Applied patch from BOLT system.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4673 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2014-12-04 06:01:24 +00:00
Родитель 167e2a676d
Коммит de10f1506d
41 изменённых файлов: 3045 добавлений и 178 удалений

Просмотреть файл

@ -0,0 +1,95 @@
#!/bin/bash
# This is to be run after run_nnet2_multisplice.sh.
# It demonstrates discriminative training for the online-nnet2 models
. cmd.sh
stage=1
train_stage=-10
use_gpu=true
srcdir=exp/nnet2_online/nnet_ms_a_online
criterion=smbr
learning_rate=0.0016
drop_frames=false # only relevant for MMI
. cmd.sh
. ./path.sh
. ./utils/parse_options.sh
if [ ! -f $srcdir/final.mdl ]; then
echo "$0: expected $srcdir/final.mdl to exist; first run run_nnet2_multisplice.sh."
exit 1;
fi
if $use_gpu; then
if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
EOF
fi
parallel_opts="-l gpu=1"
num_threads=1
minibatch_size=512
else
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
# almost the same, but this may be a little bit slow.
num_threads=16
minibatch_size=128
parallel_opts="-pe smp $num_threads"
fi
if [ $stage -le 1 ]; then
# use a wide beam because this is RM. These would be too high for other setups.
nj=30
num_threads=6
steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads" \
--nj $nj --sub-split 40 --num-threads "$num_threads" --beam 20.0 --lattice-beam 10.0 \
data/train data/lang $srcdir ${srcdir}_denlats || exit 1;
fi
if [ $stage -le 2 ]; then
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
# get excellent GPU utilization though.]
nj=100
use_gpu=no
gpu_opts=
steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
--nj $nj data/train data/lang $srcdir ${srcdir}_ali || exit 1;
fi
if [ $stage -le 3 ]; then
# I tested the following with --max-temp-archives 3
# to test other branches of the code.
steps/online/nnet2/get_egs_discriminative2.sh \
--cmd "$decode_cmd -pe smp 5" \
--criterion $criterion --drop-frames $drop_frames \
data/train data/lang ${srcdir}{_ali,_denlats,,_degs} || exit 1;
fi
if [ $stage -le 4 ]; then
steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
--learning-rate $learning_rate \
--criterion $criterion --drop-frames $drop_frames \
--num-epochs 6 \
--num-jobs-nnet 2 --num-threads $num_threads \
${srcdir}_degs ${srcdir}_${criterion}_${learning_rate} || exit 1;
fi
if [ $stage -le 5 ]; then
ln -sf $(readlink -f $srcdir/conf) ${srcdir}_${criterion}_${learning_rate}/conf # so it acts like an online-decoding directory
for epoch in 0 1 2 3 4 5 6; do
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--iter epoch$epoch exp/tri3b/graph data/test ${srcdir}_${criterion}_${learning_rate}/decode_epoch$epoch &
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
--iter epoch$epoch exp/tri3b/graph_ug data/test ${srcdir}_${criterion}_${learning_rate}/decode_ug_epoch$epoch &
done
wait
for dir in ${srcdir}_${criterion}_${learning_rate}/decode*; do grep WER $dir/wer_* | utils/best_wer.sh; done
fi

Просмотреть файл

@ -63,7 +63,7 @@ oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
cp -rH $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")

Просмотреть файл

@ -57,7 +57,7 @@ oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
cp -rH $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")

Просмотреть файл

@ -66,7 +66,7 @@ oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
cp -rH $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")

Просмотреть файл

@ -3,12 +3,7 @@
# 2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments using MLP model
# If you supply the "--use-graphs true" option, it will use the training
# graphs from the source directory (where the model is). In this
# case the number of jobs must match with the source directory.
# Computes training alignments using DNN
# Begin configuration section.
nj=4

Просмотреть файл

@ -53,7 +53,6 @@ if [ $# != 4 ]; then
echo " --splice-width <width;4> # Number of frames on each side to append for feature input"
echo " --left-context <width;4> # Number of frames on left side to append for feature input, overrides splice-width"
echo " --right-context <width;4> # Number of frames on right side to append for feature input, overrides splice-width"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
echo " # very end."
@ -106,7 +105,7 @@ if [ -f $data/utt2uniq ]; then
fi
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
[ -z "$transform_dir" ] && transform_dir=$alidir
@ -210,7 +209,7 @@ if [ $stage -le 2 ]; then
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/egs/train_subset_all.egs" || touch $dir/.error &
wait;
[ -f $dir/.error ] && exit 1;
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
echo "Getting subsets of validation examples for diagnostics and combination."
$cmd $dir/log/create_valid_subset_combine.log \
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \

Просмотреть файл

@ -75,7 +75,6 @@ if [ $# != 3 ]; then
echo " --frames-per-eg <frames;8> # number of frames per eg on disk"
echo " --left-context <width;4> # Number of frames on left side to append for feature input"
echo " --right-context <width;4> # Number of frames on right side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
echo " # very end."
@ -236,7 +235,6 @@ if [ $stage -le 2 ]; then
gzip -c >$dir/ali_special.gz || exit 1;
set +o pipefail; # unset the pipefail option.
all_ids=$(seq -s, $nj) # e.g. 1,2,...39,40
$cmd $dir/log/create_valid_subset.log \
nnet-get-egs $ivectors_opt $nnet_context_opts "$valid_feats" \
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
@ -246,7 +244,7 @@ if [ $stage -le 2 ]; then
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
wait;
[ -f $dir/.error ] && exit 1;
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
echo "... Getting subsets of validation examples for diagnostics and combination."
$cmd $dir/log/create_valid_subset_combine.log \
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \

Просмотреть файл

@ -0,0 +1,300 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script dumps examples MPE or MMI or state-level minimum bayes risk (sMBR)
# training of neural nets. Note: for "criterion", smbr > mpe > mmi in terms of
# compatibility of the dumped egs, meaning you can use the egs dumped with
# --criterion smbr for MPE or MMI, and egs dumped with --criterion mpe for MMI
# training. The discriminative training program itself doesn't enforce this and
# it would let you mix and match them arbitrarily; we area speaking in terms of
# the correctness of the algorithm that splits the lattices into pieces.
# Begin configuration section.
cmd=run.pl
criterion=smbr
drop_frames=false # option relevant for MMI, affects how we dump examples.
samples_per_iter=400000 # measured in frames, not in "examples"
max_temp_archives=128 # maximum number of temp archives per input job, only
# affects the process of generating archives, not the
# final result.
stage=0
cleanup=true
transform_dir= # If this is a SAT system, directory for transforms
online_ivector_dir=
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 6 ]; then
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <denlat-dir> <src-model-file> <degs-dir>"
echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet_denlats exp/tri4/final.mdl exp/tri4_mpe/degs"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs (probably would be good to add -tc 5 or so if using"
echo " # GridEngine (to avoid excessive NFS traffic)."
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
echo " # online-neural-net setup. (but you may want to use"
echo " # steps/online/nnet2/get_egs_discriminative2.sh instead)"
exit 1;
fi
data=$1
lang=$2
alidir=$3
denlatdir=$4
src_model=$5
dir=$6
extra_files=
[ ! -z $online_ivector_dir ] && \
extra_files="$online_ivector_dir/ivector_period $online_ivector_dir/ivector_online.scp"
# Check some files.
for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/num_jobs $alidir/tree \
$denlatdir/lat.1.gz $denlatdir/num_jobs $src_model $extra_files; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/log $dir/info || exit 1;
nj=$(cat $denlatdir/num_jobs) || exit 1; # $nj is the number of
# splits of the denlats and alignments.
nj_ali=$(cat $alidir/num_jobs) || exit 1;
sdata=$data/split$nj
utils/split_data.sh $data $nj
if [ $nj_ali -eq $nj ]; then
ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
else
ali_rspecifier="scp:$dir/ali.scp"
if [ $stage -le 1 ]; then
echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
all_ids=$(seq -s, $nj_ali)
copy-int-vector --print-args=false \
"ark:gunzip -c $alidir/ali.{$all_ids}.gz|" ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1;
fi
fi
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null`
cp $alidir/splice_opts $dir 2>/dev/null
cp $alidir/cmvn_opts $dir 2>/dev/null
cp $alidir/tree $dir
cp $lang/phones/silence.csl $dir/info/
cp $src_model $dir/final.mdl || exit 1
if [ ! -z "$online_ivector_dir" ]; then
ivector_period=$(cat $online_ivector_dir/ivector_period)
ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
echo $ivector_dim >$dir/info/ivector_dim
# the 'const_dim_opt' allows it to write only one iVector per example,
# rather than one per time-index... it has to average over
const_dim_opt="--const-feat-dim=$ivector_dim"
else
echo 0 > $dir/info/ivector_dim
fi
## We don't support deltas here, only LDA or raw (mainly because deltas are less
## frequently used).
if [ -z $feat_type ]; then
if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
fi
echo "$0: feature type is $feat_type"
case $feat_type in
raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
;;
lda)
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
cp $alidir/final.mat $dir
feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
;;
*) echo "$0: invalid feature type $feat_type" && exit 1;
esac
if [ -z "$transform_dir" ]; then
if [ -f $transform_dir/trans.1 ] || [ -f $transform_dir/raw_trans.1 ]; then
transform_dir=$alidir
fi
fi
if [ ! -z "$transform_dir" ]; then
echo "$0: using transforms from $transform_dir"
[ ! -s $transform_dir/num_jobs ] && \
echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1;
nj_orig=$(cat $transform_dir/num_jobs)
if [ $feat_type == "raw" ]; then trans=raw_trans;
else trans=trans; fi
if [ $feat_type == "lda" ] && ! cmp $transform_dir/final.mat $alidir/final.mat; then
echo "$0: LDA transforms differ between $alidir and $transform_dir"
exit 1;
fi
if [ ! -f $transform_dir/$trans.1 ]; then
echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)"
exit 1;
fi
if [ $nj -ne $nj_orig ]; then
# Copy the transforms into an archive with an index.
for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \
copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1;
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |"
else
# number of jobs matches with alignment dir.
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |"
fi
fi
if [ ! -z $online_ivector_dir ]; then
# add iVectors to the features.
feats="$feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
fi
if [ $stage -le 2 ]; then
echo "$0: working out number of frames of training data"
num_frames=$(steps/nnet2/get_num_frames.sh $data)
echo $num_frames > $dir/info/num_frames
# Working out total number of archives. Add one on the assumption the
# num-frames won't divide exactly, and we want to round up.
num_archives=$[$num_frames/$samples_per_iter + 1]
# the next few lines relate to how we may temporarily split each input job
# into fewer than $num_archives pieces, to avoid using an excessive
# number of filehandles.
archive_ratio=$[$num_archives/$max_temp_archives+1]
num_archives_temp=$[$num_archives/$archive_ratio]
# change $num_archives slightly to make it an exact multiple
# of $archive_ratio.
num_archives=$[$num_archives_temp*$archive_ratio]
echo $num_archives >$dir/info/num_archives || exit 1
echo $num_archives_temp >$dir/info/num_archives_temp || exit 1
frames_per_archive=$[$num_frames/$num_archives]
# note, this is the number of frames per archive prior to discarding frames.
echo $frames_per_archive > $dir/info/frames_per_archive
else
num_archives=$(cat $dir/info/num_archives) || exit 1;
num_archives_temp=$(cat $dir/info/num_archives_temp) || exit 1;
frames_per_archive=$(cat $dir/info/frames_per_archive) || exit 1;
fi
echo "$0: Splitting the data up into $num_archives archives (using $num_archives_temp temporary pieces per input job)"
echo "$0: giving samples-per-iteration of $frames_per_archive (you requested $samples_per_iter)."
# we create these data links regardless of the stage, as there are situations
# where we would want to recreate a data link that had previously been deleted.
if [ -d $dir/storage ]; then
echo "$0: creating data links for distributed storage of degs"
# See utils/create_split_dir.pl for how this 'storage' directory is created.
for x in $(seq $nj); do
for y in $(seq $num_archives_temp); do
utils/create_data_link.pl $dir/degs_orig.$x.$y.ark
done
done
for z in $(seq $num_archives); do
utils/create_data_link.pl $dir/degs.$z.ark
done
if [ $num_archives_temp -ne $num_archives ]; then
for z in $(seq $num_archives); do
utils/create_data_link.pl $dir/degs_temp.$z.ark
done
fi
fi
if [ $stage -le 3 ]; then
echo "$0: getting initial training examples by splitting lattices"
degs_list=$(for n in $(seq $num_archives_temp); do echo ark:$dir/degs_orig.JOB.$n.ark; done)
$cmd JOB=1:$nj $dir/log/get_egs.JOB.log \
nnet-get-egs-discriminative --criterion=$criterion --drop-frames=$drop_frames \
"$src_model" "$feats" "$ali_rspecifier" "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz|" ark:- \| \
nnet-copy-egs-discriminative $const_dim_opt ark:- $degs_list || exit 1;
sleep 5; # wait a bit so NFS has time to write files.
fi
if [ $stage -le 4 ]; then
degs_list=$(for n in $(seq $nj); do echo $dir/degs_orig.$n.JOB.ark; done)
if [ $num_archives -eq $num_archives_temp ]; then
echo "$0: combining data into final archives and shuffling it"
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
cat $degs_list \| nnet-shuffle-egs-discriminative --srand=JOB ark:- \
ark:$dir/degs.JOB.ark || exit 1;
else
echo "$0: combining and re-splitting data into un-shuffled versions of final archives."
archive_ratio=$[$num_archives/$num_archives_temp]
! [ $archive_ratio -gt 1 ] && echo "$0: Bad archive_ratio $archive_ratio" && exit 1;
# note: the \$[ .. ] won't be evaluated until the job gets executed. The
# aim is to write to the archives with the final numbering, 1
# ... num_archives, which is more than num_archives_temp. The list with
# \$[... ] expressions in it computes the set of final indexes for each
# temporary index.
degs_list_out=$(for n in $(seq $archive_ratio); do echo "ark:$dir/degs_temp.\$[((JOB-1)*$archive_ratio)+$n].ark"; done)
# e.g. if dir=foo and archive_ratio=2, we'd have
# degs_list_out='foo/degs_temp.$[((JOB-1)*2)+1].ark foo/degs_temp.$[((JOB-1)*2)+2].ark'
$cmd JOB=1:$num_archives_temp $dir/log/resplit.JOB.log \
cat $degs_list \| nnet-copy-egs-discriminative --srand=JOB ark:- \
$degs_list_out || exit 1;
fi
fi
if [ $stage -le 5 ] && [ $num_archives -ne $num_archives_temp ]; then
echo "$0: shuffling final archives."
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
nnet-shuffle-egs-discriminative --srand=JOB ark:$dir/degs_temp.JOB.ark \
ark:$dir/degs.JOB.ark || exit 1
fi
if $cleanup; then
echo "$0: removing temporary archives."
for x in $(seq $nj); do
for y in $(seq $num_archives_temp); do
file=$dir/degs_orig.$x.$y.ark
[ -L $file ] && rm $(readlink -f $file); rm $file
done
done
if [ $num_archives_temp -ne $num_archives ]; then
for z in $(seq $num_archives); do
file=$dir/degs_temp.$z.ark
[ -L $file ] && rm $(readlink -f $file); rm $file
done
fi
fi
echo "$0: Done."

Просмотреть файл

@ -10,16 +10,25 @@ if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -ne 1 ]; then
echo "Usage: $0 <data-dir>"
echo "Prints the number of frames of data in the data-dir, via sampling rather"
echo "than trying to access all the data."
(
echo "Usage: $0 <data-dir>"
echo "Prints the number of frames of data in the data-dir, via sampling rather"
echo "than trying to access all the data."
) 1>&2
fi
data=$1
if [ ! -f $data/feats.scp ]; then
echo "$0: expected $data/feats.scp to exist"
exit 1;
if [ -f $data/segments ]; then
echo "$0: $data/feats.scp does not exist, but $data/segments does exist; using that and assuming 100 frames per second." 1>&2
num_frames=$(cat $data/segments | awk '{x += $4 - $3;} END{print int(x*100);}') || exit 1;
echo $num_frames
exit 0;
else
echo "$0: neither $data/feats.scp nor $data/segments exist." 1>&2
exit 1;
fi
fi

Просмотреть файл

@ -32,7 +32,7 @@ echo "$0 $@" # Print the command line for logging
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/tri1 exp/tri1_denlats"
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/nnet4 exp/nnet4_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
@ -68,14 +68,12 @@ thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir
cp -r $lang $dir/
cp -rH $lang $dir/
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")

Просмотреть файл

@ -2,10 +2,8 @@
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does MPE or fMMI state-level minimum bayes risk (sMBR) training.
# Note: the temporary data is put in <exp-dir>/degs/, so if you want
# to use a different disk for that, just make that a soft link to some other
# volume.
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training
# of neural nets.
# Begin configuration section.
cmd=run.pl
@ -45,7 +43,6 @@ transform_dir=
degs_dir=
retroactive=false
online_ivector_dir=
use_preconditioning=false
# End configuration section.
@ -76,7 +73,7 @@ if [ $# != 6 ]; then
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
echo " --samples-per-iter <#samples|200000> # Number of samples of data to process per iteration, per"
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
@ -85,6 +82,8 @@ if [ $# != 6 ]; then
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
echo " # changes across layers."
echo " --degs-dir <dir|""> # Directory for discriminative examples, e.g. exp/foo/degs"
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
echo " # where the numerator transition-id is not in the denominator lattice."
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
echo " # online-neural-net setup."
exit 1;
@ -240,19 +239,17 @@ fi
if [ $stage -le -7 ]; then
echo "$0: Copying initial model and modifying preconditioning setup"
# We want online preconditioning with a larger number of samples of history, since
# in this setup the frames are only randomized at the segment level so they are highly
# correlated. It might make sense to tune this a little, later on, although I doubt
# it matters once it's large enough.
if $use_preconditioning; then
$cmd $dir/log/convert.log \
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
else
$cmd $dir/log/convert.log \
nnet-am-copy --learning-rate=$learning_rate "$src_model" $dir/0.mdl || exit 1;
fi
# Note, the baseline model probably had preconditioning, and we'll keep it;
# but we want online preconditioning with a larger number of samples of
# history, since in this setup the frames are only randomized at the segment
# level so they are highly correlated. It might make sense to tune this a
# little, later on, although I doubt it matters once the --num-samples-history
# is large enough.
$cmd $dir/log/convert.log \
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
fi
@ -344,7 +341,7 @@ fi
x=0
while [ $x -lt $num_iters ]; do
if [ $x -ge 0 ] && [ $stage -le $x ]; then
if [ $stage -le $x ]; then
echo "Training neural net (pass $x)"
@ -356,10 +353,7 @@ while [ $x -lt $num_iters ]; do
$dir/$[$x+1].JOB.mdl \
|| exit 1;
nnets_list=
for n in `seq 1 $num_jobs_nnet`; do
nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
done
nnets_list=$(for n in $(seq $num_jobs_nnet); do echo $dir/$[$x+1].$n.mdl; done)
$cmd $dir/log/average.$x.log \
nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;

Просмотреть файл

@ -0,0 +1,219 @@
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training.
# This version (2) of the script uses a newer format for the discriminative-training
# egs, as obtained by steps/nnet2/get_egs_discriminative2.sh.
# Begin configuration section.
cmd=run.pl
num_epochs=4 # Number of epochs of training
learning_rate=0.00002
acoustic_scale=0.1 # acoustic scale for MMI/MPFE/SMBR training.
boost=0.0 # option relevant for MMI
criterion=smbr
drop_frames=false # option relevant for MMI
num_jobs_nnet=4 # Number of neural net jobs to run in parallel. Note: this
# will interact with the learning rates (if you decrease
# this, you'll have to decrease the learning rate, and vice
# versa).
modify_learning_rates=true
last_layer_factor=1.0 # relates to modify-learning-rates
first_layer_factor=1.0 # relates to modify-learning-rates
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in
# disk I/O. Smaller is easier on disk and memory but less random. It's
# not a huge deal though, as samples are anyway randomized right at the start.
stage=-3
num_threads=16 # this is the default but you may want to change it, e.g. to 1 if
# using GPUs.
cleanup=true
retroactive=false
remove_egs=false
src_model= # will default to $degs_dir/final.mdl
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 2 ]; then
echo "Usage: $0 [opts] <degs-dir> <exp-dir>"
echo " e.g.: $0 exp/tri4_mpe_degs exp/tri4_mpe"
echo ""
echo "You have to first call get_egs_discriminative2.sh to dump the egs."
echo "Caution: the options 'drop_frames' and 'criterion' are taken here"
echo "even though they were required also by get_egs_discriminative2.sh,"
echo "and they should normally match."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|4> # Number of epochs of training"
echo " --initial-learning-rate <initial-learning-rate|0.0002> # Learning rate at start of training"
echo " --final-learning-rate <final-learning-rate|0.0004> # Learning rate at end of training"
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
echo " # training (will affect results as well as speed; try 8, 16)"
echo " # Note: if you increase this, you may want to also increase"
echo " # the learning rate. Also note: if there are fewer archives"
echo " # of egs than this, it will get reduced automatically."
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size. With GPU, must be 1."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --stage <stage|-3> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
echo " --boost <boost|0.0> # Boosting factor for MMI (e.g., 0.1)"
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
echo " # where the numerator transition-id is not in the denominator lattice."
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
echo " # changes across layers."
exit 1;
fi
degs_dir=$1
dir=$2
[ -z "$src_model" ] && src_model=$degs_dir/final.mdl
# Check some files.
for f in $degs_dir/degs.1.ark $degs_dir/info/{num_archives,silence.csl,frames_per_archive} $src_model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/log || exit 1;
# copy some things
for f in splice_opts cmvn_opts tree final.mat; do
if [ -f $degs_dir/$f ]; then
cp $degs_dir/$f $dir/ || exit 1;
fi
done
silphonelist=`cat $degs_dir/info/silence.csl` || exit 1;
num_archives=$(cat $degs_dir/info/num_archives) || exit 1;
if [ $num_jobs_nnet -gt $num_archives ]; then
echo "$0: num-jobs-nnet $num_jobs_nnet exceeds number of archives $num_archives,"
echo " ... setting it to $num_archives."
num_jobs_nnet=$num_archives
fi
num_iters=$[($num_epochs*$num_archives)/$num_jobs_nnet]
echo "$0: Will train for $num_epochs epochs = $num_iters iterations"
if [ $stage -le -1 ]; then
echo "$0: Copying initial model and modifying preconditioning setup"
# Note, the baseline model probably had preconditioning, and we'll keep it;
# but we want online preconditioning with a larger number of samples of
# history, since in this setup the frames are only randomized at the segment
# level so they are highly correlated. It might make sense to tune this a
# little, later on, although I doubt it matters once the --num-samples-history
# is large enough.
$cmd $dir/log/convert.log \
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
fi
if [ $num_threads -eq 1 ]; then
train_suffix="-simple" # this enables us to use GPU code if
# we have just one thread.
else
train_suffix="-parallel --num-threads=$num_threads"
fi
x=0
while [ $x -lt $num_iters ]; do
if [ $stage -le $x ]; then
echo "Training neural net (pass $x)"
# The \$ below delays the evaluation of the expression until the script runs (and JOB
# will be replaced by the job-id). That expression in $[..] is responsible for
# choosing the archive indexes to use for each job on each iteration... we cycle through
# all archives.
$cmd JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
nnet-combine-egs-discriminative \
"ark:$degs_dir/degs.\$[((JOB-1+($x*$num_jobs_nnet))%$num_archives)+1].ark" ark:- \| \
nnet-train-discriminative$train_suffix --silence-phones=$silphonelist \
--criterion=$criterion --drop-frames=$drop_frames \
--boost=$boost --acoustic-scale=$acoustic_scale \
$dir/$x.mdl ark:- $dir/$[$x+1].JOB.mdl || exit 1;
nnets_list=$(for n in $(seq $num_jobs_nnet); do echo $dir/$[$x+1].$n.mdl; done)
$cmd $dir/log/average.$x.log \
nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;
if $modify_learning_rates; then
$cmd $dir/log/modify_learning_rates.$x.log \
nnet-modify-learning-rates --retroactive=$retroactive \
--last-layer-factor=$last_layer_factor \
--first-layer-factor=$first_layer_factor \
$dir/$x.mdl $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
fi
rm $nnets_list
fi
x=$[$x+1]
done
rm $dir/final.mdl 2>/dev/null
ln -s $x.mdl $dir/final.mdl
echo Done
epoch_final_iters=
for e in $(seq 0 $num_epochs); do
x=$[($e*$num_archives)/$num_jobs_nnet] # gives the iteration number.
ln -sf $x.mdl $dir/epoch$e.mdl
epoch_final_iters="$epoch_final_iters $x"
done
# function to remove egs that might be soft links.
remove () { for x in $*; do [ -L $x ] && rm $(readlink -f $x); rm $x; done }
if $cleanup && $remove_egs; then # note: this is false by default.
echo Removing training examples
for n in $(seq $num_archives); do
remove $degs_dir/degs.*
done
fi
if $cleanup; then
echo Removing most of the models
for x in `seq 0 $num_iters`; do
if ! echo $epoch_final_iters | grep -w $x >/dev/null; then
# if $x is not an epoch-final iteration..
rm $dir/$x.mdl 2>/dev/null
fi
done
fi

Просмотреть файл

@ -0,0 +1,304 @@
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training,
# in the multi-language or at least multi-model setting where you have multiple "degs" directories.
# The input "degs" directories must be dumped by one of the get_egs_discriminative2.sh scripts.
# Begin configuration section.
cmd=run.pl
num_epochs=4 # Number of epochs of training
learning_rate=0.00002
acoustic_scale=0.1 # acoustic scale for MMI/MPFE/SMBR training.
boost=0.0 # option relevant for MMI
criterion=smbr
drop_frames=false # option relevant for MMI
num_jobs_nnet="4 4" # Number of neural net jobs to run in parallel, one per
# language.. Note: this will interact with the learning
# rates (if you decrease this, you'll have to decrease
# the learning rate, and vice versa).
modify_learning_rates=true
last_layer_factor=1.0 # relates to modify-learning-rates
first_layer_factor=1.0 # relates to modify-learning-rates
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in
# disk I/O. Smaller is easier on disk and memory but less random. It's
# not a huge deal though, as samples are anyway randomized right at the start.
stage=-3
num_threads=16 # this is the default but you may want to change it, e.g. to 1 if
# using GPUs.
cleanup=true
retroactive=false
remove_egs=false
src_models= # can be used to override the defaults of <degs-dir1>/final.mdl <degs-dir2>/final.mdl .. etc.
# set this to a space-separated list.
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 3 ]; then
echo "Usage: $0 [opts] <degs-dir1> <degs-dir2> ... <degs-dirN> <exp-dir>"
echo " e.g.: $0 exp/tri4_mpe_degs exp_other_lang/tri4_mpe_degs exp/tri4_mpe_multilang"
echo ""
echo "You have to first call get_egs_discriminative2.sh to dump the egs."
echo "Caution: the options 'drop_frames' and 'criterion' are taken here"
echo "even though they were required also by get_egs_discriminative2.sh,"
echo "and they should normally match."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|4> # Number of epochs of training"
echo " --initial-learning-rate <initial-learning-rate|0.0002> # Learning rate at start of training"
echo " --final-learning-rate <final-learning-rate|0.0004> # Learning rate at end of training"
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
echo " # training (will affect results as well as speed; try 8, 16)"
echo " # Note: if you increase this, you may want to also increase"
echo " # the learning rate. Also note: if there are fewer archives"
echo " # of egs than this, it will get reduced automatically."
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size. With GPU, must be 1."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --stage <stage|-3> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
echo " --boost <boost|0.0> # Boosting factor for MMI (e.g., 0.1)"
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
echo " # where the numerator transition-id is not in the denominator lattice."
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
echo " # changes across layers."
exit 1;
fi
argv=("$@")
num_args=$#
num_lang=$[$num_args-1]
dir=${argv[$num_args-1]}
num_jobs_nnet_array=($num_jobs_nnet)
! [ "${#num_jobs_nnet_array[@]}" -eq "$num_lang" ] && \
echo "$0: --num-jobs-nnet option must have size equal to the number of languages" && exit 1;
for lang in $(seq 0 $[$num_lang-1]); do
degs_dir[$lang]=${argv[$lang]}
done
if [ ! -z "$src_models" ]; then
src_model_array=($src_models)
! [ "${#src_model_array[@]}" -eq "$num_lang" ] && \
echo "$0: --src-models option must have size equal to the number of languages" && exit 1;
else
for lang in $(seq 0 $[$num_lang-1]); do
src_model_array[$lang]=${degs_dir[$lang]}/final.mdl
done
fi
mkdir -p $dir/log || exit 1;
for lang in $(seq 0 $[$num_lang-1]); do
this_degs_dir=${degs_dir[$lang]}
mdl=${src_model_array[$lang]}
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
# Check inputs
for f in $this_degs_dir/degs.1.ark $this_degs_dir/info/{num_archives,silence.csl,frames_per_archive} $mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/$lang/log || exit 1;
# check for valid num-jobs-nnet.
! [ $this_num_jobs_nnet -gt 0 ] && echo "Bad num-jobs-nnet option '$num_jobs_nnet'" && exit 1;
this_num_archives=$(cat $this_degs_dir/info/num_archives) || exit 1;
num_archives_array[$lang]=$this_num_archives
silphonelist_array[$lang]=$(cat $this_degs_dir/info/silence.csl) || exit 1;
if [ $this_num_jobs_nnet -gt $this_num_archives ]; then
echo "$0: num-jobs-nnet $this_num_jobs_nnet exceeds number of archives $this_num_archives"
echo " ... for language $lang; setting it to $this_num_archives."
num_jobs_nnet_array[$lang]=$this_num_archives
fi
# copy some things from the input directories.
for f in splice_opts cmvn_opts tree final.mat; do
if [ -f $this_degs_dir/$f ]; then
cp $this_degs_dir/$f $dir/$lang/ || exit 1;
fi
done
if [ -f $this_degs_dir/conf ]; then
ln -sf $(readlink -f $this_degs_dir/conf) $dir/ || exit 1;
fi
done
# work out number of iterations.
num_archives0=$(cat ${degs_dir[0]}/info/num_archives) || exit 1;
num_jobs_nnet0=${num_jobs_nnet_array[0]}
! [ $num_epochs -gt 0 ] && echo "Error: num-epochs $num_epochs is not valid" && exit 1;
num_iters=$[($num_epochs*$num_archives0)/$num_jobs_nnet0]
echo "$0: Will train for $num_epochs epochs = $num_iters iterations (measured on language 0)"
# Work out the number of epochs we train for on the other languages... this is
# just informational.
for lang in $(seq 1 $[$num_lang-1]); do
this_degs_dir=${degs_dir[$lang]}
this_num_archives=${num_archives_array[$lang]}
this_num_epochs=$[($num_iters*${num_jobs_nnet_array[$lang]})/$this_num_archives]
echo "$0: $num_iters iterations is approximately $this_num_epochs epochs for language $lang"
done
if [ $stage -le -1 ]; then
echo "$0: Copying initial models and modifying preconditioning setups"
# Note, the baseline model probably had preconditioning, and we'll keep it;
# but we want online preconditioning with a larger number of samples of
# history, since in this setup the frames are only randomized at the segment
# level so they are highly correlated. It might make sense to tune this a
# little, later on, although I doubt it matters once the --num-samples-history
# is large enough.
for lang in $(seq 0 $[$num_lang-1]); do
$cmd $dir/$lang/log/convert.log \
nnet-am-copy --learning-rate=$learning_rate ${src_model_array[$lang]} - \| \
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/$lang/0.mdl || exit 1;
done
fi
if [ $num_threads -eq 1 ]; then
train_suffix="-simple" # this enables us to use GPU code if
# we have just one thread.
else
train_suffix="-parallel --num-threads=$num_threads"
fi
x=0
while [ $x -lt $num_iters ]; do
if [ $stage -le $x ]; then
echo "Training neural net (pass $x)"
rm $dir/.error 2>/dev/null
for lang in $(seq 0 $[$num_lang-1]); do
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
this_num_archives=${num_archives_array[$lang]}
this_degs_dir=${degs_dir[$lang]}
this_silphonelist=${silphonelist_array[$lang]}
# The \$ below delays the evaluation of the expression until the script runs (and JOB
# will be replaced by the job-id). That expression in $[..] is responsible for
# choosing the archive indexes to use for each job on each iteration... we cycle through
# all archives.
(
$cmd JOB=1:$this_num_jobs_nnet $dir/$lang/log/train.$x.JOB.log \
nnet-combine-egs-discriminative \
"ark:$this_degs_dir/degs.\$[((JOB-1+($x*$this_num_jobs_nnet))%$this_num_archives)+1].ark" ark:- \| \
nnet-train-discriminative$train_suffix --silence-phones=$this_silphonelist \
--criterion=$criterion --drop-frames=$drop_frames \
--boost=$boost --acoustic-scale=$acoustic_scale \
$dir/$lang/$x.mdl ark:- $dir/$lang/$[$x+1].JOB.mdl || exit 1;
nnets_list=$(for n in $(seq $this_num_jobs_nnet); do echo $dir/$lang/$[$x+1].$n.mdl; done)
# produce an average just within this language.
$cmd $dir/$lang/log/average.$x.log \
nnet-am-average $nnets_list $dir/$lang/$[$x+1].tmp.mdl || exit 1;
rm $nnets_list
) || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "$0: error on pass $x" && exit 1
# apply the modify-learning-rates thing to the model for the zero'th language;
# we'll use the resulting learning rates for the other languages.
if $modify_learning_rates; then
$cmd $dir/log/modify_learning_rates.$x.log \
nnet-modify-learning-rates --retroactive=$retroactive \
--last-layer-factor=$last_layer_factor \
--first-layer-factor=$first_layer_factor \
$dir/0/$x.mdl $dir/0/$[$x+1].tmp.mdl $dir/0/$[$x+1].tmp.mdl || exit 1;
fi
nnets_list=$(for lang in $(seq 0 $[$num_lang-1]); do echo $dir/$lang/$[$x+1].tmp.mdl; done)
weights_csl=$(echo $num_jobs_nnet | sed 's/ /:/g') # get as colon separated list.
# the next command produces the cross-language averaged model containing the
# final layer corresponding to language zero. Note, if we did modify-learning-rates,
# it will also have the modified learning rates.
$cmd $dir/log/average.$x.log \
nnet-am-average --weights=$weights_csl --skip-last-layer=true \
$nnets_list $dir/0/$[$x+1].mdl || exit 1;
# we'll transfer these learning rates to the other models.
learning_rates=$(nnet-am-info --print-learning-rates=true $dir/0/$[$x+1].mdl 2>/dev/null)
for lang in $(seq 1 $[$num_lang-1]); do
# the next command takes the averaged hidden parameters from language zero, and
# the last layer from language $lang. It's not really doing averaging.
# we use nnet-am-copy to transfer the learning rates from model zero.
$cmd $dir/$lang/log/combine_average.$x.log \
nnet-am-average --weights=0.0:1.0 --skip-last-layer=true \
$dir/$lang/$[$x+1].tmp.mdl $dir/0/$[$x+1].mdl - \| \
nnet-am-copy --learning-rates=$learning_rates - $dir/$lang/$[$x+1].mdl || exit 1;
done
$cleanup && rm $dir/*/$[$x+1].tmp.mdl
fi
x=$[$x+1]
done
for lang in $(seq 0 $[$num_lang-1]); do
rm $dir/$lang/final.mdl 2>/dev/null
ln -s $x.mdl $dir/$lang/final.mdl
epoch_final_iters=
for e in $(seq 0 $num_epochs); do
x=$[($e*$num_archives0)/$num_jobs_nnet0] # gives the iteration number.
ln -sf $x.mdl $dir/$lang/epoch$e.mdl
epoch_final_iters="$epoch_final_iters $x"
done
if $cleanup; then
echo "Removing most of the models for language $lang"
for x in `seq 0 $num_iters`; do
if ! echo $epoch_final_iters | grep -w $x >/dev/null; then
# if $x is not an epoch-final iteration..
rm $dir/$lang/$x.mdl 2>/dev/null
fi
done
fi
done
echo Done

Просмотреть файл

@ -64,8 +64,6 @@ if [ $# != 3 ]; then
echo " --num-epochs <#epochs|15> # Number of epochs of training"
echo " # while reducing learning rate (determines #iterations, together"
echo " # with --samples-per-iter and --num-jobs-nnet)"
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
echo " # after learning rate fully reduced"
echo " --learning-rate-factor<factor|1.0> # Factor (e.g. 0.2) by which to change learning rate"
echo " # during the course of training"
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"

Просмотреть файл

@ -0,0 +1,351 @@
#!/bin/bash
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey).
# Apache 2.0.
# This script further trains an already-existing neural network,
# given an existing model and an examples (egs/) directory.
# This version of the script epects an egs/ directory in the newer
# format, as created by get_egs2.sh.
#
# Begin configuration section.
cmd=run.pl
num_epochs=10 # Number of epochs of training; number of iterations is
# worked out from this.
num_iters_final=20 # Maximum number of final iterations to give to the
# optimization over the validation set.
learning_rate_factor=1.0 # You can use this to gradually decrease the learning
# rate during training (e.g. use 0.2); the initial
# learning rates are as specified in the model, but it
# will decrease slightly on each iteration to achieve
# this ratio.
combine=true # controls whether or not to do the final model combination.
combine_regularizer=1.0e-14 # Small regularizer so that parameters won't go crazy.
max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
# to the final 'combine' stage, but these models will themselves be averages of
# iteration-number ranges.
minibatch_size=128 # by default use a smallish minibatch size for neural net
# training; this controls instability which would otherwise
# be a problem with multi-threaded update. Note: it also
# interacts with the "preconditioned" update which generally
# works better with larger minibatch size, so it's not
# completely cost free.
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in
# disk I/O. Smaller is easier on disk and memory but less random. It's
# not a huge deal though, as samples are anyway randomized right at the start.
num_jobs_nnet=4
mix_up=0
stage=-5
num_threads=16
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
combine_num_threads=8
cleanup=true
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
# more than enough.
num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
remove_egs=false
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [opts] <input-model> <egs-dir> <exp-dir>"
echo " e.g.: $0 exp/nnet4c/final.mdl exp/nnet4c/egs exp/nnet5c/"
echo "see also the older script update_nnet.sh which creates the egs itself"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|15> # Number of epochs of training"
echo " # while reducing learning rate (determines #iterations, together"
echo " # with --samples-per-iter and --num-jobs-nnet)"
echo " --num-jobs-nnet <#jobs|4> # Number of neural-net jobs to run in parallel"
echo " --learning-rate-factor<factor|1.0> # Factor (e.g. 0.2) by which to change learning rate"
echo " # during the course of training"
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)."
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --mix-up <#mix|0> # If specified, add quasi-targets, analogous to a mixture of Gaussians vs."
echo " # single Gaussians. Only do this if not already mixed-up."
echo " --combine <true or false|true> # If true, do the final nnet-combine-fast stage."
echo " --stage <stage|-5> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
exit 1;
fi
input_mdl=$1
egs_dir=$2
dir=$3
# Check some files.
for f in $input_mdl $egs_dir/egs.1.ark; do
[ ! -f $f ] && echo "$0: expected file $f to exist." && exit 1;
done
mkdir -p $dir/log
# Copy some things from the directory where the input model is located, to the
# experimental directory, if they exist. These might be needed for things like
# decoding.
input_dir=$(dirname $input_mdl);
for f in tree splice_opts cmvn_opts final.mat; do
if [ -f $input_dir/$f ]; then
cp $input_dir/$f $dir/
fi
done
frames_per_eg=$(cat $egs_dir/info/frames_per_eg) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
# num_archives_expanded considers each separate label-position from
# 0..frames_per_eg-1 to be a separate archive.
num_archives_expanded=$[$num_archives*$frames_per_eg]
if [ $num_jobs_nnet -gt $num_archives_expanded ]; then
echo "$0: --num-jobs-nnet cannot exceed num-archives*frames-per-eg which is $num_archives_expanded"
echo "$0: setting --num-jobs-nnet to $num_archives_expanded"
num_jobs_nnet=$num_archives_expanded
fi
# set num_iters so that as close as possible, we process the data $num_epochs
# times, i.e. $num_iters*$num_jobs_nnet == $num_epochs*$num_archives_expanded
num_iters=$[($num_epochs*$num_archives_expanded)/$num_jobs_nnet]
echo "$0: Will train for $num_epochs epochs = $num_iters iterations"
per_iter_learning_rate_factor=$(perl -e "print ($learning_rate_factor ** (1.0 / $num_iters));")
mix_up_iter=$[$num_iters/2]
if [ $num_threads -eq 1 ]; then
parallel_suffix="-simple" # this enables us to use GPU code if
# we have just one thread.
parallel_train_opts=
if ! cuda-compiled; then
echo "$0: WARNING: you are running with one thread but you have not compiled"
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
fi
else
parallel_suffix="-parallel"
parallel_train_opts="--num-threads=$num_threads"
fi
approx_iters_per_epoch=$[$num_iters/$num_epochs]
# First work out how many models we want to combine over in the final
# nnet-combine-fast invocation. This equals
# min(max(max_models_combine, iters_per_epoch),
# 2/3 * iters_after_mixup)
num_models_combine=$max_models_combine
if [ $num_models_combine -lt $approx_iters_per_epoch ]; then
num_models_combine=$approx_iters_per_epoch
fi
iters_after_mixup_23=$[(($num_iters-$mix_up_iter-1)*2)/3]
if [ $num_models_combine -gt $iters_after_mixup_23 ]; then
num_models_combine=$iters_after_mixup_23
fi
first_model_combine=$[$num_iters-$num_models_combine+1]
cp $input_mdl $dir/0.mdl || exit 1;
x=0
while [ $x -lt $num_iters ]; do
if [ $x -ge 0 ] && [ $stage -le $x ]; then
# Set off jobs doing some diagnostics, in the background.
$cmd $dir/log/compute_prob_valid.$x.log \
nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs &
$cmd $dir/log/compute_prob_train.$x.log \
nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
$cmd $dir/log/progress.$x.log \
nnet-show-progress --use-gpu=no $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
fi
echo "Training neural net (pass $x)"
rm $dir/.error 2>/dev/null
( # this sub-shell is so that when we "wait" below,
# we only wait for the training jobs that we just spawned,
# not the diagnostic jobs that we spawned above.
# We can't easily use a single parallel SGE job to do the main training,
# because the computation of which archive and which --frame option
# to use for each job is a little complex, so we spawn each one separately.
for n in $(seq $num_jobs_nnet); do
k=$[$x*$num_jobs_nnet + $n - 1]; # k is a zero-based index that we'll derive
# the other indexes from.
archive=$[($k%$num_archives)+1]; # work out the 1-based archive index.
frame=$[(($k/$num_archives)%$frames_per_eg)]; # work out the 0-based frame
# index; this increases more slowly than the archive index because the
# same archive with different frame indexes will give similar gradients,
# so we want to separate them in time.
$cmd $parallel_opts $dir/log/train.$x.$n.log \
nnet-train$parallel_suffix $parallel_train_opts \
--minibatch-size=$minibatch_size --srand=$x $dir/$x.mdl \
"ark:nnet-copy-egs --frame=$frame ark:$egs_dir/egs.$archive.ark ark:-|nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-|" \
$dir/$[$x+1].$n.mdl || touch $dir/.error &
done
wait
)
# the error message below is not that informative, but $cmd will
# have printed a more specific one.
[ -f $dir/.error ] && echo "$0: error on iteration $x of training" && exit 1;
nnets_list=
for n in `seq 1 $num_jobs_nnet`; do
nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
done
$cmd $dir/log/average.$x.log \
nnet-am-average $nnets_list - \| \
nnet-am-copy --learning-rate-factor=$per_iter_learning_rate_factor - $dir/$[$x+1].mdl || exit 1;
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
# mix up.
echo Mixing up from $num_leaves to $mix_up components
$cmd $dir/log/mix_up.$x.log \
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
fi
rm $nnets_list
fi
x=$[$x+1]
done
if [ $stage -le $num_iters ]; then
echo "Doing final combination to produce final.mdl"
# Now do combination.
nnets_list=()
# the if..else..fi statement below sets 'nnets_list'.
if [ $max_models_combine -lt $num_models_combine ]; then
# The number of models to combine is too large, e.g. > 20. In this case,
# each argument to nnet-combine-fast will be an average of multiple models.
cur_offset=0 # current offset from first_model_combine.
for n in $(seq $max_models_combine); do
next_offset=$[($n*$num_models_combine)/$max_models_combine]
sub_list=""
for o in $(seq $cur_offset $[$next_offset-1]); do
iter=$[$first_model_combine+$o]
mdl=$dir/$iter.mdl
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
sub_list="$sub_list $mdl"
done
nnets_list[$[$n-1]]="nnet-am-average $sub_list - |"
cur_offset=$next_offset
done
else
nnets_list=
for n in $(seq 0 $[num_models_combine-1]); do
iter=$[$first_model_combine+$n]
mdl=$dir/$iter.mdl
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
nnets_list[$n]=$mdl
done
fi
# Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as
# if there are many models it can give out-of-memory error; set num-threads to 8
# to speed it up (this isn't ideal...)
num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
mb=$[($num_egs+$combine_num_threads-1)/$combine_num_threads]
[ $mb -gt 512 ] && mb=512
# Setting --initial-model to a large value makes it initialize the combination
# with the average of all the models. It's important not to start with a
# single model, or, due to the invariance to scaling that these nonlinearities
# give us, we get zero diagonal entries in the fisher matrix that
# nnet-combine-fast uses for scaling, which after flooring and inversion, has
# the effect that the initial model chosen gets much higher learning rates
# than the others. This prevents the optimization from working well.
$cmd $combine_parallel_opts $dir/log/combine.log \
nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \
--num-threads=$combine_num_threads \
--verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$egs_dir/combine.egs \
$dir/final.mdl || exit 1;
# Normalize stddev for affine or block affine layers that are followed by a
# pnorm layer and then a normalize layer.
$cmd $dir/log/normalize.log \
nnet-normalize-stddev $dir/final.mdl $dir/final.mdl || exit 1;
# Compute the probability of the final, combined model with
# the same subset we used for the previous compute_probs, as the
# different subsets will lead to different probs.
$cmd $dir/log/compute_prob_valid.final.log \
nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs &
$cmd $dir/log/compute_prob_train.final.log \
nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs &
fi
if [ $stage -le $[$num_iters+1] ]; then
echo "Getting average posterior for purposes of adjusting the priors."
# Note: this just uses CPUs, using a smallish subset of data.
rm $dir/post.$x.*.vec 2>/dev/null
$cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
nnet-copy-egs --frame=random --srand=JOB ark:$egs_dir/egs.1.ark ark:- \| \
nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet-compute-from-egs "nnet-to-raw-nnet $dir/final.mdl -|" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear.
$cmd $dir/log/vector_sum.$x.log \
vector-sum $dir/post.$x.*.vec $dir/post.$x.vec || exit 1;
rm $dir/post.$x.*.vec;
echo "Re-adjusting priors based on computed posteriors"
$cmd $dir/log/adjust_priors.final.log \
nnet-adjust-priors $dir/final.mdl $dir/post.$x.vec $dir/final.mdl || exit 1;
fi
if [ ! -f $dir/final.mdl ]; then
echo "$0: $dir/final.mdl does not exist."
# we don't want to clean up if the training didn't succeed.
exit 1;
fi
sleep 2
echo Done
if $cleanup; then
echo Cleaning up data
if $remove_egs && [[ $egs_dir =~ $dir/egs* ]]; then
steps/nnet2/remove_egs.sh $egs_dir
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
if [ $[$x%100] -ne 0 ] && [ $x -ne $num_iters ] && [ -f $dir/$x.mdl ]; then
# delete all but every 100th model; don't delete the ones which combine to form the final model.
rm $dir/$x.mdl
fi
done
fi

Просмотреть файл

@ -0,0 +1,543 @@
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
# 2013 Xiaohui Zhang
# 2013 Guoguo Chen
# 2014 Vimal Manohar
# 2014 Vijayaditya Peddinti
# Apache 2.0.
# train_multilang2.sh is for multi-language training of neural nets. It
# takes multiple egs directories which must be created by get_egs2.sh, and the
# corresponding alignment directories (only needed for training the transition
# models).
#
# This script requires you to supply a neural net partially trained for the 1st
# language, by one of the regular training scripts, to be used as the initial
# neural net (for use by other languages, we'll discard the last layer); it
# should not have been subject to "mix-up" (since this script does mix-up), or
# combination (since it would increase the parameter range to a too-large value
# which isn't compatible with our normal learning rate schedules).
# Begin configuration section.
cmd=run.pl
num_epochs=10 # Number of epochs of training (for first language);
# the number of iterations is worked out from this.
initial_learning_rate=0.04
final_learning_rate=0.004
minibatch_size=128 # by default use a smallish minibatch size for neural net
# training; this controls instability which would otherwise
# be a problem with multi-threaded update.
num_jobs_nnet="2 2" # Number of neural net jobs to run in parallel. This option
# is passed to get_egs.sh. Array must be same length
# as number of separate languages.
num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
# to the final 'combine' stage, but these models will themselves be averages of
# iteration-number ranges.
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in
# disk I/O. Smaller is easier on disk and memory but less random. It's
# not a huge deal though, as samples are anyway randomized right at the start.
# (the point of this is to get data in different minibatches on different iterations,
# since in the preconditioning method, 2 samples in the same minibatch can
# affect each others' gradients.
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
# more than enough.
stage=-4
mix_up="0 0" # Number of components to mix up to (should be > #tree leaves, if
# specified.) An array, one per language.
num_threads=16 # default suitable for CPU-based training
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # default suitable for CPU-based training.
# by default we use 16 threads; this lets the queue know.
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
combine_num_threads=8
combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage.
cleanup=false # while testing, leaving cleanup=false.
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# -lt 6 -o $[$#%2] -ne 0 ]; then
# num-args must be at least 6 and must be even.
echo "Usage: $0 [opts] <ali1> <egs1> <ali2> <egs2> ... <aliN> <egsN> <input-model> <exp-dir>"
echo " e.g.: $0 data/train exp/tri6_ali exp/tri6_egs exp_lang2/tri6_ali exp_lang2/tri6_egs exp/dnn6a/10.mdl exp/tri6_multilang"
echo ""
echo "Note: the first egs/ali should correspond to the language that you really want; this"
echo "only affects how the num-epochs is computed, and which model we link to final.mdl."
echo ""
echo "The --num-jobs-nnet should be an array saying how many jobs to allocate to each language,"
echo "e.g. --num-jobs-nnet '2 4'"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|15> # Number of epochs of training (figured from 1st corpus)"
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
echo " # data, 0.01 for large data"
echo " --final-learning-rate <final-learning-rate|0.004> # Learning rate at end of training, e.g. 0.004 for small"
echo " # data, 0.001 for large data"
echo " --num-hidden-layers <#hidden-layers|2> # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
echo " --add-layers-period <#iters|2> # Number of iterations between adding hidden layers"
echo " --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer,"
echo " # per context-dependent state. Try a number several times #states."
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
echo " # training (will affect results as well as speed; try 8, 16)"
echo " # Note: if you increase this, you may want to also increase"
echo " # the learning rate."
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
echo " # as well as speed; may interact with batch size; if you increase"
echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)."
echo " --splice-indexes <string|layer0/-4:-3:-2:-1:0:1:2:3:4> "
echo " # Frame indices used for each splice layer."
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
echo " --stage <stage|-4> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
exit 1;
fi
argv=("$@")
num_args=$#
num_lang=$[($num_args-2)/2]
dir=${argv[$num_args-1]}
input_model=${argv[$num_args-2]}
[ ! -f $input_model ] && echo "$0: Input model $input_model does not exist" && exit 1;
mkdir -p $dir/log
num_jobs_nnet_array=($num_jobs_nnet)
! [ "${#num_jobs_nnet_array[@]}" -eq "$num_lang" ] && \
echo "$0: --num-jobs-nnet option must have size equal to the number of languages" && exit 1;
mix_up_array=($mix_up)
! [ "${#mix_up_array[@]}" -eq "$num_lang" ] && \
echo "$0: --mix-up option must have size equal to the number of languages" && exit 1;
# Language index starts from 0.
for lang in $(seq 0 $[$num_lang-1]); do
alidir[$lang]=${argv[$lang*2]}
egs_dir[$lang]=${argv[$lang*2+1]}
for f in ${egs_dir[$lang]}/info/frames_per_eg ${egs_dir[lang]}/egs.1.ark ${alidir[$lang]}/ali.1.gz ${alidir[$lang]}/tree; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/$lang/log
cp ${alidir[$lang]}/tree $dir/$lang/ || exit 1;
for f in ${egs_dir[$lang]}/{final.mat,cmvn_opts,splice_opts}; do
# Copy any of these files that exist.
cp $f $dir/$lang/ 2>/dev/null
done
done
for x in final.mat cmvn_opts splice_opts; do
if [ -f $dir/0/$x ]; then
for lang in $(seq 1 $[$num_lang-1]); do
if ! cmp $dir/0/$x $dir/$lang/$x; then
echo "$0: warning: files $dir/0/$x and $dir/$lang/$x are not identical."
fi
done
fi
done
# the input model is supposed to correspond to the first language.
nnet-am-copy --learning-rate=$initial_learning_rate $input_model $dir/0/0.mdl
if nnet-am-info --print-args=false $dir/0/0.mdl | grep SumGroupComponent 2>/dev/null; then
if [ "${mix_up_array[0]}" != "0" ]; then
echo "$0: Your input model already has mixtures, but you are asking to mix it up."
echo " ... best to use a model without mixtures as input. (e.g., earlier iter)."
exit 1;
fi
fi
if [ $stage -le -4 ]; then
echo "$0: initializing models for other languages"
for lang in $(seq 1 $[$num_lang-1]); do
# create the initial models for the other languages.
$cmd $dir/$lang/log/reinitialize.log \
nnet-am-reinitialize $input_model ${alidir[$lang]}/final.mdl $dir/$lang/0.mdl || exit 1;
done
fi
if [ $stage -le -3 ]; then
echo "Training transition probabilities and setting priors"
for lang in $(seq 0 $[$num_lang-1]); do
$cmd $dir/$lang/log/train_trans.log \
nnet-train-transitions $dir/$lang/0.mdl "ark:gunzip -c ${alidir[$lang]}/ali.*.gz|" $dir/$lang/0.mdl \
|| exit 1;
done
fi
# Work out the number of iterations... the number of epochs refers to the
# first language (language zero) and this, together with the num-jobs-nnet for
# that language and details of the egs, determine the number of epochs.
frames_per_eg0=$(cat ${egs_dir[0]}/info/frames_per_eg) || exit 1;
num_archives0=$(cat ${egs_dir[0]}/info/num_archives) || exit 1;
# num_archives_expanded considers each separate label-position from
# 0..frames_per_eg-1 to be a separate archive.
num_archives_expanded0=$[$num_archives0*$frames_per_eg0]
if [ ${num_jobs_nnet_array[0]} -gt $num_archives_expanded0 ]; then
echo "$0: --num-jobs-nnet[0] cannot exceed num-archives*frames-per-eg which is $num_archives_expanded"
exit 1;
fi
# set num_iters so that as close as possible, we process the data $num_epochs
# times, i.e. $num_iters*$num_jobs_nnet == $num_epochs*$num_archives_expanded
num_iters=$[($num_epochs*$num_archives_expanded0)/${num_jobs_nnet_array[0]}]
echo "$0: Will train for $num_epochs epochs (of language 0) = $num_iters iterations"
! [ $num_iters -gt 0 ] && exit 1;
# Work out the number of epochs we train for on the other languages... this is
# just informational.
for lang in $(seq 1 $[$num_lang-1]); do
frames_per_eg=$(cat ${egs_dir[$lang]}/info/frames_per_eg) || exit 1;
num_archives=$(cat ${egs_dir[$lang]}/info/num_archives) || exit 1;
num_archives_expanded=$[$num_archives*$frames_per_eg]
num_epochs=$[($num_iters*${num_jobs_nnet_array[$lang]})/$num_archives_expanded]
echo "$0: $num_iters iterations is approximately $num_epochs epochs for language $lang"
done
# do any mixing-up after half the iters.
mix_up_iter=$[$num_iters/2]
if [ $num_threads -eq 1 ]; then
parallel_suffix="-simple" # this enables us to use GPU code if
# we have just one thread.
parallel_train_opts=
if ! cuda-compiled; then
echo "$0: WARNING: you are running with one thread but you have not compiled"
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
fi
else
parallel_suffix="-parallel"
parallel_train_opts="--num-threads=$num_threads"
fi
approx_iters_per_epoch=$[$num_iters/$num_epochs]
# First work out how many models we want to combine over in the final
# nnet-combine-fast invocation. This equals
# min(max(max_models_combine, iters_per_epoch),
# 2/3 * iters_after_mixup).
# We use the same numbers of iterations for all languages, even though it's just
# worked out for the first language.
num_models_combine=$max_models_combine
if [ $num_models_combine -lt $approx_iters_per_epoch ]; then
num_models_combine=$approx_iters_per_epoch
fi
iters_after_mixup_23=$[(($num_iters-$mix_up_iter-1)*2)/3]
if [ $num_models_combine -gt $iters_after_mixup_23 ]; then
num_models_combine=$iters_after_mixup_23
fi
first_model_combine=$[$num_iters-$num_models_combine+1]
x=0
while [ $x -lt $num_iters ]; do
if [ $x -ge 0 ] && [ $stage -le $x ]; then
for lang in $(seq 0 $[$num_lang-1]); do
# Set off jobs doing some diagnostics, in the background.
$cmd $dir/$lang/log/compute_prob_valid.$x.log \
nnet-compute-prob $dir/$lang/$x.mdl ark:${egs_dir[$lang]}/valid_diagnostic.egs &
$cmd $dir/$lang/log/compute_prob_train.$x.log \
nnet-compute-prob $dir/$lang/$x.mdl ark:${egs_dir[$lang]}/train_diagnostic.egs &
if [ $x -gt 0 ] && [ ! -f $dir/$lang/log/mix_up.$[$x-1].log ]; then
$cmd $dir/$lang/log/progress.$x.log \
nnet-show-progress --use-gpu=no $dir/$lang/$[$x-1].mdl $dir/$lang/$x.mdl \
ark:${egs_dir[$lang]}/train_diagnostic.egs '&&' \
nnet-am-info $dir/$lang/$x.mdl &
fi
done
echo "Training neural net (pass $x)"
if [ $x -eq 0 ]; then
# on iteration zero, use a smaller minibatch size and only one quarter of the
# normal amount of training data: this will help, respectively, to ensure stability
# and to stop the models from moving so far that averaging hurts.
this_minibatch_size=$[$minibatch_size/2];
this_keep_proportion=0.25
else
this_minibatch_size=$minibatch_size
this_keep_proportion=1.0
# use half the examples on iteration 1, out of a concern that the model-averaging
# might not work if we move too far before getting close to convergence.
[ $x -eq 1 ] && this_keep_proportion=0.5
fi
rm $dir/.error 2>/dev/null
( # this sub-shell is so that when we "wait" below,
# we only wait for the training jobs that we just spawned,
# not the diagnostic jobs that we spawned above.
# We can't easily use a single parallel SGE job to do the main training,
# because the computation of which archive and which --frame option
# to use for each job is a little complex, so we spawn each one separately.
for lang in $(seq 0 $[$num_lang-1]); do
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
this_frames_per_eg=$(cat ${egs_dir[$lang]}/info/frames_per_eg) || exit 1;
this_num_archives=$(cat ${egs_dir[$lang]}/info/num_archives) || exit 1;
! [ $this_num_jobs_nnet -gt 0 -a $this_frames_per_eg -gt 0 -a $this_num_archives -gt 0 ] && exit 1
for n in $(seq $this_num_jobs_nnet); do
k=$[$x*$this_num_jobs_nnet + $n - 1]; # k is a zero-based index that we'll derive
# the other indexes from.
archive=$[($k%$this_num_archives)+1]; # work out the 1-based archive index.
frame=$[(($k/$this_num_archives)%$this_frames_per_eg)];
$cmd $parallel_opts $dir/$lang/log/train.$x.$n.log \
nnet-train$parallel_suffix $parallel_train_opts \
--minibatch-size=$this_minibatch_size --srand=$x $dir/$lang/$x.mdl \
"ark:nnet-copy-egs --keep-proportion=$this_keep_proportion --frame=$frame ark:${egs_dir[$lang]}/egs.$archive.ark ark:-|nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-|" \
$dir/$lang/$[$x+1].$n.mdl || touch $dir/.error &
done
done
wait
)
# the error message below is not that informative, but $cmd will
# have printed a more specific one.
[ -f $dir/.error ] && echo "$0: error on iteration $x of training" && exit 1;
learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters $initial_learning_rate $final_learning_rate`;
(
# First average within each language. Use a sub-shell so "wait" won't
# wait for the diagnostic jobs.
for lang in $(seq 0 $[$num_lang-1]); do
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
nnets_list=$(for n in `seq 1 $this_num_jobs_nnet`; do echo $dir/$lang/$[$x+1].$n.mdl; done)
# average the output of the different jobs.
$cmd $dir/$lang/log/average.$x.log \
nnet-am-average $nnets_list - \| \
nnet-am-copy --learning-rate=$learning_rate - $dir/$lang/$[$x+1].tmp.mdl || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "$0: error averaging models on iteration $x of training" && exit 1;
# Remove the models we just averaged.
for lang in $(seq 0 $[$num_lang-1]); do
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
for n in `seq 1 $this_num_jobs_nnet`; do rm $dir/$lang/$[$x+1].$n.mdl; done
done
)
nnets_list=$(for lang in $(seq 0 $[$num_lang-1]); do echo $dir/$lang/$[$x+1].tmp.mdl; done)
weights_csl=$(echo $num_jobs_nnet | sed 's/ /:/g') # get as colon separated list.
# the next command produces the cross-language averaged model containing the
# final layer corresponding to language zero.
$cmd $dir/log/average.$x.log \
nnet-am-average --weights=$weights_csl --skip-last-layer=true \
$nnets_list $dir/0/$[$x+1].mdl || exit 1;
for lang in $(seq 1 $[$num_lang-1]); do
# the next command takes the averaged hidden parameters from language zero, and
# the last layer from language $lang. It's not really doing averaging.
$cmd $dir/$lang/log/combine_average.$x.log \
nnet-am-average --weights=0.0:1.0 --skip-last-layer=true \
$dir/$lang/$[$x+1].tmp.mdl $dir/0/$[$x+1].mdl $dir/$lang/$[$x+1].mdl || exit 1;
done
$cleanup && rm $dir/*/$[$x+1].tmp.mdl
if [ $x -eq $mix_up_iter ]; then
for lang in $(seq 0 $[$num_lang-1]); do
this_mix_up=${mix_up_array[$lang]}
if [ $this_mix_up -gt 0 ]; then
echo "$0: for language $lang, mixing up to $this_mix_up components"
$cmd $dir/$lang/log/mix_up.$x.log \
nnet-am-mixup --min-count=10 --num-mixtures=$this_mix_up \
$dir/$lang/$[$x+1].mdl $dir/$lang/$[$x+1].mdl || exit 1;
fi
done
fi
# Now average across languages.
rm $nnets_list
for lang in $(seq 0 $[$num_lang-1]); do # mix up.
[ ! -f $dir/$lang/$[$x+1].mdl ] && echo "No such file $dir/$lang/$[$x+1].mdl" && exit 1;
if [ -f $dir/$lang/$[$x-1].mdl ] && $cleanup && \
[ $[($x-1)%100] -ne 0 ] && [ $[$x-1] -lt $first_model_combine ]; then
rm $dir/$lang/$[$x-1].mdl
fi
done
fi
x=$[$x+1]
done
if [ $stage -le $num_iters ]; then
echo "Doing combination to produce final models"
rm $dir/.error 2>/dev/null
for lang in $(seq 0 $[$num_lang-1]); do
nnets_list=()
# the if..else..fi statement below sets 'nnets_list'.
if [ $max_models_combine -lt $num_models_combine ]; then
# The number of models to combine is too large, e.g. > 20. In this case,
# each argument to nnet-combine-fast will be an average of multiple models.
cur_offset=0 # current offset from first_model_combine.
for n in $(seq $max_models_combine); do
next_offset=$[($n*$num_models_combine)/$max_models_combine]
sub_list=""
for o in $(seq $cur_offset $[$next_offset-1]); do
iter=$[$first_model_combine+$o]
mdl=$dir/$lang/$iter.mdl
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
sub_list="$sub_list $mdl"
done
nnets_list[$[$n-1]]="nnet-am-average $sub_list - |"
cur_offset=$next_offset
done
else
nnets_list=
for n in $(seq 0 $[num_models_combine-1]); do
iter=$[$first_model_combine+$n]
mdl=$dir/$iter.mdl
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
nnets_list[$n]=$mdl
done
fi
# Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as
# if there are many models it can give out-of-memory error; set num-threads
# to 8 to speed it up (this isn't ideal...)
num_egs=`nnet-copy-egs ark:${egs_dir[$lang]}/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
mb=$[($num_egs+$combine_num_threads-1)/$combine_num_threads]
[ $mb -gt 512 ] && mb=512
# Setting --initial-model to a large value makes it initialize the combination
# with the average of all the models. It's important not to start with a
# single model, or, due to the invariance to scaling that these nonlinearities
# give us, we get zero diagonal entries in the fisher matrix that
# nnet-combine-fast uses for scaling, which after flooring and inversion, has
# the effect that the initial model chosen gets much higher learning rates
# than the others. This prevents the optimization from working well.
$cmd $combine_parallel_opts $dir/$lang/log/combine.log \
nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \
--num-threads=$combine_num_threads \
--verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:${egs_dir[$lang]}/combine.egs \
- \| nnet-normalize-stddev - $dir/$lang/final.mdl || touch $dir/.error &
done
wait
[ -f $dir/.error ] && echo "$0: error doing model combination" && exit 1;
fi
if [ $stage -le $[$num_iters+1] ]; then
for lang in $(seq 0 $[$num_lang-1]); do
# Run the diagnostics for the final models.
$cmd $dir/$lang/log/compute_prob_valid.final.log \
nnet-compute-prob $dir/$lang/final.mdl ark:${egs_dir[$lang]}/valid_diagnostic.egs &
$cmd $dir/$lang/log/compute_prob_train.final.log \
nnet-compute-prob $dir/$lang/final.mdl ark:${egs_dir[$lang]}/train_diagnostic.egs &
done
wait
fi
if [ $stage -le $[$num_iters+2] ]; then
# Note: this just uses CPUs, using a smallish subset of data.
for lang in $(seq 0 $[$num_lang-1]); do
echo "$0: Getting average posterior for purposes of adjusting the priors (language $lang)."
rm $dir/$lang/.error 2>/dev/null
rm $dir/$lang/post.$x.*.vec 2>/dev/null
$cmd JOB=1:$num_jobs_compute_prior $dir/$lang/log/get_post.JOB.log \
nnet-copy-egs --frame=random --srand=JOB ark:${egs_dir[$lang]}/egs.1.ark ark:- \| \
nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
nnet-compute-from-egs "nnet-to-raw-nnet $dir/$lang/final.mdl -|" ark:- ark:- \| \
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/$lang/post.JOB.vec || touch $dir/$lang/.error &
done
echo "$0: ... waiting for jobs for all languages to complete."
wait
sleep 3; # make sure there is time for $dir/$lang/post.$x.*.vec to appear.
for lang in $(seq 0 $[$num_lang-1]); do
[ -f $dir/$lang/.error ] && \
echo "$0: error getting posteriors for adjusting the priors for language $lang" && exit 1;
$cmd $dir/$lang/log/vector_sum.log \
vector-sum $dir/$lang/post.*.vec $dir/$lang/post.vec || exit 1;
rm $dir/$lang/post.*.vec;
echo "Re-adjusting priors based on computed posteriors for language $lang"
$cmd $dir/$lang/log/adjust_priors.final.log \
nnet-adjust-priors $dir/$lang/final.mdl $dir/$lang/post.vec $dir/$lang/final.mdl || exit 1;
done
fi
for lang in $(seq 0 $[$num_lang-1]); do
if [ ! -f $dir/$lang/final.mdl ]; then
echo "$0: $dir/final.mdl does not exist."
# we don't want to clean up if the training didn't succeed.
exit 1;
fi
done
sleep 2
echo Done
if $cleanup; then
echo Cleaning up data
if [[ $egs_dir =~ $dir/egs* ]]; then
steps/nnet2/remove_egs.sh $egs_dir
fi
echo Removing most of the models
for x in `seq 0 $num_iters`; do
if [ $[$x%100] -ne 0 ] && [ $x -ne $num_iters ] && [ -f $dir/$lang/$x.mdl ]; then
# delete all but every 100th model; don't delete the ones which combine to form the final model.
rm $dir/$lang/$x.mdl
fi
done
fi
exit 0

Просмотреть файл

@ -140,8 +140,8 @@ if [ $# != 4 ]; then
echo " # Frame indices used for each splice layer."
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which"
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|''> # A list of space-separated epoch indices the beginning of which"
echo " # realignment is to be done"
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
echo " --align-use-gpu (yes/no) # specify is gpu is to be used for realignment"

Просмотреть файл

@ -133,8 +133,8 @@ if [ $# != 4 ]; then
echo " # Frame indices used for each splice layer."
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which"
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
echo " --realign-epochs <list-of-epochs|''> # A list of space-separated epoch indices the beginning of which"
echo " # realignment is to be done"
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
echo " --align-use-gpu (yes/no) # specify is gpu is to be used for realignment"

Просмотреть файл

@ -0,0 +1,80 @@
#!/bin/bash
# Copyright 2012 Brno University of Technology (Author: Karel Vesely)
# 2013-2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# Computes training alignments using DNN. This takes as input a directory
# prepared as for online-nnet2 decoding (e.g. by
# steps/online/nnet2/prepare_online_decoding.sh), and it computes the features
# directly from the wav.scp instead of relying on features dumped on disk;
# this avoids the hassle of having to dump suitably matched features.
# Begin configuration section.
nj=4
cmd=run.pl
# Begin configuration.
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
beam=10
retry_beam=40
iter=final
use_gpu=no
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
oov=`cat $lang/oov.int` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
for f in $srcdir/tree $srcdir/${iter}.mdl $data/wav.scp $lang/L.fst \
$srcdir/conf/online_nnet2_decoding.conf; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
if [ -f $data/segments ]; then
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
# previous utterances within a speaker, we do the filtering after extracting the features.
echo "$0 [info]: segments file exists: using that."
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
else
echo "$0 [info]: no segments file exists, using wav.scp."
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
fi
echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
$cmd JOB=1:$nj $dir/log/align.JOB.log \
compile-train-graphs $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" ark:- \| \
nnet-align-compiled $scale_opts --use-gpu=$use_gpu --beam=$beam --retry-beam=$retry_beam \
$srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
echo "$0: done aligning data."

Просмотреть файл

@ -78,4 +78,8 @@ for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
done
echo "$0: copied data from $srcdir to $destdir, with --utts-per-spk-max $utts_per_spk_max"
utils/validate_data_dir.sh $destdir
opts=
[ ! -f $srcdir/feats.scp ] && opts="--no-feats"
[ ! -f $srcdir/text ] && opts="$opts --no-text"
utils/validate_data_dir.sh $opts $destdir

Просмотреть файл

@ -8,7 +8,7 @@
# it uses the program online2-wav-dump-feature to do all parts of feature
# extraction: MFCC/PLP/fbank, possibly plus pitch, plus iVectors. This script
# is intended mostly for cross-system training for online decoding, where you
# initialize the nnet from an existing, larger systme.
# initialize the nnet from an existing, larger system.
# Begin configuration section.
@ -69,7 +69,7 @@ dir=$4
mdl=$online_nnet_dir/final.mdl # only needed for left and right context.
feature_conf=$online_nnet_dir/conf/online_nnet2_decoding.conf
for f in $data/feats.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $feature_conf $mdl; do
for f in $data/wav.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $feature_conf $mdl; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
@ -100,7 +100,7 @@ if [ -f $data/utt2uniq ]; then
fi
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid/uttlist | \
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
for subdir in valid train_subset; do

Просмотреть файл

@ -0,0 +1,288 @@
#!/bin/bash
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
#
# This is modified from ../../nnet2/get_egs2.sh. [note: get_egs2.sh is as get_egs.sh,
# but uses the newer, more compact way of writing egs. where we write multiple
# frames of labels in order to share the context.]
# This script combines the
# nnet-example extraction with the feature extraction directly from wave files;
# it uses the program online2-wav-dump-feature to do all parts of feature
# extraction: MFCC/PLP/fbank, possibly plus pitch, plus iVectors. This script
# is intended mostly for cross-system training for online decoding, where you
# initialize the nnet from an existing, larger system.
#
# Begin configuration section.
cmd=run.pl
frames_per_eg=8 # number of frames of labels per example. more->less disk space and
# less time preparing egs, but more I/O during training.
# note: the script may reduce this if reduce_frames_per_eg is true.
reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg
# if there is only one archive and even with the
# reduced frames_pe_eg, the number of
# samples_per_iter that would result is less than or
# equal to the user-specified value.
num_utts_subset=300 # number of utterances in validation and training
# subsets used for shrinkage and diagnostics.
num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
num_train_frames_combine=10000 # # train frames for the above.
num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
samples_per_iter=400000 # each iteration of training, see this many samples
# per job. This is just a guideline; it will pick a number
# that divides the number of samples in the entire data.
stage=0
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
random_copy=false
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: $0 [opts] <data> <ali-dir> <online-nnet-dir> <egs-dir>"
echo " e.g.: $0 data/train exp/tri3_ali exp/nnet2_online/nnet_a_gpu_online/ exp/nnet2_online/nnet_b/egs"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
echo " --samples-per-iter <#samples;400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --feat-type <lda|raw> # (by default it tries to guess). The feature type you want"
echo " # to use as input to the neural net."
echo " --frames-per-eg <frames;8> # number of frames per eg on disk"
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
echo " # very end."
echo " --stage <stage|0> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
exit 1;
fi
data=$1
alidir=$2
online_nnet_dir=$3
dir=$4
mdl=$online_nnet_dir/final.mdl # only needed for left and right context.
feature_conf=$online_nnet_dir/conf/online_nnet2_decoding.conf
for f in $data/wav.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $mdl $feature_conf; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir...
sdata=$data/split$nj
utils/split_data.sh $data $nj
mkdir -p $dir/log $dir/info
! cmp $alidir/tree $online_nnet_dir/tree && \
echo "$0: warning, tree from alignment dir does not match tree from online-nnet dir"
cp $alidir/tree $dir
grep -v '^--endpoint' $feature_conf >$dir/feature.conf || exit 1;
mkdir -p $dir/valid $dir/train_subset
# Get list of validation utterances.
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
> $dir/valid/uttlist || exit 1;
if [ -f $data/utt2uniq ]; then
echo "File $data/utt2uniq exists, so augmenting valid/uttlist to"
echo "include all perturbed versions of the same 'real' utterances."
mv $dir/valid/uttlist $dir/valid/uttlist.tmp
utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
cat $dir/valid/uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
sort | uniq | utils/apply_map.pl $dir/uniq2utt | \
awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid/uttlist
rm $dir/uniq2utt $dir/valid/uttlist.tmp
fi
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid/uttlist | \
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
for subdir in valid train_subset; do
# In order for the iVector extraction to work right, we need to process all
# utterances of the speakers which have utterances in valid/uttlist, and the
# same for train_subset/uttlist. We produce $dir/valid/uttlist_extended which
# will contain all utterances of all speakers which have utterances in
# $dir/valid/uttlist, and the same for $dir/train_subset/.
utils/filter_scp.pl $dir/$subdir/uttlist <$data/utt2spk | awk '{print $2}' > $dir/$subdir/spklist || exit 1;
utils/filter_scp.pl -f 2 $dir/$subdir/spklist <$data/utt2spk >$dir/$subdir/utt2spk || exit 1;
utils/utt2spk_to_spk2utt.pl <$dir/$subdir/utt2spk >$dir/$subdir/spk2utt || exit 1;
awk '{print $1}' <$dir/$subdir/utt2spk >$dir/$subdir/uttlist_extended || exit 1;
rm $dir/$subdir/spklist
done
if [ -f $data/segments ]; then
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
# previous utterances within a speaker, we do the filtering after extracting the features.
echo "$0 [info]: segments file exists: using that."
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- | subset-feats --exclude=$dir/valid/uttlist ark:- ark:- |"
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid/uttlist_extended $data/segments | extract-segments scp:$data/wav.scp - ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/valid/spk2utt ark,s,cs:- ark:- | subset-feats --include=$dir/valid/uttlist ark:- ark:- |"
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset/uttlist_extended $data/segments | extract-segments scp:$data/wav.scp - ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/train_subset/spk2utt ark,s,cs:- ark:- | subset-feats --include=$dir/train_subset/uttlist ark:- ark:- |"
else
echo "$0 [info]: no segments file exists, using wav.scp."
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- | subset-feats --exclude=$dir/valid/uttlist ark:- ark:- |"
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid/uttlist_extended $data/wav.scp | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/valid/spk2utt scp:- ark:- | subset-feats --include=$dir/valid/uttlist ark:- ark:- |"
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset/uttlist_extended $data/wav.scp | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/train_subset/spk2utt scp:- ark:- | subset-feats --include=$dir/train_subset/uttlist ark:- ark:- |"
fi
ivector_dim=$(online2-wav-dump-features --config=$dir/feature.conf --print-ivector-dim=true) || exit 1;
! [ $ivector_dim -ge 0 ] && echo "$0: error getting iVector dim" && exit 1;
set -o pipefail
left_context=$(nnet-am-info $mdl | grep '^left-context' | awk '{print $2}') || exit 1;
right_context=$(nnet-am-info $mdl | grep '^right-context' | awk '{print $2}') || exit 1;
set +o pipefail
if [ $stage -le 0 ]; then
echo "$0: working out number of frames of training data"
num_frames=$(steps/nnet2/get_num_frames.sh $data)
echo $num_frames > $dir/info/num_frames
else
num_frames=`cat $dir/info/num_frames` || exit 1;
fi
# the + 1 is to round up, not down... we assume it doesn't divide exactly.
num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1]
# (for small data)- while reduce_frames_per_eg == true and the number of
# archives is 1 and would still be 1 if we reduced frames_per_eg by 1, reduce it
# by 1.
reduced=false
while $reduce_frames_per_eg && [ $frames_per_eg -gt 1 ] && \
[ $[$num_frames/(($frames_per_eg-1)*$samples_per_iter)] -eq 0 ]; do
frames_per_eg=$[$frames_per_eg-1]
num_archives=1
reduced=true
done
$reduced && echo "$0: reduced frames_per_eg to $frames_per_eg because amount of data is small."
echo $num_archives >$dir/info/num_archives
echo $frames_per_eg >$dir/info/frames_per_eg
# Working out number of egs per archive
egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)]
! [ $egs_per_archive -le $samples_per_iter ] && \
echo "$0: script error: egs_per_archive=$egs_per_archive not <= samples_per_iter=$samples_per_iter" \
&& exit 1;
echo $egs_per_archive > $dir/info/egs_per_archive
echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with"
echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)"
# Making soft links to storage directories. This is a no-up unless
# the subdirectory $dir/storage/ exists. See utils/create_split_dir.pl
for x in `seq $num_archives`; do
utils/create_data_link.pl $dir/egs.$x.ark
for y in `seq $nj`; do
utils/create_data_link.pl $dir/egs_orig.$x.$y.ark
done
done
nnet_context_opts="--left-context=$left_context --right-context=$right_context"
if [ $stage -le 2 ]; then
echo "$0: Getting validation and training subset examples."
rm $dir/.error 2>/dev/null
echo "$0: ... extracting validation and training-subset alignments."
set -o pipefail;
for id in $(seq $nj); do gunzip -c $alidir/ali.$id.gz; done | \
copy-int-vector ark:- ark,t:- | \
utils/filter_scp.pl <(cat $dir/valid/uttlist $dir/train_subset/uttlist) | \
gzip -c >$dir/ali_special.gz || exit 1;
set +o pipefail; # unset the pipefail option.
$cmd $dir/log/create_valid_subset.log \
nnet-get-egs $ivectors_opt $nnet_context_opts "$valid_feats" \
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/valid_all.egs" || touch $dir/.error &
$cmd $dir/log/create_train_subset.log \
nnet-get-egs $ivectors_opt $nnet_context_opts "$train_subset_feats" \
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
wait;
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1;
echo "... Getting subsets of validation examples for diagnostics and combination."
$cmd $dir/log/create_valid_subset_combine.log \
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
ark:$dir/valid_combine.egs || touch $dir/.error &
$cmd $dir/log/create_valid_subset_diagnostic.log \
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
ark:$dir/valid_diagnostic.egs || touch $dir/.error &
$cmd $dir/log/create_train_subset_combine.log \
nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
ark:$dir/train_combine.egs || touch $dir/.error &
$cmd $dir/log/create_train_subset_diagnostic.log \
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
ark:$dir/train_diagnostic.egs || touch $dir/.error &
wait
sleep 5 # wait for file system to sync.
cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
[ ! -s $f ] && echo "No examples in file $f" && exit 1;
done
rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs $dir/ali_special.gz
fi
if [ $stage -le 3 ]; then
# create egs_orig.*.*.ark; the first index goes to $num_archives,
# the second to $nj (which is the number of jobs in the original alignment
# dir)
egs_list=
for n in $(seq $num_archives); do
egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark"
done
echo "$0: Generating training examples on disk"
# The examples will go round-robin to egs_list.
$cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \
"ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
nnet-copy-egs ark:- $egs_list || exit 1;
fi
if [ $stage -le 4 ]; then
echo "$0: recombining and shuffling order of archives on disk"
# combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
# shuffle the order, writing to the egs.JOB.ark
egs_list=
for n in $(seq $nj); do
egs_list="$egs_list $dir/egs_orig.JOB.$n.ark"
done
$cmd $io_opts $extra_opts JOB=1:$num_archives $dir/log/shuffle.JOB.log \
nnet-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1;
fi
if [ $stage -le 5 ]; then
echo "$0: removing temporary archives"
for x in `seq $num_archives`; do
for y in `seq $nj`; do
file=$dir/egs_orig.$x.$y.ark
[ -L $file ] && rm $(readlink -f $file)
rm $file
done
done
fi
echo "$0: Finished preparing training examples"

Просмотреть файл

@ -0,0 +1,244 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# This script dumps examples MPE or MMI or state-level minimum bayes risk (sMBR)
# training of neural nets. Note: for "criterion", smbr > mpe > mmi in terms of
# compatibility of the dumped egs, meaning you can use the egs dumped with
# --criterion smbr for MPE or MMI, and egs dumped with --criterion mpe for MMI
# training. The discriminative training program itself doesn't enforce this and
# it would let you mix and match them arbitrarily; we area speaking in terms of
# the correctness of the algorithm that splits the lattices into pieces.
# Begin configuration section.
cmd=run.pl
criterion=smbr
drop_frames=false # option relevant for MMI, affects how we dump examples.
samples_per_iter=400000 # measured in frames, not in "examples"
max_temp_archives=128 # maximum number of temp archives per input job, only
# affects the process of generating archives, not the
# final result.
stage=0
iter=final
cleanup=true
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 6 ]; then
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <denlat-dir> <src-online-nnet2-dir> <degs-dir>"
echo " e.g.: $0 data/train data/lang exp/nnet2_online/nnet_a_online{_ali,_denlats,_degs}"
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config file containing options"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs (probably would be good to add -tc 5 or so if using"
echo " # GridEngine (to avoid excessive NFS traffic)."
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " # process."
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
echo " # the middle."
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
echo " # online-neural-net setup. (but you may want to use"
echo " # steps/online/nnet2/get_egs_discriminative2.sh instead)"
exit 1;
fi
data=$1
lang=$2
alidir=$3
denlatdir=$4
srcdir=$5
dir=$6
# Check some files.
for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/num_jobs $alidir/tree \
$denlatdir/lat.1.gz $denlatdir/num_jobs $srcdir/$iter.mdl $srcdir/conf/online_nnet2_decoding.conf; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
mkdir -p $dir/log $dir/info || exit 1;
nj=$(cat $denlatdir/num_jobs) || exit 1; # $nj is the number of
# splits of the denlats and alignments.
nj_ali=$(cat $alidir/num_jobs) || exit 1;
sdata=$data/split$nj
utils/split_data.sh $data $nj
if [ $nj_ali -eq $nj ]; then
ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
else
ali_rspecifier="scp:$dir/ali.scp"
if [ $stage -le 1 ]; then
echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
all_ids=$(seq -s, $nj_ali)
copy-int-vector --print-args=false \
"ark:gunzip -c $alidir/ali.{$all_ids}.gz|" ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1;
fi
fi
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
cp $alidir/tree $dir
cp $lang/phones/silence.csl $dir/info || exit 1;
cp $srcdir/$iter.mdl $dir/final.mdl || exit 1;
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
ivector_dim=$(online2-wav-dump-features --config=$dir/feature.conf --print-ivector-dim=true) || exit 1;
echo $ivector_dim > $dir/info/ivector_dim
! [ $ivector_dim -ge 0 ] && echo "$0: error getting iVector dim" && exit 1;
if [ -f $data/segments ]; then
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
# previous utterances within a speaker, we do the filtering after extracting the features.
echo "$0 [info]: segments file exists: using that."
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
else
echo "$0 [info]: no segments file exists, using wav.scp."
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
fi
if [ $stage -le 2 ]; then
echo "$0: working out number of frames of training data"
num_frames=$(steps/nnet2/get_num_frames.sh $data)
echo $num_frames > $dir/info/num_frames
# Working out total number of archives. Add one on the assumption the
# num-frames won't divide exactly, and we want to round up.
num_archives=$[$num_frames/$samples_per_iter + 1]
# the next few lines relate to how we may temporarily split each input job
# into fewer than $num_archives pieces, to avoid using an excessive
# number of filehandles.
archive_ratio=$[$num_archives/$max_temp_archives+1]
num_archives_temp=$[$num_archives/$archive_ratio]
# change $num_archives slightly to make it an exact multiple
# of $archive_ratio.
num_archives=$[$num_archives_temp*$archive_ratio]
echo $num_archives >$dir/info/num_archives || exit 1
echo $num_archives_temp >$dir/info/num_archives_temp || exit 1
frames_per_archive=$[$num_frames/$num_archives]
# note, this is the number of frames per archive prior to discarding frames.
echo $frames_per_archive > $dir/info/frames_per_archive
else
num_archives=$(cat $dir/info/num_archives) || exit 1;
num_archives_temp=$(cat $dir/info/num_archives_temp) || exit 1;
frames_per_archive=$(cat $dir/info/frames_per_archive) || exit 1;
fi
echo "$0: Splitting the data up into $num_archives archives (using $num_archives_temp temporary pieces per input job)"
echo "$0: giving samples-per-iteration of $frames_per_archive (you requested $samples_per_iter)."
# we create these data links regardless of the stage, as there are situations
# where we would want to recreate a data link that had previously been deleted.
if [ -d $dir/storage ]; then
echo "$0: creating data links for distributed storage of degs"
# See utils/create_split_dir.pl for how this 'storage' directory is created.
for x in $(seq $nj); do
for y in $(seq $num_archives_temp); do
utils/create_data_link.pl $dir/degs_orig.$x.$y.ark
done
done
for z in $(seq $num_archives); do
utils/create_data_link.pl $dir/degs.$z.ark
done
if [ $num_archives_temp -ne $num_archives ]; then
for z in $(seq $num_archives); do
utils/create_data_link.pl $dir/degs_temp.$z.ark
done
fi
fi
if [ $stage -le 3 ]; then
echo "$0: getting initial training examples by splitting lattices"
degs_list=$(for n in $(seq $num_archives_temp); do echo ark:$dir/degs_orig.JOB.$n.ark; done)
$cmd JOB=1:$nj $dir/log/get_egs.JOB.log \
nnet-get-egs-discriminative --criterion=$criterion --drop-frames=$drop_frames \
"$srcdir/$iter.mdl" "$feats" "$ali_rspecifier" "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz|" ark:- \| \
nnet-copy-egs-discriminative $const_dim_opt ark:- $degs_list || exit 1;
sleep 5; # wait a bit so NFS has time to write files.
fi
if [ $stage -le 4 ]; then
degs_list=$(for n in $(seq $nj); do echo $dir/degs_orig.$n.JOB.ark; done)
if [ $num_archives -eq $num_archives_temp ]; then
echo "$0: combining data into final archives and shuffling it"
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
cat $degs_list \| nnet-shuffle-egs-discriminative --srand=JOB ark:- \
ark:$dir/degs.JOB.ark || exit 1;
else
echo "$0: combining and re-splitting data into un-shuffled versions of final archives."
archive_ratio=$[$num_archives/$num_archives_temp]
! [ $archive_ratio -gt 1 ] && echo "$0: Bad archive_ratio $archive_ratio" && exit 1;
# note: the \$[ .. ] won't be evaluated until the job gets executed. The
# aim is to write to the archives with the final numbering, 1
# ... num_archives, which is more than num_archives_temp. The list with
# \$[... ] expressions in it computes the set of final indexes for each
# temporary index.
degs_list_out=$(for n in $(seq $archive_ratio); do echo "ark:$dir/degs_temp.\$[((JOB-1)*$archive_ratio)+$n].ark"; done)
# e.g. if dir=foo and archive_ratio=2, we'd have
# degs_list_out='foo/degs_temp.$[((JOB-1)*2)+1].ark foo/degs_temp.$[((JOB-1)*2)+2].ark'
$cmd JOB=1:$num_archives_temp $dir/log/resplit.JOB.log \
cat $degs_list \| nnet-copy-egs-discriminative --srand=JOB ark:- \
$degs_list_out || exit 1;
fi
fi
if [ $stage -le 5 ] && [ $num_archives -ne $num_archives_temp ]; then
echo "$0: shuffling final archives."
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
nnet-shuffle-egs-discriminative --srand=JOB ark:$dir/degs_temp.JOB.ark \
ark:$dir/degs.JOB.ark || exit 1
fi
if $cleanup; then
echo "$0: removing temporary archives."
for x in $(seq $nj); do
for y in $(seq $num_archives_temp); do
file=$dir/degs_orig.$x.$y.ark
[ -L $file ] && rm $(readlink -f $file); rm $file
done
done
if [ $num_archives_temp -ne $num_archives ]; then
for z in $(seq $num_archives); do
file=$dir/degs_temp.$z.ark
[ -L $file ] && rm $(readlink -f $file); rm $file
done
fi
fi
echo "$0: Done."

Просмотреть файл

@ -0,0 +1,168 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
# Create denominator lattices for MMI/MPE training.
# This version uses the online-nnet2 features.
#
# Creates its output in $dir/lat.*.gz
# Begin configuration section.
stage=0
nj=4
cmd=run.pl
sub_split=1
beam=13.0
lattice_beam=7.0
acwt=0.1
max_active=5000
max_mem=20000000 # This will stop the processes getting too large.
# This is in bytes, but not "real" bytes-- you have to multiply
# by something like 5 or 10 to get real bytes (not sure why so large)
num_threads=1
parallel_opts=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 4 ]; then
echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/nnet2_online/nnet_a_online exp/nnet2_online/nnet_a_denlats"
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
echo " plus transforms."
echo ""
echo "Main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --sub-split <n-split> # e.g. 40; use this for "
echo " # large databases so your jobs will be smaller and"
echo " # will (individually) finish reasonably soon."
echo " --num-threads <n> # number of threads per decoding job"
echo " --parallel-opts <string> # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
exit 1;
fi
data=$1
lang=$2
srcdir=$3
dir=$4
for f in $data/wav.scp $lang/L.fst $srcdir/final.mdl $srcdir/conf/online_nnet2_decoding.conf; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
done
sdata=$data/split$nj
thread_string=
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
mkdir -p $dir/log
split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
oov=`cat $lang/oov.int` || exit 1;
# Compute grammar FST which corresponds to unigram decoding graph.
new_lang="$dir/"$(basename "$lang")
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
if [ $stage -le 0 ]; then
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
cp -rH $lang $dir/
echo "Compiling decoding graph in $dir/dengraph"
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
else
echo "Making unigram grammar FST in $new_lang"
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \
|| exit 1;
utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
fi
fi
if [ -f $data/segments ]; then
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
# previous utterances within a speaker, we do the filtering after extracting the features.
echo "$0 [info]: segments file exists: using that."
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
else
echo "$0 [info]: no segments file exists, using wav.scp."
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
fi
# if this job is interrupted by the user, we want any background jobs to be
# killed too.
cleanup() {
local pids=$(jobs -pr)
[ -n "$pids" ] && kill $pids
}
trap "cleanup" INT QUIT TERM EXIT
if [ $sub_split -eq 1 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else
# each job from 1 to $nj is split into multiple pieces (sub-split), and we aim
# to have at most two jobs running at each time. The idea is that if we have stragglers
# from one job, we can be processing another one at the same time.
rm $dir/.error 2>/dev/null
prev_pid=
for n in `seq $[nj+1]`; do
if [ $n -gt $nj ]; then
this_pid=
elif [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
this_pid=
else
sdata2=$data/split$nj/$n/split$sub_split;
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
fi
mkdir -p $dir/log/$n
mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error &
this_pid=$!
fi
if [ ! -z "$prev_pid" ]; then # Wait for the previous job; merge the previous set of lattices.
wait $prev_pid
[ -f $dir/.error ] && echo "$0: error generating denominator lattices" && exit 1;
rm $dir/.merge_error 2>/dev/null
echo Merging archives for data subset $prev_n
for k in `seq $sub_split`; do
gunzip -c $dir/lat.$prev_n.$k.gz || touch $dir/.merge_error;
done | gzip -c > $dir/lat.$prev_n.gz || touch $dir/.merge_error;
[ -f $dir/.merge_error ] && echo "$0: Merging lattices for subset $prev_n failed (or maybe some other error)" && exit 1;
rm $dir/lat.$prev_n.*.gz
touch $dir/.done.$prev_n
fi
prev_n=$n
prev_pid=$this_pid
done
fi
echo "$0: done generating denominator lattices."

Просмотреть файл

@ -27,26 +27,39 @@ echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: $0 [options] <orig-nnet-online-dir> <new-nnet-dir> <new-nnet-online-dir>"
echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
if [ $# -ne 3 ] && [ $# -ne 4 ]; then
echo "Usage: $0 [options] <orig-nnet-online-dir> [<new-lang-dir>] <new-nnet-dir> <new-nnet-online-dir>"
echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
fi
online_src=$1
nnet_src=$2
dir=$3
if [ $# -eq 3 ]; then
echo "$0: warning: it's better if you add the new <lang> directory as the 2nd argument."
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree; do
online_src=$1
lang=
nnet_src=$2
dir=$3
else
online_src=$1
lang=$2
nnet_src=$3
dir=$4
extra_files=$lang/words.txt
fi
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $extra_files; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
origdir=$dir
dir_as_given=$dir
dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
# configuration files we write will contain absolute
# pathnames.
@ -89,8 +102,16 @@ $cmd $dir/log/append_nnet.log \
nnet-insert --randomize-next-component=false --insert-at=0 \
$nnet_src/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1;
cp $nnet_src/tree $dir/ || exit 1;
$cleanup && rm $dir/first_nnet.raw
echo "$0: formatted neural net for online decoding in $origdir"
if [ ! -z "$lang" ]; then
# if the $lang option was provided, modify the silence-phones in the config;
# these are only used for the endpointing code, but we should get this right.
cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
silphones=$(cat $lang/phones/silence.csl) || exit 1;
cat $dir/conf/online_nnet2_decoding.conf.tmp | \
sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
rm $dir/conf/online_nnet2_decoding.conf.tmp
fi
echo "$0: formatted neural net for online decoding in $dir_as_given"

Просмотреть файл

@ -0,0 +1,76 @@
#!/bin/bash
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# This is as prepare_online_decoding.sh, but for transfer learning-- the case where
# you have an existing online-decoding directory where you have all the feature
# stuff, that you don't want to change, but
# Begin configuration.
stage=0 # This allows restarting after partway, when something when wrong.
cmd=run.pl
iter=final
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# -ne 4 ]; then
echo "Usage: $0 [options] <orig-nnet-online-dir> <new-lang-dir> <new-nnet-dir> <new-nnet-online-dir>"
echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
exit 1;
fi
online_src=$1
lang=$2
nnet_src=$3
dir=$4
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $lang/words.txt; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
dir_as_given=$dir
dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
# configuration files we write will contain absolute
# pathnames.
mkdir -p $dir/conf $dir/log
cp $nnet_src/tree $dir/ || exit 1;
cp $nnet_src/$iter.mdl $dir/ || exit 1;
# There are a bunch of files that we will need to copy from $online_src, because
# we're aiming to have one self-contained directory that has everything in it.
cp -rT $online_src/ivector_extractor/ $dir/ivector_extractor
[ ! -d $online_src/conf ] && \
echo "Expected directory $online_src/conf to exist" && exit 1;
for x in $online_src/conf/*conf; do
# Replace directory name starting $online_src with those starting with $dir.
# We actually replace any directory names ending in /ivector_extractor/ or /conf/
# with $dir/ivector_extractor/ or $dir/conf/
cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x)
done
# modify the silence-phones in the config; these are only used for the
# endpointing code.
cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
silphones=$(cat $lang/phones/silence.csl) || exit 1;
cat $dir/conf/online_nnet2_decoding.conf.tmp | \
sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
rm $dir/conf/online_nnet2_decoding.conf.tmp
echo "$0: formatted neural net for online decoding in $dir_as_given"

Просмотреть файл

@ -9,7 +9,7 @@
if (@ARGV > 0 && $ARGV[0] eq "-f") {
shift @ARGV;
shift @ARGV;
$field_spec = shift @ARGV;
if ($field_spec =~ m/^\d+$/) {
$field_begin = $field_spec - 1; $field_end = $field_spec - 1;
@ -36,6 +36,7 @@ if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
}
if(@ARGV != 1) {
print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
"options: [-f <field-range> ]\n" .
"Applies the map 'map' to all input text, where each line of the map\n" .

Просмотреть файл

@ -196,7 +196,8 @@ cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
cp $dir/phones/silence.txt $dir/phones/context_indep.txt
cat $srcdir/extra_questions.txt | utils/apply_map.pl $tmpdir/phone_map.txt \
# if extra_questions.txt is empty, it's OK.
cat $srcdir/extra_questions.txt 2>/dev/null | utils/apply_map.pl $tmpdir/phone_map.txt \
>$dir/phones/extra_questions.txt
# Want extra questions about the word-start/word-end stuff. Make it separate for

Просмотреть файл

@ -42,14 +42,14 @@ utt2spks=""
texts=""
nu=`cat $data/utt2spk | wc -l`
nf=`cat $data/feats.scp | wc -l`
nf=`cat $data/feats.scp 2>/dev/null | wc -l`
nt=`cat $data/text 2>/dev/null | wc -l` # take it as zero if no such file
if [ $nu -ne $nf ]; then
if [ -f $data/feats.scp ] && [ $nu -ne $nf ]; then
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
echo " may produce incorrectly split data."
echo "use utils/fix_data_dir.sh to fix this."
fi
if [ $nt -ne 0 -a $nu -ne $nt ]; then
if [ -f $data/text ] && [ $nu -ne $nt ]; then
echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt); this script "
echo " may produce incorrectly split data."
echo "use utils/fix_data_dir.sh to fix this."
@ -89,7 +89,7 @@ fi
utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
[ -f $data/feats.scp ] && utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats
[ -f $data/text ] && utils/split_scp.pl $utt2spk_opt $data/text $texts

Просмотреть файл

@ -28,15 +28,16 @@
$quiet = 0;
$first = 0;
$last = 0;
if ($ARGV[0] eq "--quiet") {
if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
shift;
$quiet = 1;
}
if ($ARGV[0] eq "--first") {
if (@ARGV > 0 && $ARGV[0] eq "--first") {
shift;
$first = 1;
}
if ($ARGV[0] eq "--last") {
if (@ARGV > 0 && $ARGV[0] eq "--last") {
shift;
$last = 1;
}
@ -44,7 +45,8 @@ if ($ARGV[0] eq "--last") {
if(@ARGV < 2 ) {
die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
" --quiet causes it to not die if N < num lines in scp.\n" .
" --first and --last make it equivalent to head or tail.\n";
" --first and --last make it equivalent to head or tail.\n" .
"See also: filter_scp.pl\n";
}
$N = shift @ARGV;

Просмотреть файл

@ -73,6 +73,11 @@ void AmNnet::Init(const Nnet &nnet) {
}
}
void AmNnet::ResizeOutputLayer(int32 new_num_pdfs) {
nnet_.ResizeOutputLayer(new_num_pdfs);
priors_.Resize(new_num_pdfs);
priors_.Set(1.0 / new_num_pdfs);
}
} // namespace nnet2
} // namespace kaldi

Просмотреть файл

@ -68,6 +68,10 @@ class AmNnet {
std::string Info() const;
/// This function is used when doing transfer learning to a new system.
/// It will set the priors to be all the same.
void ResizeOutputLayer(int32 new_num_pdfs);
private:
const AmNnet &operator = (const AmNnet &other); // Disallow.
Nnet nnet_;

Просмотреть файл

@ -1021,6 +1021,13 @@ void AffineComponent::Scale(BaseFloat scale) {
bias_params_.Scale(scale);
}
// virtual
void AffineComponent::Resize(int32 input_dim, int32 output_dim) {
KALDI_ASSERT(input_dim > 0 && output_dim > 0);
bias_params_.Resize(output_dim);
linear_params_.Resize(output_dim, input_dim);
}
void AffineComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) {
const AffineComponent *other =
dynamic_cast<const AffineComponent*>(&other_in);
@ -1590,6 +1597,22 @@ void AffineComponentPreconditioned::Update(
in_value_precon_part, kNoTrans, 1.0);
}
// virtual
void AffineComponentPreconditionedOnline::Resize(
int32 input_dim, int32 output_dim) {
KALDI_ASSERT(input_dim > 1 && output_dim > 1);
if (rank_in_ >= input_dim) rank_in_ = input_dim - 1;
if (rank_out_ >= output_dim) rank_out_ = output_dim - 1;
bias_params_.Resize(output_dim);
linear_params_.Resize(output_dim, input_dim);
OnlinePreconditioner temp;
preconditioner_in_ = temp;
preconditioner_out_ = temp;
SetPreconditionerConfigs();
}
void AffineComponentPreconditionedOnline::Read(std::istream &is, bool binary) {
std::ostringstream ostr_beg, ostr_end;
ostr_beg << "<" << Type() << ">";

Просмотреть файл

@ -740,6 +740,10 @@ class AffineComponent: public UpdatableComponent {
void Init(BaseFloat learning_rate,
std::string matrix_filename);
// This function resizes the dimensions of the component, setting the
// parameters to zero, while leaving any other configuration values the same.
virtual void Resize(int32 input_dim, int32 output_dim);
// The following functions are used for collapsing multiple layers
// together. They return a pointer to a new Component equivalent to
// the sequence of two components. We haven't implemented this for
@ -895,6 +899,8 @@ class AffineComponentPreconditionedOnline: public AffineComponent {
BaseFloat alpha, BaseFloat max_change_per_sample,
std::string matrix_filename);
virtual void Resize(int32 input_dim, int32 output_dim);
// This constructor is used when converting neural networks partway through
// training, from AffineComponent or AffineComponentPreconditioned to
// AffineComponentPreconditionedOnline.

Просмотреть файл

@ -339,68 +339,34 @@ void Nnet::SetLearningRates(BaseFloat learning_rate) {
KALDI_LOG << "Set learning rates to " << learning_rate;
}
void Nnet::AdjustLearningRates(
const VectorBase<BaseFloat> &old_model_old_gradient,
const VectorBase<BaseFloat> &new_model_old_gradient,
const VectorBase<BaseFloat> &old_model_new_gradient,
const VectorBase<BaseFloat> &new_model_new_gradient,
BaseFloat measure_at, // where to measure gradient,
// on line between old and new model;
// 0.5 < measure_at <= 1.0.
BaseFloat ratio, // e.g. 1.1; ratio by which we change learning rate.
BaseFloat max_learning_rate) {
std::vector<BaseFloat> new_lrates;
KALDI_ASSERT(old_model_old_gradient.Dim() == NumUpdatableComponents() &&
new_model_old_gradient.Dim() == NumUpdatableComponents() &&
old_model_new_gradient.Dim() == NumUpdatableComponents() &&
new_model_new_gradient.Dim() == NumUpdatableComponents());
KALDI_ASSERT(ratio >= 1.0);
KALDI_ASSERT(measure_at > 0.5 && measure_at <= 1.0);
std::string changes_str;
std::string dotprod_str;
BaseFloat inv_ratio = 1.0 / ratio;
int32 index = 0;
for (int32 c = 0; c < NumComponents(); c++) {
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[c]);
if (uc == NULL) { // Non-updatable component.
KALDI_ASSERT(old_model_old_gradient(c) == 0.0);
continue;
} else {
BaseFloat grad_dotprod_at_end =
new_model_new_gradient(index) - old_model_new_gradient(index),
grad_dotprod_at_start =
new_model_old_gradient(index) - old_model_old_gradient(index),
grad_dotprod_interp =
measure_at * grad_dotprod_at_end +
(1.0 - measure_at) * grad_dotprod_at_start;
// grad_dotprod_interp will be positive if we
// want more of the gradient term
// -> faster learning rate for this component
BaseFloat lrate = uc->LearningRate();
lrate *= (grad_dotprod_interp > 0 ? ratio : inv_ratio);
changes_str = changes_str +
(grad_dotprod_interp > 0 ? " increase" : " decrease");
dotprod_str = dotprod_str +
(new_model_new_gradient(index) > 0 ? " positive" : " negative");
if (lrate > max_learning_rate) lrate = max_learning_rate;
new_lrates.push_back(lrate);
uc->SetLearningRate(lrate);
index++;
}
void Nnet::ResizeOutputLayer(int32 new_num_pdfs) {
KALDI_ASSERT(new_num_pdfs > 0);
KALDI_ASSERT(NumComponents() > 2);
int32 nc = NumComponents();
SoftmaxComponent *sc;
if ((sc = dynamic_cast<SoftmaxComponent*>(components_[nc - 1])) == NULL)
KALDI_ERR << "Expected last component to be SoftmaxComponent.";
SumGroupComponent *sgc = dynamic_cast<SumGroupComponent*>(components_[nc - 2]);
if (sgc != NULL) {
// Remove it. We'll resize things later.
delete sgc;
components_.erase(components_.begin() + nc - 2,
components_.begin() + nc - 1);
nc--;
}
KALDI_ASSERT(index == NumUpdatableComponents());
KALDI_VLOG(1) << "Changes to learning rates: " << changes_str;
KALDI_VLOG(1) << "Dot product of model with validation gradient is "
<< dotprod_str;
std::ostringstream lrate_str;
for (size_t i = 0; i < new_lrates.size(); i++)
lrate_str << new_lrates[i] << ' ';
KALDI_VLOG(1) << "Learning rates are " << lrate_str.str();
}
// note: it could be child class of AffineComponent.
AffineComponent *ac = dynamic_cast<AffineComponent*>(components_[nc - 2]);
if (ac == NULL)
KALDI_ERR << "Network doesn't have expected structure (didn't find final "
<< "AffineComponent).";
ac->Resize(ac->InputDim(), new_num_pdfs);
// Remove the softmax component, and replace it with a new one
delete components_[nc - 1];
components_[nc - 1] = new SoftmaxComponent(new_num_pdfs);
this->Check();
}
int32 Nnet::NumUpdatableComponents() const {
int32 ans = 0;

Просмотреть файл

@ -234,21 +234,11 @@ class Nnet {
// with things of type NonlinearComponent.
/// [This function is only used in the binary nnet-train.cc which is currently not
/// being used]. This is used to separately adjust learning rates of each layer,
/// after each "phase" of training. We basically ask (using the validation
/// gradient), do we wish we had gone further in this direction? Yes->
/// increase learning rate, no -> decrease it. The inputs have dimension
/// NumUpdatableComponents().
void AdjustLearningRates(
const VectorBase<BaseFloat> &old_model_old_gradient,
const VectorBase<BaseFloat> &new_model_old_gradient,
const VectorBase<BaseFloat> &old_model_new_gradient,
const VectorBase<BaseFloat> &new_model_new_gradient,
BaseFloat measure_at, // where to measure gradient, on line between old
// and new model; 0.5 < measure_at <= 1.0.
BaseFloat learning_rate_ratio,
BaseFloat max_learning_rate);
/// This function is used when doing transfer learning to a new system. It
/// resizes the final affine and softmax components. If your system has a
/// SumGroupComponent before the final softmax, it will be discarded.
void ResizeOutputLayer(int32 new_num_pdfs);
/// Scale all the learning rates in the neural net by this factor.
void ScaleLearningRates(BaseFloat factor);

Просмотреть файл

@ -27,7 +27,7 @@ BINFILES = nnet-am-info nnet-init \
nnet-perturb-egs-fmllr nnet-get-weighted-egs nnet-adjust-priors \
cuda-compiled nnet-replace-last-layers nnet-am-switch-preconditioning \
nnet-train-simple-perturbed nnet-train-parallel-perturbed \
nnet1-to-raw-nnet raw-nnet-copy nnet-relabel-egs
nnet1-to-raw-nnet raw-nnet-copy nnet-relabel-egs nnet-am-reinitialize
OBJFILES =

Просмотреть файл

@ -23,6 +23,36 @@
#include "nnet2/combine-nnet-a.h"
#include "nnet2/am-nnet.h"
namespace kaldi {
void GetWeights(const std::string &weights_str,
int32 num_inputs,
vector<BaseFloat> *weights) {
KALDI_ASSERT(num_inputs >= 1);
if (!weights_str.empty()) {
SplitStringToFloats(weights_str, ":", true, weights);
if (weights->size() != num_inputs) {
KALDI_ERR << "--weights option must be a colon-separated list "
<< "with " << num_inputs << " elements, got: "
<< weights_str;
}
} else {
for (int32 i = 0; i < num_inputs; i++)
weights->push_back(1.0 / num_inputs);
}
// normalize the weights to sum to one.
float weight_sum = 0.0;
for (int32 i = 0; i < num_inputs; i++)
weight_sum += (*weights)[i];
for (int32 i = 0; i < num_inputs; i++)
(*weights)[i] = (*weights)[i] / weight_sum;
if (fabs(weight_sum - 1.0) > 0.01) {
KALDI_WARN << "Normalizing weights to sum to one, sum was " << weight_sum;
}
}
}
int main(int argc, char *argv[]) {
try {
@ -32,60 +62,109 @@ int main(int argc, char *argv[]) {
typedef kaldi::int64 int64;
const char *usage =
"This program average (or sums, if --sum=true) the parameters over a number of neural nets.\n"
"This program averages (or sums, if --sum=true) the parameters over a\n"
"number of neural nets. If you supply the option --skip-last-layer=true,\n"
"the parameters of the last updatable layer are copied from <model1> instead\n"
"of being averaged (useful in multi-language scenarios).\n"
"The --weights option can be used to weight each model differently.\n"
"\n"
"Usage: nnet-am-average [options] <model1> <model2> ... <modelN> <model-out>\n"
"\n"
"e.g.:\n"
" nnet-am-average 1.1.nnet 1.2.nnet 1.3.nnet 2.nnet\n";
bool binary_write = true;
bool sum = false;
ParseOptions po(usage);
po.Register("sum", &sum, "If true, sums instead of averages.");
po.Register("binary", &binary_write, "Write output in binary mode");
string weights_str;
bool skip_last_layer = false;
po.Register("weights", &weights_str, "Colon-separated list of weights, one "
"for each input model. These will be normalized to sum to one.");
po.Register("skip-last-layer", &skip_last_layer, "If true, averaging of "
"the last updatable layer is skipped (result comes from model1)");
po.Read(argc, argv);
if (po.NumArgs() < 2) {
po.PrintUsage();
exit(1);
}
std::string
nnet1_rxfilename = po.GetArg(1),
nnet_wxfilename = po.GetArg(po.NumArgs());
TransitionModel trans_model;
TransitionModel trans_model1;
AmNnet am_nnet1;
{
bool binary_read;
Input ki(nnet1_rxfilename, &binary_read);
trans_model.Read(ki.Stream(), binary_read);
trans_model1.Read(ki.Stream(), binary_read);
am_nnet1.Read(ki.Stream(), binary_read);
}
int32 num_inputs = po.NumArgs() - 1;
BaseFloat scale = (sum ? 1.0 : 1.0 / num_inputs);
am_nnet1.GetNnet().Scale(scale);
vector<BaseFloat> model_weights;
GetWeights(weights_str, num_inputs, &model_weights);
int32 c_begin = 0,
c_end = (skip_last_layer ?
am_nnet1.GetNnet().LastUpdatableComponent() :
am_nnet1.GetNnet().NumComponents());
KALDI_ASSERT(c_end != -1 && "Network has no updatable components.");
// scale the components - except the last layer, if skip_last_layer == true.
for (int32 c = c_begin; c < c_end; c++) {
UpdatableComponent *uc =
dynamic_cast<UpdatableComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
if (uc != NULL) uc->Scale(model_weights[0]);
NonlinearComponent *nc =
dynamic_cast<NonlinearComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
if (nc != NULL)
nc->Scale(model_weights[0]);
}
for (int32 i = 2; i <= num_inputs; i++) {
bool binary_read;
Input ki(po.GetArg(i), &binary_read);
TransitionModel trans_model;
trans_model.Read(ki.Stream(), binary_read);
AmNnet am_nnet;
am_nnet.Read(ki.Stream(), binary_read);
am_nnet1.GetNnet().AddNnet(scale, am_nnet.GetNnet());
for (int32 c = c_begin; c < c_end; c++) {
UpdatableComponent *uc_average =
dynamic_cast<UpdatableComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
const UpdatableComponent *uc_this =
dynamic_cast<const UpdatableComponent*>(&(am_nnet.GetNnet().GetComponent(c)));
if (uc_average != NULL) {
KALDI_ASSERT(uc_this != NULL &&
"Networks must have the same structure.");
uc_average->Add(model_weights[i-1], *uc_this);
}
NonlinearComponent *nc_average =
dynamic_cast<NonlinearComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
const NonlinearComponent *nc_this =
dynamic_cast<const NonlinearComponent*>(&(am_nnet.GetNnet().GetComponent(c)));
if (nc_average != NULL) {
KALDI_ASSERT(nc_this != NULL &&
"Networks must have the same structure.");
nc_average->Add(model_weights[i-1], *nc_this);
}
}
}
{
Output ko(nnet_wxfilename, binary_write);
trans_model.Write(ko.Stream(), binary_write);
trans_model1.Write(ko.Stream(), binary_write);
am_nnet1.Write(ko.Stream(), binary_write);
}
KALDI_LOG << "Averaged parameters of " << num_inputs
<< " neural nets, and wrote to " << nnet_wxfilename;
return 0; // it will throw an exception if there are any problems.

Просмотреть файл

@ -36,6 +36,14 @@ int main(int argc, char *argv[]) {
" nnet-am-info 1.nnet\n";
ParseOptions po(usage);
bool print_learning_rates = false;
po.Register("print-learning-rates", &print_learning_rates,
"If true, instead of printing the normal info, print a "
"colon-separated list of the learning rates for each updatable "
"layer, suitable to give to nnet-am-copy as the argument to"
"--learning-rates.");
po.Read(argc, argv);
@ -55,9 +63,19 @@ int main(int argc, char *argv[]) {
am_nnet.Read(ki.Stream(), binary_read);
}
std::cout << am_nnet.Info();
if (print_learning_rates) {
Vector<BaseFloat> learning_rates(am_nnet.GetNnet().NumUpdatableComponents());
am_nnet.GetNnet().GetLearningRates(&learning_rates);
int32 nc = learning_rates.Dim();
for (int32 i = 0; i < nc; i++)
std::cout << learning_rates(i) << (i < nc - 1 ? ":" : "");
std::cout << std::endl;
KALDI_LOG << "Printed learning-rate info for " << nnet_rxfilename;
} else {
std::cout << am_nnet.Info();
KALDI_LOG << "Printed info about " << nnet_rxfilename;
}
KALDI_LOG << "Printed info about " << nnet_rxfilename;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;

Просмотреть файл

@ -0,0 +1,88 @@
// nnet2bin/nnet-am-reinitialize.cc
// Copyright 2014 Johns Hopkins University (author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet2/am-nnet.h"
#include "hmm/transition-model.h"
#include "tree/context-dep.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
using namespace kaldi::nnet2;
typedef kaldi::int32 int32;
const char *usage =
"This program can used when transferring a neural net from one language\n"
"to another (or one tree to another). It takes a neural net and a\n"
"transition model from a different neural net, resizes the last layer\n"
"to match the new transition model, zeroes it, and writes out the new,\n"
"resized .mdl file. If the original model had been 'mixed-up', the associated\n"
"SumGroupComponent will be removed.\n"
"\n"
"Usage: nnet-am-reinitialize [options] <nnet-in> <new-transition-model> <nnet-out>\n"
"e.g.:\n"
" nnet-am-reinitialize 1.mdl exp/tri6/final.mdl 2.mdl\n";
bool binary_write = true;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
std::string nnet_rxfilename = po.GetArg(1),
transition_model_rxfilename = po.GetArg(2),
nnet_wxfilename = po.GetArg(3);
TransitionModel orig_trans_model;
AmNnet am_nnet;
{
bool binary;
Input ki(nnet_rxfilename, &binary);
orig_trans_model.Read(ki.Stream(), binary);
am_nnet.Read(ki.Stream(), binary);
}
TransitionModel new_trans_model;
ReadKaldiObject(transition_model_rxfilename, &new_trans_model);
am_nnet.ResizeOutputLayer(new_trans_model.NumPdfs());
{
Output ko(nnet_wxfilename, binary_write);
new_trans_model.Write(ko.Stream(), binary_write);
am_nnet.Write(ko.Stream(), binary_write);
}
KALDI_LOG << "Resized neural net from " << nnet_rxfilename
<< " to " << am_nnet.NumPdfs()
<< " pdfs, and wrote to " << nnet_wxfilename;
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -33,10 +33,10 @@ int main(int argc, char *argv[]) {
const char *usage =
"Get examples of data for discriminative neural network training;\n"
"each one corresponds to part of a file, of variable (and configurable\n"
"each one corresponds to part of a file, of variable (and configurable)\n"
"length.\n"
"\n"
"Usage: nnet-get-egs-discriminative [options] <model|transition-model> "
"Usage: nnet-get-egs-discriminative [options] <model> "
"<features-rspecifier> <ali-rspecifier> <den-lat-rspecifier> "
"<training-examples-out>\n"
"\n"