зеркало из https://github.com/mozilla/kaldi.git
Applied patch from BOLT system.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4673 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
167e2a676d
Коммит
de10f1506d
|
@ -0,0 +1,95 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This is to be run after run_nnet2_multisplice.sh.
|
||||
# It demonstrates discriminative training for the online-nnet2 models
|
||||
|
||||
. cmd.sh
|
||||
|
||||
|
||||
stage=1
|
||||
train_stage=-10
|
||||
use_gpu=true
|
||||
srcdir=exp/nnet2_online/nnet_ms_a_online
|
||||
criterion=smbr
|
||||
learning_rate=0.0016
|
||||
|
||||
drop_frames=false # only relevant for MMI
|
||||
|
||||
. cmd.sh
|
||||
. ./path.sh
|
||||
. ./utils/parse_options.sh
|
||||
|
||||
if [ ! -f $srcdir/final.mdl ]; then
|
||||
echo "$0: expected $srcdir/final.mdl to exist; first run run_nnet2_multisplice.sh."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if $use_gpu; then
|
||||
if ! cuda-compiled; then
|
||||
cat <<EOF && exit 1
|
||||
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
|
||||
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
|
||||
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
|
||||
EOF
|
||||
fi
|
||||
parallel_opts="-l gpu=1"
|
||||
num_threads=1
|
||||
minibatch_size=512
|
||||
else
|
||||
# Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
|
||||
# almost the same, but this may be a little bit slow.
|
||||
num_threads=16
|
||||
minibatch_size=128
|
||||
parallel_opts="-pe smp $num_threads"
|
||||
fi
|
||||
|
||||
if [ $stage -le 1 ]; then
|
||||
# use a wide beam because this is RM. These would be too high for other setups.
|
||||
nj=30
|
||||
num_threads=6
|
||||
steps/online/nnet2/make_denlats.sh --cmd "$decode_cmd -l mem_free=1G,ram_free=1G -pe smp $num_threads" \
|
||||
--nj $nj --sub-split 40 --num-threads "$num_threads" --beam 20.0 --lattice-beam 10.0 \
|
||||
data/train data/lang $srcdir ${srcdir}_denlats || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
# hardcode no-GPU for alignment, although you could use GPU [you wouldn't
|
||||
# get excellent GPU utilization though.]
|
||||
nj=100
|
||||
use_gpu=no
|
||||
gpu_opts=
|
||||
steps/online/nnet2/align.sh --cmd "$decode_cmd $gpu_opts" --use-gpu "$use_gpu" \
|
||||
--nj $nj data/train data/lang $srcdir ${srcdir}_ali || exit 1;
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
# I tested the following with --max-temp-archives 3
|
||||
# to test other branches of the code.
|
||||
steps/online/nnet2/get_egs_discriminative2.sh \
|
||||
--cmd "$decode_cmd -pe smp 5" \
|
||||
--criterion $criterion --drop-frames $drop_frames \
|
||||
data/train data/lang ${srcdir}{_ali,_denlats,,_degs} || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ]; then
|
||||
steps/nnet2/train_discriminative2.sh --cmd "$decode_cmd $parallel_opts" \
|
||||
--learning-rate $learning_rate \
|
||||
--criterion $criterion --drop-frames $drop_frames \
|
||||
--num-epochs 6 \
|
||||
--num-jobs-nnet 2 --num-threads $num_threads \
|
||||
${srcdir}_degs ${srcdir}_${criterion}_${learning_rate} || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ]; then
|
||||
ln -sf $(readlink -f $srcdir/conf) ${srcdir}_${criterion}_${learning_rate}/conf # so it acts like an online-decoding directory
|
||||
|
||||
for epoch in 0 1 2 3 4 5 6; do
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
|
||||
--iter epoch$epoch exp/tri3b/graph data/test ${srcdir}_${criterion}_${learning_rate}/decode_epoch$epoch &
|
||||
steps/online/nnet2/decode.sh --config conf/decode.config --cmd "$decode_cmd" --nj 20 \
|
||||
--iter epoch$epoch exp/tri3b/graph_ug data/test ${srcdir}_${criterion}_${learning_rate}/decode_ug_epoch$epoch &
|
||||
done
|
||||
wait
|
||||
for dir in ${srcdir}_${criterion}_${learning_rate}/decode*; do grep WER $dir/wer_* | utils/best_wer.sh; done
|
||||
fi
|
|
@ -63,7 +63,7 @@ oov=`cat $lang/oov.int` || exit 1;
|
|||
|
||||
mkdir -p $dir
|
||||
|
||||
cp -r $lang $dir/
|
||||
cp -rH $lang $dir/
|
||||
|
||||
# Compute grammar FST which corresponds to unigram decoding graph.
|
||||
new_lang="$dir/"$(basename "$lang")
|
||||
|
|
|
@ -57,7 +57,7 @@ oov=`cat $lang/oov.int` || exit 1;
|
|||
|
||||
mkdir -p $dir
|
||||
|
||||
cp -r $lang $dir/
|
||||
cp -rH $lang $dir/
|
||||
|
||||
# Compute grammar FST which corresponds to unigram decoding graph.
|
||||
new_lang="$dir/"$(basename "$lang")
|
||||
|
|
|
@ -66,7 +66,7 @@ oov=`cat $lang/oov.int` || exit 1;
|
|||
|
||||
mkdir -p $dir
|
||||
|
||||
cp -r $lang $dir/
|
||||
cp -rH $lang $dir/
|
||||
|
||||
# Compute grammar FST which corresponds to unigram decoding graph.
|
||||
new_lang="$dir/"$(basename "$lang")
|
||||
|
|
|
@ -3,12 +3,7 @@
|
|||
# 2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
|
||||
# Computes training alignments using MLP model
|
||||
|
||||
# If you supply the "--use-graphs true" option, it will use the training
|
||||
# graphs from the source directory (where the model is). In this
|
||||
# case the number of jobs must match with the source directory.
|
||||
|
||||
# Computes training alignments using DNN
|
||||
|
||||
# Begin configuration section.
|
||||
nj=4
|
||||
|
|
|
@ -53,7 +53,6 @@ if [ $# != 4 ]; then
|
|||
echo " --splice-width <width;4> # Number of frames on each side to append for feature input"
|
||||
echo " --left-context <width;4> # Number of frames on left side to append for feature input, overrides splice-width"
|
||||
echo " --right-context <width;4> # Number of frames on right side to append for feature input, overrides splice-width"
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
|
||||
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
|
||||
echo " # very end."
|
||||
|
@ -106,7 +105,7 @@ if [ -f $data/utt2uniq ]; then
|
|||
fi
|
||||
|
||||
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
|
||||
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
|
||||
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
|
||||
|
||||
[ -z "$transform_dir" ] && transform_dir=$alidir
|
||||
|
||||
|
@ -210,7 +209,7 @@ if [ $stage -le 2 ]; then
|
|||
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/egs/train_subset_all.egs" || touch $dir/.error &
|
||||
wait;
|
||||
[ -f $dir/.error ] && exit 1;
|
||||
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
|
||||
echo "Getting subsets of validation examples for diagnostics and combination."
|
||||
$cmd $dir/log/create_valid_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/egs/valid_all.egs \
|
||||
|
|
|
@ -75,7 +75,6 @@ if [ $# != 3 ]; then
|
|||
echo " --frames-per-eg <frames;8> # number of frames per eg on disk"
|
||||
echo " --left-context <width;4> # Number of frames on left side to append for feature input"
|
||||
echo " --right-context <width;4> # Number of frames on right side to append for feature input"
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
|
||||
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
|
||||
echo " # very end."
|
||||
|
@ -236,7 +235,6 @@ if [ $stage -le 2 ]; then
|
|||
gzip -c >$dir/ali_special.gz || exit 1;
|
||||
set +o pipefail; # unset the pipefail option.
|
||||
|
||||
all_ids=$(seq -s, $nj) # e.g. 1,2,...39,40
|
||||
$cmd $dir/log/create_valid_subset.log \
|
||||
nnet-get-egs $ivectors_opt $nnet_context_opts "$valid_feats" \
|
||||
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
|
@ -246,7 +244,7 @@ if [ $stage -le 2 ]; then
|
|||
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
|
||||
wait;
|
||||
[ -f $dir/.error ] && exit 1;
|
||||
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1
|
||||
echo "... Getting subsets of validation examples for diagnostics and combination."
|
||||
$cmd $dir/log/create_valid_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
|
||||
|
|
|
@ -0,0 +1,300 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# This script dumps examples MPE or MMI or state-level minimum bayes risk (sMBR)
|
||||
# training of neural nets. Note: for "criterion", smbr > mpe > mmi in terms of
|
||||
# compatibility of the dumped egs, meaning you can use the egs dumped with
|
||||
# --criterion smbr for MPE or MMI, and egs dumped with --criterion mpe for MMI
|
||||
# training. The discriminative training program itself doesn't enforce this and
|
||||
# it would let you mix and match them arbitrarily; we area speaking in terms of
|
||||
# the correctness of the algorithm that splits the lattices into pieces.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
criterion=smbr
|
||||
drop_frames=false # option relevant for MMI, affects how we dump examples.
|
||||
samples_per_iter=400000 # measured in frames, not in "examples"
|
||||
max_temp_archives=128 # maximum number of temp archives per input job, only
|
||||
# affects the process of generating archives, not the
|
||||
# final result.
|
||||
|
||||
stage=0
|
||||
|
||||
cleanup=true
|
||||
transform_dir= # If this is a SAT system, directory for transforms
|
||||
online_ivector_dir=
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
|
||||
if [ $# != 6 ]; then
|
||||
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <denlat-dir> <src-model-file> <degs-dir>"
|
||||
echo " e.g.: $0 data/train data/lang exp/tri3_ali exp/tri4_nnet_denlats exp/tri4/final.mdl exp/tri4_mpe/degs"
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs (probably would be good to add -tc 5 or so if using"
|
||||
echo " # GridEngine (to avoid excessive NFS traffic)."
|
||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||
echo " # process."
|
||||
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
|
||||
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
|
||||
echo " # online-neural-net setup. (but you may want to use"
|
||||
echo " # steps/online/nnet2/get_egs_discriminative2.sh instead)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
alidir=$3
|
||||
denlatdir=$4
|
||||
src_model=$5
|
||||
dir=$6
|
||||
|
||||
|
||||
extra_files=
|
||||
[ ! -z $online_ivector_dir ] && \
|
||||
extra_files="$online_ivector_dir/ivector_period $online_ivector_dir/ivector_online.scp"
|
||||
|
||||
# Check some files.
|
||||
for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/num_jobs $alidir/tree \
|
||||
$denlatdir/lat.1.gz $denlatdir/num_jobs $src_model $extra_files; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $dir/log $dir/info || exit 1;
|
||||
|
||||
|
||||
nj=$(cat $denlatdir/num_jobs) || exit 1; # $nj is the number of
|
||||
# splits of the denlats and alignments.
|
||||
|
||||
|
||||
nj_ali=$(cat $alidir/num_jobs) || exit 1;
|
||||
|
||||
sdata=$data/split$nj
|
||||
utils/split_data.sh $data $nj
|
||||
|
||||
|
||||
|
||||
|
||||
if [ $nj_ali -eq $nj ]; then
|
||||
ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
|
||||
else
|
||||
ali_rspecifier="scp:$dir/ali.scp"
|
||||
if [ $stage -le 1 ]; then
|
||||
echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
|
||||
all_ids=$(seq -s, $nj_ali)
|
||||
copy-int-vector --print-args=false \
|
||||
"ark:gunzip -c $alidir/ali.{$all_ids}.gz|" ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
|
||||
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
|
||||
cmvn_opts=`cat $alidir/cmvn_opts 2>/dev/null`
|
||||
cp $alidir/splice_opts $dir 2>/dev/null
|
||||
cp $alidir/cmvn_opts $dir 2>/dev/null
|
||||
cp $alidir/tree $dir
|
||||
cp $lang/phones/silence.csl $dir/info/
|
||||
cp $src_model $dir/final.mdl || exit 1
|
||||
|
||||
if [ ! -z "$online_ivector_dir" ]; then
|
||||
ivector_period=$(cat $online_ivector_dir/ivector_period)
|
||||
ivector_dim=$(feat-to-dim scp:$online_ivector_dir/ivector_online.scp -) || exit 1;
|
||||
echo $ivector_dim >$dir/info/ivector_dim
|
||||
# the 'const_dim_opt' allows it to write only one iVector per example,
|
||||
# rather than one per time-index... it has to average over
|
||||
const_dim_opt="--const-feat-dim=$ivector_dim"
|
||||
else
|
||||
echo 0 > $dir/info/ivector_dim
|
||||
fi
|
||||
|
||||
## We don't support deltas here, only LDA or raw (mainly because deltas are less
|
||||
## frequently used).
|
||||
if [ -z $feat_type ]; then
|
||||
if [ -f $alidir/final.mat ] && [ ! -f $transform_dir/raw_trans.1 ]; then feat_type=lda; else feat_type=raw; fi
|
||||
fi
|
||||
echo "$0: feature type is $feat_type"
|
||||
|
||||
case $feat_type in
|
||||
raw) feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- |"
|
||||
;;
|
||||
lda)
|
||||
splice_opts=`cat $alidir/splice_opts 2>/dev/null`
|
||||
cp $alidir/final.mat $dir
|
||||
feats="ark,s,cs:apply-cmvn $cmvn_opts --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
;;
|
||||
*) echo "$0: invalid feature type $feat_type" && exit 1;
|
||||
esac
|
||||
|
||||
if [ -z "$transform_dir" ]; then
|
||||
if [ -f $transform_dir/trans.1 ] || [ -f $transform_dir/raw_trans.1 ]; then
|
||||
transform_dir=$alidir
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -z "$transform_dir" ]; then
|
||||
echo "$0: using transforms from $transform_dir"
|
||||
[ ! -s $transform_dir/num_jobs ] && \
|
||||
echo "$0: expected $transform_dir/num_jobs to contain the number of jobs." && exit 1;
|
||||
nj_orig=$(cat $transform_dir/num_jobs)
|
||||
|
||||
if [ $feat_type == "raw" ]; then trans=raw_trans;
|
||||
else trans=trans; fi
|
||||
if [ $feat_type == "lda" ] && ! cmp $transform_dir/final.mat $alidir/final.mat; then
|
||||
echo "$0: LDA transforms differ between $alidir and $transform_dir"
|
||||
exit 1;
|
||||
fi
|
||||
if [ ! -f $transform_dir/$trans.1 ]; then
|
||||
echo "$0: expected $transform_dir/$trans.1 to exist (--transform-dir option)"
|
||||
exit 1;
|
||||
fi
|
||||
if [ $nj -ne $nj_orig ]; then
|
||||
# Copy the transforms into an archive with an index.
|
||||
for n in $(seq $nj_orig); do cat $transform_dir/$trans.$n; done | \
|
||||
copy-feats ark:- ark,scp:$dir/$trans.ark,$dir/$trans.scp || exit 1;
|
||||
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk scp:$dir/$trans.scp ark:- ark:- |"
|
||||
else
|
||||
# number of jobs matches with alignment dir.
|
||||
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$transform_dir/$trans.JOB ark:- ark:- |"
|
||||
fi
|
||||
fi
|
||||
if [ ! -z $online_ivector_dir ]; then
|
||||
# add iVectors to the features.
|
||||
feats="$feats paste-feats --length-tolerance=$ivector_period ark:- 'ark,s,cs:utils/filter_scp.pl $sdata/JOB/utt2spk $online_ivector_dir/ivector_online.scp | subsample-feats --n=-$ivector_period scp:- ark:- |' ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
echo "$0: working out number of frames of training data"
|
||||
num_frames=$(steps/nnet2/get_num_frames.sh $data)
|
||||
|
||||
echo $num_frames > $dir/info/num_frames
|
||||
|
||||
# Working out total number of archives. Add one on the assumption the
|
||||
# num-frames won't divide exactly, and we want to round up.
|
||||
num_archives=$[$num_frames/$samples_per_iter + 1]
|
||||
|
||||
# the next few lines relate to how we may temporarily split each input job
|
||||
# into fewer than $num_archives pieces, to avoid using an excessive
|
||||
# number of filehandles.
|
||||
archive_ratio=$[$num_archives/$max_temp_archives+1]
|
||||
num_archives_temp=$[$num_archives/$archive_ratio]
|
||||
# change $num_archives slightly to make it an exact multiple
|
||||
# of $archive_ratio.
|
||||
num_archives=$[$num_archives_temp*$archive_ratio]
|
||||
|
||||
echo $num_archives >$dir/info/num_archives || exit 1
|
||||
echo $num_archives_temp >$dir/info/num_archives_temp || exit 1
|
||||
|
||||
frames_per_archive=$[$num_frames/$num_archives]
|
||||
|
||||
# note, this is the number of frames per archive prior to discarding frames.
|
||||
echo $frames_per_archive > $dir/info/frames_per_archive
|
||||
else
|
||||
num_archives=$(cat $dir/info/num_archives) || exit 1;
|
||||
num_archives_temp=$(cat $dir/info/num_archives_temp) || exit 1;
|
||||
frames_per_archive=$(cat $dir/info/frames_per_archive) || exit 1;
|
||||
fi
|
||||
|
||||
echo "$0: Splitting the data up into $num_archives archives (using $num_archives_temp temporary pieces per input job)"
|
||||
echo "$0: giving samples-per-iteration of $frames_per_archive (you requested $samples_per_iter)."
|
||||
|
||||
# we create these data links regardless of the stage, as there are situations
|
||||
# where we would want to recreate a data link that had previously been deleted.
|
||||
|
||||
if [ -d $dir/storage ]; then
|
||||
echo "$0: creating data links for distributed storage of degs"
|
||||
# See utils/create_split_dir.pl for how this 'storage' directory is created.
|
||||
for x in $(seq $nj); do
|
||||
for y in $(seq $num_archives_temp); do
|
||||
utils/create_data_link.pl $dir/degs_orig.$x.$y.ark
|
||||
done
|
||||
done
|
||||
for z in $(seq $num_archives); do
|
||||
utils/create_data_link.pl $dir/degs.$z.ark
|
||||
done
|
||||
if [ $num_archives_temp -ne $num_archives ]; then
|
||||
for z in $(seq $num_archives); do
|
||||
utils/create_data_link.pl $dir/degs_temp.$z.ark
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
echo "$0: getting initial training examples by splitting lattices"
|
||||
|
||||
degs_list=$(for n in $(seq $num_archives_temp); do echo ark:$dir/degs_orig.JOB.$n.ark; done)
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/get_egs.JOB.log \
|
||||
nnet-get-egs-discriminative --criterion=$criterion --drop-frames=$drop_frames \
|
||||
"$src_model" "$feats" "$ali_rspecifier" "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz|" ark:- \| \
|
||||
nnet-copy-egs-discriminative $const_dim_opt ark:- $degs_list || exit 1;
|
||||
sleep 5; # wait a bit so NFS has time to write files.
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ]; then
|
||||
|
||||
degs_list=$(for n in $(seq $nj); do echo $dir/degs_orig.$n.JOB.ark; done)
|
||||
|
||||
if [ $num_archives -eq $num_archives_temp ]; then
|
||||
echo "$0: combining data into final archives and shuffling it"
|
||||
|
||||
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
|
||||
cat $degs_list \| nnet-shuffle-egs-discriminative --srand=JOB ark:- \
|
||||
ark:$dir/degs.JOB.ark || exit 1;
|
||||
else
|
||||
echo "$0: combining and re-splitting data into un-shuffled versions of final archives."
|
||||
|
||||
archive_ratio=$[$num_archives/$num_archives_temp]
|
||||
! [ $archive_ratio -gt 1 ] && echo "$0: Bad archive_ratio $archive_ratio" && exit 1;
|
||||
|
||||
# note: the \$[ .. ] won't be evaluated until the job gets executed. The
|
||||
# aim is to write to the archives with the final numbering, 1
|
||||
# ... num_archives, which is more than num_archives_temp. The list with
|
||||
# \$[... ] expressions in it computes the set of final indexes for each
|
||||
# temporary index.
|
||||
degs_list_out=$(for n in $(seq $archive_ratio); do echo "ark:$dir/degs_temp.\$[((JOB-1)*$archive_ratio)+$n].ark"; done)
|
||||
# e.g. if dir=foo and archive_ratio=2, we'd have
|
||||
# degs_list_out='foo/degs_temp.$[((JOB-1)*2)+1].ark foo/degs_temp.$[((JOB-1)*2)+2].ark'
|
||||
|
||||
$cmd JOB=1:$num_archives_temp $dir/log/resplit.JOB.log \
|
||||
cat $degs_list \| nnet-copy-egs-discriminative --srand=JOB ark:- \
|
||||
$degs_list_out || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ] && [ $num_archives -ne $num_archives_temp ]; then
|
||||
echo "$0: shuffling final archives."
|
||||
|
||||
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
|
||||
nnet-shuffle-egs-discriminative --srand=JOB ark:$dir/degs_temp.JOB.ark \
|
||||
ark:$dir/degs.JOB.ark || exit 1
|
||||
fi
|
||||
|
||||
if $cleanup; then
|
||||
echo "$0: removing temporary archives."
|
||||
for x in $(seq $nj); do
|
||||
for y in $(seq $num_archives_temp); do
|
||||
file=$dir/degs_orig.$x.$y.ark
|
||||
[ -L $file ] && rm $(readlink -f $file); rm $file
|
||||
done
|
||||
done
|
||||
if [ $num_archives_temp -ne $num_archives ]; then
|
||||
for z in $(seq $num_archives); do
|
||||
file=$dir/degs_temp.$z.ark
|
||||
[ -L $file ] && rm $(readlink -f $file); rm $file
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$0: Done."
|
|
@ -10,16 +10,25 @@ if [ -f path.sh ]; then . ./path.sh; fi
|
|||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 <data-dir>"
|
||||
echo "Prints the number of frames of data in the data-dir, via sampling rather"
|
||||
echo "than trying to access all the data."
|
||||
(
|
||||
echo "Usage: $0 <data-dir>"
|
||||
echo "Prints the number of frames of data in the data-dir, via sampling rather"
|
||||
echo "than trying to access all the data."
|
||||
) 1>&2
|
||||
fi
|
||||
|
||||
data=$1
|
||||
|
||||
if [ ! -f $data/feats.scp ]; then
|
||||
echo "$0: expected $data/feats.scp to exist"
|
||||
exit 1;
|
||||
if [ -f $data/segments ]; then
|
||||
echo "$0: $data/feats.scp does not exist, but $data/segments does exist; using that and assuming 100 frames per second." 1>&2
|
||||
num_frames=$(cat $data/segments | awk '{x += $4 - $3;} END{print int(x*100);}') || exit 1;
|
||||
echo $num_frames
|
||||
exit 0;
|
||||
else
|
||||
echo "$0: neither $data/feats.scp nor $data/segments exist." 1>&2
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ echo "$0 $@" # Print the command line for logging
|
|||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
|
||||
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/tri1 exp/tri1_denlats"
|
||||
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/nnet4 exp/nnet4_denlats"
|
||||
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
|
||||
echo " plus transforms."
|
||||
echo ""
|
||||
|
@ -68,14 +68,12 @@ thread_string=
|
|||
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
|
||||
|
||||
mkdir -p $dir/log
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
oov=`cat $lang/oov.int` || exit 1;
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
cp -r $lang $dir/
|
||||
cp -rH $lang $dir/
|
||||
|
||||
# Compute grammar FST which corresponds to unigram decoding graph.
|
||||
new_lang="$dir/"$(basename "$lang")
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# This script does MPE or fMMI state-level minimum bayes risk (sMBR) training.
|
||||
# Note: the temporary data is put in <exp-dir>/degs/, so if you want
|
||||
# to use a different disk for that, just make that a soft link to some other
|
||||
# volume.
|
||||
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training
|
||||
# of neural nets.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
|
@ -45,7 +43,6 @@ transform_dir=
|
|||
degs_dir=
|
||||
retroactive=false
|
||||
online_ivector_dir=
|
||||
use_preconditioning=false
|
||||
# End configuration section.
|
||||
|
||||
|
||||
|
@ -76,7 +73,7 @@ if [ $# != 6 ]; then
|
|||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||
echo " --samples-per-iter <#samples|200000> # Number of samples of data to process per iteration, per"
|
||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||
echo " # process."
|
||||
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
|
@ -85,6 +82,8 @@ if [ $# != 6 ]; then
|
|||
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
|
||||
echo " # changes across layers."
|
||||
echo " --degs-dir <dir|""> # Directory for discriminative examples, e.g. exp/foo/degs"
|
||||
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
|
||||
echo " # where the numerator transition-id is not in the denominator lattice."
|
||||
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
|
||||
echo " # online-neural-net setup."
|
||||
exit 1;
|
||||
|
@ -240,19 +239,17 @@ fi
|
|||
|
||||
if [ $stage -le -7 ]; then
|
||||
echo "$0: Copying initial model and modifying preconditioning setup"
|
||||
# We want online preconditioning with a larger number of samples of history, since
|
||||
# in this setup the frames are only randomized at the segment level so they are highly
|
||||
# correlated. It might make sense to tune this a little, later on, although I doubt
|
||||
# it matters once it's large enough.
|
||||
|
||||
if $use_preconditioning; then
|
||||
$cmd $dir/log/convert.log \
|
||||
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
|
||||
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
|
||||
else
|
||||
$cmd $dir/log/convert.log \
|
||||
nnet-am-copy --learning-rate=$learning_rate "$src_model" $dir/0.mdl || exit 1;
|
||||
fi
|
||||
# Note, the baseline model probably had preconditioning, and we'll keep it;
|
||||
# but we want online preconditioning with a larger number of samples of
|
||||
# history, since in this setup the frames are only randomized at the segment
|
||||
# level so they are highly correlated. It might make sense to tune this a
|
||||
# little, later on, although I doubt it matters once the --num-samples-history
|
||||
# is large enough.
|
||||
|
||||
$cmd $dir/log/convert.log \
|
||||
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
|
||||
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
|
||||
fi
|
||||
|
||||
|
||||
|
@ -344,7 +341,7 @@ fi
|
|||
|
||||
x=0
|
||||
while [ $x -lt $num_iters ]; do
|
||||
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
||||
if [ $stage -le $x ]; then
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
|
||||
|
@ -356,10 +353,7 @@ while [ $x -lt $num_iters ]; do
|
|||
$dir/$[$x+1].JOB.mdl \
|
||||
|| exit 1;
|
||||
|
||||
nnets_list=
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
|
||||
done
|
||||
nnets_list=$(for n in $(seq $num_jobs_nnet); do echo $dir/$[$x+1].$n.mdl; done)
|
||||
|
||||
$cmd $dir/log/average.$x.log \
|
||||
nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;
|
||||
|
|
|
@ -0,0 +1,219 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training.
|
||||
# This version (2) of the script uses a newer format for the discriminative-training
|
||||
# egs, as obtained by steps/nnet2/get_egs_discriminative2.sh.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
num_epochs=4 # Number of epochs of training
|
||||
learning_rate=0.00002
|
||||
acoustic_scale=0.1 # acoustic scale for MMI/MPFE/SMBR training.
|
||||
boost=0.0 # option relevant for MMI
|
||||
|
||||
criterion=smbr
|
||||
drop_frames=false # option relevant for MMI
|
||||
num_jobs_nnet=4 # Number of neural net jobs to run in parallel. Note: this
|
||||
# will interact with the learning rates (if you decrease
|
||||
# this, you'll have to decrease the learning rate, and vice
|
||||
# versa).
|
||||
|
||||
modify_learning_rates=true
|
||||
last_layer_factor=1.0 # relates to modify-learning-rates
|
||||
first_layer_factor=1.0 # relates to modify-learning-rates
|
||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||
# on each iter. You could set it to 0 or to a large value for complete
|
||||
# randomization, but this would both consume memory and cause spikes in
|
||||
# disk I/O. Smaller is easier on disk and memory but less random. It's
|
||||
# not a huge deal though, as samples are anyway randomized right at the start.
|
||||
|
||||
|
||||
stage=-3
|
||||
|
||||
|
||||
num_threads=16 # this is the default but you may want to change it, e.g. to 1 if
|
||||
# using GPUs.
|
||||
cleanup=true
|
||||
retroactive=false
|
||||
remove_egs=false
|
||||
src_model= # will default to $degs_dir/final.mdl
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
|
||||
if [ $# != 2 ]; then
|
||||
echo "Usage: $0 [opts] <degs-dir> <exp-dir>"
|
||||
echo " e.g.: $0 exp/tri4_mpe_degs exp/tri4_mpe"
|
||||
echo ""
|
||||
echo "You have to first call get_egs_discriminative2.sh to dump the egs."
|
||||
echo "Caution: the options 'drop_frames' and 'criterion' are taken here"
|
||||
echo "even though they were required also by get_egs_discriminative2.sh,"
|
||||
echo "and they should normally match."
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --num-epochs <#epochs|4> # Number of epochs of training"
|
||||
echo " --initial-learning-rate <initial-learning-rate|0.0002> # Learning rate at start of training"
|
||||
echo " --final-learning-rate <final-learning-rate|0.0004> # Learning rate at end of training"
|
||||
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
|
||||
echo " # training (will affect results as well as speed; try 8, 16)"
|
||||
echo " # Note: if you increase this, you may want to also increase"
|
||||
echo " # the learning rate. Also note: if there are fewer archives"
|
||||
echo " # of egs than this, it will get reduced automatically."
|
||||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size. With GPU, must be 1."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --stage <stage|-3> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
|
||||
echo " --boost <boost|0.0> # Boosting factor for MMI (e.g., 0.1)"
|
||||
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
|
||||
echo " # where the numerator transition-id is not in the denominator lattice."
|
||||
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
|
||||
echo " # changes across layers."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
degs_dir=$1
|
||||
dir=$2
|
||||
|
||||
[ -z "$src_model" ] && src_model=$degs_dir/final.mdl
|
||||
|
||||
# Check some files.
|
||||
for f in $degs_dir/degs.1.ark $degs_dir/info/{num_archives,silence.csl,frames_per_archive} $src_model; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $dir/log || exit 1;
|
||||
|
||||
# copy some things
|
||||
for f in splice_opts cmvn_opts tree final.mat; do
|
||||
if [ -f $degs_dir/$f ]; then
|
||||
cp $degs_dir/$f $dir/ || exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
silphonelist=`cat $degs_dir/info/silence.csl` || exit 1;
|
||||
|
||||
|
||||
num_archives=$(cat $degs_dir/info/num_archives) || exit 1;
|
||||
|
||||
if [ $num_jobs_nnet -gt $num_archives ]; then
|
||||
echo "$0: num-jobs-nnet $num_jobs_nnet exceeds number of archives $num_archives,"
|
||||
echo " ... setting it to $num_archives."
|
||||
num_jobs_nnet=$num_archives
|
||||
fi
|
||||
|
||||
num_iters=$[($num_epochs*$num_archives)/$num_jobs_nnet]
|
||||
|
||||
echo "$0: Will train for $num_epochs epochs = $num_iters iterations"
|
||||
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
echo "$0: Copying initial model and modifying preconditioning setup"
|
||||
|
||||
# Note, the baseline model probably had preconditioning, and we'll keep it;
|
||||
# but we want online preconditioning with a larger number of samples of
|
||||
# history, since in this setup the frames are only randomized at the segment
|
||||
# level so they are highly correlated. It might make sense to tune this a
|
||||
# little, later on, although I doubt it matters once the --num-samples-history
|
||||
# is large enough.
|
||||
|
||||
$cmd $dir/log/convert.log \
|
||||
nnet-am-copy --learning-rate=$learning_rate "$src_model" - \| \
|
||||
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/0.mdl || exit 1;
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ $num_threads -eq 1 ]; then
|
||||
train_suffix="-simple" # this enables us to use GPU code if
|
||||
# we have just one thread.
|
||||
else
|
||||
train_suffix="-parallel --num-threads=$num_threads"
|
||||
fi
|
||||
|
||||
|
||||
x=0
|
||||
while [ $x -lt $num_iters ]; do
|
||||
if [ $stage -le $x ]; then
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
|
||||
# The \$ below delays the evaluation of the expression until the script runs (and JOB
|
||||
# will be replaced by the job-id). That expression in $[..] is responsible for
|
||||
# choosing the archive indexes to use for each job on each iteration... we cycle through
|
||||
# all archives.
|
||||
|
||||
$cmd JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
|
||||
nnet-combine-egs-discriminative \
|
||||
"ark:$degs_dir/degs.\$[((JOB-1+($x*$num_jobs_nnet))%$num_archives)+1].ark" ark:- \| \
|
||||
nnet-train-discriminative$train_suffix --silence-phones=$silphonelist \
|
||||
--criterion=$criterion --drop-frames=$drop_frames \
|
||||
--boost=$boost --acoustic-scale=$acoustic_scale \
|
||||
$dir/$x.mdl ark:- $dir/$[$x+1].JOB.mdl || exit 1;
|
||||
|
||||
nnets_list=$(for n in $(seq $num_jobs_nnet); do echo $dir/$[$x+1].$n.mdl; done)
|
||||
|
||||
$cmd $dir/log/average.$x.log \
|
||||
nnet-am-average $nnets_list $dir/$[$x+1].mdl || exit 1;
|
||||
|
||||
if $modify_learning_rates; then
|
||||
$cmd $dir/log/modify_learning_rates.$x.log \
|
||||
nnet-modify-learning-rates --retroactive=$retroactive \
|
||||
--last-layer-factor=$last_layer_factor \
|
||||
--first-layer-factor=$first_layer_factor \
|
||||
$dir/$x.mdl $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
|
||||
fi
|
||||
rm $nnets_list
|
||||
fi
|
||||
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
rm $dir/final.mdl 2>/dev/null
|
||||
ln -s $x.mdl $dir/final.mdl
|
||||
|
||||
|
||||
echo Done
|
||||
|
||||
epoch_final_iters=
|
||||
for e in $(seq 0 $num_epochs); do
|
||||
x=$[($e*$num_archives)/$num_jobs_nnet] # gives the iteration number.
|
||||
ln -sf $x.mdl $dir/epoch$e.mdl
|
||||
epoch_final_iters="$epoch_final_iters $x"
|
||||
done
|
||||
|
||||
|
||||
# function to remove egs that might be soft links.
|
||||
remove () { for x in $*; do [ -L $x ] && rm $(readlink -f $x); rm $x; done }
|
||||
|
||||
if $cleanup && $remove_egs; then # note: this is false by default.
|
||||
echo Removing training examples
|
||||
for n in $(seq $num_archives); do
|
||||
remove $degs_dir/degs.*
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
if $cleanup; then
|
||||
echo Removing most of the models
|
||||
for x in `seq 0 $num_iters`; do
|
||||
if ! echo $epoch_final_iters | grep -w $x >/dev/null; then
|
||||
# if $x is not an epoch-final iteration..
|
||||
rm $dir/$x.mdl 2>/dev/null
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
|
@ -0,0 +1,304 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# This script does MPE or MMI or state-level minimum bayes risk (sMBR) training,
|
||||
# in the multi-language or at least multi-model setting where you have multiple "degs" directories.
|
||||
# The input "degs" directories must be dumped by one of the get_egs_discriminative2.sh scripts.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
num_epochs=4 # Number of epochs of training
|
||||
learning_rate=0.00002
|
||||
acoustic_scale=0.1 # acoustic scale for MMI/MPFE/SMBR training.
|
||||
boost=0.0 # option relevant for MMI
|
||||
|
||||
criterion=smbr
|
||||
drop_frames=false # option relevant for MMI
|
||||
num_jobs_nnet="4 4" # Number of neural net jobs to run in parallel, one per
|
||||
# language.. Note: this will interact with the learning
|
||||
# rates (if you decrease this, you'll have to decrease
|
||||
# the learning rate, and vice versa).
|
||||
|
||||
modify_learning_rates=true
|
||||
last_layer_factor=1.0 # relates to modify-learning-rates
|
||||
first_layer_factor=1.0 # relates to modify-learning-rates
|
||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||
# on each iter. You could set it to 0 or to a large value for complete
|
||||
# randomization, but this would both consume memory and cause spikes in
|
||||
# disk I/O. Smaller is easier on disk and memory but less random. It's
|
||||
# not a huge deal though, as samples are anyway randomized right at the start.
|
||||
|
||||
|
||||
stage=-3
|
||||
|
||||
|
||||
num_threads=16 # this is the default but you may want to change it, e.g. to 1 if
|
||||
# using GPUs.
|
||||
cleanup=true
|
||||
retroactive=false
|
||||
remove_egs=false
|
||||
src_models= # can be used to override the defaults of <degs-dir1>/final.mdl <degs-dir2>/final.mdl .. etc.
|
||||
# set this to a space-separated list.
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
|
||||
if [ $# -lt 3 ]; then
|
||||
echo "Usage: $0 [opts] <degs-dir1> <degs-dir2> ... <degs-dirN> <exp-dir>"
|
||||
echo " e.g.: $0 exp/tri4_mpe_degs exp_other_lang/tri4_mpe_degs exp/tri4_mpe_multilang"
|
||||
echo ""
|
||||
echo "You have to first call get_egs_discriminative2.sh to dump the egs."
|
||||
echo "Caution: the options 'drop_frames' and 'criterion' are taken here"
|
||||
echo "even though they were required also by get_egs_discriminative2.sh,"
|
||||
echo "and they should normally match."
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --num-epochs <#epochs|4> # Number of epochs of training"
|
||||
echo " --initial-learning-rate <initial-learning-rate|0.0002> # Learning rate at start of training"
|
||||
echo " --final-learning-rate <final-learning-rate|0.0004> # Learning rate at end of training"
|
||||
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
|
||||
echo " # training (will affect results as well as speed; try 8, 16)"
|
||||
echo " # Note: if you increase this, you may want to also increase"
|
||||
echo " # the learning rate. Also note: if there are fewer archives"
|
||||
echo " # of egs than this, it will get reduced automatically."
|
||||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size. With GPU, must be 1."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --stage <stage|-3> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
|
||||
echo " --boost <boost|0.0> # Boosting factor for MMI (e.g., 0.1)"
|
||||
echo " --drop-frames <true,false|false> # Option that affects MMI training: if true, we exclude gradients from frames"
|
||||
echo " # where the numerator transition-id is not in the denominator lattice."
|
||||
echo " --modify-learning-rates <true,false|false> # If true, modify learning rates to try to equalize relative"
|
||||
echo " # changes across layers."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
argv=("$@")
|
||||
num_args=$#
|
||||
num_lang=$[$num_args-1]
|
||||
|
||||
dir=${argv[$num_args-1]}
|
||||
|
||||
num_jobs_nnet_array=($num_jobs_nnet)
|
||||
! [ "${#num_jobs_nnet_array[@]}" -eq "$num_lang" ] && \
|
||||
echo "$0: --num-jobs-nnet option must have size equal to the number of languages" && exit 1;
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
degs_dir[$lang]=${argv[$lang]}
|
||||
done
|
||||
|
||||
if [ ! -z "$src_models" ]; then
|
||||
src_model_array=($src_models)
|
||||
! [ "${#src_model_array[@]}" -eq "$num_lang" ] && \
|
||||
echo "$0: --src-models option must have size equal to the number of languages" && exit 1;
|
||||
else
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
src_model_array[$lang]=${degs_dir[$lang]}/final.mdl
|
||||
done
|
||||
fi
|
||||
|
||||
mkdir -p $dir/log || exit 1;
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_degs_dir=${degs_dir[$lang]}
|
||||
mdl=${src_model_array[$lang]}
|
||||
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
|
||||
# Check inputs
|
||||
for f in $this_degs_dir/degs.1.ark $this_degs_dir/info/{num_archives,silence.csl,frames_per_archive} $mdl; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
mkdir -p $dir/$lang/log || exit 1;
|
||||
|
||||
# check for valid num-jobs-nnet.
|
||||
! [ $this_num_jobs_nnet -gt 0 ] && echo "Bad num-jobs-nnet option '$num_jobs_nnet'" && exit 1;
|
||||
this_num_archives=$(cat $this_degs_dir/info/num_archives) || exit 1;
|
||||
num_archives_array[$lang]=$this_num_archives
|
||||
silphonelist_array[$lang]=$(cat $this_degs_dir/info/silence.csl) || exit 1;
|
||||
|
||||
if [ $this_num_jobs_nnet -gt $this_num_archives ]; then
|
||||
echo "$0: num-jobs-nnet $this_num_jobs_nnet exceeds number of archives $this_num_archives"
|
||||
echo " ... for language $lang; setting it to $this_num_archives."
|
||||
num_jobs_nnet_array[$lang]=$this_num_archives
|
||||
fi
|
||||
|
||||
# copy some things from the input directories.
|
||||
for f in splice_opts cmvn_opts tree final.mat; do
|
||||
if [ -f $this_degs_dir/$f ]; then
|
||||
cp $this_degs_dir/$f $dir/$lang/ || exit 1;
|
||||
fi
|
||||
done
|
||||
if [ -f $this_degs_dir/conf ]; then
|
||||
ln -sf $(readlink -f $this_degs_dir/conf) $dir/ || exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# work out number of iterations.
|
||||
num_archives0=$(cat ${degs_dir[0]}/info/num_archives) || exit 1;
|
||||
num_jobs_nnet0=${num_jobs_nnet_array[0]}
|
||||
|
||||
! [ $num_epochs -gt 0 ] && echo "Error: num-epochs $num_epochs is not valid" && exit 1;
|
||||
|
||||
|
||||
num_iters=$[($num_epochs*$num_archives0)/$num_jobs_nnet0]
|
||||
|
||||
echo "$0: Will train for $num_epochs epochs = $num_iters iterations (measured on language 0)"
|
||||
# Work out the number of epochs we train for on the other languages... this is
|
||||
# just informational.
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
this_degs_dir=${degs_dir[$lang]}
|
||||
this_num_archives=${num_archives_array[$lang]}
|
||||
this_num_epochs=$[($num_iters*${num_jobs_nnet_array[$lang]})/$this_num_archives]
|
||||
echo "$0: $num_iters iterations is approximately $this_num_epochs epochs for language $lang"
|
||||
done
|
||||
|
||||
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
echo "$0: Copying initial models and modifying preconditioning setups"
|
||||
|
||||
# Note, the baseline model probably had preconditioning, and we'll keep it;
|
||||
# but we want online preconditioning with a larger number of samples of
|
||||
# history, since in this setup the frames are only randomized at the segment
|
||||
# level so they are highly correlated. It might make sense to tune this a
|
||||
# little, later on, although I doubt it matters once the --num-samples-history
|
||||
# is large enough.
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
$cmd $dir/$lang/log/convert.log \
|
||||
nnet-am-copy --learning-rate=$learning_rate ${src_model_array[$lang]} - \| \
|
||||
nnet-am-switch-preconditioning --num-samples-history=50000 - $dir/$lang/0.mdl || exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
|
||||
if [ $num_threads -eq 1 ]; then
|
||||
train_suffix="-simple" # this enables us to use GPU code if
|
||||
# we have just one thread.
|
||||
else
|
||||
train_suffix="-parallel --num-threads=$num_threads"
|
||||
fi
|
||||
|
||||
|
||||
x=0
|
||||
while [ $x -lt $num_iters ]; do
|
||||
if [ $stage -le $x ]; then
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
|
||||
this_num_archives=${num_archives_array[$lang]}
|
||||
this_degs_dir=${degs_dir[$lang]}
|
||||
this_silphonelist=${silphonelist_array[$lang]}
|
||||
|
||||
# The \$ below delays the evaluation of the expression until the script runs (and JOB
|
||||
# will be replaced by the job-id). That expression in $[..] is responsible for
|
||||
# choosing the archive indexes to use for each job on each iteration... we cycle through
|
||||
# all archives.
|
||||
|
||||
(
|
||||
$cmd JOB=1:$this_num_jobs_nnet $dir/$lang/log/train.$x.JOB.log \
|
||||
nnet-combine-egs-discriminative \
|
||||
"ark:$this_degs_dir/degs.\$[((JOB-1+($x*$this_num_jobs_nnet))%$this_num_archives)+1].ark" ark:- \| \
|
||||
nnet-train-discriminative$train_suffix --silence-phones=$this_silphonelist \
|
||||
--criterion=$criterion --drop-frames=$drop_frames \
|
||||
--boost=$boost --acoustic-scale=$acoustic_scale \
|
||||
$dir/$lang/$x.mdl ark:- $dir/$lang/$[$x+1].JOB.mdl || exit 1;
|
||||
|
||||
nnets_list=$(for n in $(seq $this_num_jobs_nnet); do echo $dir/$lang/$[$x+1].$n.mdl; done)
|
||||
|
||||
# produce an average just within this language.
|
||||
$cmd $dir/$lang/log/average.$x.log \
|
||||
nnet-am-average $nnets_list $dir/$lang/$[$x+1].tmp.mdl || exit 1;
|
||||
|
||||
rm $nnets_list
|
||||
) || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "$0: error on pass $x" && exit 1
|
||||
|
||||
|
||||
# apply the modify-learning-rates thing to the model for the zero'th language;
|
||||
# we'll use the resulting learning rates for the other languages.
|
||||
if $modify_learning_rates; then
|
||||
$cmd $dir/log/modify_learning_rates.$x.log \
|
||||
nnet-modify-learning-rates --retroactive=$retroactive \
|
||||
--last-layer-factor=$last_layer_factor \
|
||||
--first-layer-factor=$first_layer_factor \
|
||||
$dir/0/$x.mdl $dir/0/$[$x+1].tmp.mdl $dir/0/$[$x+1].tmp.mdl || exit 1;
|
||||
fi
|
||||
|
||||
nnets_list=$(for lang in $(seq 0 $[$num_lang-1]); do echo $dir/$lang/$[$x+1].tmp.mdl; done)
|
||||
weights_csl=$(echo $num_jobs_nnet | sed 's/ /:/g') # get as colon separated list.
|
||||
|
||||
# the next command produces the cross-language averaged model containing the
|
||||
# final layer corresponding to language zero. Note, if we did modify-learning-rates,
|
||||
# it will also have the modified learning rates.
|
||||
$cmd $dir/log/average.$x.log \
|
||||
nnet-am-average --weights=$weights_csl --skip-last-layer=true \
|
||||
$nnets_list $dir/0/$[$x+1].mdl || exit 1;
|
||||
|
||||
# we'll transfer these learning rates to the other models.
|
||||
learning_rates=$(nnet-am-info --print-learning-rates=true $dir/0/$[$x+1].mdl 2>/dev/null)
|
||||
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
# the next command takes the averaged hidden parameters from language zero, and
|
||||
# the last layer from language $lang. It's not really doing averaging.
|
||||
# we use nnet-am-copy to transfer the learning rates from model zero.
|
||||
$cmd $dir/$lang/log/combine_average.$x.log \
|
||||
nnet-am-average --weights=0.0:1.0 --skip-last-layer=true \
|
||||
$dir/$lang/$[$x+1].tmp.mdl $dir/0/$[$x+1].mdl - \| \
|
||||
nnet-am-copy --learning-rates=$learning_rates - $dir/$lang/$[$x+1].mdl || exit 1;
|
||||
done
|
||||
|
||||
$cleanup && rm $dir/*/$[$x+1].tmp.mdl
|
||||
|
||||
fi
|
||||
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
rm $dir/$lang/final.mdl 2>/dev/null
|
||||
ln -s $x.mdl $dir/$lang/final.mdl
|
||||
|
||||
|
||||
epoch_final_iters=
|
||||
for e in $(seq 0 $num_epochs); do
|
||||
x=$[($e*$num_archives0)/$num_jobs_nnet0] # gives the iteration number.
|
||||
ln -sf $x.mdl $dir/$lang/epoch$e.mdl
|
||||
epoch_final_iters="$epoch_final_iters $x"
|
||||
done
|
||||
|
||||
if $cleanup; then
|
||||
echo "Removing most of the models for language $lang"
|
||||
for x in `seq 0 $num_iters`; do
|
||||
if ! echo $epoch_final_iters | grep -w $x >/dev/null; then
|
||||
# if $x is not an epoch-final iteration..
|
||||
rm $dir/$lang/$x.mdl 2>/dev/null
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
echo Done
|
|
@ -64,8 +64,6 @@ if [ $# != 3 ]; then
|
|||
echo " --num-epochs <#epochs|15> # Number of epochs of training"
|
||||
echo " # while reducing learning rate (determines #iterations, together"
|
||||
echo " # with --samples-per-iter and --num-jobs-nnet)"
|
||||
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
|
||||
echo " # after learning rate fully reduced"
|
||||
echo " --learning-rate-factor<factor|1.0> # Factor (e.g. 0.2) by which to change learning rate"
|
||||
echo " # during the course of training"
|
||||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
|
|
|
@ -0,0 +1,351 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey).
|
||||
# Apache 2.0.
|
||||
|
||||
# This script further trains an already-existing neural network,
|
||||
# given an existing model and an examples (egs/) directory.
|
||||
# This version of the script epects an egs/ directory in the newer
|
||||
# format, as created by get_egs2.sh.
|
||||
#
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
num_epochs=10 # Number of epochs of training; number of iterations is
|
||||
# worked out from this.
|
||||
num_iters_final=20 # Maximum number of final iterations to give to the
|
||||
# optimization over the validation set.
|
||||
learning_rate_factor=1.0 # You can use this to gradually decrease the learning
|
||||
# rate during training (e.g. use 0.2); the initial
|
||||
# learning rates are as specified in the model, but it
|
||||
# will decrease slightly on each iteration to achieve
|
||||
# this ratio.
|
||||
|
||||
combine=true # controls whether or not to do the final model combination.
|
||||
combine_regularizer=1.0e-14 # Small regularizer so that parameters won't go crazy.
|
||||
max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
|
||||
# to the final 'combine' stage, but these models will themselves be averages of
|
||||
# iteration-number ranges.
|
||||
|
||||
minibatch_size=128 # by default use a smallish minibatch size for neural net
|
||||
# training; this controls instability which would otherwise
|
||||
# be a problem with multi-threaded update. Note: it also
|
||||
# interacts with the "preconditioned" update which generally
|
||||
# works better with larger minibatch size, so it's not
|
||||
# completely cost free.
|
||||
|
||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||
# on each iter. You could set it to 0 or to a large value for complete
|
||||
# randomization, but this would both consume memory and cause spikes in
|
||||
# disk I/O. Smaller is easier on disk and memory but less random. It's
|
||||
# not a huge deal though, as samples are anyway randomized right at the start.
|
||||
num_jobs_nnet=4
|
||||
mix_up=0
|
||||
stage=-5
|
||||
num_threads=16
|
||||
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # by default we use 16 threads; this lets the queue know.
|
||||
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
|
||||
combine_num_threads=8
|
||||
cleanup=true
|
||||
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
|
||||
# more than enough.
|
||||
num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
|
||||
remove_egs=false
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [opts] <input-model> <egs-dir> <exp-dir>"
|
||||
echo " e.g.: $0 exp/nnet4c/final.mdl exp/nnet4c/egs exp/nnet5c/"
|
||||
echo "see also the older script update_nnet.sh which creates the egs itself"
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --num-epochs <#epochs|15> # Number of epochs of training"
|
||||
echo " # while reducing learning rate (determines #iterations, together"
|
||||
echo " # with --samples-per-iter and --num-jobs-nnet)"
|
||||
echo " --num-jobs-nnet <#jobs|4> # Number of neural-net jobs to run in parallel"
|
||||
echo " --learning-rate-factor<factor|1.0> # Factor (e.g. 0.2) by which to change learning rate"
|
||||
echo " # during the course of training"
|
||||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
echo " --num-iters-final <#iters|20> # Number of final iterations to give to nnet-combine-fast to "
|
||||
echo " # interpolate parameters (the weights are learned with a validation set)"
|
||||
echo " --mix-up <#mix|0> # If specified, add quasi-targets, analogous to a mixture of Gaussians vs."
|
||||
echo " # single Gaussians. Only do this if not already mixed-up."
|
||||
echo " --combine <true or false|true> # If true, do the final nnet-combine-fast stage."
|
||||
echo " --stage <stage|-5> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
input_mdl=$1
|
||||
egs_dir=$2
|
||||
dir=$3
|
||||
|
||||
# Check some files.
|
||||
for f in $input_mdl $egs_dir/egs.1.ark; do
|
||||
[ ! -f $f ] && echo "$0: expected file $f to exist." && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $dir/log
|
||||
|
||||
# Copy some things from the directory where the input model is located, to the
|
||||
# experimental directory, if they exist. These might be needed for things like
|
||||
# decoding.
|
||||
input_dir=$(dirname $input_mdl);
|
||||
for f in tree splice_opts cmvn_opts final.mat; do
|
||||
if [ -f $input_dir/$f ]; then
|
||||
cp $input_dir/$f $dir/
|
||||
fi
|
||||
done
|
||||
|
||||
frames_per_eg=$(cat $egs_dir/info/frames_per_eg) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
|
||||
num_archives=$(cat $egs_dir/info/num_archives) || { echo "error: no such file $egs_dir/info/frames_per_eg"; exit 1; }
|
||||
|
||||
# num_archives_expanded considers each separate label-position from
|
||||
# 0..frames_per_eg-1 to be a separate archive.
|
||||
num_archives_expanded=$[$num_archives*$frames_per_eg]
|
||||
|
||||
if [ $num_jobs_nnet -gt $num_archives_expanded ]; then
|
||||
echo "$0: --num-jobs-nnet cannot exceed num-archives*frames-per-eg which is $num_archives_expanded"
|
||||
echo "$0: setting --num-jobs-nnet to $num_archives_expanded"
|
||||
num_jobs_nnet=$num_archives_expanded
|
||||
fi
|
||||
|
||||
|
||||
# set num_iters so that as close as possible, we process the data $num_epochs
|
||||
# times, i.e. $num_iters*$num_jobs_nnet == $num_epochs*$num_archives_expanded
|
||||
num_iters=$[($num_epochs*$num_archives_expanded)/$num_jobs_nnet]
|
||||
|
||||
echo "$0: Will train for $num_epochs epochs = $num_iters iterations"
|
||||
|
||||
per_iter_learning_rate_factor=$(perl -e "print ($learning_rate_factor ** (1.0 / $num_iters));")
|
||||
|
||||
mix_up_iter=$[$num_iters/2]
|
||||
|
||||
if [ $num_threads -eq 1 ]; then
|
||||
parallel_suffix="-simple" # this enables us to use GPU code if
|
||||
# we have just one thread.
|
||||
parallel_train_opts=
|
||||
if ! cuda-compiled; then
|
||||
echo "$0: WARNING: you are running with one thread but you have not compiled"
|
||||
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
|
||||
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
|
||||
fi
|
||||
else
|
||||
parallel_suffix="-parallel"
|
||||
parallel_train_opts="--num-threads=$num_threads"
|
||||
fi
|
||||
|
||||
|
||||
approx_iters_per_epoch=$[$num_iters/$num_epochs]
|
||||
# First work out how many models we want to combine over in the final
|
||||
# nnet-combine-fast invocation. This equals
|
||||
# min(max(max_models_combine, iters_per_epoch),
|
||||
# 2/3 * iters_after_mixup)
|
||||
num_models_combine=$max_models_combine
|
||||
if [ $num_models_combine -lt $approx_iters_per_epoch ]; then
|
||||
num_models_combine=$approx_iters_per_epoch
|
||||
fi
|
||||
iters_after_mixup_23=$[(($num_iters-$mix_up_iter-1)*2)/3]
|
||||
if [ $num_models_combine -gt $iters_after_mixup_23 ]; then
|
||||
num_models_combine=$iters_after_mixup_23
|
||||
fi
|
||||
first_model_combine=$[$num_iters-$num_models_combine+1]
|
||||
|
||||
cp $input_mdl $dir/0.mdl || exit 1;
|
||||
|
||||
x=0
|
||||
|
||||
while [ $x -lt $num_iters ]; do
|
||||
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
||||
# Set off jobs doing some diagnostics, in the background.
|
||||
$cmd $dir/log/compute_prob_valid.$x.log \
|
||||
nnet-compute-prob $dir/$x.mdl ark:$egs_dir/valid_diagnostic.egs &
|
||||
$cmd $dir/log/compute_prob_train.$x.log \
|
||||
nnet-compute-prob $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
|
||||
if [ $x -gt 0 ] && [ ! -f $dir/log/mix_up.$[$x-1].log ]; then
|
||||
$cmd $dir/log/progress.$x.log \
|
||||
nnet-show-progress --use-gpu=no $dir/$[$x-1].mdl $dir/$x.mdl ark:$egs_dir/train_diagnostic.egs &
|
||||
fi
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
( # this sub-shell is so that when we "wait" below,
|
||||
# we only wait for the training jobs that we just spawned,
|
||||
# not the diagnostic jobs that we spawned above.
|
||||
|
||||
# We can't easily use a single parallel SGE job to do the main training,
|
||||
# because the computation of which archive and which --frame option
|
||||
# to use for each job is a little complex, so we spawn each one separately.
|
||||
for n in $(seq $num_jobs_nnet); do
|
||||
k=$[$x*$num_jobs_nnet + $n - 1]; # k is a zero-based index that we'll derive
|
||||
# the other indexes from.
|
||||
archive=$[($k%$num_archives)+1]; # work out the 1-based archive index.
|
||||
frame=$[(($k/$num_archives)%$frames_per_eg)]; # work out the 0-based frame
|
||||
# index; this increases more slowly than the archive index because the
|
||||
# same archive with different frame indexes will give similar gradients,
|
||||
# so we want to separate them in time.
|
||||
|
||||
$cmd $parallel_opts $dir/log/train.$x.$n.log \
|
||||
nnet-train$parallel_suffix $parallel_train_opts \
|
||||
--minibatch-size=$minibatch_size --srand=$x $dir/$x.mdl \
|
||||
"ark:nnet-copy-egs --frame=$frame ark:$egs_dir/egs.$archive.ark ark:-|nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-|" \
|
||||
$dir/$[$x+1].$n.mdl || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
)
|
||||
# the error message below is not that informative, but $cmd will
|
||||
# have printed a more specific one.
|
||||
[ -f $dir/.error ] && echo "$0: error on iteration $x of training" && exit 1;
|
||||
|
||||
nnets_list=
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
nnets_list="$nnets_list $dir/$[$x+1].$n.mdl"
|
||||
done
|
||||
|
||||
$cmd $dir/log/average.$x.log \
|
||||
nnet-am-average $nnets_list - \| \
|
||||
nnet-am-copy --learning-rate-factor=$per_iter_learning_rate_factor - $dir/$[$x+1].mdl || exit 1;
|
||||
|
||||
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
|
||||
# mix up.
|
||||
echo Mixing up from $num_leaves to $mix_up components
|
||||
$cmd $dir/log/mix_up.$x.log \
|
||||
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
|
||||
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
|
||||
fi
|
||||
rm $nnets_list
|
||||
fi
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
|
||||
if [ $stage -le $num_iters ]; then
|
||||
echo "Doing final combination to produce final.mdl"
|
||||
|
||||
# Now do combination.
|
||||
nnets_list=()
|
||||
# the if..else..fi statement below sets 'nnets_list'.
|
||||
if [ $max_models_combine -lt $num_models_combine ]; then
|
||||
# The number of models to combine is too large, e.g. > 20. In this case,
|
||||
# each argument to nnet-combine-fast will be an average of multiple models.
|
||||
cur_offset=0 # current offset from first_model_combine.
|
||||
for n in $(seq $max_models_combine); do
|
||||
next_offset=$[($n*$num_models_combine)/$max_models_combine]
|
||||
sub_list=""
|
||||
for o in $(seq $cur_offset $[$next_offset-1]); do
|
||||
iter=$[$first_model_combine+$o]
|
||||
mdl=$dir/$iter.mdl
|
||||
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
|
||||
sub_list="$sub_list $mdl"
|
||||
done
|
||||
nnets_list[$[$n-1]]="nnet-am-average $sub_list - |"
|
||||
cur_offset=$next_offset
|
||||
done
|
||||
else
|
||||
nnets_list=
|
||||
for n in $(seq 0 $[num_models_combine-1]); do
|
||||
iter=$[$first_model_combine+$n]
|
||||
mdl=$dir/$iter.mdl
|
||||
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
|
||||
nnets_list[$n]=$mdl
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as
|
||||
# if there are many models it can give out-of-memory error; set num-threads to 8
|
||||
# to speed it up (this isn't ideal...)
|
||||
num_egs=`nnet-copy-egs ark:$egs_dir/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
|
||||
mb=$[($num_egs+$combine_num_threads-1)/$combine_num_threads]
|
||||
[ $mb -gt 512 ] && mb=512
|
||||
# Setting --initial-model to a large value makes it initialize the combination
|
||||
# with the average of all the models. It's important not to start with a
|
||||
# single model, or, due to the invariance to scaling that these nonlinearities
|
||||
# give us, we get zero diagonal entries in the fisher matrix that
|
||||
# nnet-combine-fast uses for scaling, which after flooring and inversion, has
|
||||
# the effect that the initial model chosen gets much higher learning rates
|
||||
# than the others. This prevents the optimization from working well.
|
||||
$cmd $combine_parallel_opts $dir/log/combine.log \
|
||||
nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \
|
||||
--num-threads=$combine_num_threads \
|
||||
--verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:$egs_dir/combine.egs \
|
||||
$dir/final.mdl || exit 1;
|
||||
|
||||
# Normalize stddev for affine or block affine layers that are followed by a
|
||||
# pnorm layer and then a normalize layer.
|
||||
$cmd $dir/log/normalize.log \
|
||||
nnet-normalize-stddev $dir/final.mdl $dir/final.mdl || exit 1;
|
||||
|
||||
# Compute the probability of the final, combined model with
|
||||
# the same subset we used for the previous compute_probs, as the
|
||||
# different subsets will lead to different probs.
|
||||
$cmd $dir/log/compute_prob_valid.final.log \
|
||||
nnet-compute-prob $dir/final.mdl ark:$egs_dir/valid_diagnostic.egs &
|
||||
$cmd $dir/log/compute_prob_train.final.log \
|
||||
nnet-compute-prob $dir/final.mdl ark:$egs_dir/train_diagnostic.egs &
|
||||
fi
|
||||
|
||||
if [ $stage -le $[$num_iters+1] ]; then
|
||||
echo "Getting average posterior for purposes of adjusting the priors."
|
||||
# Note: this just uses CPUs, using a smallish subset of data.
|
||||
rm $dir/post.$x.*.vec 2>/dev/null
|
||||
$cmd JOB=1:$num_jobs_compute_prior $dir/log/get_post.$x.JOB.log \
|
||||
nnet-copy-egs --frame=random --srand=JOB ark:$egs_dir/egs.1.ark ark:- \| \
|
||||
nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
|
||||
nnet-compute-from-egs "nnet-to-raw-nnet $dir/final.mdl -|" ark:- ark:- \| \
|
||||
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/post.$x.JOB.vec || exit 1;
|
||||
|
||||
sleep 3; # make sure there is time for $dir/post.$x.*.vec to appear.
|
||||
|
||||
$cmd $dir/log/vector_sum.$x.log \
|
||||
vector-sum $dir/post.$x.*.vec $dir/post.$x.vec || exit 1;
|
||||
|
||||
rm $dir/post.$x.*.vec;
|
||||
|
||||
echo "Re-adjusting priors based on computed posteriors"
|
||||
$cmd $dir/log/adjust_priors.final.log \
|
||||
nnet-adjust-priors $dir/final.mdl $dir/post.$x.vec $dir/final.mdl || exit 1;
|
||||
fi
|
||||
|
||||
|
||||
if [ ! -f $dir/final.mdl ]; then
|
||||
echo "$0: $dir/final.mdl does not exist."
|
||||
# we don't want to clean up if the training didn't succeed.
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
|
||||
echo Done
|
||||
|
||||
if $cleanup; then
|
||||
echo Cleaning up data
|
||||
if $remove_egs && [[ $egs_dir =~ $dir/egs* ]]; then
|
||||
steps/nnet2/remove_egs.sh $egs_dir
|
||||
fi
|
||||
|
||||
echo Removing most of the models
|
||||
for x in `seq 0 $num_iters`; do
|
||||
if [ $[$x%100] -ne 0 ] && [ $x -ne $num_iters ] && [ -f $dir/$x.mdl ]; then
|
||||
# delete all but every 100th model; don't delete the ones which combine to form the final model.
|
||||
rm $dir/$x.mdl
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
|
@ -0,0 +1,543 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey).
|
||||
# 2013 Xiaohui Zhang
|
||||
# 2013 Guoguo Chen
|
||||
# 2014 Vimal Manohar
|
||||
# 2014 Vijayaditya Peddinti
|
||||
# Apache 2.0.
|
||||
|
||||
|
||||
# train_multilang2.sh is for multi-language training of neural nets. It
|
||||
# takes multiple egs directories which must be created by get_egs2.sh, and the
|
||||
# corresponding alignment directories (only needed for training the transition
|
||||
# models).
|
||||
#
|
||||
# This script requires you to supply a neural net partially trained for the 1st
|
||||
# language, by one of the regular training scripts, to be used as the initial
|
||||
# neural net (for use by other languages, we'll discard the last layer); it
|
||||
# should not have been subject to "mix-up" (since this script does mix-up), or
|
||||
# combination (since it would increase the parameter range to a too-large value
|
||||
# which isn't compatible with our normal learning rate schedules).
|
||||
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
num_epochs=10 # Number of epochs of training (for first language);
|
||||
# the number of iterations is worked out from this.
|
||||
initial_learning_rate=0.04
|
||||
final_learning_rate=0.004
|
||||
|
||||
minibatch_size=128 # by default use a smallish minibatch size for neural net
|
||||
# training; this controls instability which would otherwise
|
||||
# be a problem with multi-threaded update.
|
||||
|
||||
num_jobs_nnet="2 2" # Number of neural net jobs to run in parallel. This option
|
||||
# is passed to get_egs.sh. Array must be same length
|
||||
# as number of separate languages.
|
||||
num_jobs_compute_prior=10 # these are single-threaded, run on CPU.
|
||||
|
||||
max_models_combine=20 # The "max_models_combine" is the maximum number of models we give
|
||||
# to the final 'combine' stage, but these models will themselves be averages of
|
||||
# iteration-number ranges.
|
||||
|
||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||
# on each iter. You could set it to 0 or to a large value for complete
|
||||
# randomization, but this would both consume memory and cause spikes in
|
||||
# disk I/O. Smaller is easier on disk and memory but less random. It's
|
||||
# not a huge deal though, as samples are anyway randomized right at the start.
|
||||
# (the point of this is to get data in different minibatches on different iterations,
|
||||
# since in the preconditioning method, 2 samples in the same minibatch can
|
||||
# affect each others' gradients.
|
||||
|
||||
prior_subset_size=10000 # 10k samples per job, for computing priors. Should be
|
||||
# more than enough.
|
||||
|
||||
stage=-4
|
||||
|
||||
|
||||
mix_up="0 0" # Number of components to mix up to (should be > #tree leaves, if
|
||||
# specified.) An array, one per language.
|
||||
|
||||
num_threads=16 # default suitable for CPU-based training
|
||||
parallel_opts="-pe smp 16 -l ram_free=1G,mem_free=1G" # default suitable for CPU-based training.
|
||||
# by default we use 16 threads; this lets the queue know.
|
||||
# note: parallel_opts doesn't automatically get adjusted if you adjust num-threads.
|
||||
combine_num_threads=8
|
||||
combine_parallel_opts="-pe smp 8" # queue options for the "combine" stage.
|
||||
cleanup=false # while testing, leaving cleanup=false.
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# -lt 6 -o $[$#%2] -ne 0 ]; then
|
||||
# num-args must be at least 6 and must be even.
|
||||
echo "Usage: $0 [opts] <ali1> <egs1> <ali2> <egs2> ... <aliN> <egsN> <input-model> <exp-dir>"
|
||||
echo " e.g.: $0 data/train exp/tri6_ali exp/tri6_egs exp_lang2/tri6_ali exp_lang2/tri6_egs exp/dnn6a/10.mdl exp/tri6_multilang"
|
||||
echo ""
|
||||
echo "Note: the first egs/ali should correspond to the language that you really want; this"
|
||||
echo "only affects how the num-epochs is computed, and which model we link to final.mdl."
|
||||
echo ""
|
||||
echo "The --num-jobs-nnet should be an array saying how many jobs to allocate to each language,"
|
||||
echo "e.g. --num-jobs-nnet '2 4'"
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --num-epochs <#epochs|15> # Number of epochs of training (figured from 1st corpus)"
|
||||
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
|
||||
echo " # data, 0.01 for large data"
|
||||
echo " --final-learning-rate <final-learning-rate|0.004> # Learning rate at end of training, e.g. 0.004 for small"
|
||||
echo " # data, 0.001 for large data"
|
||||
echo " --num-hidden-layers <#hidden-layers|2> # Number of hidden layers, e.g. 2 for 3 hours of data, 4 for 100hrs"
|
||||
echo " --add-layers-period <#iters|2> # Number of iterations between adding hidden layers"
|
||||
echo " --mix-up <#pseudo-gaussians|0> # Can be used to have multiple targets in final output layer,"
|
||||
echo " # per context-dependent state. Try a number several times #states."
|
||||
echo " --num-jobs-nnet <num-jobs|8> # Number of parallel jobs to use for main neural net"
|
||||
echo " # training (will affect results as well as speed; try 8, 16)"
|
||||
echo " # Note: if you increase this, you may want to also increase"
|
||||
echo " # the learning rate."
|
||||
echo " --num-threads <num-threads|16> # Number of parallel threads per job (will affect results"
|
||||
echo " # as well as speed; may interact with batch size; if you increase"
|
||||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16 -l ram_free=1G,mem_free=1G\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads... note, you might have to reduce mem_free,ram_free"
|
||||
echo " # versus your defaults, because it gets multiplied by the -pe smp argument."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
echo " --splice-indexes <string|layer0/-4:-3:-2:-1:0:1:2:3:4> "
|
||||
echo " # Frame indices used for each splice layer."
|
||||
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --stage <stage|-4> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
argv=("$@")
|
||||
num_args=$#
|
||||
num_lang=$[($num_args-2)/2]
|
||||
|
||||
dir=${argv[$num_args-1]}
|
||||
input_model=${argv[$num_args-2]}
|
||||
|
||||
[ ! -f $input_model ] && echo "$0: Input model $input_model does not exist" && exit 1;
|
||||
|
||||
mkdir -p $dir/log
|
||||
|
||||
num_jobs_nnet_array=($num_jobs_nnet)
|
||||
! [ "${#num_jobs_nnet_array[@]}" -eq "$num_lang" ] && \
|
||||
echo "$0: --num-jobs-nnet option must have size equal to the number of languages" && exit 1;
|
||||
mix_up_array=($mix_up)
|
||||
! [ "${#mix_up_array[@]}" -eq "$num_lang" ] && \
|
||||
echo "$0: --mix-up option must have size equal to the number of languages" && exit 1;
|
||||
|
||||
|
||||
# Language index starts from 0.
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
alidir[$lang]=${argv[$lang*2]}
|
||||
egs_dir[$lang]=${argv[$lang*2+1]}
|
||||
for f in ${egs_dir[$lang]}/info/frames_per_eg ${egs_dir[lang]}/egs.1.ark ${alidir[$lang]}/ali.1.gz ${alidir[$lang]}/tree; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
mkdir -p $dir/$lang/log
|
||||
cp ${alidir[$lang]}/tree $dir/$lang/ || exit 1;
|
||||
|
||||
for f in ${egs_dir[$lang]}/{final.mat,cmvn_opts,splice_opts}; do
|
||||
# Copy any of these files that exist.
|
||||
cp $f $dir/$lang/ 2>/dev/null
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
for x in final.mat cmvn_opts splice_opts; do
|
||||
if [ -f $dir/0/$x ]; then
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
if ! cmp $dir/0/$x $dir/$lang/$x; then
|
||||
echo "$0: warning: files $dir/0/$x and $dir/$lang/$x are not identical."
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
# the input model is supposed to correspond to the first language.
|
||||
nnet-am-copy --learning-rate=$initial_learning_rate $input_model $dir/0/0.mdl
|
||||
|
||||
if nnet-am-info --print-args=false $dir/0/0.mdl | grep SumGroupComponent 2>/dev/null; then
|
||||
if [ "${mix_up_array[0]}" != "0" ]; then
|
||||
echo "$0: Your input model already has mixtures, but you are asking to mix it up."
|
||||
echo " ... best to use a model without mixtures as input. (e.g., earlier iter)."
|
||||
exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le -4 ]; then
|
||||
echo "$0: initializing models for other languages"
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
# create the initial models for the other languages.
|
||||
$cmd $dir/$lang/log/reinitialize.log \
|
||||
nnet-am-reinitialize $input_model ${alidir[$lang]}/final.mdl $dir/$lang/0.mdl || exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "Training transition probabilities and setting priors"
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
$cmd $dir/$lang/log/train_trans.log \
|
||||
nnet-train-transitions $dir/$lang/0.mdl "ark:gunzip -c ${alidir[$lang]}/ali.*.gz|" $dir/$lang/0.mdl \
|
||||
|| exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
# Work out the number of iterations... the number of epochs refers to the
|
||||
# first language (language zero) and this, together with the num-jobs-nnet for
|
||||
# that language and details of the egs, determine the number of epochs.
|
||||
|
||||
frames_per_eg0=$(cat ${egs_dir[0]}/info/frames_per_eg) || exit 1;
|
||||
num_archives0=$(cat ${egs_dir[0]}/info/num_archives) || exit 1;
|
||||
# num_archives_expanded considers each separate label-position from
|
||||
# 0..frames_per_eg-1 to be a separate archive.
|
||||
num_archives_expanded0=$[$num_archives0*$frames_per_eg0]
|
||||
|
||||
if [ ${num_jobs_nnet_array[0]} -gt $num_archives_expanded0 ]; then
|
||||
echo "$0: --num-jobs-nnet[0] cannot exceed num-archives*frames-per-eg which is $num_archives_expanded"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
# set num_iters so that as close as possible, we process the data $num_epochs
|
||||
# times, i.e. $num_iters*$num_jobs_nnet == $num_epochs*$num_archives_expanded
|
||||
num_iters=$[($num_epochs*$num_archives_expanded0)/${num_jobs_nnet_array[0]}]
|
||||
|
||||
echo "$0: Will train for $num_epochs epochs (of language 0) = $num_iters iterations"
|
||||
|
||||
! [ $num_iters -gt 0 ] && exit 1;
|
||||
|
||||
# Work out the number of epochs we train for on the other languages... this is
|
||||
# just informational.
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
frames_per_eg=$(cat ${egs_dir[$lang]}/info/frames_per_eg) || exit 1;
|
||||
num_archives=$(cat ${egs_dir[$lang]}/info/num_archives) || exit 1;
|
||||
num_archives_expanded=$[$num_archives*$frames_per_eg]
|
||||
num_epochs=$[($num_iters*${num_jobs_nnet_array[$lang]})/$num_archives_expanded]
|
||||
echo "$0: $num_iters iterations is approximately $num_epochs epochs for language $lang"
|
||||
done
|
||||
|
||||
# do any mixing-up after half the iters.
|
||||
mix_up_iter=$[$num_iters/2]
|
||||
|
||||
if [ $num_threads -eq 1 ]; then
|
||||
parallel_suffix="-simple" # this enables us to use GPU code if
|
||||
# we have just one thread.
|
||||
parallel_train_opts=
|
||||
if ! cuda-compiled; then
|
||||
echo "$0: WARNING: you are running with one thread but you have not compiled"
|
||||
echo " for CUDA. You may be running a setup optimized for GPUs. If you have"
|
||||
echo " GPUs and have nvcc installed, go to src/ and do ./configure; make"
|
||||
fi
|
||||
else
|
||||
parallel_suffix="-parallel"
|
||||
parallel_train_opts="--num-threads=$num_threads"
|
||||
fi
|
||||
|
||||
|
||||
approx_iters_per_epoch=$[$num_iters/$num_epochs]
|
||||
# First work out how many models we want to combine over in the final
|
||||
# nnet-combine-fast invocation. This equals
|
||||
# min(max(max_models_combine, iters_per_epoch),
|
||||
# 2/3 * iters_after_mixup).
|
||||
# We use the same numbers of iterations for all languages, even though it's just
|
||||
# worked out for the first language.
|
||||
num_models_combine=$max_models_combine
|
||||
if [ $num_models_combine -lt $approx_iters_per_epoch ]; then
|
||||
num_models_combine=$approx_iters_per_epoch
|
||||
fi
|
||||
iters_after_mixup_23=$[(($num_iters-$mix_up_iter-1)*2)/3]
|
||||
if [ $num_models_combine -gt $iters_after_mixup_23 ]; then
|
||||
num_models_combine=$iters_after_mixup_23
|
||||
fi
|
||||
first_model_combine=$[$num_iters-$num_models_combine+1]
|
||||
|
||||
x=0
|
||||
|
||||
|
||||
while [ $x -lt $num_iters ]; do
|
||||
|
||||
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
# Set off jobs doing some diagnostics, in the background.
|
||||
$cmd $dir/$lang/log/compute_prob_valid.$x.log \
|
||||
nnet-compute-prob $dir/$lang/$x.mdl ark:${egs_dir[$lang]}/valid_diagnostic.egs &
|
||||
$cmd $dir/$lang/log/compute_prob_train.$x.log \
|
||||
nnet-compute-prob $dir/$lang/$x.mdl ark:${egs_dir[$lang]}/train_diagnostic.egs &
|
||||
if [ $x -gt 0 ] && [ ! -f $dir/$lang/log/mix_up.$[$x-1].log ]; then
|
||||
$cmd $dir/$lang/log/progress.$x.log \
|
||||
nnet-show-progress --use-gpu=no $dir/$lang/$[$x-1].mdl $dir/$lang/$x.mdl \
|
||||
ark:${egs_dir[$lang]}/train_diagnostic.egs '&&' \
|
||||
nnet-am-info $dir/$lang/$x.mdl &
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
|
||||
if [ $x -eq 0 ]; then
|
||||
# on iteration zero, use a smaller minibatch size and only one quarter of the
|
||||
# normal amount of training data: this will help, respectively, to ensure stability
|
||||
# and to stop the models from moving so far that averaging hurts.
|
||||
this_minibatch_size=$[$minibatch_size/2];
|
||||
this_keep_proportion=0.25
|
||||
else
|
||||
this_minibatch_size=$minibatch_size
|
||||
this_keep_proportion=1.0
|
||||
# use half the examples on iteration 1, out of a concern that the model-averaging
|
||||
# might not work if we move too far before getting close to convergence.
|
||||
[ $x -eq 1 ] && this_keep_proportion=0.5
|
||||
fi
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
|
||||
( # this sub-shell is so that when we "wait" below,
|
||||
# we only wait for the training jobs that we just spawned,
|
||||
# not the diagnostic jobs that we spawned above.
|
||||
|
||||
# We can't easily use a single parallel SGE job to do the main training,
|
||||
# because the computation of which archive and which --frame option
|
||||
# to use for each job is a little complex, so we spawn each one separately.
|
||||
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
|
||||
this_frames_per_eg=$(cat ${egs_dir[$lang]}/info/frames_per_eg) || exit 1;
|
||||
this_num_archives=$(cat ${egs_dir[$lang]}/info/num_archives) || exit 1;
|
||||
|
||||
! [ $this_num_jobs_nnet -gt 0 -a $this_frames_per_eg -gt 0 -a $this_num_archives -gt 0 ] && exit 1
|
||||
|
||||
for n in $(seq $this_num_jobs_nnet); do
|
||||
k=$[$x*$this_num_jobs_nnet + $n - 1]; # k is a zero-based index that we'll derive
|
||||
# the other indexes from.
|
||||
archive=$[($k%$this_num_archives)+1]; # work out the 1-based archive index.
|
||||
frame=$[(($k/$this_num_archives)%$this_frames_per_eg)];
|
||||
|
||||
$cmd $parallel_opts $dir/$lang/log/train.$x.$n.log \
|
||||
nnet-train$parallel_suffix $parallel_train_opts \
|
||||
--minibatch-size=$this_minibatch_size --srand=$x $dir/$lang/$x.mdl \
|
||||
"ark:nnet-copy-egs --keep-proportion=$this_keep_proportion --frame=$frame ark:${egs_dir[$lang]}/egs.$archive.ark ark:-|nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x ark:- ark:-|" \
|
||||
$dir/$lang/$[$x+1].$n.mdl || touch $dir/.error &
|
||||
done
|
||||
done
|
||||
wait
|
||||
)
|
||||
# the error message below is not that informative, but $cmd will
|
||||
# have printed a more specific one.
|
||||
[ -f $dir/.error ] && echo "$0: error on iteration $x of training" && exit 1;
|
||||
|
||||
|
||||
learning_rate=`perl -e '($x,$n,$i,$f)=@ARGV; print ($x >= $n ? $f : $i*exp($x*log($f/$i)/$n));' $[$x+1] $num_iters $initial_learning_rate $final_learning_rate`;
|
||||
|
||||
(
|
||||
# First average within each language. Use a sub-shell so "wait" won't
|
||||
# wait for the diagnostic jobs.
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
|
||||
nnets_list=$(for n in `seq 1 $this_num_jobs_nnet`; do echo $dir/$lang/$[$x+1].$n.mdl; done)
|
||||
# average the output of the different jobs.
|
||||
$cmd $dir/$lang/log/average.$x.log \
|
||||
nnet-am-average $nnets_list - \| \
|
||||
nnet-am-copy --learning-rate=$learning_rate - $dir/$lang/$[$x+1].tmp.mdl || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "$0: error averaging models on iteration $x of training" && exit 1;
|
||||
# Remove the models we just averaged.
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_num_jobs_nnet=${num_jobs_nnet_array[$lang]}
|
||||
for n in `seq 1 $this_num_jobs_nnet`; do rm $dir/$lang/$[$x+1].$n.mdl; done
|
||||
done
|
||||
)
|
||||
|
||||
|
||||
nnets_list=$(for lang in $(seq 0 $[$num_lang-1]); do echo $dir/$lang/$[$x+1].tmp.mdl; done)
|
||||
weights_csl=$(echo $num_jobs_nnet | sed 's/ /:/g') # get as colon separated list.
|
||||
|
||||
# the next command produces the cross-language averaged model containing the
|
||||
# final layer corresponding to language zero.
|
||||
$cmd $dir/log/average.$x.log \
|
||||
nnet-am-average --weights=$weights_csl --skip-last-layer=true \
|
||||
$nnets_list $dir/0/$[$x+1].mdl || exit 1;
|
||||
|
||||
for lang in $(seq 1 $[$num_lang-1]); do
|
||||
# the next command takes the averaged hidden parameters from language zero, and
|
||||
# the last layer from language $lang. It's not really doing averaging.
|
||||
$cmd $dir/$lang/log/combine_average.$x.log \
|
||||
nnet-am-average --weights=0.0:1.0 --skip-last-layer=true \
|
||||
$dir/$lang/$[$x+1].tmp.mdl $dir/0/$[$x+1].mdl $dir/$lang/$[$x+1].mdl || exit 1;
|
||||
done
|
||||
|
||||
$cleanup && rm $dir/*/$[$x+1].tmp.mdl
|
||||
|
||||
if [ $x -eq $mix_up_iter ]; then
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
this_mix_up=${mix_up_array[$lang]}
|
||||
if [ $this_mix_up -gt 0 ]; then
|
||||
echo "$0: for language $lang, mixing up to $this_mix_up components"
|
||||
$cmd $dir/$lang/log/mix_up.$x.log \
|
||||
nnet-am-mixup --min-count=10 --num-mixtures=$this_mix_up \
|
||||
$dir/$lang/$[$x+1].mdl $dir/$lang/$[$x+1].mdl || exit 1;
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Now average across languages.
|
||||
|
||||
rm $nnets_list
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do # mix up.
|
||||
[ ! -f $dir/$lang/$[$x+1].mdl ] && echo "No such file $dir/$lang/$[$x+1].mdl" && exit 1;
|
||||
if [ -f $dir/$lang/$[$x-1].mdl ] && $cleanup && \
|
||||
[ $[($x-1)%100] -ne 0 ] && [ $[$x-1] -lt $first_model_combine ]; then
|
||||
rm $dir/$lang/$[$x-1].mdl
|
||||
fi
|
||||
done
|
||||
fi
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
|
||||
if [ $stage -le $num_iters ]; then
|
||||
echo "Doing combination to produce final models"
|
||||
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
nnets_list=()
|
||||
# the if..else..fi statement below sets 'nnets_list'.
|
||||
if [ $max_models_combine -lt $num_models_combine ]; then
|
||||
# The number of models to combine is too large, e.g. > 20. In this case,
|
||||
# each argument to nnet-combine-fast will be an average of multiple models.
|
||||
cur_offset=0 # current offset from first_model_combine.
|
||||
for n in $(seq $max_models_combine); do
|
||||
next_offset=$[($n*$num_models_combine)/$max_models_combine]
|
||||
sub_list=""
|
||||
for o in $(seq $cur_offset $[$next_offset-1]); do
|
||||
iter=$[$first_model_combine+$o]
|
||||
mdl=$dir/$lang/$iter.mdl
|
||||
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
|
||||
sub_list="$sub_list $mdl"
|
||||
done
|
||||
nnets_list[$[$n-1]]="nnet-am-average $sub_list - |"
|
||||
cur_offset=$next_offset
|
||||
done
|
||||
else
|
||||
nnets_list=
|
||||
for n in $(seq 0 $[num_models_combine-1]); do
|
||||
iter=$[$first_model_combine+$n]
|
||||
mdl=$dir/$iter.mdl
|
||||
[ ! -f $mdl ] && echo "Expected $mdl to exist" && exit 1;
|
||||
nnets_list[$n]=$mdl
|
||||
done
|
||||
fi
|
||||
|
||||
# Below, use --use-gpu=no to disable nnet-combine-fast from using a GPU, as
|
||||
# if there are many models it can give out-of-memory error; set num-threads
|
||||
# to 8 to speed it up (this isn't ideal...)
|
||||
num_egs=`nnet-copy-egs ark:${egs_dir[$lang]}/combine.egs ark:/dev/null 2>&1 | tail -n 1 | awk '{print $NF}'`
|
||||
|
||||
mb=$[($num_egs+$combine_num_threads-1)/$combine_num_threads]
|
||||
[ $mb -gt 512 ] && mb=512
|
||||
# Setting --initial-model to a large value makes it initialize the combination
|
||||
# with the average of all the models. It's important not to start with a
|
||||
# single model, or, due to the invariance to scaling that these nonlinearities
|
||||
# give us, we get zero diagonal entries in the fisher matrix that
|
||||
# nnet-combine-fast uses for scaling, which after flooring and inversion, has
|
||||
# the effect that the initial model chosen gets much higher learning rates
|
||||
# than the others. This prevents the optimization from working well.
|
||||
$cmd $combine_parallel_opts $dir/$lang/log/combine.log \
|
||||
nnet-combine-fast --initial-model=100000 --num-lbfgs-iters=40 --use-gpu=no \
|
||||
--num-threads=$combine_num_threads \
|
||||
--verbose=3 --minibatch-size=$mb "${nnets_list[@]}" ark:${egs_dir[$lang]}/combine.egs \
|
||||
- \| nnet-normalize-stddev - $dir/$lang/final.mdl || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
|
||||
[ -f $dir/.error ] && echo "$0: error doing model combination" && exit 1;
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le $[$num_iters+1] ]; then
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
# Run the diagnostics for the final models.
|
||||
$cmd $dir/$lang/log/compute_prob_valid.final.log \
|
||||
nnet-compute-prob $dir/$lang/final.mdl ark:${egs_dir[$lang]}/valid_diagnostic.egs &
|
||||
$cmd $dir/$lang/log/compute_prob_train.final.log \
|
||||
nnet-compute-prob $dir/$lang/final.mdl ark:${egs_dir[$lang]}/train_diagnostic.egs &
|
||||
done
|
||||
wait
|
||||
fi
|
||||
|
||||
if [ $stage -le $[$num_iters+2] ]; then
|
||||
# Note: this just uses CPUs, using a smallish subset of data.
|
||||
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
echo "$0: Getting average posterior for purposes of adjusting the priors (language $lang)."
|
||||
rm $dir/$lang/.error 2>/dev/null
|
||||
rm $dir/$lang/post.$x.*.vec 2>/dev/null
|
||||
$cmd JOB=1:$num_jobs_compute_prior $dir/$lang/log/get_post.JOB.log \
|
||||
nnet-copy-egs --frame=random --srand=JOB ark:${egs_dir[$lang]}/egs.1.ark ark:- \| \
|
||||
nnet-subset-egs --srand=JOB --n=$prior_subset_size ark:- ark:- \| \
|
||||
nnet-compute-from-egs "nnet-to-raw-nnet $dir/$lang/final.mdl -|" ark:- ark:- \| \
|
||||
matrix-sum-rows ark:- ark:- \| vector-sum ark:- $dir/$lang/post.JOB.vec || touch $dir/$lang/.error &
|
||||
done
|
||||
echo "$0: ... waiting for jobs for all languages to complete."
|
||||
wait
|
||||
sleep 3; # make sure there is time for $dir/$lang/post.$x.*.vec to appear.
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
[ -f $dir/$lang/.error ] && \
|
||||
echo "$0: error getting posteriors for adjusting the priors for language $lang" && exit 1;
|
||||
|
||||
$cmd $dir/$lang/log/vector_sum.log \
|
||||
vector-sum $dir/$lang/post.*.vec $dir/$lang/post.vec || exit 1;
|
||||
|
||||
rm $dir/$lang/post.*.vec;
|
||||
|
||||
echo "Re-adjusting priors based on computed posteriors for language $lang"
|
||||
$cmd $dir/$lang/log/adjust_priors.final.log \
|
||||
nnet-adjust-priors $dir/$lang/final.mdl $dir/$lang/post.vec $dir/$lang/final.mdl || exit 1;
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
for lang in $(seq 0 $[$num_lang-1]); do
|
||||
if [ ! -f $dir/$lang/final.mdl ]; then
|
||||
echo "$0: $dir/final.mdl does not exist."
|
||||
# we don't want to clean up if the training didn't succeed.
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
sleep 2
|
||||
|
||||
echo Done
|
||||
|
||||
if $cleanup; then
|
||||
echo Cleaning up data
|
||||
if [[ $egs_dir =~ $dir/egs* ]]; then
|
||||
steps/nnet2/remove_egs.sh $egs_dir
|
||||
fi
|
||||
|
||||
echo Removing most of the models
|
||||
for x in `seq 0 $num_iters`; do
|
||||
if [ $[$x%100] -ne 0 ] && [ $x -ne $num_iters ] && [ -f $dir/$lang/$x.mdl ]; then
|
||||
# delete all but every 100th model; don't delete the ones which combine to form the final model.
|
||||
rm $dir/$lang/$x.mdl
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
exit 0
|
|
@ -140,8 +140,8 @@ if [ $# != 4 ]; then
|
|||
echo " # Frame indices used for each splice layer."
|
||||
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which"
|
||||
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --realign-epochs <list-of-epochs|''> # A list of space-separated epoch indices the beginning of which"
|
||||
echo " # realignment is to be done"
|
||||
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
|
||||
echo " --align-use-gpu (yes/no) # specify is gpu is to be used for realignment"
|
||||
|
|
|
@ -133,8 +133,8 @@ if [ $# != 4 ]; then
|
|||
echo " # Frame indices used for each splice layer."
|
||||
echo " # Format : layer<hidden_layer_index>/<frame_indices>....layer<hidden_layer>/<frame_indices> "
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --realign-epochs <list-of-epochs|\"\"> # A list of space-separated epoch indices the beginning of which"
|
||||
echo " --lda-dim <dim|''> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --realign-epochs <list-of-epochs|''> # A list of space-separated epoch indices the beginning of which"
|
||||
echo " # realignment is to be done"
|
||||
echo " --align-cmd (utils/run.pl|utils/queue.pl <queue opts>) # passed to align.sh"
|
||||
echo " --align-use-gpu (yes/no) # specify is gpu is to be used for realignment"
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2012 Brno University of Technology (Author: Karel Vesely)
|
||||
# 2013-2014 Johns Hopkins University (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
|
||||
# Computes training alignments using DNN. This takes as input a directory
|
||||
# prepared as for online-nnet2 decoding (e.g. by
|
||||
# steps/online/nnet2/prepare_online_decoding.sh), and it computes the features
|
||||
# directly from the wav.scp instead of relying on features dumped on disk;
|
||||
# this avoids the hassle of having to dump suitably matched features.
|
||||
|
||||
|
||||
# Begin configuration section.
|
||||
nj=4
|
||||
cmd=run.pl
|
||||
# Begin configuration.
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
beam=10
|
||||
retry_beam=40
|
||||
iter=final
|
||||
use_gpu=no
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f path.sh ] && . ./path.sh # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: $0 <data-dir> <lang-dir> <src-dir> <align-dir>"
|
||||
echo "e.g.: $0 data/train data/lang exp/nnet4 exp/nnet4_ali"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
oov=`cat $lang/oov.int` || exit 1;
|
||||
mkdir -p $dir/log
|
||||
echo $nj > $dir/num_jobs
|
||||
sdata=$data/split$nj
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
|
||||
|
||||
for f in $srcdir/tree $srcdir/${iter}.mdl $data/wav.scp $lang/L.fst \
|
||||
$srcdir/conf/online_nnet2_decoding.conf; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
cp $srcdir/{tree,${iter}.mdl} $dir || exit 1;
|
||||
|
||||
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
|
||||
|
||||
|
||||
if [ -f $data/segments ]; then
|
||||
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
|
||||
# previous utterances within a speaker, we do the filtering after extracting the features.
|
||||
echo "$0 [info]: segments file exists: using that."
|
||||
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
|
||||
else
|
||||
echo "$0 [info]: no segments file exists, using wav.scp."
|
||||
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
|
||||
fi
|
||||
|
||||
echo "$0: aligning data in $data using model from $srcdir, putting alignments in $dir"
|
||||
|
||||
tra="ark:utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt $sdata/JOB/text|";
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/align.JOB.log \
|
||||
compile-train-graphs $dir/tree $srcdir/${iter}.mdl $lang/L.fst "$tra" ark:- \| \
|
||||
nnet-align-compiled $scale_opts --use-gpu=$use_gpu --beam=$beam --retry-beam=$retry_beam \
|
||||
$srcdir/${iter}.mdl ark:- "$feats" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
||||
|
||||
echo "$0: done aligning data."
|
||||
|
|
@ -78,4 +78,8 @@ for f in feats.scp segments wav.scp reco2file_and_channel text stm glm ctm; do
|
|||
done
|
||||
|
||||
echo "$0: copied data from $srcdir to $destdir, with --utts-per-spk-max $utts_per_spk_max"
|
||||
utils/validate_data_dir.sh $destdir
|
||||
opts=
|
||||
[ ! -f $srcdir/feats.scp ] && opts="--no-feats"
|
||||
[ ! -f $srcdir/text ] && opts="$opts --no-text"
|
||||
|
||||
utils/validate_data_dir.sh $opts $destdir
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
# it uses the program online2-wav-dump-feature to do all parts of feature
|
||||
# extraction: MFCC/PLP/fbank, possibly plus pitch, plus iVectors. This script
|
||||
# is intended mostly for cross-system training for online decoding, where you
|
||||
# initialize the nnet from an existing, larger systme.
|
||||
# initialize the nnet from an existing, larger system.
|
||||
|
||||
|
||||
# Begin configuration section.
|
||||
|
@ -69,7 +69,7 @@ dir=$4
|
|||
mdl=$online_nnet_dir/final.mdl # only needed for left and right context.
|
||||
feature_conf=$online_nnet_dir/conf/online_nnet2_decoding.conf
|
||||
|
||||
for f in $data/feats.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $feature_conf $mdl; do
|
||||
for f in $data/wav.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $feature_conf $mdl; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
@ -100,7 +100,7 @@ if [ -f $data/utt2uniq ]; then
|
|||
fi
|
||||
|
||||
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid/uttlist | \
|
||||
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
|
||||
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
|
||||
|
||||
|
||||
for subdir in valid train_subset; do
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2014 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
#
|
||||
# This is modified from ../../nnet2/get_egs2.sh. [note: get_egs2.sh is as get_egs.sh,
|
||||
# but uses the newer, more compact way of writing egs. where we write multiple
|
||||
# frames of labels in order to share the context.]
|
||||
# This script combines the
|
||||
# nnet-example extraction with the feature extraction directly from wave files;
|
||||
# it uses the program online2-wav-dump-feature to do all parts of feature
|
||||
# extraction: MFCC/PLP/fbank, possibly plus pitch, plus iVectors. This script
|
||||
# is intended mostly for cross-system training for online decoding, where you
|
||||
# initialize the nnet from an existing, larger system.
|
||||
#
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
frames_per_eg=8 # number of frames of labels per example. more->less disk space and
|
||||
# less time preparing egs, but more I/O during training.
|
||||
# note: the script may reduce this if reduce_frames_per_eg is true.
|
||||
|
||||
reduce_frames_per_eg=true # If true, this script may reduce the frames_per_eg
|
||||
# if there is only one archive and even with the
|
||||
# reduced frames_pe_eg, the number of
|
||||
# samples_per_iter that would result is less than or
|
||||
# equal to the user-specified value.
|
||||
num_utts_subset=300 # number of utterances in validation and training
|
||||
# subsets used for shrinkage and diagnostics.
|
||||
num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
|
||||
num_train_frames_combine=10000 # # train frames for the above.
|
||||
num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
|
||||
samples_per_iter=400000 # each iteration of training, see this many samples
|
||||
# per job. This is just a guideline; it will pick a number
|
||||
# that divides the number of samples in the entire data.
|
||||
|
||||
stage=0
|
||||
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
|
||||
random_copy=false
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: $0 [opts] <data> <ali-dir> <online-nnet-dir> <egs-dir>"
|
||||
echo " e.g.: $0 data/train exp/tri3_ali exp/nnet2_online/nnet_a_gpu_online/ exp/nnet2_online/nnet_b/egs"
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl;utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --samples-per-iter <#samples;400000> # Number of samples of data to process per iteration, per"
|
||||
echo " # process."
|
||||
echo " --feat-type <lda|raw> # (by default it tries to guess). The feature type you want"
|
||||
echo " # to use as input to the neural net."
|
||||
echo " --frames-per-eg <frames;8> # number of frames per eg on disk"
|
||||
echo " --num-frames-diagnostic <#frames;4000> # Number of frames used in computing (train,valid) diagnostics"
|
||||
echo " --num-valid-frames-combine <#frames;10000> # Number of frames used in getting combination weights at the"
|
||||
echo " # very end."
|
||||
echo " --stage <stage|0> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
alidir=$2
|
||||
online_nnet_dir=$3
|
||||
dir=$4
|
||||
|
||||
mdl=$online_nnet_dir/final.mdl # only needed for left and right context.
|
||||
feature_conf=$online_nnet_dir/conf/online_nnet2_decoding.conf
|
||||
|
||||
|
||||
for f in $data/wav.scp $alidir/ali.1.gz $alidir/final.mdl $alidir/tree $mdl $feature_conf; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
||||
nj=`cat $alidir/num_jobs` || exit 1; # number of jobs in alignment dir...
|
||||
|
||||
sdata=$data/split$nj
|
||||
utils/split_data.sh $data $nj
|
||||
|
||||
mkdir -p $dir/log $dir/info
|
||||
! cmp $alidir/tree $online_nnet_dir/tree && \
|
||||
echo "$0: warning, tree from alignment dir does not match tree from online-nnet dir"
|
||||
cp $alidir/tree $dir
|
||||
grep -v '^--endpoint' $feature_conf >$dir/feature.conf || exit 1;
|
||||
mkdir -p $dir/valid $dir/train_subset
|
||||
|
||||
# Get list of validation utterances.
|
||||
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
|
||||
> $dir/valid/uttlist || exit 1;
|
||||
|
||||
if [ -f $data/utt2uniq ]; then
|
||||
echo "File $data/utt2uniq exists, so augmenting valid/uttlist to"
|
||||
echo "include all perturbed versions of the same 'real' utterances."
|
||||
mv $dir/valid/uttlist $dir/valid/uttlist.tmp
|
||||
utils/utt2spk_to_spk2utt.pl $data/utt2uniq > $dir/uniq2utt
|
||||
cat $dir/valid/uttlist.tmp | utils/apply_map.pl $data/utt2uniq | \
|
||||
sort | uniq | utils/apply_map.pl $dir/uniq2utt | \
|
||||
awk '{for(n=1;n<=NF;n++) print $n;}' | sort > $dir/valid/uttlist
|
||||
rm $dir/uniq2utt $dir/valid/uttlist.tmp
|
||||
fi
|
||||
|
||||
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid/uttlist | \
|
||||
utils/shuffle_list.pl | head -$num_utts_subset > $dir/train_subset/uttlist || exit 1;
|
||||
|
||||
|
||||
for subdir in valid train_subset; do
|
||||
# In order for the iVector extraction to work right, we need to process all
|
||||
# utterances of the speakers which have utterances in valid/uttlist, and the
|
||||
# same for train_subset/uttlist. We produce $dir/valid/uttlist_extended which
|
||||
# will contain all utterances of all speakers which have utterances in
|
||||
# $dir/valid/uttlist, and the same for $dir/train_subset/.
|
||||
|
||||
utils/filter_scp.pl $dir/$subdir/uttlist <$data/utt2spk | awk '{print $2}' > $dir/$subdir/spklist || exit 1;
|
||||
utils/filter_scp.pl -f 2 $dir/$subdir/spklist <$data/utt2spk >$dir/$subdir/utt2spk || exit 1;
|
||||
utils/utt2spk_to_spk2utt.pl <$dir/$subdir/utt2spk >$dir/$subdir/spk2utt || exit 1;
|
||||
awk '{print $1}' <$dir/$subdir/utt2spk >$dir/$subdir/uttlist_extended || exit 1;
|
||||
rm $dir/$subdir/spklist
|
||||
done
|
||||
|
||||
|
||||
if [ -f $data/segments ]; then
|
||||
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
|
||||
# previous utterances within a speaker, we do the filtering after extracting the features.
|
||||
echo "$0 [info]: segments file exists: using that."
|
||||
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- | subset-feats --exclude=$dir/valid/uttlist ark:- ark:- |"
|
||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid/uttlist_extended $data/segments | extract-segments scp:$data/wav.scp - ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/valid/spk2utt ark,s,cs:- ark:- | subset-feats --include=$dir/valid/uttlist ark:- ark:- |"
|
||||
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset/uttlist_extended $data/segments | extract-segments scp:$data/wav.scp - ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/train_subset/spk2utt ark,s,cs:- ark:- | subset-feats --include=$dir/train_subset/uttlist ark:- ark:- |"
|
||||
else
|
||||
echo "$0 [info]: no segments file exists, using wav.scp."
|
||||
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- | subset-feats --exclude=$dir/valid/uttlist ark:- ark:- |"
|
||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid/uttlist_extended $data/wav.scp | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/valid/spk2utt scp:- ark:- | subset-feats --include=$dir/valid/uttlist ark:- ark:- |"
|
||||
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset/uttlist_extended $data/wav.scp | online2-wav-dump-features --config=$dir/feature.conf ark:$dir/train_subset/spk2utt scp:- ark:- | subset-feats --include=$dir/train_subset/uttlist ark:- ark:- |"
|
||||
fi
|
||||
|
||||
ivector_dim=$(online2-wav-dump-features --config=$dir/feature.conf --print-ivector-dim=true) || exit 1;
|
||||
|
||||
! [ $ivector_dim -ge 0 ] && echo "$0: error getting iVector dim" && exit 1;
|
||||
|
||||
|
||||
|
||||
set -o pipefail
|
||||
left_context=$(nnet-am-info $mdl | grep '^left-context' | awk '{print $2}') || exit 1;
|
||||
right_context=$(nnet-am-info $mdl | grep '^right-context' | awk '{print $2}') || exit 1;
|
||||
set +o pipefail
|
||||
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "$0: working out number of frames of training data"
|
||||
num_frames=$(steps/nnet2/get_num_frames.sh $data)
|
||||
echo $num_frames > $dir/info/num_frames
|
||||
else
|
||||
num_frames=`cat $dir/info/num_frames` || exit 1;
|
||||
fi
|
||||
|
||||
# the + 1 is to round up, not down... we assume it doesn't divide exactly.
|
||||
num_archives=$[$num_frames/($frames_per_eg*$samples_per_iter)+1]
|
||||
# (for small data)- while reduce_frames_per_eg == true and the number of
|
||||
# archives is 1 and would still be 1 if we reduced frames_per_eg by 1, reduce it
|
||||
# by 1.
|
||||
reduced=false
|
||||
while $reduce_frames_per_eg && [ $frames_per_eg -gt 1 ] && \
|
||||
[ $[$num_frames/(($frames_per_eg-1)*$samples_per_iter)] -eq 0 ]; do
|
||||
frames_per_eg=$[$frames_per_eg-1]
|
||||
num_archives=1
|
||||
reduced=true
|
||||
done
|
||||
$reduced && echo "$0: reduced frames_per_eg to $frames_per_eg because amount of data is small."
|
||||
|
||||
echo $num_archives >$dir/info/num_archives
|
||||
echo $frames_per_eg >$dir/info/frames_per_eg
|
||||
|
||||
# Working out number of egs per archive
|
||||
egs_per_archive=$[$num_frames/($frames_per_eg*$num_archives)]
|
||||
! [ $egs_per_archive -le $samples_per_iter ] && \
|
||||
echo "$0: script error: egs_per_archive=$egs_per_archive not <= samples_per_iter=$samples_per_iter" \
|
||||
&& exit 1;
|
||||
|
||||
echo $egs_per_archive > $dir/info/egs_per_archive
|
||||
|
||||
echo "$0: creating $num_archives archives, each with $egs_per_archive egs, with"
|
||||
echo "$0: $frames_per_eg labels per example, and (left,right) context = ($left_context,$right_context)"
|
||||
|
||||
# Making soft links to storage directories. This is a no-up unless
|
||||
# the subdirectory $dir/storage/ exists. See utils/create_split_dir.pl
|
||||
for x in `seq $num_archives`; do
|
||||
utils/create_data_link.pl $dir/egs.$x.ark
|
||||
for y in `seq $nj`; do
|
||||
utils/create_data_link.pl $dir/egs_orig.$x.$y.ark
|
||||
done
|
||||
done
|
||||
|
||||
nnet_context_opts="--left-context=$left_context --right-context=$right_context"
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
echo "$0: Getting validation and training subset examples."
|
||||
rm $dir/.error 2>/dev/null
|
||||
echo "$0: ... extracting validation and training-subset alignments."
|
||||
set -o pipefail;
|
||||
for id in $(seq $nj); do gunzip -c $alidir/ali.$id.gz; done | \
|
||||
copy-int-vector ark:- ark,t:- | \
|
||||
utils/filter_scp.pl <(cat $dir/valid/uttlist $dir/train_subset/uttlist) | \
|
||||
gzip -c >$dir/ali_special.gz || exit 1;
|
||||
set +o pipefail; # unset the pipefail option.
|
||||
|
||||
$cmd $dir/log/create_valid_subset.log \
|
||||
nnet-get-egs $ivectors_opt $nnet_context_opts "$valid_feats" \
|
||||
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/valid_all.egs" || touch $dir/.error &
|
||||
$cmd $dir/log/create_train_subset.log \
|
||||
nnet-get-egs $ivectors_opt $nnet_context_opts "$train_subset_feats" \
|
||||
"ark,s,cs:gunzip -c $dir/ali_special.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo "Error detected while creating train/valid egs" && exit 1;
|
||||
echo "... Getting subsets of validation examples for diagnostics and combination."
|
||||
$cmd $dir/log/create_valid_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
|
||||
ark:$dir/valid_combine.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_valid_subset_diagnostic.log \
|
||||
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
|
||||
ark:$dir/valid_diagnostic.egs || touch $dir/.error &
|
||||
|
||||
$cmd $dir/log/create_train_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
|
||||
ark:$dir/train_combine.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_train_subset_diagnostic.log \
|
||||
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
|
||||
ark:$dir/train_diagnostic.egs || touch $dir/.error &
|
||||
wait
|
||||
sleep 5 # wait for file system to sync.
|
||||
cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
|
||||
|
||||
for f in $dir/{combine,train_diagnostic,valid_diagnostic}.egs; do
|
||||
[ ! -s $f ] && echo "No examples in file $f" && exit 1;
|
||||
done
|
||||
rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_combine.egs $dir/ali_special.gz
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
# create egs_orig.*.*.ark; the first index goes to $num_archives,
|
||||
# the second to $nj (which is the number of jobs in the original alignment
|
||||
# dir)
|
||||
|
||||
egs_list=
|
||||
for n in $(seq $num_archives); do
|
||||
egs_list="$egs_list ark:$dir/egs_orig.$n.JOB.ark"
|
||||
done
|
||||
echo "$0: Generating training examples on disk"
|
||||
|
||||
# The examples will go round-robin to egs_list.
|
||||
$cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
|
||||
nnet-get-egs $ivectors_opt $nnet_context_opts --num-frames=$frames_per_eg "$feats" \
|
||||
"ark,s,cs:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
|
||||
nnet-copy-egs ark:- $egs_list || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ]; then
|
||||
echo "$0: recombining and shuffling order of archives on disk"
|
||||
# combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
|
||||
# shuffle the order, writing to the egs.JOB.ark
|
||||
|
||||
egs_list=
|
||||
for n in $(seq $nj); do
|
||||
egs_list="$egs_list $dir/egs_orig.JOB.$n.ark"
|
||||
done
|
||||
|
||||
$cmd $io_opts $extra_opts JOB=1:$num_archives $dir/log/shuffle.JOB.log \
|
||||
nnet-shuffle-egs --srand=JOB "ark:cat $egs_list|" ark:$dir/egs.JOB.ark || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ]; then
|
||||
echo "$0: removing temporary archives"
|
||||
for x in `seq $num_archives`; do
|
||||
for y in `seq $nj`; do
|
||||
file=$dir/egs_orig.$x.$y.ark
|
||||
[ -L $file ] && rm $(readlink -f $file)
|
||||
rm $file
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
echo "$0: Finished preparing training examples"
|
|
@ -0,0 +1,244 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# This script dumps examples MPE or MMI or state-level minimum bayes risk (sMBR)
|
||||
# training of neural nets. Note: for "criterion", smbr > mpe > mmi in terms of
|
||||
# compatibility of the dumped egs, meaning you can use the egs dumped with
|
||||
# --criterion smbr for MPE or MMI, and egs dumped with --criterion mpe for MMI
|
||||
# training. The discriminative training program itself doesn't enforce this and
|
||||
# it would let you mix and match them arbitrarily; we area speaking in terms of
|
||||
# the correctness of the algorithm that splits the lattices into pieces.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
criterion=smbr
|
||||
drop_frames=false # option relevant for MMI, affects how we dump examples.
|
||||
samples_per_iter=400000 # measured in frames, not in "examples"
|
||||
max_temp_archives=128 # maximum number of temp archives per input job, only
|
||||
# affects the process of generating archives, not the
|
||||
# final result.
|
||||
|
||||
stage=0
|
||||
iter=final
|
||||
cleanup=true
|
||||
# End configuration section.
|
||||
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
|
||||
if [ $# != 6 ]; then
|
||||
echo "Usage: $0 [opts] <data> <lang> <ali-dir> <denlat-dir> <src-online-nnet2-dir> <degs-dir>"
|
||||
echo " e.g.: $0 data/train data/lang exp/nnet2_online/nnet_a_online{_ali,_denlats,_degs}"
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config file containing options"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs (probably would be good to add -tc 5 or so if using"
|
||||
echo " # GridEngine (to avoid excessive NFS traffic)."
|
||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||
echo " # process."
|
||||
echo " --stage <stage|-8> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
echo " --criterion <criterion|smbr> # Training criterion: may be smbr, mmi or mpfe"
|
||||
echo " --online-ivector-dir <dir|""> # Directory for online-estimated iVectors, used in the"
|
||||
echo " # online-neural-net setup. (but you may want to use"
|
||||
echo " # steps/online/nnet2/get_egs_discriminative2.sh instead)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
alidir=$3
|
||||
denlatdir=$4
|
||||
srcdir=$5
|
||||
dir=$6
|
||||
|
||||
|
||||
# Check some files.
|
||||
for f in $data/feats.scp $lang/L.fst $alidir/ali.1.gz $alidir/num_jobs $alidir/tree \
|
||||
$denlatdir/lat.1.gz $denlatdir/num_jobs $srcdir/$iter.mdl $srcdir/conf/online_nnet2_decoding.conf; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
mkdir -p $dir/log $dir/info || exit 1;
|
||||
|
||||
|
||||
nj=$(cat $denlatdir/num_jobs) || exit 1; # $nj is the number of
|
||||
# splits of the denlats and alignments.
|
||||
|
||||
|
||||
nj_ali=$(cat $alidir/num_jobs) || exit 1;
|
||||
|
||||
sdata=$data/split$nj
|
||||
utils/split_data.sh $data $nj
|
||||
|
||||
|
||||
|
||||
|
||||
if [ $nj_ali -eq $nj ]; then
|
||||
ali_rspecifier="ark,s,cs:gunzip -c $alidir/ali.JOB.gz |"
|
||||
else
|
||||
ali_rspecifier="scp:$dir/ali.scp"
|
||||
if [ $stage -le 1 ]; then
|
||||
echo "$0: number of jobs in den-lats versus alignments differ: dumping them as single archive and index."
|
||||
all_ids=$(seq -s, $nj_ali)
|
||||
copy-int-vector --print-args=false \
|
||||
"ark:gunzip -c $alidir/ali.{$all_ids}.gz|" ark,scp:$dir/ali.ark,$dir/ali.scp || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
silphonelist=`cat $lang/phones/silence.csl` || exit 1;
|
||||
|
||||
cp $alidir/tree $dir
|
||||
cp $lang/phones/silence.csl $dir/info || exit 1;
|
||||
cp $srcdir/$iter.mdl $dir/final.mdl || exit 1;
|
||||
|
||||
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
|
||||
|
||||
ivector_dim=$(online2-wav-dump-features --config=$dir/feature.conf --print-ivector-dim=true) || exit 1;
|
||||
|
||||
echo $ivector_dim > $dir/info/ivector_dim
|
||||
|
||||
! [ $ivector_dim -ge 0 ] && echo "$0: error getting iVector dim" && exit 1;
|
||||
|
||||
if [ -f $data/segments ]; then
|
||||
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
|
||||
# previous utterances within a speaker, we do the filtering after extracting the features.
|
||||
echo "$0 [info]: segments file exists: using that."
|
||||
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
|
||||
else
|
||||
echo "$0 [info]: no segments file exists, using wav.scp."
|
||||
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
if [ $stage -le 2 ]; then
|
||||
echo "$0: working out number of frames of training data"
|
||||
num_frames=$(steps/nnet2/get_num_frames.sh $data)
|
||||
|
||||
echo $num_frames > $dir/info/num_frames
|
||||
|
||||
# Working out total number of archives. Add one on the assumption the
|
||||
# num-frames won't divide exactly, and we want to round up.
|
||||
num_archives=$[$num_frames/$samples_per_iter + 1]
|
||||
|
||||
# the next few lines relate to how we may temporarily split each input job
|
||||
# into fewer than $num_archives pieces, to avoid using an excessive
|
||||
# number of filehandles.
|
||||
archive_ratio=$[$num_archives/$max_temp_archives+1]
|
||||
num_archives_temp=$[$num_archives/$archive_ratio]
|
||||
# change $num_archives slightly to make it an exact multiple
|
||||
# of $archive_ratio.
|
||||
num_archives=$[$num_archives_temp*$archive_ratio]
|
||||
|
||||
echo $num_archives >$dir/info/num_archives || exit 1
|
||||
echo $num_archives_temp >$dir/info/num_archives_temp || exit 1
|
||||
|
||||
frames_per_archive=$[$num_frames/$num_archives]
|
||||
|
||||
# note, this is the number of frames per archive prior to discarding frames.
|
||||
echo $frames_per_archive > $dir/info/frames_per_archive
|
||||
else
|
||||
num_archives=$(cat $dir/info/num_archives) || exit 1;
|
||||
num_archives_temp=$(cat $dir/info/num_archives_temp) || exit 1;
|
||||
frames_per_archive=$(cat $dir/info/frames_per_archive) || exit 1;
|
||||
fi
|
||||
|
||||
echo "$0: Splitting the data up into $num_archives archives (using $num_archives_temp temporary pieces per input job)"
|
||||
echo "$0: giving samples-per-iteration of $frames_per_archive (you requested $samples_per_iter)."
|
||||
|
||||
# we create these data links regardless of the stage, as there are situations
|
||||
# where we would want to recreate a data link that had previously been deleted.
|
||||
|
||||
if [ -d $dir/storage ]; then
|
||||
echo "$0: creating data links for distributed storage of degs"
|
||||
# See utils/create_split_dir.pl for how this 'storage' directory is created.
|
||||
for x in $(seq $nj); do
|
||||
for y in $(seq $num_archives_temp); do
|
||||
utils/create_data_link.pl $dir/degs_orig.$x.$y.ark
|
||||
done
|
||||
done
|
||||
for z in $(seq $num_archives); do
|
||||
utils/create_data_link.pl $dir/degs.$z.ark
|
||||
done
|
||||
if [ $num_archives_temp -ne $num_archives ]; then
|
||||
for z in $(seq $num_archives); do
|
||||
utils/create_data_link.pl $dir/degs_temp.$z.ark
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 3 ]; then
|
||||
echo "$0: getting initial training examples by splitting lattices"
|
||||
|
||||
degs_list=$(for n in $(seq $num_archives_temp); do echo ark:$dir/degs_orig.JOB.$n.ark; done)
|
||||
|
||||
$cmd JOB=1:$nj $dir/log/get_egs.JOB.log \
|
||||
nnet-get-egs-discriminative --criterion=$criterion --drop-frames=$drop_frames \
|
||||
"$srcdir/$iter.mdl" "$feats" "$ali_rspecifier" "ark,s,cs:gunzip -c $denlatdir/lat.JOB.gz|" ark:- \| \
|
||||
nnet-copy-egs-discriminative $const_dim_opt ark:- $degs_list || exit 1;
|
||||
sleep 5; # wait a bit so NFS has time to write files.
|
||||
fi
|
||||
|
||||
if [ $stage -le 4 ]; then
|
||||
|
||||
degs_list=$(for n in $(seq $nj); do echo $dir/degs_orig.$n.JOB.ark; done)
|
||||
|
||||
if [ $num_archives -eq $num_archives_temp ]; then
|
||||
echo "$0: combining data into final archives and shuffling it"
|
||||
|
||||
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
|
||||
cat $degs_list \| nnet-shuffle-egs-discriminative --srand=JOB ark:- \
|
||||
ark:$dir/degs.JOB.ark || exit 1;
|
||||
else
|
||||
echo "$0: combining and re-splitting data into un-shuffled versions of final archives."
|
||||
|
||||
archive_ratio=$[$num_archives/$num_archives_temp]
|
||||
! [ $archive_ratio -gt 1 ] && echo "$0: Bad archive_ratio $archive_ratio" && exit 1;
|
||||
|
||||
# note: the \$[ .. ] won't be evaluated until the job gets executed. The
|
||||
# aim is to write to the archives with the final numbering, 1
|
||||
# ... num_archives, which is more than num_archives_temp. The list with
|
||||
# \$[... ] expressions in it computes the set of final indexes for each
|
||||
# temporary index.
|
||||
degs_list_out=$(for n in $(seq $archive_ratio); do echo "ark:$dir/degs_temp.\$[((JOB-1)*$archive_ratio)+$n].ark"; done)
|
||||
# e.g. if dir=foo and archive_ratio=2, we'd have
|
||||
# degs_list_out='foo/degs_temp.$[((JOB-1)*2)+1].ark foo/degs_temp.$[((JOB-1)*2)+2].ark'
|
||||
|
||||
$cmd JOB=1:$num_archives_temp $dir/log/resplit.JOB.log \
|
||||
cat $degs_list \| nnet-copy-egs-discriminative --srand=JOB ark:- \
|
||||
$degs_list_out || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le 5 ] && [ $num_archives -ne $num_archives_temp ]; then
|
||||
echo "$0: shuffling final archives."
|
||||
|
||||
$cmd JOB=1:$num_archives $dir/log/shuffle.JOB.log \
|
||||
nnet-shuffle-egs-discriminative --srand=JOB ark:$dir/degs_temp.JOB.ark \
|
||||
ark:$dir/degs.JOB.ark || exit 1
|
||||
|
||||
fi
|
||||
|
||||
if $cleanup; then
|
||||
echo "$0: removing temporary archives."
|
||||
for x in $(seq $nj); do
|
||||
for y in $(seq $num_archives_temp); do
|
||||
file=$dir/degs_orig.$x.$y.ark
|
||||
[ -L $file ] && rm $(readlink -f $file); rm $file
|
||||
done
|
||||
done
|
||||
if [ $num_archives_temp -ne $num_archives ]; then
|
||||
for z in $(seq $num_archives); do
|
||||
file=$dir/degs_temp.$z.ark
|
||||
[ -L $file ] && rm $(readlink -f $file); rm $file
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "$0: Done."
|
|
@ -0,0 +1,168 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey). Apache 2.0.
|
||||
|
||||
# Create denominator lattices for MMI/MPE training.
|
||||
# This version uses the online-nnet2 features.
|
||||
#
|
||||
# Creates its output in $dir/lat.*.gz
|
||||
|
||||
# Begin configuration section.
|
||||
stage=0
|
||||
nj=4
|
||||
cmd=run.pl
|
||||
sub_split=1
|
||||
beam=13.0
|
||||
lattice_beam=7.0
|
||||
acwt=0.1
|
||||
max_active=5000
|
||||
max_mem=20000000 # This will stop the processes getting too large.
|
||||
# This is in bytes, but not "real" bytes-- you have to multiply
|
||||
# by something like 5 or 10 to get real bytes (not sure why so large)
|
||||
num_threads=1
|
||||
parallel_opts=
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh; # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/make_denlats.sh [options] <data-dir> <lang-dir> <src-dir> <exp-dir>"
|
||||
echo " e.g.: steps/make_denlats.sh data/train data/lang exp/nnet2_online/nnet_a_online exp/nnet2_online/nnet_a_denlats"
|
||||
echo "Works for (delta|lda) features, and (with --transform-dir option) such features"
|
||||
echo " plus transforms."
|
||||
echo ""
|
||||
echo "Main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --sub-split <n-split> # e.g. 40; use this for "
|
||||
echo " # large databases so your jobs will be smaller and"
|
||||
echo " # will (individually) finish reasonably soon."
|
||||
echo " --num-threads <n> # number of threads per decoding job"
|
||||
echo " --parallel-opts <string> # if >1 thread, add this to 'cmd', e.g. -pe smp 6"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
srcdir=$3
|
||||
dir=$4
|
||||
|
||||
for f in $data/wav.scp $lang/L.fst $srcdir/final.mdl $srcdir/conf/online_nnet2_decoding.conf; do
|
||||
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1;
|
||||
done
|
||||
|
||||
sdata=$data/split$nj
|
||||
|
||||
thread_string=
|
||||
[ $num_threads -gt 1 ] && thread_string="-parallel --num-threads=$num_threads"
|
||||
|
||||
mkdir -p $dir/log
|
||||
split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
oov=`cat $lang/oov.int` || exit 1;
|
||||
|
||||
|
||||
# Compute grammar FST which corresponds to unigram decoding graph.
|
||||
new_lang="$dir/"$(basename "$lang")
|
||||
|
||||
|
||||
grep -v '^--endpoint' $srcdir/conf/online_nnet2_decoding.conf >$dir/feature.conf || exit 1;
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
# mkgraph.sh expects a whole directory "lang", so put everything in one directory...
|
||||
# it gets L_disambig.fst and G.fst (among other things) from $dir/lang, and
|
||||
# final.mdl from $srcdir; the output HCLG.fst goes in $dir/graph.
|
||||
|
||||
cp -rH $lang $dir/
|
||||
|
||||
echo "Compiling decoding graph in $dir/dengraph"
|
||||
if [ -s $dir/dengraph/HCLG.fst ] && [ $dir/dengraph/HCLG.fst -nt $srcdir/final.mdl ]; then
|
||||
echo "Graph $dir/dengraph/HCLG.fst already exists: skipping graph creation."
|
||||
else
|
||||
echo "Making unigram grammar FST in $new_lang"
|
||||
cat $data/text | utils/sym2int.pl --map-oov $oov -f 2- $lang/words.txt | \
|
||||
awk '{for(n=2;n<=NF;n++){ printf("%s ", $n); } printf("\n"); }' | \
|
||||
utils/make_unigram_grammar.pl | fstcompile | fstarcsort --sort_type=ilabel > $new_lang/G.fst \
|
||||
|| exit 1;
|
||||
utils/mkgraph.sh $new_lang $srcdir $dir/dengraph || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
if [ -f $data/segments ]; then
|
||||
# note: in the feature extraction, because the program online2-wav-dump-features is sensitive to the
|
||||
# previous utterances within a speaker, we do the filtering after extracting the features.
|
||||
echo "$0 [info]: segments file exists: using that."
|
||||
feats="ark,s,cs:extract-segments scp:$sdata/JOB/wav.scp $sdata/JOB/segments ark:- | online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt ark,s,cs:- ark:- |"
|
||||
else
|
||||
echo "$0 [info]: no segments file exists, using wav.scp."
|
||||
feats="ark,s,cs:online2-wav-dump-features --config=$dir/feature.conf ark:$sdata/JOB/spk2utt scp:$sdata/JOB/wav.scp ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
|
||||
# if this job is interrupted by the user, we want any background jobs to be
|
||||
# killed too.
|
||||
cleanup() {
|
||||
local pids=$(jobs -pr)
|
||||
[ -n "$pids" ] && kill $pids
|
||||
}
|
||||
trap "cleanup" INT QUIT TERM EXIT
|
||||
|
||||
|
||||
if [ $sub_split -eq 1 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/decode_den.JOB.log \
|
||||
nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
|
||||
else
|
||||
# each job from 1 to $nj is split into multiple pieces (sub-split), and we aim
|
||||
# to have at most two jobs running at each time. The idea is that if we have stragglers
|
||||
# from one job, we can be processing another one at the same time.
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
prev_pid=
|
||||
for n in `seq $[nj+1]`; do
|
||||
if [ $n -gt $nj ]; then
|
||||
this_pid=
|
||||
elif [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $srcdir/final.mdl ]; then
|
||||
echo "Not processing subset $n as already done (delete $dir/.done.$n if not)";
|
||||
this_pid=
|
||||
else
|
||||
sdata2=$data/split$nj/$n/split$sub_split;
|
||||
if [ ! -d $sdata2 ] || [ $sdata2 -ot $sdata/$n/feats.scp ]; then
|
||||
split_data.sh --per-utt $sdata/$n $sub_split || exit 1;
|
||||
fi
|
||||
mkdir -p $dir/log/$n
|
||||
mkdir -p $dir/part
|
||||
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
|
||||
|
||||
$cmd $parallel_opts JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
|
||||
nnet-latgen-faster$thread_string --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || touch $dir/.error &
|
||||
this_pid=$!
|
||||
fi
|
||||
if [ ! -z "$prev_pid" ]; then # Wait for the previous job; merge the previous set of lattices.
|
||||
wait $prev_pid
|
||||
[ -f $dir/.error ] && echo "$0: error generating denominator lattices" && exit 1;
|
||||
rm $dir/.merge_error 2>/dev/null
|
||||
echo Merging archives for data subset $prev_n
|
||||
for k in `seq $sub_split`; do
|
||||
gunzip -c $dir/lat.$prev_n.$k.gz || touch $dir/.merge_error;
|
||||
done | gzip -c > $dir/lat.$prev_n.gz || touch $dir/.merge_error;
|
||||
[ -f $dir/.merge_error ] && echo "$0: Merging lattices for subset $prev_n failed (or maybe some other error)" && exit 1;
|
||||
rm $dir/lat.$prev_n.*.gz
|
||||
touch $dir/.done.$prev_n
|
||||
fi
|
||||
prev_n=$n
|
||||
prev_pid=$this_pid
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
echo "$0: done generating denominator lattices."
|
|
@ -27,26 +27,39 @@ echo "$0 $@" # Print the command line for logging
|
|||
[ -f path.sh ] && . ./path.sh;
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# -ne 3 ]; then
|
||||
echo "Usage: $0 [options] <orig-nnet-online-dir> <new-nnet-dir> <new-nnet-online-dir>"
|
||||
echo "e.g.: $0 data/lang exp/nnet2_online/extractor exp/nnet2_online/nnet exp/nnet2_online/nnet_online"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
exit 1;
|
||||
if [ $# -ne 3 ] && [ $# -ne 4 ]; then
|
||||
echo "Usage: $0 [options] <orig-nnet-online-dir> [<new-lang-dir>] <new-nnet-dir> <new-nnet-online-dir>"
|
||||
echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
online_src=$1
|
||||
nnet_src=$2
|
||||
dir=$3
|
||||
if [ $# -eq 3 ]; then
|
||||
echo "$0: warning: it's better if you add the new <lang> directory as the 2nd argument."
|
||||
|
||||
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree; do
|
||||
online_src=$1
|
||||
lang=
|
||||
nnet_src=$2
|
||||
dir=$3
|
||||
else
|
||||
online_src=$1
|
||||
lang=$2
|
||||
nnet_src=$3
|
||||
dir=$4
|
||||
|
||||
extra_files=$lang/words.txt
|
||||
fi
|
||||
|
||||
|
||||
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $extra_files; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
||||
origdir=$dir
|
||||
dir_as_given=$dir
|
||||
dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
|
||||
# configuration files we write will contain absolute
|
||||
# pathnames.
|
||||
|
@ -89,8 +102,16 @@ $cmd $dir/log/append_nnet.log \
|
|||
nnet-insert --randomize-next-component=false --insert-at=0 \
|
||||
$nnet_src/final.mdl $dir/first_nnet.raw $dir/final.mdl || exit 1;
|
||||
|
||||
cp $nnet_src/tree $dir/ || exit 1;
|
||||
|
||||
$cleanup && rm $dir/first_nnet.raw
|
||||
|
||||
echo "$0: formatted neural net for online decoding in $origdir"
|
||||
if [ ! -z "$lang" ]; then
|
||||
# if the $lang option was provided, modify the silence-phones in the config;
|
||||
# these are only used for the endpointing code, but we should get this right.
|
||||
cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
|
||||
silphones=$(cat $lang/phones/silence.csl) || exit 1;
|
||||
cat $dir/conf/online_nnet2_decoding.conf.tmp | \
|
||||
sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
|
||||
rm $dir/conf/online_nnet2_decoding.conf.tmp
|
||||
fi
|
||||
|
||||
echo "$0: formatted neural net for online decoding in $dir_as_given"
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2014 Johns Hopkins University (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
|
||||
# This is as prepare_online_decoding.sh, but for transfer learning-- the case where
|
||||
# you have an existing online-decoding directory where you have all the feature
|
||||
# stuff, that you don't want to change, but
|
||||
|
||||
# Begin configuration.
|
||||
stage=0 # This allows restarting after partway, when something when wrong.
|
||||
cmd=run.pl
|
||||
iter=final
|
||||
# End configuration.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f path.sh ] && . ./path.sh;
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# -ne 4 ]; then
|
||||
echo "Usage: $0 [options] <orig-nnet-online-dir> <new-lang-dir> <new-nnet-dir> <new-nnet-online-dir>"
|
||||
echo "e.g.: $0 exp_other/nnet2_online/nnet_a_online data/lang exp/nnet2_online/nnet_a exp/nnet2_online/nnet_a_online"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
online_src=$1
|
||||
lang=$2
|
||||
nnet_src=$3
|
||||
dir=$4
|
||||
|
||||
for f in $online_src/conf/online_nnet2_decoding.conf $nnet_src/final.mdl $nnet_src/tree $lang/words.txt; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
|
||||
dir_as_given=$dir
|
||||
dir=$(readlink -f $dir) # Convert $dir to an absolute pathname, so that the
|
||||
# configuration files we write will contain absolute
|
||||
# pathnames.
|
||||
mkdir -p $dir/conf $dir/log
|
||||
|
||||
|
||||
cp $nnet_src/tree $dir/ || exit 1;
|
||||
|
||||
cp $nnet_src/$iter.mdl $dir/ || exit 1;
|
||||
|
||||
|
||||
# There are a bunch of files that we will need to copy from $online_src, because
|
||||
# we're aiming to have one self-contained directory that has everything in it.
|
||||
cp -rT $online_src/ivector_extractor/ $dir/ivector_extractor
|
||||
|
||||
[ ! -d $online_src/conf ] && \
|
||||
echo "Expected directory $online_src/conf to exist" && exit 1;
|
||||
|
||||
for x in $online_src/conf/*conf; do
|
||||
# Replace directory name starting $online_src with those starting with $dir.
|
||||
# We actually replace any directory names ending in /ivector_extractor/ or /conf/
|
||||
# with $dir/ivector_extractor/ or $dir/conf/
|
||||
cat $x | perl -ape "s:=(.+)/(ivector_extractor|conf)/:=$dir/\$2/:;" > $dir/conf/$(basename $x)
|
||||
done
|
||||
|
||||
|
||||
# modify the silence-phones in the config; these are only used for the
|
||||
# endpointing code.
|
||||
cp $dir/conf/online_nnet2_decoding.conf{,.tmp}
|
||||
silphones=$(cat $lang/phones/silence.csl) || exit 1;
|
||||
cat $dir/conf/online_nnet2_decoding.conf.tmp | \
|
||||
sed s/silence-phones=.\\+/silence-phones=$silphones/ > $dir/conf/online_nnet2_decoding.conf
|
||||
rm $dir/conf/online_nnet2_decoding.conf.tmp
|
||||
|
||||
echo "$0: formatted neural net for online decoding in $dir_as_given"
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
|
||||
if (@ARGV > 0 && $ARGV[0] eq "-f") {
|
||||
shift @ARGV;
|
||||
shift @ARGV;
|
||||
$field_spec = shift @ARGV;
|
||||
if ($field_spec =~ m/^\d+$/) {
|
||||
$field_begin = $field_spec - 1; $field_end = $field_spec - 1;
|
||||
|
@ -36,6 +36,7 @@ if (@ARGV > 0 && $ARGV[0] eq '--permissive') {
|
|||
}
|
||||
|
||||
if(@ARGV != 1) {
|
||||
print STDERR "Invalid usage: " . join(" ", @ARGV) . "\n";
|
||||
print STDERR "Usage: apply_map.pl [options] map <input >output\n" .
|
||||
"options: [-f <field-range> ]\n" .
|
||||
"Applies the map 'map' to all input text, where each line of the map\n" .
|
||||
|
|
|
@ -196,7 +196,8 @@ cat $srcdir/nonsilence_phones.txt | utils/apply_map.pl $tmpdir/phone_map.txt | \
|
|||
cp $srcdir/optional_silence.txt $dir/phones/optional_silence.txt
|
||||
cp $dir/phones/silence.txt $dir/phones/context_indep.txt
|
||||
|
||||
cat $srcdir/extra_questions.txt | utils/apply_map.pl $tmpdir/phone_map.txt \
|
||||
# if extra_questions.txt is empty, it's OK.
|
||||
cat $srcdir/extra_questions.txt 2>/dev/null | utils/apply_map.pl $tmpdir/phone_map.txt \
|
||||
>$dir/phones/extra_questions.txt
|
||||
|
||||
# Want extra questions about the word-start/word-end stuff. Make it separate for
|
||||
|
|
|
@ -42,14 +42,14 @@ utt2spks=""
|
|||
texts=""
|
||||
|
||||
nu=`cat $data/utt2spk | wc -l`
|
||||
nf=`cat $data/feats.scp | wc -l`
|
||||
nf=`cat $data/feats.scp 2>/dev/null | wc -l`
|
||||
nt=`cat $data/text 2>/dev/null | wc -l` # take it as zero if no such file
|
||||
if [ $nu -ne $nf ]; then
|
||||
if [ -f $data/feats.scp ] && [ $nu -ne $nf ]; then
|
||||
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
|
||||
echo " may produce incorrectly split data."
|
||||
echo "use utils/fix_data_dir.sh to fix this."
|
||||
fi
|
||||
if [ $nt -ne 0 -a $nu -ne $nt ]; then
|
||||
if [ -f $data/text ] && [ $nu -ne $nt ]; then
|
||||
echo "split_data.sh: warning, #lines is (utt2spk,text) is ($nu,$nt); this script "
|
||||
echo " may produce incorrectly split data."
|
||||
echo "use utils/fix_data_dir.sh to fix this."
|
||||
|
@ -89,7 +89,7 @@ fi
|
|||
|
||||
utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
|
||||
|
||||
utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
|
||||
[ -f $data/feats.scp ] && utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats
|
||||
|
||||
[ -f $data/text ] && utils/split_scp.pl $utt2spk_opt $data/text $texts
|
||||
|
||||
|
|
|
@ -28,15 +28,16 @@
|
|||
$quiet = 0;
|
||||
$first = 0;
|
||||
$last = 0;
|
||||
if ($ARGV[0] eq "--quiet") {
|
||||
|
||||
if (@ARGV > 0 && $ARGV[0] eq "--quiet") {
|
||||
shift;
|
||||
$quiet = 1;
|
||||
}
|
||||
if ($ARGV[0] eq "--first") {
|
||||
if (@ARGV > 0 && $ARGV[0] eq "--first") {
|
||||
shift;
|
||||
$first = 1;
|
||||
}
|
||||
if ($ARGV[0] eq "--last") {
|
||||
if (@ARGV > 0 && $ARGV[0] eq "--last") {
|
||||
shift;
|
||||
$last = 1;
|
||||
}
|
||||
|
@ -44,7 +45,8 @@ if ($ARGV[0] eq "--last") {
|
|||
if(@ARGV < 2 ) {
|
||||
die "Usage: subset_scp.pl [--quiet][--first|--last] N in.scp\n" .
|
||||
" --quiet causes it to not die if N < num lines in scp.\n" .
|
||||
" --first and --last make it equivalent to head or tail.\n";
|
||||
" --first and --last make it equivalent to head or tail.\n" .
|
||||
"See also: filter_scp.pl\n";
|
||||
}
|
||||
|
||||
$N = shift @ARGV;
|
||||
|
|
|
@ -73,6 +73,11 @@ void AmNnet::Init(const Nnet &nnet) {
|
|||
}
|
||||
}
|
||||
|
||||
void AmNnet::ResizeOutputLayer(int32 new_num_pdfs) {
|
||||
nnet_.ResizeOutputLayer(new_num_pdfs);
|
||||
priors_.Resize(new_num_pdfs);
|
||||
priors_.Set(1.0 / new_num_pdfs);
|
||||
}
|
||||
|
||||
} // namespace nnet2
|
||||
} // namespace kaldi
|
||||
|
|
|
@ -68,6 +68,10 @@ class AmNnet {
|
|||
|
||||
std::string Info() const;
|
||||
|
||||
/// This function is used when doing transfer learning to a new system.
|
||||
/// It will set the priors to be all the same.
|
||||
void ResizeOutputLayer(int32 new_num_pdfs);
|
||||
|
||||
private:
|
||||
const AmNnet &operator = (const AmNnet &other); // Disallow.
|
||||
Nnet nnet_;
|
||||
|
|
|
@ -1021,6 +1021,13 @@ void AffineComponent::Scale(BaseFloat scale) {
|
|||
bias_params_.Scale(scale);
|
||||
}
|
||||
|
||||
// virtual
|
||||
void AffineComponent::Resize(int32 input_dim, int32 output_dim) {
|
||||
KALDI_ASSERT(input_dim > 0 && output_dim > 0);
|
||||
bias_params_.Resize(output_dim);
|
||||
linear_params_.Resize(output_dim, input_dim);
|
||||
}
|
||||
|
||||
void AffineComponent::Add(BaseFloat alpha, const UpdatableComponent &other_in) {
|
||||
const AffineComponent *other =
|
||||
dynamic_cast<const AffineComponent*>(&other_in);
|
||||
|
@ -1590,6 +1597,22 @@ void AffineComponentPreconditioned::Update(
|
|||
in_value_precon_part, kNoTrans, 1.0);
|
||||
}
|
||||
|
||||
|
||||
// virtual
|
||||
void AffineComponentPreconditionedOnline::Resize(
|
||||
int32 input_dim, int32 output_dim) {
|
||||
KALDI_ASSERT(input_dim > 1 && output_dim > 1);
|
||||
if (rank_in_ >= input_dim) rank_in_ = input_dim - 1;
|
||||
if (rank_out_ >= output_dim) rank_out_ = output_dim - 1;
|
||||
bias_params_.Resize(output_dim);
|
||||
linear_params_.Resize(output_dim, input_dim);
|
||||
OnlinePreconditioner temp;
|
||||
preconditioner_in_ = temp;
|
||||
preconditioner_out_ = temp;
|
||||
SetPreconditionerConfigs();
|
||||
}
|
||||
|
||||
|
||||
void AffineComponentPreconditionedOnline::Read(std::istream &is, bool binary) {
|
||||
std::ostringstream ostr_beg, ostr_end;
|
||||
ostr_beg << "<" << Type() << ">";
|
||||
|
|
|
@ -740,6 +740,10 @@ class AffineComponent: public UpdatableComponent {
|
|||
void Init(BaseFloat learning_rate,
|
||||
std::string matrix_filename);
|
||||
|
||||
// This function resizes the dimensions of the component, setting the
|
||||
// parameters to zero, while leaving any other configuration values the same.
|
||||
virtual void Resize(int32 input_dim, int32 output_dim);
|
||||
|
||||
// The following functions are used for collapsing multiple layers
|
||||
// together. They return a pointer to a new Component equivalent to
|
||||
// the sequence of two components. We haven't implemented this for
|
||||
|
@ -895,6 +899,8 @@ class AffineComponentPreconditionedOnline: public AffineComponent {
|
|||
BaseFloat alpha, BaseFloat max_change_per_sample,
|
||||
std::string matrix_filename);
|
||||
|
||||
virtual void Resize(int32 input_dim, int32 output_dim);
|
||||
|
||||
// This constructor is used when converting neural networks partway through
|
||||
// training, from AffineComponent or AffineComponentPreconditioned to
|
||||
// AffineComponentPreconditionedOnline.
|
||||
|
|
|
@ -339,68 +339,34 @@ void Nnet::SetLearningRates(BaseFloat learning_rate) {
|
|||
KALDI_LOG << "Set learning rates to " << learning_rate;
|
||||
}
|
||||
|
||||
|
||||
void Nnet::AdjustLearningRates(
|
||||
const VectorBase<BaseFloat> &old_model_old_gradient,
|
||||
const VectorBase<BaseFloat> &new_model_old_gradient,
|
||||
const VectorBase<BaseFloat> &old_model_new_gradient,
|
||||
const VectorBase<BaseFloat> &new_model_new_gradient,
|
||||
BaseFloat measure_at, // where to measure gradient,
|
||||
// on line between old and new model;
|
||||
// 0.5 < measure_at <= 1.0.
|
||||
BaseFloat ratio, // e.g. 1.1; ratio by which we change learning rate.
|
||||
BaseFloat max_learning_rate) {
|
||||
std::vector<BaseFloat> new_lrates;
|
||||
KALDI_ASSERT(old_model_old_gradient.Dim() == NumUpdatableComponents() &&
|
||||
new_model_old_gradient.Dim() == NumUpdatableComponents() &&
|
||||
old_model_new_gradient.Dim() == NumUpdatableComponents() &&
|
||||
new_model_new_gradient.Dim() == NumUpdatableComponents());
|
||||
KALDI_ASSERT(ratio >= 1.0);
|
||||
KALDI_ASSERT(measure_at > 0.5 && measure_at <= 1.0);
|
||||
std::string changes_str;
|
||||
std::string dotprod_str;
|
||||
BaseFloat inv_ratio = 1.0 / ratio;
|
||||
int32 index = 0;
|
||||
for (int32 c = 0; c < NumComponents(); c++) {
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(components_[c]);
|
||||
if (uc == NULL) { // Non-updatable component.
|
||||
KALDI_ASSERT(old_model_old_gradient(c) == 0.0);
|
||||
continue;
|
||||
} else {
|
||||
BaseFloat grad_dotprod_at_end =
|
||||
new_model_new_gradient(index) - old_model_new_gradient(index),
|
||||
grad_dotprod_at_start =
|
||||
new_model_old_gradient(index) - old_model_old_gradient(index),
|
||||
grad_dotprod_interp =
|
||||
measure_at * grad_dotprod_at_end +
|
||||
(1.0 - measure_at) * grad_dotprod_at_start;
|
||||
// grad_dotprod_interp will be positive if we
|
||||
// want more of the gradient term
|
||||
// -> faster learning rate for this component
|
||||
|
||||
BaseFloat lrate = uc->LearningRate();
|
||||
lrate *= (grad_dotprod_interp > 0 ? ratio : inv_ratio);
|
||||
changes_str = changes_str +
|
||||
(grad_dotprod_interp > 0 ? " increase" : " decrease");
|
||||
dotprod_str = dotprod_str +
|
||||
(new_model_new_gradient(index) > 0 ? " positive" : " negative");
|
||||
if (lrate > max_learning_rate) lrate = max_learning_rate;
|
||||
|
||||
new_lrates.push_back(lrate);
|
||||
uc->SetLearningRate(lrate);
|
||||
index++;
|
||||
}
|
||||
void Nnet::ResizeOutputLayer(int32 new_num_pdfs) {
|
||||
KALDI_ASSERT(new_num_pdfs > 0);
|
||||
KALDI_ASSERT(NumComponents() > 2);
|
||||
int32 nc = NumComponents();
|
||||
SoftmaxComponent *sc;
|
||||
if ((sc = dynamic_cast<SoftmaxComponent*>(components_[nc - 1])) == NULL)
|
||||
KALDI_ERR << "Expected last component to be SoftmaxComponent.";
|
||||
SumGroupComponent *sgc = dynamic_cast<SumGroupComponent*>(components_[nc - 2]);
|
||||
if (sgc != NULL) {
|
||||
// Remove it. We'll resize things later.
|
||||
delete sgc;
|
||||
components_.erase(components_.begin() + nc - 2,
|
||||
components_.begin() + nc - 1);
|
||||
nc--;
|
||||
}
|
||||
KALDI_ASSERT(index == NumUpdatableComponents());
|
||||
KALDI_VLOG(1) << "Changes to learning rates: " << changes_str;
|
||||
KALDI_VLOG(1) << "Dot product of model with validation gradient is "
|
||||
<< dotprod_str;
|
||||
std::ostringstream lrate_str;
|
||||
for (size_t i = 0; i < new_lrates.size(); i++)
|
||||
lrate_str << new_lrates[i] << ' ';
|
||||
KALDI_VLOG(1) << "Learning rates are " << lrate_str.str();
|
||||
}
|
||||
|
||||
// note: it could be child class of AffineComponent.
|
||||
AffineComponent *ac = dynamic_cast<AffineComponent*>(components_[nc - 2]);
|
||||
if (ac == NULL)
|
||||
KALDI_ERR << "Network doesn't have expected structure (didn't find final "
|
||||
<< "AffineComponent).";
|
||||
|
||||
ac->Resize(ac->InputDim(), new_num_pdfs);
|
||||
// Remove the softmax component, and replace it with a new one
|
||||
delete components_[nc - 1];
|
||||
components_[nc - 1] = new SoftmaxComponent(new_num_pdfs);
|
||||
this->Check();
|
||||
}
|
||||
|
||||
int32 Nnet::NumUpdatableComponents() const {
|
||||
int32 ans = 0;
|
||||
|
|
|
@ -234,21 +234,11 @@ class Nnet {
|
|||
// with things of type NonlinearComponent.
|
||||
|
||||
|
||||
/// [This function is only used in the binary nnet-train.cc which is currently not
|
||||
/// being used]. This is used to separately adjust learning rates of each layer,
|
||||
/// after each "phase" of training. We basically ask (using the validation
|
||||
/// gradient), do we wish we had gone further in this direction? Yes->
|
||||
/// increase learning rate, no -> decrease it. The inputs have dimension
|
||||
/// NumUpdatableComponents().
|
||||
void AdjustLearningRates(
|
||||
const VectorBase<BaseFloat> &old_model_old_gradient,
|
||||
const VectorBase<BaseFloat> &new_model_old_gradient,
|
||||
const VectorBase<BaseFloat> &old_model_new_gradient,
|
||||
const VectorBase<BaseFloat> &new_model_new_gradient,
|
||||
BaseFloat measure_at, // where to measure gradient, on line between old
|
||||
// and new model; 0.5 < measure_at <= 1.0.
|
||||
BaseFloat learning_rate_ratio,
|
||||
BaseFloat max_learning_rate);
|
||||
/// This function is used when doing transfer learning to a new system. It
|
||||
/// resizes the final affine and softmax components. If your system has a
|
||||
/// SumGroupComponent before the final softmax, it will be discarded.
|
||||
void ResizeOutputLayer(int32 new_num_pdfs);
|
||||
|
||||
|
||||
/// Scale all the learning rates in the neural net by this factor.
|
||||
void ScaleLearningRates(BaseFloat factor);
|
||||
|
|
|
@ -27,7 +27,7 @@ BINFILES = nnet-am-info nnet-init \
|
|||
nnet-perturb-egs-fmllr nnet-get-weighted-egs nnet-adjust-priors \
|
||||
cuda-compiled nnet-replace-last-layers nnet-am-switch-preconditioning \
|
||||
nnet-train-simple-perturbed nnet-train-parallel-perturbed \
|
||||
nnet1-to-raw-nnet raw-nnet-copy nnet-relabel-egs
|
||||
nnet1-to-raw-nnet raw-nnet-copy nnet-relabel-egs nnet-am-reinitialize
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -23,6 +23,36 @@
|
|||
#include "nnet2/combine-nnet-a.h"
|
||||
#include "nnet2/am-nnet.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
void GetWeights(const std::string &weights_str,
|
||||
int32 num_inputs,
|
||||
vector<BaseFloat> *weights) {
|
||||
KALDI_ASSERT(num_inputs >= 1);
|
||||
if (!weights_str.empty()) {
|
||||
SplitStringToFloats(weights_str, ":", true, weights);
|
||||
if (weights->size() != num_inputs) {
|
||||
KALDI_ERR << "--weights option must be a colon-separated list "
|
||||
<< "with " << num_inputs << " elements, got: "
|
||||
<< weights_str;
|
||||
}
|
||||
} else {
|
||||
for (int32 i = 0; i < num_inputs; i++)
|
||||
weights->push_back(1.0 / num_inputs);
|
||||
}
|
||||
// normalize the weights to sum to one.
|
||||
float weight_sum = 0.0;
|
||||
for (int32 i = 0; i < num_inputs; i++)
|
||||
weight_sum += (*weights)[i];
|
||||
for (int32 i = 0; i < num_inputs; i++)
|
||||
(*weights)[i] = (*weights)[i] / weight_sum;
|
||||
if (fabs(weight_sum - 1.0) > 0.01) {
|
||||
KALDI_WARN << "Normalizing weights to sum to one, sum was " << weight_sum;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
|
@ -32,60 +62,109 @@ int main(int argc, char *argv[]) {
|
|||
typedef kaldi::int64 int64;
|
||||
|
||||
const char *usage =
|
||||
"This program average (or sums, if --sum=true) the parameters over a number of neural nets.\n"
|
||||
"This program averages (or sums, if --sum=true) the parameters over a\n"
|
||||
"number of neural nets. If you supply the option --skip-last-layer=true,\n"
|
||||
"the parameters of the last updatable layer are copied from <model1> instead\n"
|
||||
"of being averaged (useful in multi-language scenarios).\n"
|
||||
"The --weights option can be used to weight each model differently.\n"
|
||||
"\n"
|
||||
"Usage: nnet-am-average [options] <model1> <model2> ... <modelN> <model-out>\n"
|
||||
"\n"
|
||||
"e.g.:\n"
|
||||
" nnet-am-average 1.1.nnet 1.2.nnet 1.3.nnet 2.nnet\n";
|
||||
|
||||
|
||||
bool binary_write = true;
|
||||
bool sum = false;
|
||||
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("sum", &sum, "If true, sums instead of averages.");
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
string weights_str;
|
||||
bool skip_last_layer = false;
|
||||
po.Register("weights", &weights_str, "Colon-separated list of weights, one "
|
||||
"for each input model. These will be normalized to sum to one.");
|
||||
po.Register("skip-last-layer", &skip_last_layer, "If true, averaging of "
|
||||
"the last updatable layer is skipped (result comes from model1)");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
||||
if (po.NumArgs() < 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
nnet1_rxfilename = po.GetArg(1),
|
||||
nnet_wxfilename = po.GetArg(po.NumArgs());
|
||||
|
||||
TransitionModel trans_model;
|
||||
|
||||
TransitionModel trans_model1;
|
||||
AmNnet am_nnet1;
|
||||
{
|
||||
bool binary_read;
|
||||
Input ki(nnet1_rxfilename, &binary_read);
|
||||
trans_model.Read(ki.Stream(), binary_read);
|
||||
trans_model1.Read(ki.Stream(), binary_read);
|
||||
am_nnet1.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
int32 num_inputs = po.NumArgs() - 1;
|
||||
BaseFloat scale = (sum ? 1.0 : 1.0 / num_inputs);
|
||||
|
||||
am_nnet1.GetNnet().Scale(scale);
|
||||
vector<BaseFloat> model_weights;
|
||||
GetWeights(weights_str, num_inputs, &model_weights);
|
||||
|
||||
int32 c_begin = 0,
|
||||
c_end = (skip_last_layer ?
|
||||
am_nnet1.GetNnet().LastUpdatableComponent() :
|
||||
am_nnet1.GetNnet().NumComponents());
|
||||
KALDI_ASSERT(c_end != -1 && "Network has no updatable components.");
|
||||
|
||||
// scale the components - except the last layer, if skip_last_layer == true.
|
||||
for (int32 c = c_begin; c < c_end; c++) {
|
||||
UpdatableComponent *uc =
|
||||
dynamic_cast<UpdatableComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
|
||||
if (uc != NULL) uc->Scale(model_weights[0]);
|
||||
NonlinearComponent *nc =
|
||||
dynamic_cast<NonlinearComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
|
||||
if (nc != NULL)
|
||||
nc->Scale(model_weights[0]);
|
||||
}
|
||||
|
||||
for (int32 i = 2; i <= num_inputs; i++) {
|
||||
bool binary_read;
|
||||
Input ki(po.GetArg(i), &binary_read);
|
||||
TransitionModel trans_model;
|
||||
trans_model.Read(ki.Stream(), binary_read);
|
||||
AmNnet am_nnet;
|
||||
am_nnet.Read(ki.Stream(), binary_read);
|
||||
am_nnet1.GetNnet().AddNnet(scale, am_nnet.GetNnet());
|
||||
|
||||
for (int32 c = c_begin; c < c_end; c++) {
|
||||
UpdatableComponent *uc_average =
|
||||
dynamic_cast<UpdatableComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
|
||||
const UpdatableComponent *uc_this =
|
||||
dynamic_cast<const UpdatableComponent*>(&(am_nnet.GetNnet().GetComponent(c)));
|
||||
if (uc_average != NULL) {
|
||||
KALDI_ASSERT(uc_this != NULL &&
|
||||
"Networks must have the same structure.");
|
||||
uc_average->Add(model_weights[i-1], *uc_this);
|
||||
}
|
||||
|
||||
NonlinearComponent *nc_average =
|
||||
dynamic_cast<NonlinearComponent*>(&(am_nnet1.GetNnet().GetComponent(c)));
|
||||
const NonlinearComponent *nc_this =
|
||||
dynamic_cast<const NonlinearComponent*>(&(am_nnet.GetNnet().GetComponent(c)));
|
||||
if (nc_average != NULL) {
|
||||
KALDI_ASSERT(nc_this != NULL &&
|
||||
"Networks must have the same structure.");
|
||||
nc_average->Add(model_weights[i-1], *nc_this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
{
|
||||
Output ko(nnet_wxfilename, binary_write);
|
||||
trans_model.Write(ko.Stream(), binary_write);
|
||||
trans_model1.Write(ko.Stream(), binary_write);
|
||||
am_nnet1.Write(ko.Stream(), binary_write);
|
||||
}
|
||||
|
||||
|
||||
KALDI_LOG << "Averaged parameters of " << num_inputs
|
||||
<< " neural nets, and wrote to " << nnet_wxfilename;
|
||||
return 0; // it will throw an exception if there are any problems.
|
||||
|
|
|
@ -36,6 +36,14 @@ int main(int argc, char *argv[]) {
|
|||
" nnet-am-info 1.nnet\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool print_learning_rates = false;
|
||||
|
||||
po.Register("print-learning-rates", &print_learning_rates,
|
||||
"If true, instead of printing the normal info, print a "
|
||||
"colon-separated list of the learning rates for each updatable "
|
||||
"layer, suitable to give to nnet-am-copy as the argument to"
|
||||
"--learning-rates.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
@ -55,9 +63,19 @@ int main(int argc, char *argv[]) {
|
|||
am_nnet.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
std::cout << am_nnet.Info();
|
||||
if (print_learning_rates) {
|
||||
Vector<BaseFloat> learning_rates(am_nnet.GetNnet().NumUpdatableComponents());
|
||||
am_nnet.GetNnet().GetLearningRates(&learning_rates);
|
||||
int32 nc = learning_rates.Dim();
|
||||
for (int32 i = 0; i < nc; i++)
|
||||
std::cout << learning_rates(i) << (i < nc - 1 ? ":" : "");
|
||||
std::cout << std::endl;
|
||||
KALDI_LOG << "Printed learning-rate info for " << nnet_rxfilename;
|
||||
} else {
|
||||
std::cout << am_nnet.Info();
|
||||
KALDI_LOG << "Printed info about " << nnet_rxfilename;
|
||||
}
|
||||
|
||||
KALDI_LOG << "Printed info about " << nnet_rxfilename;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
// nnet2bin/nnet-am-reinitialize.cc
|
||||
|
||||
// Copyright 2014 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "nnet2/am-nnet.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "tree/context-dep.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet2;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"This program can used when transferring a neural net from one language\n"
|
||||
"to another (or one tree to another). It takes a neural net and a\n"
|
||||
"transition model from a different neural net, resizes the last layer\n"
|
||||
"to match the new transition model, zeroes it, and writes out the new,\n"
|
||||
"resized .mdl file. If the original model had been 'mixed-up', the associated\n"
|
||||
"SumGroupComponent will be removed.\n"
|
||||
"\n"
|
||||
"Usage: nnet-am-reinitialize [options] <nnet-in> <new-transition-model> <nnet-out>\n"
|
||||
"e.g.:\n"
|
||||
" nnet-am-reinitialize 1.mdl exp/tri6/final.mdl 2.mdl\n";
|
||||
|
||||
bool binary_write = true;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_rxfilename = po.GetArg(1),
|
||||
transition_model_rxfilename = po.GetArg(2),
|
||||
nnet_wxfilename = po.GetArg(3);
|
||||
|
||||
TransitionModel orig_trans_model;
|
||||
AmNnet am_nnet;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(nnet_rxfilename, &binary);
|
||||
orig_trans_model.Read(ki.Stream(), binary);
|
||||
am_nnet.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
TransitionModel new_trans_model;
|
||||
ReadKaldiObject(transition_model_rxfilename, &new_trans_model);
|
||||
|
||||
am_nnet.ResizeOutputLayer(new_trans_model.NumPdfs());
|
||||
|
||||
{
|
||||
Output ko(nnet_wxfilename, binary_write);
|
||||
new_trans_model.Write(ko.Stream(), binary_write);
|
||||
am_nnet.Write(ko.Stream(), binary_write);
|
||||
}
|
||||
KALDI_LOG << "Resized neural net from " << nnet_rxfilename
|
||||
<< " to " << am_nnet.NumPdfs()
|
||||
<< " pdfs, and wrote to " << nnet_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -33,10 +33,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Get examples of data for discriminative neural network training;\n"
|
||||
"each one corresponds to part of a file, of variable (and configurable\n"
|
||||
"each one corresponds to part of a file, of variable (and configurable)\n"
|
||||
"length.\n"
|
||||
"\n"
|
||||
"Usage: nnet-get-egs-discriminative [options] <model|transition-model> "
|
||||
"Usage: nnet-get-egs-discriminative [options] <model> "
|
||||
"<features-rspecifier> <ali-rspecifier> <den-lat-rspecifier> "
|
||||
"<training-examples-out>\n"
|
||||
"\n"
|
||||
|
|
Загрузка…
Ссылка в новой задаче