зеркало из https://github.com/mozilla/kaldi.git
A lot of changes: script changes RE neural nets (more efficient IO, slightly better WERs); various new functionality for nnets and improving some feature-related binaries' interfaces.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1976 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
c1944a7209
Коммит
f699fd2be1
|
@ -1,11 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
# CAUTION: I changed e.g. 1.trans to trans.1 in the scripts. If you ran it
|
||||
# part-way through prior to this, to convert to the new naming
|
||||
# convention, run:
|
||||
# for x in `find . -name '*.trans'`; do mv $x `echo $x | perl -ane 's/(\d+)\.trans/trans.$1/;print;'`; done
|
||||
# but be careful as this will not follow soft links.
|
||||
|
||||
. cmd.sh
|
||||
|
||||
# call the next line with the directory where the RM data is
|
||||
|
|
|
@ -1,5 +1,2 @@
|
|||
beam=11.0 # beam for decoding. Was 13.0 in the scripts.
|
||||
first_beam=8.0 # beam for 1st-pass decoding in SAT.
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -21,8 +21,8 @@
|
|||
)
|
||||
|
||||
# Here are the results (copied from RESULTS file)
|
||||
#exp/nnet6a/decode_train_dev/wer_10:%WER 24.87 [ 12053 / 48460, 1590 ins, 3017 del, 7446 sub ]
|
||||
#exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.1 16.0 6.9 2.7 25.6 62.6 |
|
||||
#exp/nnet6a/decode_train_dev/wer_11:%WER 24.30 [ 11774 / 48460, 1619 ins, 2877 del, 7278 sub ]
|
||||
#exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.8 16.0 6.3 3.0 25.3 62.6 |
|
||||
|
||||
|
||||
# Here are some older results when the system had 2k not 4k leaves and ran from a worse SAT
|
||||
|
|
|
@ -191,6 +191,7 @@ exp/tri4a_dnn/decode_bd_tgpr_eval92/wer_10:%WER 4.00 [ 226 / 5643, 34 ins, 12 de
|
|||
# and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot,
|
||||
# which seems to be the reason we can't beat the SGMM+MMI numbers here.)
|
||||
|
||||
exp/nnet5c1/decode_bd_tgpr_dev93/wer_10:%WER 7.48 [ 616 / 8234, 73 ins, 98 del, 445 sub ]
|
||||
exp/nnet5c1/decode_bd_tgpr_eval92/wer_11:%WER 4.41 [ 249 / 5643, 29 ins, 19 del, 201 sub ]
|
||||
# Note: my 4.41% result is worse than Karel's 4.00%.
|
||||
|
||||
exp/nnet5c1/decode_bd_tgpr_dev93/wer_14:%WER 7.32 [ 603 / 8234, 61 ins, 101 del, 441 sub ]
|
||||
exp/nnet5c1/decode_bd_tgpr_eval92/wer_14:%WER 4.39 [ 248 / 5643, 32 ins, 17 del, 199 sub ]
|
||||
# Note: my 4.39% result is worse than Karel's 4.00%.
|
||||
|
|
|
@ -47,7 +47,7 @@ cat links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
|
|||
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
|
||||
|
||||
nl=`cat train_si84.flist | wc -l`
|
||||
[ "$nl" -eq 7138 ] || echo "Warning: expected 37416 lines in train_si84.flist, got $nl"
|
||||
[ "$nl" -eq 7138 ] || echo "Warning: expected 7138 lines in train_si84.flist, got $nl"
|
||||
|
||||
# This version for SI-284
|
||||
cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
|
||||
|
|
|
@ -281,7 +281,6 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
|||
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
|
||||
) &
|
||||
|
||||
|
||||
# Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
|
||||
# all the data). Use 30 jobs.
|
||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
# This script appends the features in two data directories.
|
||||
|
||||
# To be run from .. (one directory up from here)
|
||||
# see ../run.sh for example
|
||||
# This config creates MFCC features with half the window size and window shift,
|
||||
# and splices and sub-samples them. We'll use another script append_feats.sh
|
||||
# to combine (append) the data directories.
|
||||
|
||||
# Begin configuration section.
|
||||
cmd=run.pl
|
||||
nj=4
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
if [ -f path.sh ]; then . ./path.sh; fi
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 5 ]; then
|
||||
echo "usage: append_feats.sh [options] <src-data-dir1> <src-data-dir2> <dest-data-dir> <log-dir> <path-to-storage-dir>";
|
||||
echo "options: "
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
data_src1=$1
|
||||
data_src2=$2
|
||||
data=$3
|
||||
logdir=$4
|
||||
mfccdir=$5
|
||||
|
||||
utils/split_data.sh $data_src1 $nj || exit 1;
|
||||
utils/split_data.sh $data_src2 $nj || exit 1;
|
||||
|
||||
mkdir -p $mfccdir $logdir
|
||||
|
||||
rm -rf $data
|
||||
mkdir -p `basename $data` # Make sure directory one level up exists.
|
||||
cp -r $data_src1 $data # so we get the other files, such as utt2spk.
|
||||
rm $data/cmvn.scp
|
||||
rm -r $data/split* 2>/dev/null
|
||||
|
||||
# use "name" as part of name of the archive.
|
||||
name=`basename $data`
|
||||
|
||||
$cmd JOB=1:$nj $logdir/append.JOB.log \
|
||||
append-feats --truncate-frames=true \
|
||||
scp:$data_src1/split$nj/JOB/feats.scp scp:$data_src2/split$nj/JOB/feats.scp \
|
||||
ark,scp:$mfccdir/appended_$name.JOB.ark,$mfccdir/appended_$name.JOB.scp || exit 1;
|
||||
|
||||
# concatenate the .scp files together.
|
||||
for ((n=1; n<=nj; n++)); do
|
||||
cat $mfccdir/appended_$name.$n.scp >> $data/feats.scp || exit 1;
|
||||
done > $data/feats.scp
|
||||
|
||||
|
||||
nf=`cat $data/feats.scp | wc -l`
|
||||
nu=`cat $data/utt2spk | wc -l`
|
||||
if [ $nf -ne $nu ]; then
|
||||
echo "It seems not all of the feature files were successfully ($nf != $nu);"
|
||||
echo "consider using utils/fix_data_dir.sh $data"
|
||||
fi
|
||||
|
||||
echo "Succeeded creating MFCC features for $name"
|
|
@ -18,6 +18,10 @@ max_mem=20000000 # This will stop the processes getting too large.
|
|||
# This is in bytes, but not "real" bytes-- you have to multiply
|
||||
# by something like 5 or 10 to get real bytes (not sure why so large)
|
||||
# End configuration section.
|
||||
num_threads=1 # Number of threads used in nnet-logprob computation. If you set
|
||||
# this to a different value, make sure to also set the appropriate
|
||||
# queue options. If you set this too high it won't use all the
|
||||
# threads as most of the time will be taken in the decoder.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
|
@ -104,9 +108,10 @@ fi
|
|||
|
||||
if [ $sub_split -eq 1 ]; then
|
||||
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
|
||||
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats" ark:- \| \
|
||||
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
|
||||
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
|
||||
else
|
||||
for n in `seq $nj`; do
|
||||
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
|
||||
|
@ -120,9 +125,10 @@ else
|
|||
mkdir -p $dir/part
|
||||
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
|
||||
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
|
||||
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats_subset" ark:- \| \
|
||||
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
|
||||
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
|
||||
echo Merging archives for data subset $n
|
||||
rm $dir/.error 2>/dev/null;
|
||||
for k in `seq $sub_split`; do
|
||||
|
|
|
@ -14,18 +14,24 @@ num_iters_final=10 # Number of final iterations to give to the
|
|||
# optimization over the validation set.
|
||||
initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
|
||||
final_learning_rate=0.004 # for RM; or 0.001 is suitable for Swbd.
|
||||
num_valid_utts=300 # held-out utterances, used only for diagnostics.
|
||||
num_valid_frames_shrink=2000 # a subset of the frames in "valid_utts", used only
|
||||
# for estimating shrinkage parameters and for
|
||||
# objective-function reporting.
|
||||
num_utts_subset=300 # number of utterances in validation and training
|
||||
# subsets used for shrinkage and diagnostics
|
||||
num_valid_frames_shrink=0 # number of validation frames in the subset
|
||||
# used for shrinking
|
||||
num_train_frames_shrink=2000 # number of training frames in the subset used
|
||||
# for shrinking (by default we use all training
|
||||
# frames for this.)
|
||||
shrink_interval=3 # shrink every $shrink_interval iters,
|
||||
# except at the start of training when we do it every iter.
|
||||
num_valid_frames_combine=10000 # combination weights at the very end.
|
||||
num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
|
||||
num_train_frames_combine=10000 # # train frames for the above.
|
||||
num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
|
||||
minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
|
||||
# which would otherwise be a problem with multi-threaded update. Note:
|
||||
# it also interacts with the "preconditioned" update, so it's not completely cost free.
|
||||
samples_per_iteration=400000 # each iteration of training, see this many samples
|
||||
# per job.
|
||||
samples_per_iter=400000 # each iteration of training, see this many samples
|
||||
# per job. This is just a guideline; it will pick a number
|
||||
# that divides the number of samples in the entire data.
|
||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||
# on each iter. You could set it to 0 or to a large value for complete
|
||||
# randomization, but this would both consume memory and cause spikes in
|
||||
|
@ -37,13 +43,13 @@ add_layers_period=2 # by default, add new layers every 2 iterations.
|
|||
num_hidden_layers=2
|
||||
initial_num_hidden_layers=1 # we'll add the rest one by one.
|
||||
num_parameters=2000000 # 2 million parameters by default.
|
||||
stage=-7
|
||||
stage=-9
|
||||
realign_iters=""
|
||||
beam=10 # for realignment.
|
||||
retry_beam=40
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
||||
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.)
|
||||
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
|
||||
nnet_config_opts=
|
||||
splice_width=4 # meaning +- 4 frames on each side for second LDA
|
||||
lda_dim=250
|
||||
|
@ -54,7 +60,11 @@ shrink=true
|
|||
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
|
||||
# specified.)
|
||||
num_threads=16
|
||||
mkl_num_threads=1
|
||||
|
||||
valid_is_heldout=false # For some reason, holding out the validation set from the training set
|
||||
# seems to hurt, so by default we don't do it (i.e. it's included in training)
|
||||
random_copy=false
|
||||
cleanup=true
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
@ -72,7 +82,7 @@ if [ $# != 4 ]; then
|
|||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --num-epochs <#epochs|15> # Number of epochs of main training"
|
||||
echo " # while reducing learning rate (determines #iterations, together"
|
||||
echo " # with --samples-per-iteration and --num-jobs-nnet)"
|
||||
echo " # with --samples-per-iter and --num-jobs-nnet)"
|
||||
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
|
||||
echo " # after learning rate fully reduced"
|
||||
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
|
||||
|
@ -95,21 +105,27 @@ if [ $# != 4 ]; then
|
|||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads."
|
||||
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the "
|
||||
echo " # data. (prevents stressing the disk). "
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
echo " --samples-per-iteration <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||
echo " # process."
|
||||
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
|
||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
|
||||
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
|
||||
echo " # interpolate parameters (the weights are learned with a validation set)"
|
||||
echo " --stage <stage|-7> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
|
||||
echo " # (the validation subset is held out from training)"
|
||||
echo " --num-valid-frames-shrink <#frames|2000> # Number of frames from the validation set used for shrinking"
|
||||
echo " --num-train-frames-shrink <#frames|0> # Number of frames from the training set used for shrinking"
|
||||
echo " # (by default it's included in training, which for some reason helps)."
|
||||
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
|
||||
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
|
||||
echo " # very end."
|
||||
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
|
||||
echo " # the middle."
|
||||
|
||||
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
@ -144,8 +160,11 @@ cp $alidir/tree $dir
|
|||
|
||||
|
||||
# Get list of validation utterances.
|
||||
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_valid_utts \
|
||||
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
|
||||
> $dir/valid_uttlist || exit 1;
|
||||
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
|
||||
head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
|
||||
|
||||
|
||||
## Set up features. Note: these are different from the normal features
|
||||
## because we have one rspecifier that has the features for the entire
|
||||
|
@ -154,33 +173,49 @@ if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
|
|||
echo "$0: feature type is $feat_type"
|
||||
|
||||
case $feat_type in
|
||||
delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
|
||||
delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||
;;
|
||||
lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||
cp $alidir/final.mat $dir
|
||||
;;
|
||||
*) echo "$0: invalid feature type $feat_type" && exit 1;
|
||||
esac
|
||||
if [ -f $alidir/trans.1 ]; then
|
||||
echo "$0: using transforms from $alidir"
|
||||
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
||||
split_feats="$split_feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
|
||||
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
|
||||
valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
||||
train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
||||
fi
|
||||
|
||||
if [ $stage -le -9 ]; then
|
||||
echo "$0: working out number of frames of training data"
|
||||
num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
|
||||
echo $num_frames > $dir/num_frames
|
||||
else
|
||||
num_frames=`cat $dir/num_frames` || exit 1;
|
||||
fi
|
||||
|
||||
# Working out number of iterations per epoch.
|
||||
iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
|
||||
[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
|
||||
samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
|
||||
echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
|
||||
echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
|
||||
|
||||
|
||||
## Do LDA on top of whatever features we already have; store the matrix which
|
||||
## we'll put into the neural network as a constant.
|
||||
|
||||
if [ $stage -le -7 ]; then
|
||||
echo "Accumulating LDA statistics."
|
||||
if [ $stage -le -8 ]; then
|
||||
echo "$0: Accumulating LDA statistics."
|
||||
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
|
||||
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
|
||||
acc-lda --rand-prune=$randprune $alidir/final.mdl "$split_feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
|
||||
acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
|
||||
$dir/lda.JOB.acc || exit 1;
|
||||
est-lda --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
|
||||
2>$dir/log/lda_est.log || exit 1;
|
||||
|
@ -195,7 +230,7 @@ if [ $initial_num_hidden_layers -gt $num_hidden_layers ]; then
|
|||
fi
|
||||
|
||||
|
||||
if [ $stage -le -6 ]; then
|
||||
if [ $stage -le -7 ]; then
|
||||
echo "$0: initializing neural net";
|
||||
# to hidden.config it will write the part of the config corresponding to a
|
||||
# single hidden layer; we need this to add new layers.
|
||||
|
@ -219,14 +254,14 @@ if [ $stage -le -6 ]; then
|
|||
$dir/0.mdl || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le -5 ]; then
|
||||
if [ $stage -le -6 ]; then
|
||||
echo "Training transition probabilities and setting priors"
|
||||
$cmd $dir/log/train_trans.log \
|
||||
nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
|
||||
|| exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le -4 ]; then
|
||||
if [ $stage -le -5 ]; then
|
||||
echo "Compiling graphs of transcripts"
|
||||
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
|
||||
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
||||
|
@ -239,118 +274,113 @@ cp $alidir/ali.*.gz $dir
|
|||
|
||||
nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "Getting validation examples."
|
||||
$cmd $dir/log/create_valid_subset_shrink.log \
|
||||
if [ $stage -le -4 ]; then
|
||||
echo "Getting validation and training subset examples."
|
||||
rm $dir/.error 2>/dev/null
|
||||
$cmd $dir/log/create_valid_subset.log \
|
||||
nnet-get-egs $nnet_context_opts "$valid_feats" \
|
||||
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/valid_all.egs" || exit 1;
|
||||
echo "Getting subsets of validation examples for shrinking and combination."
|
||||
"ark:$dir/valid_all.egs" || touch $dir/.error &
|
||||
$cmd $dir/log/create_train_subset.log \
|
||||
nnet-get-egs $nnet_context_opts "$train_subset_feats" \
|
||||
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
|
||||
wait;
|
||||
[ -f $dir/.error ] && exit 1;
|
||||
echo "Getting subsets of validation examples for shrinking, diagnostics and combination."
|
||||
$cmd $dir/log/create_valid_subset_shrink.log \
|
||||
nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs ark:$dir/valid_shrink.egs &
|
||||
nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs \
|
||||
ark:$dir/valid_shrink.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_valid_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs ark:$dir/valid_combine.egs &
|
||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
|
||||
ark:$dir/valid_combine.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_valid_subset_diagnostic.log \
|
||||
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
|
||||
ark:$dir/valid_diagnostic.egs || touch $dir/.error &
|
||||
|
||||
$cmd $dir/log/create_train_subset_shrink.log \
|
||||
nnet-subset-egs --n=$num_train_frames_shrink ark:$dir/train_subset_all.egs \
|
||||
ark:$dir/train_shrink.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_train_subset_combine.log \
|
||||
nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
|
||||
ark:$dir/train_combine.egs || touch $dir/.error &
|
||||
$cmd $dir/log/create_train_subset_diagnostic.log \
|
||||
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
|
||||
ark:$dir/train_diagnostic.egs || touch $dir/.error &
|
||||
wait
|
||||
[ ! -s $dir/valid_shrink.egs ] && echo "No validation examples for shrinking" && exit 1;
|
||||
[ ! -s $dir/valid_combine.egs ] && echo "No validation examples for combination" && exit 1;
|
||||
rm $dir/valid_all.egs
|
||||
cat $dir/valid_shrink.egs $dir/train_shrink.egs > $dir/shrink.egs
|
||||
cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
|
||||
|
||||
for f in $dir/{shrink,combine,train_diagnostic,valid_diagnostic}.egs; do
|
||||
[ ! -s $f ] && echo "No examples in file $f" && exit 1;
|
||||
done
|
||||
rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_{shrink,combine}.egs
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
if [ $stage -le -3 ]; then
|
||||
mkdir -p $dir/egs
|
||||
mkdir -p $dir/temp
|
||||
echo "Creating training examples";
|
||||
# in $dir/egs, create $num_jobs_nnet separate files with training examples,
|
||||
# with randomly shuffled order. We shuffle the order of examples in each
|
||||
# file. Then on each iteration, for each training process, we'll take a
|
||||
# random subset of blocks of examples within that process's file.
|
||||
# We take them in blocks, because it avoids the overhead of fseek() while
|
||||
# creating the examples.
|
||||
# in $dir/egs, create $num_jobs_nnet separate files with training examples.
|
||||
# The order is not randomized at this point.
|
||||
|
||||
egs_list=
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
egs_list="$egs_list ark,scp:$dir/egs/egs_orig.$n.ark,$dir/egs/egs_orig.$n.scp"
|
||||
egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
|
||||
done
|
||||
echo "Generating training examples on disk"
|
||||
# The examples will go round-robin to egs_list.
|
||||
$cmd $dir/log/get_egs.log \
|
||||
$cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
|
||||
nnet-get-egs $nnet_context_opts "$feats" \
|
||||
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
|
||||
"ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
|
||||
nnet-copy-egs ark:- $egs_list || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
# combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
|
||||
# then split into multiple parts egs.JOB.*.scp for different parts of the
|
||||
# data, 0 .. $iters_per_epoch-1.
|
||||
|
||||
if [ $iters_per_epoch -eq 1 ]; then
|
||||
echo "Since iters-per-epoch == 1, just concatenating the data."
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
|
||||
rm $dir/egs/egs_orig.$n.*.ark || exit 1;
|
||||
done
|
||||
else # We'll have to split it up using nnet-copy-egs.
|
||||
egs_list=
|
||||
for n in `seq 0 $[$iters_per_epoch-1]`; do
|
||||
egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
|
||||
done
|
||||
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
|
||||
nnet-copy-egs --random=$random_copy --srand=JOB \
|
||||
"ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
|
||||
rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
# Next, shuffle the order of the examples in each of those files.
|
||||
# In order to not use too much memory (in case the size of the files is
|
||||
# huge) we do this by randomizing the order of the .scp file and then
|
||||
# just call nnet-copy-egs. If the file system is willing to store
|
||||
# stuff in memory, it is free to do so. This is not super-optimal in
|
||||
# terms of file system performance but it's simple and it won't fail when
|
||||
# the data gets large.
|
||||
# Each one should not be too large, so we can do this in memory.
|
||||
echo "Shuffling the order of training examples"
|
||||
echo "(in order to avoid stressing the disk, these won't all run at once)."
|
||||
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \
|
||||
utils/shuffle_list.pl --srand JOB $dir/egs/egs_orig.JOB.scp \| \
|
||||
nnet-copy-egs scp:- ark,scp:$dir/egs/egs.JOB.ark,$dir/egs/egs.JOB.scp \
|
||||
'&&' rm $dir/egs/egs_orig.JOB.ark $dir/egs/egs_orig.JOB.scp
|
||||
smallest_len=`wc -l $dir/egs/egs.*.scp | sort -n -k1 | awk '{print $1}' | head -1`
|
||||
# If the $samples_per_iteration is more than each split of the data,
|
||||
# append to each .scp file the .scp files from the next one or two
|
||||
# splits (or more), so each one is larger...
|
||||
rm $dir/egs/egs.*.scp.orig 2>/dev/null
|
||||
if [ $samples_per_iteration -gt $smallest_len ]; then
|
||||
extra_files=$[($samples_per_iteration-1) / $smallest_len]
|
||||
echo Each part of the data has about $smallest_len lines which is less than the
|
||||
echo samples per iteration $samples_per_iteration, so appending next $extra_files
|
||||
echo files to each scp file
|
||||
for n in `seq $num_jobs_nnet`; do mv $dir/egs/egs.$n.scp $dir/egs/egs.$n.scp.orig; done
|
||||
for n in `seq $num_jobs_nnet`; do
|
||||
for e in `seq 0 $extra_files`; do
|
||||
m=$[(($n + $e - 1)%$num_jobs_nnet)+1]
|
||||
cat $dir/egs/egs.$m.scp.orig
|
||||
done > $dir/egs/egs.$n.scp
|
||||
|
||||
for n in `seq 0 $[$iters_per_epoch-1]`; do
|
||||
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
|
||||
nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
|
||||
ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
|
||||
rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
|
||||
done
|
||||
fi
|
||||
fi
|
||||
|
||||
num_egs=`grep wrote $dir/log/get_egs.log | tail -1 | awk '{print $NF}'` || exit 1;
|
||||
! [ $num_egs -gt 0 ] && echo "bad num_egs $num_egs" && exit 1;
|
||||
num_iters_reduce=$[ 1 + (($num_egs * $num_epochs)/($num_jobs_nnet * $samples_per_iteration))]
|
||||
num_iters_extra=$[1 + (($num_egs * $num_epochs_extra)/($num_jobs_nnet * $samples_per_iteration))]
|
||||
num_iters_reduce=$[$num_epochs * $iters_per_epoch];
|
||||
num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
|
||||
num_iters=$[$num_iters_reduce+$num_iters_extra]
|
||||
|
||||
echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
|
||||
echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
|
||||
echo " (while reducing learning rate) + (with constant learning rate)."
|
||||
|
||||
function get_list {
|
||||
# usage: get_list <samples-per-iter> <iter> <input-file> >output
|
||||
#
|
||||
# Outputs an scp file for this job for this iteration. The
|
||||
# output will have <samples-per-iter> lines, and will contain lines from
|
||||
# egs.JOB.scp, possibly with repeats. It will be sorted numerically on its
|
||||
# first field, so the .ark file is accessed in order (we then pipe to
|
||||
# nnet-shuffle-egs to randomize the order). The way we do it is, we imagine
|
||||
# we had concatenated the file $dir/egs/egs.JOB.scp infinite times, and
|
||||
# taken from the concatenated file, the lines
|
||||
# <samples-per-iter> * <iter> ... <samples-per-iter> * (<iter> + 1) - 1,
|
||||
# and then sorted them on the first field (which is a number).
|
||||
# We don't actually implement it this way, we do it a bit more efficiently.
|
||||
# We require that samples-per-iter <= (#lines in input-file).
|
||||
[ $# -ne 3 ] && echo "get_list: bad usage" && exit 1;
|
||||
samples_per_iter=$1
|
||||
my_iter=$2
|
||||
input_file=$3
|
||||
start=$[$my_iter * $samples_per_iter]; # starting-point in concatenated file.
|
||||
input_len=`cat $input_file | wc -l`
|
||||
start=$[$start - $input_len*($start/$input_len)]; # remove whole multiples of input_len
|
||||
# we have to concatenate the input file to itself.
|
||||
cat $input_file $input_file | \
|
||||
head -n $[$start + $samples_per_iter] | tail -n $samples_per_iter | \
|
||||
sort -k2 -k1n
|
||||
}
|
||||
|
||||
|
||||
# up till $last_normal_shrink_iter we will shrink the parameters
|
||||
# in the normal way using the dev set, but after that we will
|
||||
# only re-compute the shrinkage parameters periodically.
|
||||
|
@ -361,22 +391,19 @@ x=0
|
|||
while [ $x -lt $num_iters ]; do
|
||||
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
||||
|
||||
# Set off a job that does diagnostics, in the background.
|
||||
$cmd $parallel_opts $dir/log/compute_prob.$x.log \
|
||||
nnet-compute-prob $dir/$x.mdl ark:$dir/valid_shrink.egs &
|
||||
# Set off jobs doing some diagnostics, in the background.
|
||||
$cmd $dir/log/compute_prob_valid.$x.log \
|
||||
nnet-compute-prob $dir/$x.mdl ark:$dir/valid_diagnostic.egs &
|
||||
$cmd $dir/log/compute_prob_train.$x.log \
|
||||
nnet-compute-prob $dir/$x.mdl ark:$dir/train_diagnostic.egs &
|
||||
|
||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||
echo "Realigning data (pass $x)"
|
||||
$cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
|
||||
nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$dir/$x.mdl" \
|
||||
"ark:gunzip -c $dir/fsts.JOB.gz|" "$split_feats" \
|
||||
"ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
|
||||
"ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
||||
fi
|
||||
for n in `seq $num_jobs_nnet`; do
|
||||
# the following command gets a subset of the n'th scp file, containing
|
||||
# $samples_per_iteration lines.
|
||||
get_list $samples_per_iteration $x $dir/egs/egs.$n.scp > $dir/temp/egs.$x.$n.scp
|
||||
done
|
||||
|
||||
echo "Training neural net (pass $x)"
|
||||
if [ $x -gt 0 ] && \
|
||||
|
@ -388,9 +415,8 @@ while [ $x -lt $num_iters ]; do
|
|||
fi
|
||||
|
||||
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
|
||||
MKL_NUM_THREADS=$mkl_num_threads \
|
||||
nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
|
||||
scp:$dir/temp/egs.$x.JOB.scp ark:- \| \
|
||||
ark:$dir/egs/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
|
||||
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
||||
"$mdl" ark:- $dir/$[$x+1].JOB.mdl \
|
||||
|| exit 1;
|
||||
|
@ -410,10 +436,10 @@ while [ $x -lt $num_iters ]; do
|
|||
if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then
|
||||
# For earlier iterations (while we've recently beeen adding layers), or every
|
||||
# $shrink_interval=3 iters , just do shrinking normally.
|
||||
mb=$[($num_valid_frames_shrink+$num_train_frames_shrink+$num_threads-1)/$num_threads]
|
||||
$cmd $parallel_opts $dir/log/shrink.$x.log \
|
||||
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads --verbose=3 \
|
||||
--minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \
|
||||
$dir/$[$x+1].mdl ark:$dir/valid_shrink.egs $dir/$[$x+1].mdl || exit 1;
|
||||
nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
|
||||
$dir/$[$x+1].mdl ark:$dir/shrink.egs $dir/$[$x+1].mdl || exit 1;
|
||||
fi
|
||||
fi
|
||||
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
|
||||
|
@ -423,7 +449,7 @@ while [ $x -lt $num_iters ]; do
|
|||
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
|
||||
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
|
||||
fi
|
||||
rm $nnets_list $dir/temp/egs.$x.*.scp
|
||||
rm $nnets_list
|
||||
fi
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
@ -435,15 +461,32 @@ nnets_list=
|
|||
for x in `seq $[$num_iters-$num_iters_final+1] $num_iters`; do
|
||||
[ $x -gt $mix_up_iter ] && nnets_list="$nnets_list $dir/$x.mdl"
|
||||
done
|
||||
if [ $stage -le $num_iters ]; then
|
||||
mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
|
||||
$cmd $parallel_opts $dir/log/combine.log \
|
||||
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads \
|
||||
--verbose=3 --minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \
|
||||
$nnets_list ark:$dir/valid_combine.egs $dir/final.mdl || exit 1;
|
||||
nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
|
||||
$nnets_list ark:$dir/combine.egs $dir/final.mdl || exit 1;
|
||||
fi
|
||||
|
||||
# Compute the probability of the final, combined model with
|
||||
# the same subset we used for the previous compute_probs, as the
|
||||
# different subsets will lead to different probs.
|
||||
$cmd $parallel_opts $dir/log/compute_prob.final.log \
|
||||
nnet-compute-prob $dir/final.mdl ark:$dir/valid_shrink.egs || exit 1;
|
||||
$cmd $dir/log/compute_prob_valid.final.log \
|
||||
nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
|
||||
$cmd $dir/log/compute_prob_train.final.log \
|
||||
nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
|
||||
|
||||
echo Done
|
||||
|
||||
if $cleanup; then
|
||||
echo Cleaning up data
|
||||
echo Removing training examples
|
||||
rm -r $dir/egs
|
||||
echo Removing most of the models
|
||||
for x in `seq 0 $num_iters`; do
|
||||
if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then
|
||||
# delete all but every 10th model; don't delete the ones which combine to form the final model.
|
||||
rm $dir/$x.mdl
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
|
|
@ -31,16 +31,17 @@ num_jobs_nnet=8 # Number of neural net training jobs to run in parallel.
|
|||
# not the same as the num-jobs (nj) which will be the same as the
|
||||
# alignment and denlat directories.
|
||||
stage=0
|
||||
sub_stage=-2 # this can be used to start from a particular sub-iteration of an
|
||||
sub_stage=-3 # this can be used to start from a particular sub-iteration of an
|
||||
# iteration
|
||||
acwt=0.1
|
||||
boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame.
|
||||
transform_dir= # Note: by default any transforms in $alidir will be used.
|
||||
|
||||
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
||||
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.)
|
||||
io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.)
|
||||
num_threads=16 # number of threads for neural net trainer..
|
||||
mkl_num_threads=1
|
||||
random_copy=false
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
@ -71,8 +72,7 @@ if [ $# != 6 ]; then
|
|||
echo " # this, you may want to decrease the batch size."
|
||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||
echo " # use multiple threads."
|
||||
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the "
|
||||
echo " # data. (prevents stressing the disk). "
|
||||
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for any especially I/O intensive jobs"
|
||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||
echo " # should not get too large, e.g. >2k)."
|
||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each"
|
||||
|
@ -181,34 +181,37 @@ while [ $x -lt $num_epochs ]; do
|
|||
echo "Epoch $x of $num_epochs"
|
||||
|
||||
if [ $stage -le $x ] && $first_iter_of_epoch; then
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then
|
||||
# First get the per-frame posteriors, by rescoring the lattices; this
|
||||
# process also gives us at the same time the posteriors of each state for
|
||||
# each frame (by default, pruned to 0.01 with a randomized algorithm).
|
||||
# The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like
|
||||
# matrix through unchanged.
|
||||
$cmd JOB=1:$nj $dir/log/post.$z.JOB.log \
|
||||
nnet-logprob2 $dir/$x.1.mdl "$feats" "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
|
||||
# matrix through unchanged. (Note: nnet-logprob2-parallel can use up to
|
||||
# $num_threads threads, but in practice it may be limited by the speed of
|
||||
# the other elements of the pipe.
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \
|
||||
nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \
|
||||
"ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
|
||||
matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \
|
||||
lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \
|
||||
lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
||||
post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1;
|
||||
fi
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
|
||||
# run nnet-get-egs for all files, to get the training examples for each frame--
|
||||
# combines the feature and label/posterior information. The posterior information
|
||||
# consists of 2 things: the numerator posteriors from the alignments, the denominator
|
||||
# posteriors from the lattices (times -1), and the smoothing posteriors from the
|
||||
# neural net log-probs (times E).
|
||||
# We copy the examples for each job round-robin to multiple archives, one for each
|
||||
# of 1...$num_jobs_nnet. We write these along with .scp files, for more convenient
|
||||
# and memory-efficient randomization.
|
||||
# of 1...$num_jobs_nnet.
|
||||
egs_out=""
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
egs_out="$egs_out ark,scp:$dir/egs/egs.$z.$n.JOB.ark,$dir/egs/egs.$z.$n.JOB.scp"
|
||||
# indexes are egs_orig.$z.$num_jobs_nnet.$nj
|
||||
egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark"
|
||||
done
|
||||
$cmd JOB=1:$nj $dir/log/egs.$z.JOB.log \
|
||||
$cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \
|
||||
ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||
ali-to-post ark:- ark:- \| \
|
||||
sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \
|
||||
|
@ -223,23 +226,33 @@ while [ $x -lt $num_epochs ]; do
|
|||
tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log
|
||||
echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`"
|
||||
fi
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
|
||||
echo "Shuffling the order of training examples and splitting them up"
|
||||
echo "(in order to avoid stressing the disk, these won't all run at once)."
|
||||
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
|
||||
echo "Merging training examples across original #jobs ($nj), and "
|
||||
echo "splitting across number of nnet jobs $num_jobs_nnet"
|
||||
egs_out2=""
|
||||
for n in `seq 1 $iters_per_epoch`; do
|
||||
egs_out2="$egs_out2 ark:$dir/egs/egs_split.$z.$n.JOB.ark"
|
||||
# indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet
|
||||
egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark"
|
||||
done
|
||||
# Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one
|
||||
# job per parallel training job (different from the previous command).
|
||||
# We sum up over the index JOB in the previous $cmd, and write to multiple
|
||||
# archives, this time one for each "sub-iter".
|
||||
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \
|
||||
cat $dir/egs/egs.$z.JOB.*.scp \| \
|
||||
utils/shuffle_list.pl --srand "\$[($z*$num_jobs_nnet)+JOB]" \| \
|
||||
nnet-copy-egs scp:- $egs_out2 || exit 1; ##'&&' \
|
||||
##rm $dir/egs/egs.$z.JOB.*.scp $dir/egs/egs.$z.JOB.*.ark || exit 1;
|
||||
# indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj
|
||||
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \
|
||||
cat $dir/egs/egs_orig.$z.JOB.*.ark \| \
|
||||
nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \
|
||||
ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1;
|
||||
fi
|
||||
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
|
||||
echo "Randomizing order of examples in each job"
|
||||
for n in `seq 1 $iters_per_epoch`; do
|
||||
s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand
|
||||
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \
|
||||
nnet-shuffle-egs "--srand=\$[JOB+$s]" \
|
||||
ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \
|
||||
rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1;
|
||||
done
|
||||
fi
|
||||
fi
|
||||
if [ $stage -le $x ]; then
|
||||
|
@ -250,7 +263,7 @@ while [ $x -lt $num_epochs ]; do
|
|||
if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then
|
||||
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \
|
||||
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
||||
$dir/$x.$y.mdl ark:$dir/egs/egs_split.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
|
||||
$dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
|
||||
|| exit 1;
|
||||
nnets_list=
|
||||
for n in `seq 1 $num_jobs_nnet`; do
|
||||
|
|
|
@ -68,7 +68,7 @@ Options:
|
|||
--input-left-context <n> # #frames of left context for input features; default 0.
|
||||
--input-right-context <n> # #frames of right context for input features; default 0.
|
||||
--param-stdddev-factor <f> # Factor which can be used to modify the standard deviation of
|
||||
# randomly nitialized features (default, 1. Gets multiplied by
|
||||
# randomly initialized features (default, 1. Gets multiplied by
|
||||
# 1/sqrt of number of inputs).
|
||||
--initial-num-hidden-layers <n> <config-file> # If >0, number of hidden layers to initialize the network with.
|
||||
# In this case, the positional parameter <num-hidden-layers> is only
|
||||
|
|
|
@ -19,7 +19,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \
|
|||
align-mapped align-compiled-mapped latgen-faster-mapped \
|
||||
hmm-info pdf-to-counts analyze-counts extract-ctx post-to-phone-post \
|
||||
post-to-pdf-post duplicate-matrix logprob-to-post prob-to-post copy-post \
|
||||
matrix-logprob
|
||||
matrix-logprob matrix-sum
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
// bin/matrix-sum.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "matrix/kaldi-matrix.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
|
||||
const char *usage =
|
||||
"Sum (and optionally scale) two archives of input matrices\n"
|
||||
"of the same dimension\n"
|
||||
"\n"
|
||||
"Usage: matrix-sum [options] <matrix-rspecifier1> <matrix-rspecifier2> <sum-wspecifier>\n";
|
||||
|
||||
BaseFloat scale1 = 1.0, scale2 = 1.0;
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
po.Register("scale1", &scale1, "Scale applied to first matrix");
|
||||
po.Register("scale2", &scale2, "Scale applied to second matrix");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
std::string rspecifier1 = po.GetArg(1);
|
||||
std::string rspecifier2 = po.GetArg(2);
|
||||
std::string wspecifier = po.GetArg(3);
|
||||
|
||||
SequentialBaseFloatMatrixReader mat1_reader(rspecifier1);
|
||||
RandomAccessBaseFloatMatrixReader mat2_reader(rspecifier2);
|
||||
BaseFloatMatrixWriter mat_writer(wspecifier);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !mat1_reader.Done(); mat1_reader.Next()) {
|
||||
std::string key = mat1_reader.Key();
|
||||
Matrix<BaseFloat> mat1 (mat1_reader.Value());
|
||||
if (!mat2_reader.HasKey(key)) {
|
||||
KALDI_WARN << "No such key " << key << " in second table.";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
const Matrix<BaseFloat> &mat2 (mat2_reader.Value(key));
|
||||
if (!SameDim(mat1, mat2)) {
|
||||
KALDI_WARN << "Matrices for key " << key << " have different dims "
|
||||
<< mat1.NumRows() << " x " << mat1.NumCols() << " vs. "
|
||||
<< mat2.NumRows() << " x " << mat2.NumCols();
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
if (scale1 != 1.0) mat1.Scale(scale1);
|
||||
mat1.AddMat(scale2, mat2);
|
||||
mat_writer.Write(key, mat1);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Added " << num_done << " matrices; " << num_err
|
||||
<< " had errors.";
|
||||
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -93,10 +93,13 @@ Real* CuMatrix<Real>::RowData(MatrixIndexT r) {
|
|||
|
||||
|
||||
template<typename Real>
|
||||
CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) {
|
||||
void CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols,
|
||||
MatrixResizeType resize_type) {
|
||||
// This code does not currently support the other resize_type options.
|
||||
KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
|
||||
if (num_rows_ == rows && num_cols_ == cols) {
|
||||
// SetZero();
|
||||
return *this;
|
||||
if (resize_type == kSetZero) SetZero();
|
||||
return;
|
||||
}
|
||||
|
||||
Destroy();
|
||||
|
@ -108,17 +111,15 @@ CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) {
|
|||
cuSafeCall(cudaMallocPitch((void**)&data_, &pitch, row_bytes, rows));
|
||||
num_rows_ = rows; num_cols_ = cols;
|
||||
stride_ = pitch/sizeof(Real);
|
||||
SetZero();
|
||||
if (resize_type == kSetZero) SetZero();
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
mat_.Resize(rows, cols);
|
||||
mat_.Resize(rows, cols, resize_type);
|
||||
num_rows_=rows;
|
||||
num_cols_=cols;
|
||||
stride_= mat_.Stride();
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
@ -134,7 +135,7 @@ void CuMatrix<Real>::Destroy() {
|
|||
} else
|
||||
#endif
|
||||
{
|
||||
mat_.Destroy();
|
||||
mat_.Resize(0, 0);
|
||||
}
|
||||
num_rows_ = num_cols_ = stride_ = 0;
|
||||
}
|
||||
|
@ -142,9 +143,8 @@ void CuMatrix<Real>::Destroy() {
|
|||
|
||||
|
||||
template<typename Real>
|
||||
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
||||
Resize(src.NumRows(), src.NumCols());
|
||||
|
||||
void CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
||||
KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
|
@ -152,7 +152,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
|||
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
||||
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
||||
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyDeviceToDevice));
|
||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
|
||||
width, src.NumRows(), cudaMemcpyDeviceToDevice));
|
||||
|
||||
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatD2D",tim.Elapsed());
|
||||
} else
|
||||
|
@ -160,16 +161,13 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
|||
{
|
||||
mat_.CopyFromMat(src.mat_);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
||||
Resize(src.NumRows(), src.NumCols());
|
||||
|
||||
void CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
||||
KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
|
@ -177,7 +175,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
|||
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
||||
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
||||
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyHostToDevice));
|
||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
|
||||
width, src.NumRows(), cudaMemcpyHostToDevice));
|
||||
|
||||
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatH2D",tim.Elapsed());
|
||||
} else
|
||||
|
@ -185,17 +184,12 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
|||
{
|
||||
mat_.CopyFromMat(src);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuMatrix<Real>::CopyToMat(Matrix<Real> *dst) const {
|
||||
if (dst->NumRows() != NumRows() || dst->NumCols() != NumCols()) {
|
||||
dst->Resize(NumRows(), NumCols());
|
||||
}
|
||||
KALDI_ASSERT(dst->NumRows() == NumRows() && dst->NumCols() == NumCols());
|
||||
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
|
@ -257,7 +251,7 @@ void CuMatrix<Real>::Read(std::istream &is, bool binary) {
|
|||
|
||||
template<typename Real>
|
||||
void CuMatrix<Real>::Write(std::ostream &os, bool binary) const {
|
||||
Matrix<BaseFloat> tmp;
|
||||
Matrix<BaseFloat> tmp(NumRows(), NumCols(), kUndefined);
|
||||
CopyToMat(&tmp);
|
||||
tmp.Write(os, binary);
|
||||
}
|
||||
|
|
|
@ -46,15 +46,41 @@ class CuMatrix {
|
|||
public:
|
||||
|
||||
/// Default Constructor
|
||||
CuMatrix<Real>()
|
||||
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||
}
|
||||
CuMatrix<Real>():
|
||||
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) { }
|
||||
|
||||
/// Constructor with memory initialisation
|
||||
CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols)
|
||||
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||
CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols):
|
||||
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||
Resize(rows, cols);
|
||||
}
|
||||
|
||||
// Note: we had to remove the "explicit" keyword due
|
||||
// to problems with STL vectors of CuMatrix.
|
||||
CuMatrix<Real>(const CuMatrix<Real> &other):
|
||||
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
CopyFromMat(other);
|
||||
}
|
||||
|
||||
explicit CuMatrix<Real>(const Matrix<Real> &other):
|
||||
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
CopyFromMat(other);
|
||||
}
|
||||
|
||||
CuMatrix<Real> &operator = (const CuMatrix<Real> &other) {
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
CopyFromMat(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
CuMatrix<Real> &operator = (const Matrix<Real> &other) {
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
CopyFromMat(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~CuMatrix() {
|
||||
Destroy();
|
||||
|
@ -65,14 +91,12 @@ class CuMatrix {
|
|||
return num_rows_;
|
||||
}
|
||||
|
||||
MatrixIndexT NumCols() const {
|
||||
return num_cols_;
|
||||
}
|
||||
MatrixIndexT NumCols() const { return num_cols_; }
|
||||
|
||||
MatrixIndexT Stride() const {
|
||||
return stride_;
|
||||
}
|
||||
MatrixIndexT Stride() const { return stride_; }
|
||||
|
||||
// MatrixDim is a struct containing "rows", "cols" and "stride",
|
||||
// that is an argument of most CUDA kernels.
|
||||
::MatrixDim Dim() const {
|
||||
::MatrixDim d = { num_rows_, num_cols_, stride_ };
|
||||
return d;
|
||||
|
@ -87,29 +111,22 @@ class CuMatrix {
|
|||
Real* RowData(MatrixIndexT r);
|
||||
|
||||
/// Get size of matrix in bytes
|
||||
MatrixIndexT SizeInBytes() const {
|
||||
return num_rows_*stride_*sizeof(Real);
|
||||
}
|
||||
MatrixIndexT SizeInBytes() const { return num_rows_*stride_*sizeof(Real); }
|
||||
|
||||
/// Get size of matrix row in bytes
|
||||
MatrixIndexT RowSizeInBytes() const {
|
||||
return num_cols_*sizeof(Real);
|
||||
}
|
||||
MatrixIndexT RowSizeInBytes() const { return num_cols_*sizeof(Real); }
|
||||
|
||||
/// Get size of matrix stride in bytes
|
||||
MatrixIndexT StrideSizeInBytes() const {
|
||||
return stride_*sizeof(Real);
|
||||
}
|
||||
MatrixIndexT StrideSizeInBytes() const { return stride_*sizeof(Real); }
|
||||
|
||||
/// Allocate the memory
|
||||
ThisType& Resize(MatrixIndexT rows, MatrixIndexT cols);
|
||||
void Resize(MatrixIndexT rows, MatrixIndexT cols,
|
||||
MatrixResizeType resize_type = kSetZero);
|
||||
|
||||
/// Deallocate the memory
|
||||
void Destroy();
|
||||
|
||||
/// Copy functions (reallocates when needed)
|
||||
ThisType& CopyFromMat(const CuMatrix<Real> &src);
|
||||
ThisType& CopyFromMat(const Matrix<Real> &src);
|
||||
/// Copy functions (reallocates when needed, but note from Dan: eventually
|
||||
/// I'll change it to just die if the sizes don't match, like the Matrix class.)
|
||||
void CopyFromMat(const CuMatrix<Real> &src);
|
||||
void CopyFromMat(const Matrix<Real> &src);
|
||||
void CopyToMat(Matrix<Real> *dst) const;
|
||||
|
||||
/// Copy row interval from matrix
|
||||
|
@ -154,6 +171,8 @@ class CuMatrix {
|
|||
}
|
||||
|
||||
private:
|
||||
void Destroy();
|
||||
|
||||
MatrixIndexT num_rows_;
|
||||
MatrixIndexT num_cols_;
|
||||
MatrixIndexT stride_;
|
||||
|
|
|
@ -44,8 +44,6 @@ const Real* CuVector<Real>::Data() const {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
Real* CuVector<Real>::Data() {
|
||||
#if HAVE_CUDA==1
|
||||
|
@ -58,15 +56,12 @@ Real* CuVector<Real>::Data() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) {
|
||||
void CuVector<Real>::Resize(MatrixIndexT dim) {
|
||||
if (dim_ == dim) {
|
||||
// SetZero();
|
||||
return *this;
|
||||
SetZero();
|
||||
return;
|
||||
}
|
||||
|
||||
Destroy();
|
||||
|
||||
#if HAVE_CUDA==1
|
||||
|
@ -80,8 +75,6 @@ CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) {
|
|||
|
||||
dim_ = dim;
|
||||
SetZero();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
@ -106,9 +99,7 @@ void CuVector<Real>::Destroy() {
|
|||
|
||||
|
||||
template<typename Real>
|
||||
CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
||||
Resize(src.Dim());
|
||||
|
||||
void CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
|
@ -119,16 +110,13 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
|||
{
|
||||
vec_.CopyFromVec(src.vec_);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
||||
Resize(src.Dim());
|
||||
|
||||
void CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
||||
KALDI_ASSERT(src.Dim() == dim_);
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
Timer tim;
|
||||
|
@ -141,16 +129,14 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
|||
{
|
||||
vec_.CopyFromVec(src);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Real>
|
||||
void CuVector<Real>::CopyToVec(Vector<Real> *dst) const {
|
||||
if (dst->Dim() != dim_) {
|
||||
dst->Resize(dim_);
|
||||
}
|
||||
KALDI_ASSERT(dst->Dim() == dim_);
|
||||
|
||||
|
||||
#if HAVE_CUDA==1
|
||||
if (CuDevice::Instantiate().Enabled()) {
|
||||
|
@ -177,7 +163,7 @@ void CuVector<Real>::Read(std::istream &is, bool binary) {
|
|||
|
||||
template<typename Real>
|
||||
void CuVector<Real>::Write(std::ostream &os, bool binary) const {
|
||||
Vector<BaseFloat> tmp;
|
||||
Vector<BaseFloat> tmp(Dim());
|
||||
CopyToVec(&tmp);
|
||||
tmp.Write(os, binary);
|
||||
}
|
||||
|
|
|
@ -46,6 +46,16 @@ class CuVector {
|
|||
Resize(dim);
|
||||
}
|
||||
|
||||
CuVector<Real>(const CuVector<Real> &v): dim_(0), data_(NULL) {
|
||||
Resize(v.dim_);
|
||||
CopyFromVec(v);
|
||||
}
|
||||
|
||||
CuVector<Real>(const Vector<Real> &v): dim_(0), data_(NULL) {
|
||||
Resize(v.Dim());
|
||||
CopyFromVec(v);
|
||||
}
|
||||
|
||||
/// Destructor
|
||||
~CuVector() {
|
||||
Destroy();
|
||||
|
@ -61,14 +71,11 @@ class CuVector {
|
|||
Real* Data();
|
||||
|
||||
/// Allocate the memory
|
||||
ThisType& Resize(MatrixIndexT dim);
|
||||
|
||||
/// Deallocate the memory
|
||||
void Destroy();
|
||||
void Resize(MatrixIndexT dim);
|
||||
|
||||
/// Copy functions (lazy reallocation when needed)
|
||||
ThisType& CopyFromVec(const CuVector<Real> &src);
|
||||
ThisType& CopyFromVec(const Vector<Real> &src);
|
||||
void CopyFromVec(const CuVector<Real> &src);
|
||||
void CopyFromVec(const Vector<Real> &src);
|
||||
void CopyToVec(Vector<Real> *dst) const;
|
||||
|
||||
/// I/O
|
||||
|
@ -94,6 +101,7 @@ class CuVector {
|
|||
}
|
||||
|
||||
private:
|
||||
void Destroy();
|
||||
MatrixIndexT dim_; ///< dimension of the vector
|
||||
Real *data_; ///< GPU data pointer
|
||||
Vector<Real> vec_; ///< non-GPU vector as back-up
|
||||
|
|
|
@ -234,7 +234,7 @@ class SimpleDecoder {
|
|||
const Arc &arc = aiter.Value();
|
||||
if (arc.ilabel == 0) { // propagate nonemitting only...
|
||||
Token *new_tok = new Token(arc, tok);
|
||||
if (new_tok->arc_.weight.Value() > cutoff) {
|
||||
if (new_tok->weight_.Value() > cutoff) {
|
||||
Token::TokenDelete(new_tok);
|
||||
} else {
|
||||
unordered_map<StateId, Token*>::iterator find_iter
|
||||
|
|
|
@ -213,6 +213,10 @@ void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
|
|||
int32 offset = bins_[i].first;
|
||||
const Vector<BaseFloat> &v (bins_[i].second);
|
||||
(*mel_energies_out)(i) = VecVec(v, power_spectrum.Range(offset, v.Dim()));
|
||||
// The following assert was added due to a problem with OpenBlas that
|
||||
// we had at one point (it was a bug in that library). Just to detect
|
||||
// it early.
|
||||
KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
|
||||
}
|
||||
|
||||
if (debug_) {
|
||||
|
|
|
@ -9,7 +9,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
|
|||
feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \
|
||||
fmpe-est fmpe-copy fmpe-sum-accs append-feats extend-transform-dim \
|
||||
get-full-lda-mat compute-spectrogram-feats extract-feature-segments \
|
||||
reverse-feats paste-feats select-feats
|
||||
reverse-feats paste-feats select-feats subsample-feats
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// featbin/append-feats.cc
|
||||
|
||||
// Copyright 2012 Petr Motlicek; Pawel Swietojanski
|
||||
// Copyright 2012 Petr Motlicek Pawel Swietojanski
|
||||
// Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -32,15 +33,11 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
ParseOptions po(usage);
|
||||
|
||||
int32 feats_offset_in1 = 0;
|
||||
int32 feats_offset_in2 = 0;
|
||||
int32 num_feats_in1 = 0;
|
||||
int32 num_feats_in2 = 0;
|
||||
bool truncate_frames = false;
|
||||
|
||||
po.Register("feats-offset-in1", &feats_offset_in1, "Feats 1 offset");
|
||||
po.Register("num-feats-in1", &num_feats_in1, "Take num-feats from in1-rspeciifier");
|
||||
po.Register("feats-offset-in2", &feats_offset_in2, "Feats 2 offset");
|
||||
po.Register("num-feats-in2", &num_feats_in2, "Take num-feats from in2-rspeciifier");
|
||||
po.Register("truncate-frames", &truncate_frames, "If true, do not treat it "
|
||||
"as an error when files differ in number of frames, but truncate "
|
||||
"the longest one.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
@ -53,80 +50,47 @@ int main(int argc, char *argv[]) {
|
|||
std::string rspecifier2 = po.GetArg(2);
|
||||
std::string wspecifier = po.GetArg(3);
|
||||
|
||||
KALDI_ASSERT(feats_offset_in1 >= 0 && feats_offset_in2 >= 0);
|
||||
BaseFloatMatrixWriter feats_writer(wspecifier);
|
||||
SequentialBaseFloatMatrixReader feats_reader1(rspecifier1);
|
||||
RandomAccessBaseFloatMatrixReader feats_reader2(rspecifier2);
|
||||
|
||||
BaseFloatMatrixWriter kaldi_writer(wspecifier);
|
||||
SequentialBaseFloatMatrixReader kaldi_reader1(rspecifier1);
|
||||
RandomAccessBaseFloatMatrixReader kaldi_reader2(rspecifier2);
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
// Peeking in the archives to get the feature dimensions
|
||||
if (kaldi_reader1.Done()) {
|
||||
KALDI_ERR << "Could not read any features from " << rspecifier1
|
||||
<< ". (empty archive?)";
|
||||
}
|
||||
std::string utt = kaldi_reader1.Key();
|
||||
if (!kaldi_reader2.HasKey(utt)) {
|
||||
KALDI_ERR << "Could not read features for key " << utt << " from "
|
||||
<< rspecifier2 << ". (empty archive?)";
|
||||
}
|
||||
|
||||
int32 dim_feats_in1 = kaldi_reader1.Value().NumCols();
|
||||
int32 dim_feats_in2 = kaldi_reader2.Value(utt).NumCols();
|
||||
if (num_feats_in1 == 0)
|
||||
num_feats_in1 = dim_feats_in1 - feats_offset_in1;
|
||||
if (num_feats_in2 == 0)
|
||||
num_feats_in2 = dim_feats_in2 - feats_offset_in2;
|
||||
|
||||
KALDI_LOG << "Reading features from " << rspecifier1 << " and " << rspecifier2;
|
||||
KALDI_LOG << "\tdim1 = " << dim_feats_in1 << "; offset1 = " << feats_offset_in1
|
||||
<< "; num1 = " << num_feats_in1 << "; dim2 = " << dim_feats_in2
|
||||
<< "; offset2 = " << feats_offset_in2 << "; num2 = " << num_feats_in2;
|
||||
|
||||
KALDI_ASSERT((feats_offset_in1 + num_feats_in1) <= dim_feats_in1);
|
||||
KALDI_ASSERT((feats_offset_in2 + num_feats_in2) <= dim_feats_in2);
|
||||
|
||||
for (; !kaldi_reader1.Done(); kaldi_reader1.Next()) {
|
||||
utt = kaldi_reader1.Key();
|
||||
if (!kaldi_reader2.HasKey(utt)) {
|
||||
for (; !feats_reader1.Done(); feats_reader1.Next()) {
|
||||
std::string utt = feats_reader1.Key();
|
||||
if (!feats_reader2.HasKey(utt)) {
|
||||
KALDI_WARN << "Could not find features for " << utt << " in "
|
||||
<< rspecifier2 << ": producing no output for the utterance";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const Matrix<BaseFloat> &feats1 = kaldi_reader1.Value();
|
||||
const Matrix<BaseFloat> &feats2 = kaldi_reader2.Value(utt);
|
||||
int32 num_frames = feats1.NumRows();
|
||||
KALDI_VLOG(1) << "Utterance : " << utt << ": # of frames = " << num_frames;
|
||||
|
||||
KALDI_ASSERT(feats1.NumCols() == dim_feats_in1 &&
|
||||
feats2.NumCols() == dim_feats_in2);
|
||||
if (num_frames != feats2.NumRows()) {
|
||||
KALDI_WARN << "Utterance " << utt << ": " << num_frames
|
||||
<< " frames read from " << rspecifier1 << " and "
|
||||
<< feats2.NumRows() << " frames read from " << rspecifier2
|
||||
<< ": producing no output for the utterance";
|
||||
const Matrix<BaseFloat> &feats1 = feats_reader1.Value();
|
||||
const Matrix<BaseFloat> &feats2 = feats_reader2.Value(utt);
|
||||
if (feats1.NumRows() != feats2.NumRows() && !truncate_frames) {
|
||||
KALDI_WARN << "For utterance " << utt << ", features have different "
|
||||
<< "#frames " << feats1.NumRows() << " vs. "
|
||||
<< feats2.NumRows() << ", producing no output (use "
|
||||
<< "--truncate-frames=true if you want output)";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
int32 num_frames = std::min(feats1.NumRows(), feats2.NumRows()),
|
||||
dim1 = feats1.NumCols(), dim2 = feats2.NumCols();
|
||||
Matrix<BaseFloat> output(num_frames, dim1 + dim2, kUndefined);
|
||||
output.Range(0, num_frames, 0, dim1).CopyFromMat(
|
||||
feats1.Range(0, num_frames, 0, dim1));
|
||||
output.Range(0, num_frames, dim1, dim2).CopyFromMat(
|
||||
feats2.Range(0, num_frames, 0, dim2));
|
||||
|
||||
SubMatrix<BaseFloat> new_feats1(feats1, 0, num_frames, feats_offset_in1,
|
||||
num_feats_in1);
|
||||
SubMatrix<BaseFloat> new_feats2(feats2, 0, num_frames, feats_offset_in2,
|
||||
num_feats_in2);
|
||||
Matrix<BaseFloat> output_feats(num_frames, new_feats1.NumCols() +
|
||||
new_feats2.NumCols());
|
||||
output_feats.Range(0, num_frames, 0,
|
||||
new_feats1.NumCols()).CopyFromMat(new_feats1);
|
||||
output_feats.Range(0, num_frames, new_feats1.NumCols(),
|
||||
new_feats2.NumCols()).CopyFromMat(new_feats2);
|
||||
kaldi_writer.Write(utt, output_feats);
|
||||
feats_writer.Write(utt, output);
|
||||
num_done++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
KALDI_LOG << "Appended " << num_done << " feats; " << num_err
|
||||
<< " with errors.";
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "feat/feature-mfcc.h"
|
||||
#include "feat/wave-reader.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
// featbin/select-feats.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <utility>
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "matrix/kaldi-matrix.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
using namespace std;
|
||||
|
||||
const char *usage =
|
||||
"Sub-samples features by taking every n'th frame"
|
||||
"\n"
|
||||
"Usage: subsample-feats [options] in-rspecifier out-wspecifier\n"
|
||||
" e.g. subsample-feats --n=2 ark:- ark:-\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
int32 n = 1, offset = 0;
|
||||
|
||||
po.Register("n", &n, "Take every n'th feature, for this value of n");
|
||||
po.Register("offset", &offset, "Start with the feature with this offset, "
|
||||
"then take every n'th feature.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
string rspecifier = po.GetArg(1);
|
||||
string wspecifier = po.GetArg(2);
|
||||
|
||||
SequentialBaseFloatMatrixReader feat_reader(rspecifier);
|
||||
BaseFloatMatrixWriter feat_writer(wspecifier);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
// process all keys
|
||||
for (; !feat_reader.Done(); feat_reader.Next()) {
|
||||
std::string utt = feat_reader.Key();
|
||||
const Matrix<BaseFloat> feats(feat_reader.Value());
|
||||
|
||||
// This code could, of course, be much more efficient; I'm just
|
||||
// keeping it simple.
|
||||
int32 num_indexes = 0;
|
||||
for (int32 k = offset; k < feats.NumRows(); k += n)
|
||||
num_indexes++; // k is the index.
|
||||
|
||||
if (num_indexes == 0) {
|
||||
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
|
||||
<< "producing no output.";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
Matrix<BaseFloat> output(num_indexes, feats.NumCols());
|
||||
int32 i = 0;
|
||||
for (int32 k = offset; k < feats.NumRows(); k += n, i++) {
|
||||
SubVector<BaseFloat> src(feats, k), dest(output, i);
|
||||
dest.CopyFromVec(src);
|
||||
}
|
||||
KALDI_ASSERT(i == num_indexes);
|
||||
feat_writer.Write(utt, output);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
|
||||
<< " with errors.";
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ int main(int argc, char *argv[]) {
|
|||
using namespace kaldi;
|
||||
|
||||
const char *usage =
|
||||
"Copy a subset of features\n"
|
||||
"Copy a subset of features (the first n features)\n"
|
||||
"Usage: subset-feats [options] in-rspecifier out-wspecifier\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
|
|
@ -160,11 +160,9 @@ int main(int argc, char *argv[]) {
|
|||
KALDI_LOG << "Applied transform to " << num_done << " utterances; " << num_error
|
||||
<< " had errors.";
|
||||
|
||||
return 0;
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -27,9 +27,24 @@
|
|||
|
||||
namespace kaldi {
|
||||
|
||||
inline void cblas_Xscal(const int N, float *X, const int incX, float *Y,
|
||||
const int incY, const float c, const float s) {
|
||||
cblas_srot(N, X, incX, Y, incY, c, s);
|
||||
|
||||
inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
|
||||
const int incY) {
|
||||
cblas_scopy(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
|
||||
const int incY) {
|
||||
cblas_dcopy(N, X, incX, Y, incY);
|
||||
}
|
||||
|
||||
|
||||
inline float cblas_Xasum(const int N, const float *X, const int incX) {
|
||||
return cblas_sasum(N, X, incX);
|
||||
}
|
||||
|
||||
inline double cblas_Xasum(const int N, const double *X, const int incX) {
|
||||
return cblas_dasum(N, X, incX);
|
||||
}
|
||||
|
||||
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
|
||||
|
@ -226,6 +241,78 @@ inline void cblas_Xsyrk(
|
|||
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
|
||||
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
|
||||
}
|
||||
|
||||
/// matrix-vector multiply using a banded matrix; we always call this
|
||||
/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for
|
||||
/// elementwise multiplication. We miss some of the arguments out of this
|
||||
/// wrapper.
|
||||
inline void cblas_Xsbmv1(
|
||||
const MatrixIndexT dim,
|
||||
const double *A,
|
||||
const double alpha,
|
||||
const double *x,
|
||||
const double beta,
|
||||
double *y) {
|
||||
cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
|
||||
1, x, 1, beta, y, 1);
|
||||
}
|
||||
|
||||
inline void cblas_Xsbmv1(
|
||||
const MatrixIndexT dim,
|
||||
const float *A,
|
||||
const float alpha,
|
||||
const float *x,
|
||||
const float beta,
|
||||
float *y) {
|
||||
cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
|
||||
1, x, 1, beta, y, 1);
|
||||
}
|
||||
|
||||
|
||||
/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
|
||||
/// extend this somehow.
|
||||
inline void mul_elements(
|
||||
const MatrixIndexT dim,
|
||||
const double *a,
|
||||
double *b) { // does b *= a, elementwise.
|
||||
double c1, c2, c3, c4;
|
||||
MatrixIndexT i;
|
||||
for (i = 0; i + 4 <= dim; i += 4) {
|
||||
c1 = a[i] * b[i];
|
||||
c2 = a[i+1] * b[i+1];
|
||||
c3 = a[i+2] * b[i+2];
|
||||
c4 = a[i+3] * b[i+3];
|
||||
b[i] = c1;
|
||||
b[i+1] = c2;
|
||||
b[i+2] = c3;
|
||||
b[i+3] = c4;
|
||||
}
|
||||
for (; i < dim; i++)
|
||||
b[i] *= a[i];
|
||||
}
|
||||
|
||||
inline void mul_elements(
|
||||
const MatrixIndexT dim,
|
||||
const float *a,
|
||||
float *b) { // does b *= a, elementwise.
|
||||
float c1, c2, c3, c4;
|
||||
MatrixIndexT i;
|
||||
for (i = 0; i + 4 <= dim; i += 4) {
|
||||
c1 = a[i] * b[i];
|
||||
c2 = a[i+1] * b[i+1];
|
||||
c3 = a[i+2] * b[i+2];
|
||||
c4 = a[i+3] * b[i+3];
|
||||
b[i] = c1;
|
||||
b[i+1] = c2;
|
||||
b[i+2] = c3;
|
||||
b[i+3] = c4;
|
||||
}
|
||||
for (; i < dim; i++)
|
||||
b[i] *= a[i];
|
||||
}
|
||||
|
||||
|
||||
|
||||
// add clapack here
|
||||
#ifndef HAVE_ATLAS
|
||||
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
|
||||
|
|
|
@ -495,6 +495,41 @@ template
|
|||
void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
|
||||
MatrixTransposeType Trans);
|
||||
|
||||
// Specialize the template for CopyFromSp for float, float.
|
||||
template<>
|
||||
template<>
|
||||
void MatrixBase<float>::CopyFromSp(const SpMatrix<float> & M) {
|
||||
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
|
||||
MatrixIndexT num_rows = num_rows_, stride = stride_;
|
||||
const float *Mdata = M.Data();
|
||||
float *row_data = data_, *col_data = data_;
|
||||
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||
cblas_scopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
|
||||
cblas_scopy(i, Mdata, 1, col_data, stride); // copy to the column.
|
||||
Mdata += i+1;
|
||||
row_data += stride;
|
||||
col_data += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Specialize the template for CopyFromSp for double, double.
|
||||
template<>
|
||||
template<>
|
||||
void MatrixBase<double>::CopyFromSp(const SpMatrix<double> & M) {
|
||||
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
|
||||
MatrixIndexT num_rows = num_rows_, stride = stride_;
|
||||
const double *Mdata = M.Data();
|
||||
double *row_data = data_, *col_data = data_;
|
||||
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||
cblas_dcopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
|
||||
cblas_dcopy(i, Mdata, 1, col_data, stride); // copy to the column.
|
||||
Mdata += i+1;
|
||||
row_data += stride;
|
||||
col_data += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename Real>
|
||||
template<typename OtherReal>
|
||||
void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) {
|
||||
|
@ -711,12 +746,16 @@ void Matrix<Real>::Destroy() {
|
|||
template<typename Real>
|
||||
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
|
||||
KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
|
||||
MatrixIndexT i;
|
||||
MatrixIndexT j;
|
||||
|
||||
for (i = 0; i < num_rows_; i++) {
|
||||
for (j = 0; j < num_cols_; j++) {
|
||||
(*this)(i, j) *= a(i, j);
|
||||
if (num_cols_ == stride_ && num_cols_ == a.stride_) {
|
||||
mul_elements(num_rows_ * num_cols_, a.data_, data_);
|
||||
} else {
|
||||
MatrixIndexT a_stride = a.stride_, stride = stride_;
|
||||
Real *data = data_, *a_data = a.data_;
|
||||
for (MatrixIndexT i = 0; i < num_rows_; i++) {
|
||||
mul_elements(num_cols_, a_data, data);
|
||||
a_data += a_stride;
|
||||
data += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1985,6 +2024,13 @@ Real MatrixBase<Real>::ApplySoftMax() {
|
|||
return max + log(sum);
|
||||
}
|
||||
|
||||
template<typename Real>
|
||||
void MatrixBase<Real>::ApplyTanh() {
|
||||
for (MatrixIndexT r = 0; r < num_rows_; r++) {
|
||||
SubVector<Real> v(*this, r);
|
||||
v.ApplyTanh();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Real>
|
||||
template<class OtherReal>
|
||||
|
|
|
@ -341,6 +341,9 @@ class MatrixBase {
|
|||
/// matrix and return normalizer (log sum of exponentials).
|
||||
Real ApplySoftMax();
|
||||
|
||||
/// Apply the tanh function to each element of the matrix.
|
||||
void ApplyTanh();
|
||||
|
||||
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
|
||||
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
|
||||
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
|
||||
|
@ -553,7 +556,7 @@ class Matrix : public MatrixBase<Real> {
|
|||
/// It is symmetric, so no option for transpose, and NumRows == Cols
|
||||
template<typename OtherReal>
|
||||
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
|
||||
Resize(M.NumRows(), M.NumRows());
|
||||
Resize(M.NumRows(), M.NumRows(), kUndefined);
|
||||
this->CopyFromSp(M);
|
||||
}
|
||||
|
||||
|
@ -562,10 +565,10 @@ class Matrix : public MatrixBase<Real> {
|
|||
explicit Matrix(const TpMatrix<OtherReal> & M,
|
||||
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
|
||||
if (trans == kNoTrans) {
|
||||
Resize(M.NumRows(), M.NumCols());
|
||||
Resize(M.NumRows(), M.NumCols(), kUndefined);
|
||||
this->CopyFromTp(M);
|
||||
} else {
|
||||
Resize(M.NumCols(), M.NumRows());
|
||||
Resize(M.NumCols(), M.NumRows(), kUndefined);
|
||||
this->CopyFromTp(M, kTrans);
|
||||
}
|
||||
}
|
||||
|
@ -584,9 +587,6 @@ class Matrix : public MatrixBase<Real> {
|
|||
/// Distructor to free matrices.
|
||||
~Matrix() { Destroy(); }
|
||||
|
||||
/// Deallocates memory and sets to empty matrix.
|
||||
void Destroy();
|
||||
|
||||
/// Sets matrix to a specified size (zero is OK as long as both r and c are
|
||||
/// zero). The value of the new data depends on resize_type:
|
||||
/// -if kSetZero, the new data will be zero
|
||||
|
@ -601,9 +601,8 @@ class Matrix : public MatrixBase<Real> {
|
|||
/// Assignment operator that takes MatrixBase.
|
||||
Matrix<Real> &operator = (const MatrixBase<Real> &other) {
|
||||
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
||||
MatrixBase<Real>::NumCols() != other.NumCols()) {
|
||||
Resize(other.NumRows(), other.NumCols());
|
||||
}
|
||||
MatrixBase<Real>::NumCols() != other.NumCols())
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
MatrixBase<Real>::CopyFromMat(other);
|
||||
return *this;
|
||||
}
|
||||
|
@ -611,15 +610,17 @@ class Matrix : public MatrixBase<Real> {
|
|||
/// Assignment operator. Needed for inclusion in std::vector.
|
||||
Matrix<Real> &operator = (const Matrix<Real> &other) {
|
||||
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
||||
MatrixBase<Real>::NumCols() != other.NumCols()) {
|
||||
Resize(other.NumRows(), other.NumCols());
|
||||
}
|
||||
MatrixBase<Real>::NumCols() != other.NumCols())
|
||||
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||
MatrixBase<Real>::CopyFromMat(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
/// Deallocates memory and sets to empty matrix (dimension 0, 0).
|
||||
void Destroy();
|
||||
|
||||
/// Init assumes the current class contents are invalid (i.e. junk or have
|
||||
/// already been freed), and it sets the matrix to newly allocated memory with
|
||||
/// the specified number of rows and columns. r == c == 0 is acceptable. The data
|
||||
|
|
|
@ -375,7 +375,13 @@ template
|
|||
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row);
|
||||
|
||||
|
||||
// takes elements to a power. Throws exception if could not (but only for power != 1 ad power != 2).
|
||||
#ifdef HAVE_MKL
|
||||
template<>
|
||||
void VectorBase<float>::ApplyPow(float power) { vsPowx(dim_, data_, power, data_); }
|
||||
template<>
|
||||
void VectorBase<double>::ApplyPow(double power) { vdPowx(dim_, data_, power, data_); }
|
||||
#else
|
||||
// takes elements to a power. Throws exception if could not (but only for power != 1 and power != 2).
|
||||
template<typename Real>
|
||||
void VectorBase<Real>::ApplyPow(Real power) {
|
||||
if (power == 1.0) return;
|
||||
|
@ -399,6 +405,7 @@ void VectorBase<Real>::ApplyPow(Real power) {
|
|||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Computes the p-th norm. Throws exception if could not.
|
||||
template<typename Real>
|
||||
|
@ -534,14 +541,13 @@ template<typename Real>
|
|||
void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
|
||||
// note the double accumulator
|
||||
KALDI_ASSERT(dim_ == M.NumCols());
|
||||
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride();
|
||||
for (MatrixIndexT i = 0; i < dim_; i++) {
|
||||
double sum = 0.0;
|
||||
const Real *src = M.Data() + i;
|
||||
for (MatrixIndexT j = 0; j < num_rows; j++)
|
||||
sum += src[j*stride];
|
||||
data_[i] = alpha * sum + beta * data_[i];
|
||||
}
|
||||
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
|
||||
Real *data = data_;
|
||||
cblas_Xscal(dim, beta, data, 1);
|
||||
const Real *m_data = M.Data();
|
||||
|
||||
for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride)
|
||||
cblas_Xaxpy(dim, alpha, m_data, 1, data, 1);
|
||||
}
|
||||
|
||||
template<typename Real>
|
||||
|
@ -651,6 +657,25 @@ Real VectorBase<Real>::ApplySoftMax() {
|
|||
return max + log(sum);
|
||||
}
|
||||
|
||||
#ifdef HAVE_MKL
|
||||
template<>
|
||||
void VectorBase<float>::ApplyTanh() { vsTanh(dim_, data_, data_); }
|
||||
template<>
|
||||
void VectorBase<double>::ApplyTanh() { vdTanh(dim_, data_, data_); }
|
||||
#else
|
||||
template<typename Real>
|
||||
void VectorBase<Real>::ApplyTanh() {
|
||||
for (MatrixIndexT i = 0; i < dim_; i++) {
|
||||
Real x = data_[i];
|
||||
if (x > 0.0) {
|
||||
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
|
||||
} else {
|
||||
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
|
||||
}
|
||||
data_[i] = x;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Real>
|
||||
void VectorBase<Real>::Add(Real c) {
|
||||
|
|
|
@ -126,6 +126,10 @@ class VectorBase {
|
|||
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
|
||||
Real ApplySoftMax();
|
||||
|
||||
/// Apply the tanh function to each element of a vector. If using MKL, does
|
||||
/// it using the "less accurate" options.
|
||||
void ApplyTanh();
|
||||
|
||||
/// Take all elements of vector to a power.
|
||||
void ApplyPow(Real power);
|
||||
|
||||
|
@ -322,20 +326,20 @@ class Vector: public VectorBase<Real> {
|
|||
|
||||
/// Copy constructor. The need for this is controversial.
|
||||
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
|
||||
Resize(v.Dim());
|
||||
Resize(v.Dim(), kUndefined);
|
||||
this->CopyFromVec(v);
|
||||
}
|
||||
|
||||
/// Copy-constructor from base-class, needed to copy from SubVector.
|
||||
explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
|
||||
Resize(v.Dim());
|
||||
Resize(v.Dim(), kUndefined);
|
||||
this->CopyFromVec(v);
|
||||
}
|
||||
|
||||
/// Type conversion constructor.
|
||||
template<typename OtherReal>
|
||||
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
|
||||
Resize(v.Dim());
|
||||
Resize(v.Dim(), kUndefined);
|
||||
this->CopyFromVec(v);
|
||||
}
|
||||
|
||||
|
@ -372,14 +376,14 @@ class Vector: public VectorBase<Real> {
|
|||
|
||||
/// Assignment operator, protected so it can only be used by std::vector
|
||||
Vector<Real> &operator = (const Vector<Real> &other) {
|
||||
Resize(other.Dim());
|
||||
Resize(other.Dim(), kUndefined);
|
||||
this->CopyFromVec(other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Assignment operator that takes VectorBase.
|
||||
Vector<Real> &operator = (const VectorBase<Real> &other) {
|
||||
Resize(other.Dim());
|
||||
Resize(other.Dim(), kUndefined);
|
||||
this->CopyFromVec(other);
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -679,6 +679,28 @@ template<class Real> static void UnitTestAxpy() {
|
|||
}
|
||||
}
|
||||
|
||||
template<class Real> static void UnitTestCopySp() {
|
||||
// Checking that the various versions of copying
|
||||
// matrix to SpMatrix work the same in the symmetric case.
|
||||
for (MatrixIndexT iter = 0;iter < 5;iter++) {
|
||||
int32 dim = 5 + rand() % 10;
|
||||
SpMatrix<Real> S(dim), T(dim);
|
||||
S.SetRandn();
|
||||
Matrix<Real> M(S);
|
||||
T.CopyFromMat(M, kTakeMeanAndCheck);
|
||||
AssertEqual(S, T);
|
||||
T.SetZero();
|
||||
T.CopyFromMat(M, kTakeMean);
|
||||
AssertEqual(S, T);
|
||||
T.SetZero();
|
||||
T.CopyFromMat(M, kTakeLower);
|
||||
AssertEqual(S, T);
|
||||
T.SetZero();
|
||||
T.CopyFromMat(M, kTakeUpper);
|
||||
AssertEqual(S, T);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Real> static void UnitTestPower() {
|
||||
for (MatrixIndexT iter = 0;iter < 5;iter++) {
|
||||
|
@ -1430,6 +1452,7 @@ template<class Real> static void UnitTestMulElements() {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Real> static void UnitTestSpLogExp() {
|
||||
for (MatrixIndexT i = 0; i < 5; i++) {
|
||||
MatrixIndexT dimM = 10 + rand() % 10;
|
||||
|
@ -1860,6 +1883,27 @@ template<class Real> static void UnitTestLimitCond() {
|
|||
}
|
||||
}
|
||||
|
||||
template<class Real> static void UnitTestTanh() {
|
||||
for (MatrixIndexT i = 0; i < 10; i++) {
|
||||
MatrixIndexT dimM = 5 + rand() % 10, dimN = 5 + rand() % 10;
|
||||
Matrix<Real> M(dimM, dimN);
|
||||
Matrix<Real> N(M);
|
||||
for(int32 r = 0; r < dimM; r++) {
|
||||
for (int32 c = 0; c < dimN; c++) {
|
||||
Real x = N(r, c);
|
||||
if (x > 0.0) {
|
||||
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
|
||||
} else {
|
||||
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
|
||||
}
|
||||
N(r, c) = x;
|
||||
}
|
||||
}
|
||||
M.ApplyTanh();
|
||||
AssertEqual(M, N);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Real> static void UnitTestSimple() {
|
||||
for (MatrixIndexT i = 0;i < 5;i++) {
|
||||
MatrixIndexT dimM = 20 + rand()%10, dimN = 20 + rand()%20;
|
||||
|
@ -3541,6 +3585,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
|
|||
UnitTestDotprod<Real>();
|
||||
// UnitTestSvdVariants<Real>();
|
||||
UnitTestPower<Real>();
|
||||
UnitTestCopySp<Real>();
|
||||
UnitTestDeterminant<Real>();
|
||||
KALDI_LOG << " Point F";
|
||||
UnitTestDeterminantSign<Real>();
|
||||
|
@ -3566,6 +3611,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
|
|||
UnitTestRange<Real>();
|
||||
UnitTestSimpleForVec<Real>();
|
||||
UnitTestSimpleForMat<Real>();
|
||||
UnitTestTanh<Real>();
|
||||
UnitTestNorm<Real>();
|
||||
UnitTestMul<Real>();
|
||||
KALDI_LOG << " Point I";
|
||||
|
|
|
@ -169,9 +169,17 @@ void SpMatrix<Real>::CopyFromMat(const MatrixBase<Real> &M,
|
|||
break;
|
||||
}
|
||||
case kTakeLower:
|
||||
for (MatrixIndexT i = 0; i < D; i++)
|
||||
{ // making this one a bit more efficient.
|
||||
const Real *src = M.Data();
|
||||
Real *dest = this->data_;
|
||||
MatrixIndexT stride = M.Stride();
|
||||
for (MatrixIndexT i = 0; i < D; i++) {
|
||||
for (MatrixIndexT j = 0; j <= i; j++)
|
||||
(*this)(i, j) = M(i, j);
|
||||
dest[j] = src[j];
|
||||
dest += i + 1;
|
||||
src += stride;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case kTakeUpper:
|
||||
for (MatrixIndexT i = 0; i < D; i++)
|
||||
|
|
|
@ -10,7 +10,7 @@ OBJFILES = nnet-component.o nnet-nnet.o nnet-update.o train-nnet.o \
|
|||
nnet-randomize.o nnet-compute.o am-nnet.o nnet-functions.o \
|
||||
nnet-precondition.o shrink-nnet.o combine-nnet.o combine-nnet-a.o \
|
||||
mixup-nnet.o nnet-lbfgs.o nnet-update-parallel.o combine-nnet-fast.o \
|
||||
nnet-fix.o
|
||||
nnet-fix.o nnet-stats.o rescale-nnet.o nnet-limit-rank.o
|
||||
|
||||
#nnet-compute.o nnet-train.o
|
||||
# nnet-nnet.o nnet-loss.o nnet-rnnlm.o
|
||||
|
|
|
@ -183,6 +183,23 @@ void UnitTestSigmoidComponent() {
|
|||
}
|
||||
}
|
||||
|
||||
void UnitTestReduceComponent() {
|
||||
// We're testing that the gradients are computed correctly:
|
||||
// the input gradients and the model gradients.
|
||||
|
||||
int32 input_dim = 10 + rand() % 50, n = 1 + rand() % 3;
|
||||
{
|
||||
ReduceComponent reduce_component(input_dim, n);
|
||||
UnitTestGenericComponentInternal(reduce_component);
|
||||
}
|
||||
{
|
||||
ReduceComponent reduce_component;
|
||||
reduce_component.InitFromString("dim=15 n=3");
|
||||
UnitTestGenericComponentInternal(reduce_component);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
void UnitTestGenericComponent() { // works if it has an initializer from int,
|
||||
// e.g. tanh, sigmoid.
|
||||
|
@ -463,6 +480,8 @@ int main() {
|
|||
UnitTestGenericComponent<TanhComponent>();
|
||||
UnitTestGenericComponent<PermuteComponent>();
|
||||
UnitTestGenericComponent<SoftmaxComponent>();
|
||||
UnitTestSigmoidComponent();
|
||||
UnitTestReduceComponent();
|
||||
UnitTestAffineComponent();
|
||||
UnitTestAffinePreconInputComponent();
|
||||
UnitTestBlockAffineComponent();
|
||||
|
|
|
@ -47,6 +47,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
|
|||
ans = new TanhComponent();
|
||||
} else if (component_type == "SoftmaxComponent") {
|
||||
ans = new SoftmaxComponent();
|
||||
} else if (component_type == "ReduceComponent") {
|
||||
ans = new ReduceComponent();
|
||||
} else if (component_type == "AffineComponent") {
|
||||
ans = new AffineComponent();
|
||||
} else if (component_type == "AffineComponentA") {
|
||||
|
@ -407,20 +409,8 @@ void TanhComponent::Propagate(const MatrixBase<BaseFloat> &in,
|
|||
// Apply tanh function to each element of the output...
|
||||
// the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
|
||||
// which is a scaled and shifted sigmoid.
|
||||
out->Resize(in.NumRows(), in.NumCols());
|
||||
int32 num_rows = in.NumRows(), num_cols = in.NumCols();
|
||||
for(int32 r = 0; r < num_rows; r++) {
|
||||
const BaseFloat *in_data = in.RowData(r),
|
||||
*in_data_end = in_data + num_cols;
|
||||
BaseFloat *out_data = out->RowData(r);
|
||||
for (; in_data != in_data_end; ++in_data, ++out_data) {
|
||||
if (*in_data > 0.0) {
|
||||
*out_data = -1.0 + 2.0 / (1.0 + exp(-2.0 * *in_data));
|
||||
} else {
|
||||
*out_data = 1.0 - 2.0 / (1.0 + exp(2.0 * *in_data));
|
||||
}
|
||||
}
|
||||
}
|
||||
*out = in;
|
||||
out->ApplyTanh();
|
||||
}
|
||||
|
||||
void TanhComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
|
||||
|
@ -502,6 +492,67 @@ void SoftmaxComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
|
|||
}
|
||||
}
|
||||
|
||||
void ReduceComponent::InitFromString(std::string args) {
|
||||
std::string orig_args(args);
|
||||
int32 dim, n;
|
||||
bool ok = ParseFromString("dim", &args, &dim) &&
|
||||
ParseFromString("n", &args, &n);
|
||||
if (!args.empty())
|
||||
KALDI_ERR << "Could not process these elements in initializer: "
|
||||
<< args;
|
||||
if (!ok)
|
||||
KALDI_ERR << "Bad initializer " << orig_args;
|
||||
Init(dim, n);
|
||||
}
|
||||
|
||||
void ReduceComponent::Read(std::istream &is, bool binary) {
|
||||
ExpectOneOrTwoTokens(is, binary, "<ReduceComponent>", "<Dim>");
|
||||
ReadBasicType(is, binary, &dim_);
|
||||
ExpectToken(is, binary, "<N>");
|
||||
ReadBasicType(is, binary, &n_);
|
||||
ExpectToken(is, binary, "</ReduceComponent>");
|
||||
}
|
||||
|
||||
void ReduceComponent::Write(std::ostream &os, bool binary) const {
|
||||
WriteToken(os, binary, "<ReduceComponent>");
|
||||
WriteToken(os, binary, "<Dim>");
|
||||
WriteBasicType(os, binary, dim_);
|
||||
WriteToken(os, binary, "<N>");
|
||||
WriteBasicType(os, binary, n_);
|
||||
WriteToken(os, binary, "</ReduceComponent>");
|
||||
}
|
||||
|
||||
void ReduceComponent::Propagate(const MatrixBase<BaseFloat> &in,
|
||||
int32 num_chunks,
|
||||
Matrix<BaseFloat> *out) const {
|
||||
KALDI_ASSERT(in.NumRows() > 0 && in.NumCols() == InputDim());
|
||||
out->Resize(in.NumRows(), OutputDim());
|
||||
int32 num_frames = in.NumRows(), input_dim = in.NumCols(), n = n_;
|
||||
for (int32 r = 0; r < num_frames; r++) {
|
||||
const BaseFloat *src = in.RowData(r);
|
||||
BaseFloat *dest = out->RowData(r);
|
||||
for (int32 c = 0; c < input_dim; c++)
|
||||
dest[c / n] += src[c];
|
||||
}
|
||||
}
|
||||
|
||||
void ReduceComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value,
|
||||
const MatrixBase<BaseFloat> &, // out_value,
|
||||
const MatrixBase<BaseFloat> &out_deriv,
|
||||
int32, // num_chunks
|
||||
Component *, // to_update
|
||||
Matrix<BaseFloat> *in_deriv) const {
|
||||
int32 num_frames = out_deriv.NumRows(),
|
||||
input_dim = InputDim(), n = n_;
|
||||
in_deriv->Resize(num_frames, input_dim, kUndefined);
|
||||
for (int32 r = 0; r < num_frames; r++) {
|
||||
const BaseFloat *src = out_deriv.RowData(r);
|
||||
BaseFloat *dest = in_deriv->RowData(r);
|
||||
for (int32 c = 0; c < input_dim; c++)
|
||||
dest[c] = src[c / n];
|
||||
}
|
||||
}
|
||||
|
||||
void AffineComponent::Scale(BaseFloat scale) {
|
||||
linear_params_.Scale(scale);
|
||||
bias_params_.Scale(scale);
|
||||
|
@ -859,9 +910,9 @@ void AffineComponentPreconditioned::Update(
|
|||
in_value_temp(i, in_value.NumCols()) = 1.0;
|
||||
|
||||
Matrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
|
||||
in_value_temp.NumCols()),
|
||||
in_value_temp.NumCols(), kUndefined),
|
||||
out_deriv_precon(out_deriv.NumRows(),
|
||||
out_deriv.NumCols());
|
||||
out_deriv.NumCols(), kUndefined);
|
||||
// each row of in_value_precon will be that same row of
|
||||
// in_value, but multiplied by the inverse of a Fisher
|
||||
// matrix that has been estimated from all the other rows,
|
||||
|
|
|
@ -225,6 +225,8 @@ class NonlinearComponent: public Component {
|
|||
void Scale(BaseFloat scale);
|
||||
void Add(BaseFloat alpha, const NonlinearComponent &other);
|
||||
|
||||
// The following functions are unique to NonlinearComponent.
|
||||
// They mostly relate to diagnostics.
|
||||
const Vector<double> &ValueSum() const { return value_sum_; }
|
||||
const Vector<double> &DerivSum() const { return deriv_sum_; }
|
||||
double Count() const { return count_; }
|
||||
|
@ -324,6 +326,37 @@ class SoftmaxComponent: public NonlinearComponent {
|
|||
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
|
||||
};
|
||||
|
||||
/// This layer just sums up groups of n inputs to produce one output.
|
||||
class ReduceComponent: public Component {
|
||||
public:
|
||||
void Init(int32 dim, int32 n) { KALDI_ASSERT(dim > 0 && n > 0);dim_ = dim; n_ = n; }
|
||||
ReduceComponent(int32 dim, int32 n) { Init(dim, n); }
|
||||
ReduceComponent(): dim_(0), n_(0) { } // e.g. prior to Read()
|
||||
explicit ReduceComponent(const ReduceComponent &other):
|
||||
dim_(other.dim_), n_(other.n_) {}
|
||||
virtual Component* Copy() const { return new ReduceComponent(*this); }
|
||||
virtual std::string Type() const { return "ReduceComponent"; }
|
||||
virtual int32 InputDim() const { return dim_; }
|
||||
virtual int32 OutputDim() const { return (dim_ + n_ - 1) / n_; }
|
||||
virtual void InitFromString(std::string args);
|
||||
virtual void Read(std::istream &is, bool binary);
|
||||
virtual void Write(std::ostream &os, bool binary) const;
|
||||
virtual void Propagate(const MatrixBase<BaseFloat> &in,
|
||||
int32 num_chunks,
|
||||
Matrix<BaseFloat> *out) const;
|
||||
virtual void Backprop(const MatrixBase<BaseFloat> &in_value,
|
||||
const MatrixBase<BaseFloat> &out_value,
|
||||
const MatrixBase<BaseFloat> &out_deriv,
|
||||
int32 num_chunks,
|
||||
Component *to_update, // may be identical to "this".
|
||||
Matrix<BaseFloat> *in_deriv) const;
|
||||
virtual bool BackpropNeedsInput() const { return false; }
|
||||
virtual bool BackpropNeedsOutput() const { return false; }
|
||||
private:
|
||||
int32 dim_;
|
||||
int32 n_;
|
||||
};
|
||||
|
||||
|
||||
// Affine means a linear function plus an offset.
|
||||
// Note: although this class can be instantiated, it also
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
// nnet/nnet-limit-rank.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet-cpu/nnet-limit-rank.h"
|
||||
#include "thread/kaldi-task-sequence.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
class LimitRankClass {
|
||||
public:
|
||||
LimitRankClass(const NnetLimitRankOpts &opts,
|
||||
int32 c,
|
||||
Nnet *nnet): opts_(opts), c_(c), nnet_(nnet) { }
|
||||
void operator () () {
|
||||
AffineComponent *ac = dynamic_cast<AffineComponent*>(
|
||||
&(nnet_->GetComponent(c_)));
|
||||
KALDI_ASSERT(ac != NULL);
|
||||
|
||||
// We'll limit the rank of just the linear part, keeping the bias vector full.
|
||||
Matrix<BaseFloat> M (ac->LinearParams());
|
||||
int32 rows = M.NumRows(), cols = M.NumCols(), rc_min = std::min(rows, cols);
|
||||
Vector<BaseFloat> s(rc_min);
|
||||
Matrix<BaseFloat> U(rows, rc_min), Vt(rc_min, cols);
|
||||
// Do the destructive svd M = U diag(s) V^T. It actually outputs the transpose of V.
|
||||
M.DestructiveSvd(&s, &U, &Vt);
|
||||
SortSvd(&s, &U, &Vt); // Sort the singular values from largest to smallest.
|
||||
|
||||
int32 d = GetRetainedDim(rows, cols);
|
||||
BaseFloat old_svd_sum = s.Sum();
|
||||
U.Resize(rows, d, kCopyData);
|
||||
s.Resize(d, kCopyData);
|
||||
Vt.Resize(d, cols, kCopyData);
|
||||
BaseFloat new_svd_sum = s.Sum();
|
||||
KALDI_LOG << "For component " << c_ << " of dimension " << rows
|
||||
<< " x " << cols << ", reduced rank from "
|
||||
<< rc_min << " to " << d << ", SVD sum reduced from "
|
||||
<< old_svd_sum << " to " << new_svd_sum;
|
||||
Vt.MulRowsVec(s); // Vt <-- diag(s) Vt.
|
||||
M.AddMatMat(1.0, U, kNoTrans, Vt, kNoTrans, 0.0); // Reconstruct with reduced
|
||||
// rank.
|
||||
Vector<BaseFloat> bias_params(ac->BiasParams());
|
||||
ac->SetParams(bias_params, M);
|
||||
}
|
||||
|
||||
int32 GetRetainedDim(int32 rows, int32 cols) {
|
||||
if (opts_.parameter_proportion <= 0.0 || opts_.parameter_proportion > 1.0)
|
||||
KALDI_ERR << "bad --parameter-proportion " << opts_.parameter_proportion;
|
||||
// If we do SVD to dimension d, so that it's U diag(s) V^T where
|
||||
// U is rows * d, s is d, and V is cols * d, then the #params is as follows...
|
||||
// the first column of U has free parameters (#rows - 1) [the -1 is due to
|
||||
// the length constraint]; the second has (#rows - 2) [subtract 1 for the
|
||||
// length constraint and one for orthogonality with the previous row], etc.
|
||||
// Total is params(U) = (rows * d) - ((d(d+1))/2),
|
||||
// params(s) = d,
|
||||
// params(V) = (cols * d) - ((d(d+1))/2),
|
||||
// So total is (rows + cols) * d - d * d .
|
||||
// For example, if d = #rows, this equals (#rows * #cols)
|
||||
// We are solving for:
|
||||
// (rows * cols) * parameter_proportion = (rows + cols) * d - d * d, or
|
||||
// d^2 - d * (rows + cols) + (rows*cols)*parameter_proportion
|
||||
// In quadratic equation
|
||||
// a = 1.0,
|
||||
// b = -(rows + cols)
|
||||
// c = rows * cols * parameter_proportion.
|
||||
// Take smaller solution.
|
||||
BaseFloat a = 1.0, b = -(rows + cols),
|
||||
c = rows * cols * opts_.parameter_proportion;
|
||||
BaseFloat x = (-b - sqrt(b * b - 4 * a * c)) / (2.0 * a);
|
||||
int32 ans = static_cast<int32>(x);
|
||||
KALDI_ASSERT(ans > 0 && ans <= std::min(rows, cols));
|
||||
return ans;
|
||||
}
|
||||
|
||||
~LimitRankClass() { }
|
||||
private:
|
||||
const NnetLimitRankOpts &opts_;
|
||||
int32 c_;
|
||||
Nnet *nnet_;
|
||||
};
|
||||
|
||||
|
||||
void LimitRankParallel(const NnetLimitRankOpts &opts,
|
||||
Nnet *nnet) {
|
||||
TaskSequencerConfig task_config;
|
||||
task_config.num_threads = opts.num_threads;
|
||||
TaskSequencer<LimitRankClass> tc(task_config);
|
||||
for (int32 c = 0; c < nnet->NumComponents(); c++) {
|
||||
if (dynamic_cast<AffineComponent*>(&(nnet->GetComponent(c))) != NULL)
|
||||
tc.Run(new LimitRankClass(opts, c, nnet));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,56 @@
|
|||
// nnet-cpu/nnet-limit-rank.h
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
||||
#define KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
||||
|
||||
#include "nnet-cpu/nnet-nnet.h"
|
||||
#include "util/table-types.h"
|
||||
#include "thread/kaldi-semaphore.h"
|
||||
#include "thread/kaldi-thread.h"
|
||||
#include "nnet-cpu/nnet-update.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
struct NnetLimitRankOpts {
|
||||
int32 num_threads;
|
||||
BaseFloat parameter_proportion;
|
||||
|
||||
NnetLimitRankOpts(): num_threads(1), parameter_proportion(0.75) { }
|
||||
|
||||
void Register(ParseOptions *po) {
|
||||
po->Register("num-threads", &num_threads, "Number of threads used for "
|
||||
"rank-limiting operation; note, will never use more than "
|
||||
"#layers.");
|
||||
po->Register("parameter-proportion", ¶meter_proportion, "Proportion of "
|
||||
"dimension of each transform to limit the rank to.");
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// This function limits the rank of each affine transform in the
|
||||
/// neural net, by zeroing out the smallest singular values. The number of
|
||||
/// singular values to zero out is determined on a layer by layer basis, using
|
||||
/// "parameter_proportion" to set the proportion of parameters to remove.
|
||||
void LimitRankParallel(const NnetLimitRankOpts &opts,
|
||||
Nnet *nnet);
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
|
@ -361,6 +361,17 @@ void Nnet::RemoveDropout() {
|
|||
KALDI_LOG << "Removed " << removed << " dropout components.";
|
||||
}
|
||||
|
||||
void Nnet::RemovePreconditioning() {
|
||||
for (size_t i = 0; i < components_.size(); i++) {
|
||||
if (dynamic_cast<AffineComponentPreconditioned*>(components_[i]) != NULL) {
|
||||
AffineComponent *ac = new AffineComponent(
|
||||
*(dynamic_cast<AffineComponent*>(components_[i])));
|
||||
delete components_[i];
|
||||
components_[i] = ac;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Nnet::AddNnet(const VectorBase<BaseFloat> &scale_params,
|
||||
const Nnet &other) {
|
||||
KALDI_ASSERT(scale_params.Dim() == this->NumUpdatableComponents());
|
||||
|
|
|
@ -105,6 +105,10 @@ class Nnet {
|
|||
/// Excise any components of type DropoutComponent.
|
||||
void RemoveDropout();
|
||||
|
||||
/// Replace any components of type AffineComponentPreconditioned with
|
||||
/// components of type AffineComponent.
|
||||
void RemovePreconditioning();
|
||||
|
||||
/// For each updatatable component, adds to it
|
||||
/// the corresponding element of "other" times the
|
||||
/// appropriate element of "scales" (which has the
|
||||
|
|
|
@ -25,7 +25,13 @@ void PreconditionDirections(const MatrixBase<BaseFloat> &R,
|
|||
MatrixBase<BaseFloat> *P) {
|
||||
|
||||
int32 N = R.NumRows(), D = R.NumCols();
|
||||
KALDI_ASSERT(SameDim(R, *P) && N > 1);
|
||||
KALDI_ASSERT(SameDim(R, *P) && N > 0);
|
||||
if (N == 1) {
|
||||
KALDI_WARN << "Trying to precondition set of only one frames: returning "
|
||||
<< "unchanged. Ignore this warning if infrequent.";
|
||||
P->CopyFromMat(R);
|
||||
return;
|
||||
}
|
||||
MatrixBase<BaseFloat> &Q = *P;
|
||||
|
||||
if (N >= D) {
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
// nnet-cpu/nnet-stats.h
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef KALDI_NNET_CPU_NNET_FIX_H_
|
||||
#define KALDI_NNET_CPU_NNET_FIX_H_
|
||||
|
||||
#include "nnet-cpu/nnet-nnet.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
/* This program computes various statistics from a neural net. These are
|
||||
summaries of certain quantities already present in the network as
|
||||
stored on disk, especially regarding certain average values and
|
||||
derivatives of the sigmoids.
|
||||
*/
|
||||
|
||||
struct NnetStatsConfig {
|
||||
BaseFloat bucket_width;
|
||||
NnetStatsConfig(): bucket_width(0.025) { }
|
||||
|
||||
void Register(ParseOptions *po) {
|
||||
po->Register("bucket-width", &bucket_width, "Width of bucket in average-derivative "
|
||||
"stats for analysis.");
|
||||
}
|
||||
};
|
||||
|
||||
class NnetStats {
|
||||
public:
|
||||
NnetStats(int32 affine_component_index, BaseFloat bucket_width):
|
||||
affine_component_index_(affine_component_index),
|
||||
bucket_width_(bucket_width), global_(0, -1) { }
|
||||
|
||||
// Use default copy constructor and assignment operator.
|
||||
|
||||
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
|
||||
|
||||
void AddStatsFromNnet(const Nnet &nnet);
|
||||
|
||||
void PrintStats(std::ostream &os);
|
||||
private:
|
||||
|
||||
struct StatsElement {
|
||||
BaseFloat deriv_begin; // avg-deriv, beginning of bucket.
|
||||
BaseFloat deriv_end; // avg-deriv, end of bucket.
|
||||
BaseFloat deriv_sum; // sum of avg-deriv within bucket.
|
||||
BaseFloat deriv_sumsq; // Sum-squared of avg-deriv within bucket.
|
||||
BaseFloat abs_value_sum; // Sum of abs(avg-value). Tells us whether it's
|
||||
// saturating at one or both ends.
|
||||
BaseFloat abs_value_sumsq; // Sum-squared of abs(avg-value).
|
||||
int32 count; // Number of nonlinearities in this bucket.
|
||||
|
||||
StatsElement(BaseFloat deriv_begin,
|
||||
BaseFloat deriv_end):
|
||||
deriv_begin(deriv_begin), deriv_end(deriv_end), deriv_sum(0.0),
|
||||
deriv_sumsq(0.0), abs_value_sum(0.0), abs_value_sumsq(0.0), count(0) { }
|
||||
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
|
||||
// Outputs stats for this bucket; no newline
|
||||
void PrintStats(std::ostream &os);
|
||||
};
|
||||
int32 BucketFor(BaseFloat avg_deriv); // returns the bucket
|
||||
// for this avg-derivative value, and makes sure it is allocated.
|
||||
|
||||
int32 affine_component_index_; // Component index of the affine component
|
||||
// associated with this nonlinearity.
|
||||
BaseFloat bucket_width_; // width of buckets of stats we store (in derivative values).
|
||||
|
||||
std::vector<StatsElement> buckets_; // Stats divided into buckets by avg_deriv.
|
||||
StatsElement global_; // All the stats.
|
||||
|
||||
};
|
||||
|
||||
void GetNnetStats(const NnetStatsConfig &config,
|
||||
const Nnet &nnet,
|
||||
std::vector<NnetStats> *stats);
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // KALDI_NNET_CPU_NNET_FIX_H_
|
|
@ -44,7 +44,6 @@ class NnetUpdater {
|
|||
// Possibly splices input together from forward_data_[component].
|
||||
// MatrixBase<BaseFloat> &GetSplicedInput(int32 component, Matrix<BaseFloat> *temp_matrix);
|
||||
|
||||
|
||||
void Propagate();
|
||||
|
||||
/// Computes objective function and derivative at output layer.
|
||||
|
@ -156,7 +155,7 @@ void NnetUpdater::Backprop(const std::vector<NnetTrainingExample> &data,
|
|||
|
||||
component.Backprop(input, output, output_deriv, num_chunks,
|
||||
component_to_update, &input_deriv);
|
||||
*deriv = input_deriv;
|
||||
input_deriv.Swap(deriv);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,212 @@
|
|||
// nnet/rescale-nnet.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet-cpu/rescale-nnet.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
class NnetRescaler {
|
||||
public:
|
||||
NnetRescaler(const NnetRescaleConfig &config,
|
||||
const std::vector<NnetTrainingExample> &examples,
|
||||
Nnet *nnet):
|
||||
config_(config), examples_(examples), nnet_(nnet) {}
|
||||
|
||||
void Rescale();
|
||||
|
||||
private:
|
||||
/// takes the input and formats as a single matrix, in forward_data_[0].
|
||||
void FormatInput(const std::vector<NnetTrainingExample> &data,
|
||||
Matrix<BaseFloat> *input);
|
||||
void RescaleComponent(int32 c, int32 num_chunks,
|
||||
MatrixBase<BaseFloat> *cur_data_in,
|
||||
Matrix<BaseFloat> *next_data);
|
||||
|
||||
void ComputeRelevantIndexes();
|
||||
|
||||
BaseFloat GetTargetAvgDeriv(int32 c);
|
||||
|
||||
const NnetRescaleConfig &config_;
|
||||
const std::vector<NnetTrainingExample> &examples_;
|
||||
Nnet *nnet_;
|
||||
std::set<int32> relevant_indexes_; // values of c with AffineComponent followed
|
||||
// by (at c+1) NonlinearComponent that is not SoftmaxComponent.
|
||||
};
|
||||
|
||||
|
||||
void NnetRescaler::FormatInput(const std::vector<NnetTrainingExample> &data,
|
||||
Matrix<BaseFloat> *input) {
|
||||
KALDI_ASSERT(data.size() > 0);
|
||||
int32 num_splice = nnet_->LeftContext() + 1 + nnet_->RightContext();
|
||||
KALDI_ASSERT(data[0].input_frames.NumRows() == num_splice);
|
||||
|
||||
int32 feat_dim = data[0].input_frames.NumCols(),
|
||||
spk_dim = data[0].spk_info.Dim(),
|
||||
tot_dim = feat_dim + spk_dim; // we append these at the neural net
|
||||
// input... note, spk_dim might be 0.
|
||||
KALDI_ASSERT(tot_dim == nnet_->InputDim());
|
||||
int32 num_chunks = data.size();
|
||||
|
||||
input->Resize(num_splice * num_chunks,
|
||||
tot_dim);
|
||||
for (int32 chunk = 0; chunk < num_chunks; chunk++) {
|
||||
SubMatrix<BaseFloat> dest(*input,
|
||||
chunk * num_splice, num_splice,
|
||||
0, feat_dim);
|
||||
const Matrix<BaseFloat> &src(data[chunk].input_frames);
|
||||
dest.CopyFromMat(src);
|
||||
if (spk_dim != 0) {
|
||||
SubMatrix<BaseFloat> spk_dest(*input,
|
||||
chunk * num_splice, num_splice,
|
||||
feat_dim, spk_dim);
|
||||
spk_dest.CopyRowsFromVec(data[chunk].spk_info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NnetRescaler::ComputeRelevantIndexes() {
|
||||
for (int32 c = 0; c + 1 < nnet_->NumComponents(); c++)
|
||||
if (dynamic_cast<AffineComponent*>(&nnet_->GetComponent(c)) != NULL &&
|
||||
(dynamic_cast<NonlinearComponent*>(&nnet_->GetComponent(c+1)) != NULL &&
|
||||
dynamic_cast<SoftmaxComponent*>(&nnet_->GetComponent(c+1)) == NULL))
|
||||
relevant_indexes_.insert(c);
|
||||
}
|
||||
|
||||
|
||||
BaseFloat NnetRescaler::GetTargetAvgDeriv(int32 c) {
|
||||
KALDI_ASSERT(relevant_indexes_.count(c) == 1);
|
||||
BaseFloat factor;
|
||||
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
|
||||
factor = 0.25;
|
||||
else if (dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
|
||||
factor = 1.0;
|
||||
else
|
||||
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
|
||||
|
||||
int32 last_c = *std::max_element(relevant_indexes_.begin(), relevant_indexes_.end()),
|
||||
first_c = *std::min_element(relevant_indexes_.begin(), relevant_indexes_.end());
|
||||
if (c == first_c)
|
||||
return factor * config_.target_first_layer_avg_deriv;
|
||||
else if (c == last_c)
|
||||
return factor * config_.target_last_layer_avg_deriv;
|
||||
else
|
||||
return factor * config_.target_avg_deriv;
|
||||
}
|
||||
|
||||
// Here, c is the index of the affine component, and
|
||||
// c + 1 is the index of the nonlinear component; *cur_data is the
|
||||
// output of the affine component.
|
||||
void NnetRescaler::RescaleComponent(
|
||||
int32 c,
|
||||
int32 num_chunks,
|
||||
MatrixBase<BaseFloat> *cur_data_in,
|
||||
Matrix<BaseFloat> *next_data) {
|
||||
int32 rows = cur_data_in->NumRows(), cols = cur_data_in->NumCols();
|
||||
// Only handle sigmoid or tanh here.
|
||||
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) == NULL &&
|
||||
dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) == NULL)
|
||||
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
|
||||
// the nonlinear component:
|
||||
NonlinearComponent &nc =
|
||||
*(dynamic_cast<NonlinearComponent*>(&(nnet_->GetComponent(c + 1))));
|
||||
|
||||
BaseFloat orig_avg_deriv, target_avg_deriv = GetTargetAvgDeriv(c);
|
||||
BaseFloat cur_scaling = 1.0; // current rescaling factor (on input).
|
||||
int32 num_iters = 10;
|
||||
|
||||
Matrix<BaseFloat> cur_data(*cur_data_in),
|
||||
ones(rows, cols), in_deriv(rows, cols);
|
||||
|
||||
ones.Set(1.0);
|
||||
nc.Propagate(cur_data, num_chunks, next_data);
|
||||
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||
BaseFloat cur_avg_deriv;
|
||||
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||
orig_avg_deriv = cur_avg_deriv;
|
||||
for (int32 iter = 0; iter < num_iters; iter++) {
|
||||
// We already have "cur_avg_deriv"; perturb the scale and compute
|
||||
// the next avg_deriv, so we can see how it changes with the scale.
|
||||
cur_data.CopyFromMat(*cur_data_in);
|
||||
cur_data.Scale(cur_scaling + config_.delta);
|
||||
nc.Propagate(cur_data, num_chunks, next_data);
|
||||
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||
BaseFloat next_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||
KALDI_ASSERT(next_avg_deriv < cur_avg_deriv);
|
||||
// "gradient" is how avg_deriv changes as we change the scale.
|
||||
// should be negative.
|
||||
BaseFloat gradient = (next_avg_deriv - cur_avg_deriv) / config_.delta;
|
||||
KALDI_ASSERT(gradient < 0.0);
|
||||
BaseFloat proposed_change = (target_avg_deriv - cur_avg_deriv) / gradient;
|
||||
KALDI_VLOG(2) << "cur_avg_deriv = " << cur_avg_deriv << ", target_avg_deriv = "
|
||||
<< target_avg_deriv << ", gradient = " << gradient
|
||||
<< ", proposed_change " << proposed_change;
|
||||
// Limit size of proposed change in "cur_scaling", to ensure stability.
|
||||
if (fabs(proposed_change / cur_scaling) > config_.max_change)
|
||||
proposed_change = cur_scaling * config_.max_change *
|
||||
(proposed_change > 0.0 ? 1.0 : -1.0);
|
||||
cur_scaling += proposed_change;
|
||||
|
||||
cur_data.CopyFromMat(*cur_data_in);
|
||||
cur_data.Scale(cur_scaling);
|
||||
nc.Propagate(cur_data, num_chunks, next_data);
|
||||
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||
if (fabs(proposed_change) < config_.min_change) break; // Terminate the
|
||||
// optimization
|
||||
}
|
||||
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(
|
||||
&nnet_->GetComponent(c));
|
||||
KALDI_ASSERT(uc != NULL);
|
||||
uc->Scale(cur_scaling); // scale the parameters of the previous
|
||||
// AffineComponent.
|
||||
|
||||
KALDI_LOG << "For component " << c << ", scaling parameters by "
|
||||
<< cur_scaling << "; average "
|
||||
<< "derivative changed from " << orig_avg_deriv << " to "
|
||||
<< cur_avg_deriv << "; target was " << target_avg_deriv;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void NnetRescaler::Rescale() {
|
||||
ComputeRelevantIndexes(); // set up relevant_indexes_.
|
||||
Matrix<BaseFloat> cur_data, next_data;
|
||||
FormatInput(examples_, &cur_data);
|
||||
int32 num_chunks = examples_.size();
|
||||
for (int32 c = 0; c < nnet_->NumComponents(); c++) {
|
||||
Component &component = nnet_->GetComponent(c);
|
||||
if (relevant_indexes_.count(c - 1) == 1) {
|
||||
// the following function call also appropriately sets "next_data"
|
||||
// after doing the rescaling
|
||||
RescaleComponent(c - 1, num_chunks, &cur_data, &next_data);
|
||||
} else {
|
||||
component.Propagate(cur_data, num_chunks, &next_data);
|
||||
}
|
||||
cur_data.Swap(&next_data);
|
||||
}
|
||||
}
|
||||
|
||||
void RescaleNnet(const NnetRescaleConfig &rescale_config,
|
||||
const std::vector<NnetTrainingExample> &examples,
|
||||
Nnet *nnet) {
|
||||
NnetRescaler rescaler(rescale_config, examples, nnet);
|
||||
rescaler.Rescale();
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,76 @@
|
|||
// nnet-cpu/rescale-nnet.h
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef KALDI_NNET_CPU_RESCALE_NNET_H_
|
||||
#define KALDI_NNET_CPU_RESCALE_NNET_H_
|
||||
|
||||
#include "nnet-cpu/nnet-update.h"
|
||||
#include "nnet-cpu/nnet-compute.h"
|
||||
#include "util/parse-options.h"
|
||||
|
||||
// Neural net rescaling is a rescaling of the parameters of the various layers
|
||||
// of a neural net, done so as to match certain specified statistics on the
|
||||
// average derivative of the sigmoid, measured on sample data. This relates to
|
||||
// how "saturated" the sigmoid is.
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
struct NnetRescaleConfig {
|
||||
BaseFloat target_avg_deriv;
|
||||
BaseFloat target_first_layer_avg_deriv;
|
||||
BaseFloat target_last_layer_avg_deriv;
|
||||
|
||||
// These are relatively unimportant; for now they have no
|
||||
// command line options.
|
||||
BaseFloat num_iters;
|
||||
BaseFloat delta;
|
||||
BaseFloat max_change; // maximum change on any one iteration (to
|
||||
// ensure stability).
|
||||
BaseFloat min_change; // minimum change on any one iteration (controls
|
||||
// termination
|
||||
|
||||
NnetRescaleConfig(): target_avg_deriv(0.2),
|
||||
target_first_layer_avg_deriv(0.3),
|
||||
target_last_layer_avg_deriv(0.1),
|
||||
num_iters(10),
|
||||
delta(0.01),
|
||||
max_change(0.2), min_change(1.0e-05) { }
|
||||
|
||||
void Register(ParseOptions *po) {
|
||||
po->Register("target-avg-deriv", &target_avg_deriv, "Target average derivative "
|
||||
"for hidden layers that are the not the first or last hidden layer "
|
||||
"(as fraction of maximum derivative of the nonlinearity)");
|
||||
po->Register("target-first-layer-avg-deriv", &target_first_layer_avg_deriv,
|
||||
"Target average derivative for the first hidden layer"
|
||||
"(as fraction of maximum derivative of the nonlinearity)");
|
||||
po->Register("target-last-layer-avg-deriv", &target_last_layer_avg_deriv,
|
||||
"Target average derivative for the last hidden layer, if "
|
||||
"#hid-layers > 1"
|
||||
"(as fraction of maximum derivative of the nonlinearity)");
|
||||
}
|
||||
};
|
||||
|
||||
void RescaleNnet(const NnetRescaleConfig &rescale_config,
|
||||
const std::vector<NnetTrainingExample> &examples,
|
||||
Nnet *nnet);
|
||||
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif
|
|
@ -13,7 +13,8 @@ BINFILES = nnet-randomize-frames nnet-am-info nnet-train nnet-init \
|
|||
nnet-train-lbfgs nnet-get-egs nnet-train-parallel nnet-gradient \
|
||||
nnet-get-preconditioner nnet-precondition nnet-select-egs nnet-combine-fast \
|
||||
nnet-subset-egs nnet-shuffle-egs nnet-am-fix nnet-logprob nnet-logprob2 \
|
||||
nnet-logprob2-parallel nnet-logprob-parallel
|
||||
nnet-logprob2-parallel nnet-logprob-parallel nnet-am-stats nnet-am-rescale \
|
||||
nnet-am-limit-rank
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ int main(int argc, char *argv[]) {
|
|||
int32 truncate = -1;
|
||||
bool binary_write = true;
|
||||
bool remove_dropout = false;
|
||||
bool remove_preconditioning = false;
|
||||
BaseFloat learning_rate_factor = 1.0, learning_rate = -1;
|
||||
std::string learning_rates = "";
|
||||
std::string scales = "";
|
||||
|
@ -64,6 +65,8 @@ int main(int argc, char *argv[]) {
|
|||
"to this many components by removing the last components.");
|
||||
po.Register("remove-dropout", &remove_dropout, "Set this to true to remove "
|
||||
"any dropout components.");
|
||||
po.Register("remove-preconditioning", &remove_preconditioning, "Set this to true to replace "
|
||||
"components of type AffineComponentPreconditioned with AffineComponent.");
|
||||
po.Register("stats-from", &stats_from, "Before copying neural net, copy the "
|
||||
"statistics in any layer of type NonlinearComponent, from this "
|
||||
"neural network: provide the extended filename.");
|
||||
|
@ -133,6 +136,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
if (remove_dropout) am_nnet.GetNnet().RemoveDropout();
|
||||
|
||||
if (remove_preconditioning) am_nnet.GetNnet().RemovePreconditioning();
|
||||
|
||||
if (stats_from != "") {
|
||||
// Copy the stats associated with the layers descending from
|
||||
// NonlinearComponent.
|
||||
|
|
|
@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.:\n"
|
||||
" nnet-am-fix 1.mdl 1_fixed.mdl\n"
|
||||
"or:\n"
|
||||
" nnet-am-shrink-rows --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n";
|
||||
" nnet-am-fix --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n";
|
||||
|
||||
bool binary_write = true;
|
||||
NnetFixConfig config;
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
// nnet-cpubin/nnet-am-limit-rank.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "nnet-cpu/nnet-limit-rank.h"
|
||||
#include "nnet-cpu/am-nnet.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "tree/context-dep.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Copy a (cpu-based) neural net and its associated transition model,\n"
|
||||
"but modify it to reduce the effective parameter count by limiting\n"
|
||||
"the rank of weight matrices.\n"
|
||||
"\n"
|
||||
"Usage: nnet-am-limit-rank [options] <nnet-in> <nnet-out>\n"
|
||||
"e.g.:\n"
|
||||
" nnet-am-limit-rank 1.mdl 1_limited.mdl\n";
|
||||
|
||||
|
||||
bool binary_write = true;
|
||||
NnetLimitRankOpts config;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
config.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_rxfilename = po.GetArg(1),
|
||||
nnet_wxfilename = po.GetArg(2);
|
||||
|
||||
TransitionModel trans_model;
|
||||
AmNnet am_nnet;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(nnet_rxfilename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_nnet.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
LimitRankParallel(config, &am_nnet.GetNnet());
|
||||
|
||||
{
|
||||
Output ko(nnet_wxfilename, binary_write);
|
||||
trans_model.Write(ko.Stream(), binary_write);
|
||||
am_nnet.Write(ko.Stream(), binary_write);
|
||||
}
|
||||
KALDI_LOG << "Copied neural net from " << nnet_rxfilename
|
||||
<< " to " << nnet_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,92 @@
|
|||
// nnet-cpubin/nnet-am-rescale.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "nnet-cpu/rescale-nnet.h"
|
||||
#include "nnet-cpu/am-nnet.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "tree/context-dep.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Rescale the parameters in a neural net to achieve certain target\n"
|
||||
"statistics, relating to the average derivative of the sigmoids\n"
|
||||
"measured at some supplied data. This relates to how saturated\n"
|
||||
"the sigmoids are (we try to match the statistics of `good' neural\n"
|
||||
"nets).\n"
|
||||
"\n"
|
||||
"Usage: nnet-am-rescale [options] <nnet-in> <examples-in> <nnet-out>\n"
|
||||
"e.g.:\n"
|
||||
" nnet-am-rescale 1.mdl valid.egs 1_rescaled.mdl\n";
|
||||
|
||||
bool binary_write = true;
|
||||
NnetRescaleConfig config;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
config.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_rxfilename = po.GetArg(1),
|
||||
egs_rspecifier = po.GetArg(2),
|
||||
nnet_wxfilename = po.GetArg(3);
|
||||
|
||||
TransitionModel trans_model;
|
||||
AmNnet am_nnet;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(nnet_rxfilename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_nnet.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
std::vector<NnetTrainingExample> egs;
|
||||
|
||||
// This block adds samples to "egs".
|
||||
SequentialNnetTrainingExampleReader example_reader(
|
||||
egs_rspecifier);
|
||||
for (; !example_reader.Done(); example_reader.Next())
|
||||
egs.push_back(example_reader.Value());
|
||||
KALDI_LOG << "Read " << egs.size() << " examples.";
|
||||
KALDI_ASSERT(!egs.empty());
|
||||
|
||||
RescaleNnet(config, egs, &am_nnet.GetNnet());
|
||||
|
||||
{
|
||||
Output ko(nnet_wxfilename, binary_write);
|
||||
trans_model.Write(ko.Stream(), binary_write);
|
||||
am_nnet.Write(ko.Stream(), binary_write);
|
||||
}
|
||||
KALDI_LOG << "Rescaled neural net and wrote it to " << nnet_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
// nnet-cpubin/nnet-am-stats.cc
|
||||
|
||||
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "nnet-cpu/nnet-stats.h"
|
||||
#include "nnet-cpu/am-nnet.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "tree/context-dep.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Print some statistics about the average derivatives of the sigmoid layers\n"
|
||||
"of the neural net, that are stored in the net\n"
|
||||
"\n"
|
||||
"Usage: nnet-am-stats [options] <nnet-in>\n"
|
||||
"e.g.:\n"
|
||||
" nnet-am-stats 1.mdl 1_fixed.mdl\n";
|
||||
|
||||
NnetStatsConfig config;
|
||||
|
||||
ParseOptions po(usage);
|
||||
config.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 1) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_rxfilename = po.GetArg(1);
|
||||
|
||||
TransitionModel trans_model;
|
||||
AmNnet am_nnet;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(nnet_rxfilename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_nnet.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
std::vector<NnetStats> stats;
|
||||
GetNnetStats(config, am_nnet.GetNnet(), &stats);
|
||||
KALDI_ASSERT(!stats.empty());
|
||||
for (size_t i = 0; i < stats.size(); i++)
|
||||
stats[i].PrintStats(std::cout);
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
|
|||
KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
|
||||
<< " neural-network training examples ";
|
||||
|
||||
return (static_cast<size_t>(n) == egs.size() ? 0 : 1);
|
||||
return (num_read != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
|
|
|
@ -88,8 +88,8 @@ void CacheTgtMat::AddData(const CuMatrix<BaseFloat> &features, const CuMatrix<Ba
|
|||
features_.CopyRowsFromMat(leftover, features_leftover_, 0, 0);
|
||||
targets_.CopyRowsFromMat(leftover, targets_leftover_, 0, 0);
|
||||
|
||||
features_leftover_.Destroy();
|
||||
targets_leftover_.Destroy();
|
||||
features_leftover_.Resize(0, 0);
|
||||
targets_leftover_.Resize(0, 0);
|
||||
filling_pos_ += leftover;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ void Cache::AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32
|
|||
targets_leftover_.begin()+leftover,
|
||||
targets_.begin());
|
||||
|
||||
features_leftover_.Destroy();
|
||||
features_leftover_.Resize(0, 0);
|
||||
targets_leftover_.resize(0);
|
||||
filling_pos_ += leftover;
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
|
|||
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
||||
|
||||
// compute derivative wrt. activations of last layer of neurons
|
||||
diff->CopyFromMat(net_out);
|
||||
*diff = net_out;
|
||||
diff->AddMat(-1.0, target);
|
||||
|
||||
// we'll not produce per-frame classification accuracy for soft labels
|
||||
|
@ -40,7 +40,8 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
|
|||
|
||||
// :TODO: reimplement when needed
|
||||
// compute xentropy (ON CPU)
|
||||
Matrix<BaseFloat> target_host, net_out_host;
|
||||
Matrix<BaseFloat> target_host(target.NumRows(), target.NumCols(), kUndefined),
|
||||
net_out_host(net_out.NumRows(), net_out.NumCols(), kUndefined);
|
||||
target.CopyToMat(&target_host);
|
||||
net_out.CopyToMat(&net_out_host);
|
||||
BaseFloat val;
|
||||
|
@ -69,7 +70,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
|
|||
// get the xentropy and global error
|
||||
target_device_.CopyFromVec(target);
|
||||
if(&net_out != diff) { //<allow no-copy speedup
|
||||
diff->CopyFromMat(net_out);
|
||||
*diff = net_out;
|
||||
}
|
||||
cu::DiffXent(target_device_, diff, &log_post_tgt_);
|
||||
//
|
||||
|
@ -84,6 +85,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
|
|||
// log(sum_row(net_out.*target_mat)))
|
||||
// they now are stored in vector log_post_tgt_
|
||||
//
|
||||
log_post_tgt_host_.Resize(log_post_tgt_.Dim());
|
||||
log_post_tgt_.CopyToVec(&log_post_tgt_host_);
|
||||
loss_ -= log_post_tgt_host_.Sum();
|
||||
|
||||
|
@ -110,9 +112,10 @@ std::string Xent::Report() {
|
|||
void Mse::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &target, CuMatrix<BaseFloat> *diff) {
|
||||
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
||||
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
||||
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
||||
|
||||
|
||||
// compute derivative w.r.t. neural nerwork outputs
|
||||
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
||||
diff->CopyFromMat(net_out);
|
||||
diff->AddMat(-1.0, target);
|
||||
|
||||
|
@ -147,9 +150,9 @@ std::string Mse::Report() {
|
|||
void MseProgress::Eval(const CuMatrix<BaseFloat>& net_out, const CuMatrix<BaseFloat>& target, CuMatrix<BaseFloat>* diff) {
|
||||
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
||||
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
||||
diff->Resize(net_out.NumRows(),net_out.NumCols());
|
||||
|
||||
//compute derivative w.r.t. neural nerwork outputs
|
||||
diff->Resize(net_out.NumRows(),net_out.NumCols());
|
||||
diff->CopyFromMat(net_out);
|
||||
diff->AddMat(-1.0,target);
|
||||
|
||||
|
|
|
@ -94,8 +94,7 @@ int main(int argc, char *argv[]) {
|
|||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Vector to CuVector
|
||||
CuVector<BaseFloat> cu_shift;
|
||||
cu_shift.CopyFromVec(shift);
|
||||
CuVector<BaseFloat> cu_shift(shift);
|
||||
|
||||
//set the weights
|
||||
shift_component->SetShiftVec(cu_shift);
|
||||
|
@ -110,8 +109,7 @@ int main(int argc, char *argv[]) {
|
|||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Vector to CuVector
|
||||
CuVector<BaseFloat> cu_scale;
|
||||
cu_scale.CopyFromVec(scale);
|
||||
CuVector<BaseFloat> cu_scale(scale);
|
||||
|
||||
//set the weights
|
||||
scale_component->SetScaleVec(cu_scale);
|
||||
|
|
|
@ -129,6 +129,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
// push priors to GPU
|
||||
priors.Resize(tmp_priors.Dim());
|
||||
priors.CopyFromVec(tmp_priors);
|
||||
}
|
||||
|
||||
|
@ -150,7 +151,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
}
|
||||
// push it to gpu
|
||||
feats.CopyFromMat(mat);
|
||||
feats = mat;
|
||||
// fwd-pass
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
nnet.Feedforward(feats_transf, &nnet_out);
|
||||
|
@ -170,6 +171,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
|
||||
//download from GPU
|
||||
nnet_out_host.Resize(nnet_out.NumRows(), nnet_out.NumCols());
|
||||
nnet_out.CopyToMat(&nnet_out_host);
|
||||
//check for NaN/inf
|
||||
for(int32 r=0; r<nnet_out_host.NumRows(); r++) {
|
||||
|
|
|
@ -223,12 +223,13 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
//3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
|
||||
// push features to GPU
|
||||
feats.CopyFromMat(mat);
|
||||
feats = mat;
|
||||
// possibly apply transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
// propagate through the nnet (assuming w/o softmax)
|
||||
nnet.Propagate(feats_transf, &nnet_out);
|
||||
// pop it back to the HOST
|
||||
// transfer it back to the host
|
||||
nnet_out_h.Resize(nnet_out.NumRows(), nnet_out.NumCols(), kUndefined);
|
||||
nnet_out.CopyToMat(&nnet_out_h);
|
||||
// TODO: poccibly divide by priors
|
||||
|
||||
|
@ -277,7 +278,7 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
//7) backpropagate through the nnet
|
||||
if (!crossvalidate) {
|
||||
nnet_diff.CopyFromMat(nnet_diff_h);
|
||||
nnet_diff = nnet_diff_h;
|
||||
nnet.Backpropagate(nnet_diff, NULL);
|
||||
}
|
||||
|
||||
|
|
|
@ -139,8 +139,8 @@ int main(int argc, char *argv[]) {
|
|||
continue;
|
||||
}
|
||||
// push features/targets to GPU
|
||||
feats.CopyFromMat(fea_mat);
|
||||
targets.CopyFromMat(tgt_mat);
|
||||
feats = fea_mat;
|
||||
targets = tgt_mat;
|
||||
// possibly apply feature transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
// add to cache
|
||||
|
|
|
@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
|
|||
continue;
|
||||
}
|
||||
// push features to GPU
|
||||
feats.CopyFromMat(mat);
|
||||
feats = mat;
|
||||
// possibly apply transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
// add to cache
|
||||
|
|
|
@ -138,6 +138,7 @@ int main(int argc, char *argv[]) {
|
|||
num_other_error++;
|
||||
} else { //dimension OK
|
||||
// push features to GPU
|
||||
feats.Resize(mat.NumRows(), mat.NumCols(), kUndefined);
|
||||
feats.CopyFromMat(mat);
|
||||
// possibly apply transform
|
||||
nnet_transf.Feedforward(feats, &feats_transf);
|
||||
|
|
|
@ -132,7 +132,8 @@ int main(int argc, char *argv[]) {
|
|||
rbm_transf.Feedforward(feats, &feats_transf);
|
||||
// subsample the feats to get faster epochs
|
||||
if(drop_data > 0.0) {
|
||||
Matrix<BaseFloat> mat2;
|
||||
Matrix<BaseFloat> mat2(feats_transf.NumRows(), feats_transf.NumCols(),
|
||||
kUndefined);
|
||||
feats_transf.CopyToMat(&mat2);
|
||||
for(int32 r=mat2.NumRows()-1; r >= 0; r--) {
|
||||
if(RandUniform() < drop_data) {
|
||||
|
|
|
@ -62,8 +62,7 @@ int main(int argc, char *argv[]) {
|
|||
//the pointer will be given to the nnet, so we don't need to call delete
|
||||
|
||||
//convert Matrix to CuMatrix
|
||||
CuMatrix<BaseFloat> cu_transform;
|
||||
cu_transform.CopyFromMat(transform);
|
||||
CuMatrix<BaseFloat> cu_transform(transform);
|
||||
|
||||
//set the weights
|
||||
layer->SetLinearity(cu_transform);
|
||||
|
|
Загрузка…
Ссылка в новой задаче