A lot of changes: script changes RE neural nets (more efficient IO, slightly better WERs); various new functionality for nnets and improving some feature-related binaries' interfaces.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1976 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2013-02-04 21:57:06 +00:00
Родитель c1944a7209
Коммит f699fd2be1
65 изменённых файлов: 1879 добавлений и 462 удалений

Просмотреть файл

@ -1,11 +1,5 @@
#!/bin/bash #!/bin/bash
# CAUTION: I changed e.g. 1.trans to trans.1 in the scripts. If you ran it
# part-way through prior to this, to convert to the new naming
# convention, run:
# for x in `find . -name '*.trans'`; do mv $x `echo $x | perl -ane 's/(\d+)\.trans/trans.$1/;print;'`; done
# but be careful as this will not follow soft links.
. cmd.sh . cmd.sh
# call the next line with the directory where the RM data is # call the next line with the directory where the RM data is

Просмотреть файл

@ -1,5 +1,2 @@
beam=11.0 # beam for decoding. Was 13.0 in the scripts. beam=11.0 # beam for decoding. Was 13.0 in the scripts.
first_beam=8.0 # beam for 1st-pass decoding in SAT. first_beam=8.0 # beam for 1st-pass decoding in SAT.

Просмотреть файл

@ -21,8 +21,8 @@
) )
# Here are the results (copied from RESULTS file) # Here are the results (copied from RESULTS file)
#exp/nnet6a/decode_train_dev/wer_10:%WER 24.87 [ 12053 / 48460, 1590 ins, 3017 del, 7446 sub ] #exp/nnet6a/decode_train_dev/wer_11:%WER 24.30 [ 11774 / 48460, 1619 ins, 2877 del, 7278 sub ]
#exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.1 16.0 6.9 2.7 25.6 62.6 | #exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.8 16.0 6.3 3.0 25.3 62.6 |
# Here are some older results when the system had 2k not 4k leaves and ran from a worse SAT # Here are some older results when the system had 2k not 4k leaves and ran from a worse SAT

Просмотреть файл

@ -191,6 +191,7 @@ exp/tri4a_dnn/decode_bd_tgpr_eval92/wer_10:%WER 4.00 [ 226 / 5643, 34 ins, 12 de
# and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot, # and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot,
# which seems to be the reason we can't beat the SGMM+MMI numbers here.) # which seems to be the reason we can't beat the SGMM+MMI numbers here.)
exp/nnet5c1/decode_bd_tgpr_dev93/wer_10:%WER 7.48 [ 616 / 8234, 73 ins, 98 del, 445 sub ]
exp/nnet5c1/decode_bd_tgpr_eval92/wer_11:%WER 4.41 [ 249 / 5643, 29 ins, 19 del, 201 sub ] exp/nnet5c1/decode_bd_tgpr_dev93/wer_14:%WER 7.32 [ 603 / 8234, 61 ins, 101 del, 441 sub ]
# Note: my 4.41% result is worse than Karel's 4.00%. exp/nnet5c1/decode_bd_tgpr_eval92/wer_14:%WER 4.39 [ 248 / 5643, 32 ins, 17 del, 199 sub ]
# Note: my 4.39% result is worse than Karel's 4.00%.

Просмотреть файл

@ -47,7 +47,7 @@ cat links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
nl=`cat train_si84.flist | wc -l` nl=`cat train_si84.flist | wc -l`
[ "$nl" -eq 7138 ] || echo "Warning: expected 37416 lines in train_si84.flist, got $nl" [ "$nl" -eq 7138 ] || echo "Warning: expected 7138 lines in train_si84.flist, got $nl"
# This version for SI-284 # This version for SI-284
cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \ cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \

Просмотреть файл

@ -281,7 +281,6 @@ steps/train_quick.sh --cmd "$train_cmd" \
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/tri4b/decode_bd_tgpr_eval92 || exit 1; exp/tri4b/graph_bd_tgpr data/test_eval92 exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
) & ) &
# Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on # Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
# all the data). Use 30 jobs. # all the data). Use 30 jobs.
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \

Просмотреть файл

@ -0,0 +1,68 @@
#!/bin/bash
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# This script appends the features in two data directories.
# To be run from .. (one directory up from here)
# see ../run.sh for example
# This config creates MFCC features with half the window size and window shift,
# and splices and sub-samples them. We'll use another script append_feats.sh
# to combine (append) the data directories.
# Begin configuration section.
cmd=run.pl
nj=4
# End configuration section.
echo "$0 $@" # Print the command line for logging
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "usage: append_feats.sh [options] <src-data-dir1> <src-data-dir2> <dest-data-dir> <log-dir> <path-to-storage-dir>";
echo "options: "
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data_src1=$1
data_src2=$2
data=$3
logdir=$4
mfccdir=$5
utils/split_data.sh $data_src1 $nj || exit 1;
utils/split_data.sh $data_src2 $nj || exit 1;
mkdir -p $mfccdir $logdir
rm -rf $data
mkdir -p `basename $data` # Make sure directory one level up exists.
cp -r $data_src1 $data # so we get the other files, such as utt2spk.
rm $data/cmvn.scp
rm -r $data/split* 2>/dev/null
# use "name" as part of name of the archive.
name=`basename $data`
$cmd JOB=1:$nj $logdir/append.JOB.log \
append-feats --truncate-frames=true \
scp:$data_src1/split$nj/JOB/feats.scp scp:$data_src2/split$nj/JOB/feats.scp \
ark,scp:$mfccdir/appended_$name.JOB.ark,$mfccdir/appended_$name.JOB.scp || exit 1;
# concatenate the .scp files together.
for ((n=1; n<=nj; n++)); do
cat $mfccdir/appended_$name.$n.scp >> $data/feats.scp || exit 1;
done > $data/feats.scp
nf=`cat $data/feats.scp | wc -l`
nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
echo "Succeeded creating MFCC features for $name"

Просмотреть файл

@ -18,6 +18,10 @@ max_mem=20000000 # This will stop the processes getting too large.
# This is in bytes, but not "real" bytes-- you have to multiply # This is in bytes, but not "real" bytes-- you have to multiply
# by something like 5 or 10 to get real bytes (not sure why so large) # by something like 5 or 10 to get real bytes (not sure why so large)
# End configuration section. # End configuration section.
num_threads=1 # Number of threads used in nnet-logprob computation. If you set
# this to a different value, make sure to also set the appropriate
# queue options. If you set this too high it won't use all the
# threads as most of the time will be taken in the decoder.
echo "$0 $@" # Print the command line for logging echo "$0 $@" # Print the command line for logging
@ -104,9 +108,10 @@ fi
if [ $sub_split -eq 1 ]; then if [ $sub_split -eq 1 ]; then
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \ $cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats" ark:- \| \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1; $dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
else else
for n in `seq $nj`; do for n in `seq $nj`; do
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
@ -120,9 +125,10 @@ else
mkdir -p $dir/part mkdir -p $dir/part
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g` feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \ $cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \ nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats_subset" ark:- \| \
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \ --max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1; $dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
echo Merging archives for data subset $n echo Merging archives for data subset $n
rm $dir/.error 2>/dev/null; rm $dir/.error 2>/dev/null;
for k in `seq $sub_split`; do for k in `seq $sub_split`; do

Просмотреть файл

@ -14,18 +14,24 @@ num_iters_final=10 # Number of final iterations to give to the
# optimization over the validation set. # optimization over the validation set.
initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd. initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
final_learning_rate=0.004 # for RM; or 0.001 is suitable for Swbd. final_learning_rate=0.004 # for RM; or 0.001 is suitable for Swbd.
num_valid_utts=300 # held-out utterances, used only for diagnostics. num_utts_subset=300 # number of utterances in validation and training
num_valid_frames_shrink=2000 # a subset of the frames in "valid_utts", used only # subsets used for shrinkage and diagnostics
# for estimating shrinkage parameters and for num_valid_frames_shrink=0 # number of validation frames in the subset
# objective-function reporting. # used for shrinking
num_train_frames_shrink=2000 # number of training frames in the subset used
# for shrinking (by default we use all training
# frames for this.)
shrink_interval=3 # shrink every $shrink_interval iters, shrink_interval=3 # shrink every $shrink_interval iters,
# except at the start of training when we do it every iter. # except at the start of training when we do it every iter.
num_valid_frames_combine=10000 # combination weights at the very end. num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
num_train_frames_combine=10000 # # train frames for the above.
num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
# which would otherwise be a problem with multi-threaded update. Note: # which would otherwise be a problem with multi-threaded update. Note:
# it also interacts with the "preconditioned" update, so it's not completely cost free. # it also interacts with the "preconditioned" update, so it's not completely cost free.
samples_per_iteration=400000 # each iteration of training, see this many samples samples_per_iter=400000 # each iteration of training, see this many samples
# per job. # per job. This is just a guideline; it will pick a number
# that divides the number of samples in the entire data.
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
# on each iter. You could set it to 0 or to a large value for complete # on each iter. You could set it to 0 or to a large value for complete
# randomization, but this would both consume memory and cause spikes in # randomization, but this would both consume memory and cause spikes in
@ -37,13 +43,13 @@ add_layers_period=2 # by default, add new layers every 2 iterations.
num_hidden_layers=2 num_hidden_layers=2
initial_num_hidden_layers=1 # we'll add the rest one by one. initial_num_hidden_layers=1 # we'll add the rest one by one.
num_parameters=2000000 # 2 million parameters by default. num_parameters=2000000 # 2 million parameters by default.
stage=-7 stage=-9
realign_iters="" realign_iters=""
beam=10 # for realignment. beam=10 # for realignment.
retry_beam=40 retry_beam=40
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1" scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know. parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.) io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
nnet_config_opts= nnet_config_opts=
splice_width=4 # meaning +- 4 frames on each side for second LDA splice_width=4 # meaning +- 4 frames on each side for second LDA
lda_dim=250 lda_dim=250
@ -54,7 +60,11 @@ shrink=true
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
# specified.) # specified.)
num_threads=16 num_threads=16
mkl_num_threads=1
valid_is_heldout=false # For some reason, holding out the validation set from the training set
# seems to hurt, so by default we don't do it (i.e. it's included in training)
random_copy=false
cleanup=true
# End configuration section. # End configuration section.
echo "$0 $@" # Print the command line for logging echo "$0 $@" # Print the command line for logging
@ -72,7 +82,7 @@ if [ $# != 4 ]; then
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs." echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --num-epochs <#epochs|15> # Number of epochs of main training" echo " --num-epochs <#epochs|15> # Number of epochs of main training"
echo " # while reducing learning rate (determines #iterations, together" echo " # while reducing learning rate (determines #iterations, together"
echo " # with --samples-per-iteration and --num-jobs-nnet)" echo " # with --samples-per-iter and --num-jobs-nnet)"
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training" echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
echo " # after learning rate fully reduced" echo " # after learning rate fully reduced"
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small" echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
@ -95,21 +105,27 @@ if [ $# != 4 ]; then
echo " # this, you may want to decrease the batch size." echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that" echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads." echo " # use multiple threads."
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the " echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
echo " # data. (prevents stressing the disk). "
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads" echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)." echo " # should not get too large, e.g. >2k)."
echo " --samples-per-iteration <#samples|400000> # Number of samples of data to process per iteration, per" echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
echo " # process." echo " # process."
echo " --splice-width <width|4> # Number of frames on each side to append for feature input" echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
echo " # (note: we splice processed, typically 40-dimensional frames" echo " # (note: we splice processed, typically 40-dimensional frames"
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA" echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to " echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
echo " # interpolate parameters (the weights are learned with a validation set)" echo " # interpolate parameters (the weights are learned with a validation set)"
echo " --stage <stage|-7> # Used to run a partially-completed training process from somewhere in" echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
echo " # (the validation subset is held out from training)"
echo " --num-valid-frames-shrink <#frames|2000> # Number of frames from the validation set used for shrinking"
echo " --num-train-frames-shrink <#frames|0> # Number of frames from the training set used for shrinking"
echo " # (by default it's included in training, which for some reason helps)."
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
echo " # very end."
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
echo " # the middle." echo " # the middle."
exit 1; exit 1;
fi fi
@ -144,8 +160,11 @@ cp $alidir/tree $dir
# Get list of validation utterances. # Get list of validation utterances.
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_valid_utts \ awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
> $dir/valid_uttlist || exit 1; > $dir/valid_uttlist || exit 1;
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
## Set up features. Note: these are different from the normal features ## Set up features. Note: these are different from the normal features
## because we have one rspecifier that has the features for the entire ## because we have one rspecifier that has the features for the entire
@ -154,33 +173,49 @@ if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
echo "$0: feature type is $feat_type" echo "$0: feature type is $feat_type"
case $feat_type in case $feat_type in
delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |" delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |" valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
;; ;;
lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |" valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
cp $alidir/final.mat $dir cp $alidir/final.mat $dir
;; ;;
*) echo "$0: invalid feature type $feat_type" && exit 1; *) echo "$0: invalid feature type $feat_type" && exit 1;
esac esac
if [ -f $alidir/trans.1 ]; then if [ -f $alidir/trans.1 ]; then
echo "$0: using transforms from $alidir" echo "$0: using transforms from $alidir"
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |" feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
split_feats="$split_feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |" valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
fi fi
if [ $stage -le -9 ]; then
echo "$0: working out number of frames of training data"
num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
echo $num_frames > $dir/num_frames
else
num_frames=`cat $dir/num_frames` || exit 1;
fi
# Working out number of iterations per epoch.
iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
## Do LDA on top of whatever features we already have; store the matrix which ## Do LDA on top of whatever features we already have; store the matrix which
## we'll put into the neural network as a constant. ## we'll put into the neural network as a constant.
if [ $stage -le -7 ]; then if [ $stage -le -8 ]; then
echo "Accumulating LDA statistics." echo "$0: Accumulating LDA statistics."
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \ $cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \ weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
acc-lda --rand-prune=$randprune $alidir/final.mdl "$split_feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \ acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
$dir/lda.JOB.acc || exit 1; $dir/lda.JOB.acc || exit 1;
est-lda --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \ est-lda --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
2>$dir/log/lda_est.log || exit 1; 2>$dir/log/lda_est.log || exit 1;
@ -195,7 +230,7 @@ if [ $initial_num_hidden_layers -gt $num_hidden_layers ]; then
fi fi
if [ $stage -le -6 ]; then if [ $stage -le -7 ]; then
echo "$0: initializing neural net"; echo "$0: initializing neural net";
# to hidden.config it will write the part of the config corresponding to a # to hidden.config it will write the part of the config corresponding to a
# single hidden layer; we need this to add new layers. # single hidden layer; we need this to add new layers.
@ -219,14 +254,14 @@ if [ $stage -le -6 ]; then
$dir/0.mdl || exit 1; $dir/0.mdl || exit 1;
fi fi
if [ $stage -le -5 ]; then if [ $stage -le -6 ]; then
echo "Training transition probabilities and setting priors" echo "Training transition probabilities and setting priors"
$cmd $dir/log/train_trans.log \ $cmd $dir/log/train_trans.log \
nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \ nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
|| exit 1; || exit 1;
fi fi
if [ $stage -le -4 ]; then if [ $stage -le -5 ]; then
echo "Compiling graphs of transcripts" echo "Compiling graphs of transcripts"
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \ $cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \ compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
@ -239,118 +274,113 @@ cp $alidir/ali.*.gz $dir
nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1; nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
if [ $stage -le -3 ]; then if [ $stage -le -4 ]; then
echo "Getting validation examples." echo "Getting validation and training subset examples."
$cmd $dir/log/create_valid_subset_shrink.log \ rm $dir/.error 2>/dev/null
$cmd $dir/log/create_valid_subset.log \
nnet-get-egs $nnet_context_opts "$valid_feats" \ nnet-get-egs $nnet_context_opts "$valid_feats" \
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \ "ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/valid_all.egs" || exit 1; "ark:$dir/valid_all.egs" || touch $dir/.error &
echo "Getting subsets of validation examples for shrinking and combination." $cmd $dir/log/create_train_subset.log \
nnet-get-egs $nnet_context_opts "$train_subset_feats" \
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
wait;
[ -f $dir/.error ] && exit 1;
echo "Getting subsets of validation examples for shrinking, diagnostics and combination."
$cmd $dir/log/create_valid_subset_shrink.log \ $cmd $dir/log/create_valid_subset_shrink.log \
nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs ark:$dir/valid_shrink.egs & nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs \
ark:$dir/valid_shrink.egs || touch $dir/.error &
$cmd $dir/log/create_valid_subset_combine.log \ $cmd $dir/log/create_valid_subset_combine.log \
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs ark:$dir/valid_combine.egs & nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
ark:$dir/valid_combine.egs || touch $dir/.error &
$cmd $dir/log/create_valid_subset_diagnostic.log \
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
ark:$dir/valid_diagnostic.egs || touch $dir/.error &
$cmd $dir/log/create_train_subset_shrink.log \
nnet-subset-egs --n=$num_train_frames_shrink ark:$dir/train_subset_all.egs \
ark:$dir/train_shrink.egs || touch $dir/.error &
$cmd $dir/log/create_train_subset_combine.log \
nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
ark:$dir/train_combine.egs || touch $dir/.error &
$cmd $dir/log/create_train_subset_diagnostic.log \
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
ark:$dir/train_diagnostic.egs || touch $dir/.error &
wait wait
[ ! -s $dir/valid_shrink.egs ] && echo "No validation examples for shrinking" && exit 1; cat $dir/valid_shrink.egs $dir/train_shrink.egs > $dir/shrink.egs
[ ! -s $dir/valid_combine.egs ] && echo "No validation examples for combination" && exit 1; cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
rm $dir/valid_all.egs
for f in $dir/{shrink,combine,train_diagnostic,valid_diagnostic}.egs; do
[ ! -s $f ] && echo "No examples in file $f" && exit 1;
done
rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_{shrink,combine}.egs
fi fi
if [ $stage -le -2 ]; then if [ $stage -le -3 ]; then
mkdir -p $dir/egs mkdir -p $dir/egs
mkdir -p $dir/temp mkdir -p $dir/temp
echo "Creating training examples"; echo "Creating training examples";
# in $dir/egs, create $num_jobs_nnet separate files with training examples, # in $dir/egs, create $num_jobs_nnet separate files with training examples.
# with randomly shuffled order. We shuffle the order of examples in each # The order is not randomized at this point.
# file. Then on each iteration, for each training process, we'll take a
# random subset of blocks of examples within that process's file.
# We take them in blocks, because it avoids the overhead of fseek() while
# creating the examples.
egs_list= egs_list=
for n in `seq 1 $num_jobs_nnet`; do for n in `seq 1 $num_jobs_nnet`; do
egs_list="$egs_list ark,scp:$dir/egs/egs_orig.$n.ark,$dir/egs/egs_orig.$n.scp" egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
done done
echo "Generating training examples on disk" echo "Generating training examples on disk"
# The examples will go round-robin to egs_list. # The examples will go round-robin to egs_list.
$cmd $dir/log/get_egs.log \ $cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
nnet-get-egs $nnet_context_opts "$feats" \ nnet-get-egs $nnet_context_opts "$feats" \
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \ "ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
nnet-copy-egs ark:- $egs_list || exit 1; nnet-copy-egs ark:- $egs_list || exit 1;
fi fi
if [ $stage -le -2 ]; then
# combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
# then split into multiple parts egs.JOB.*.scp for different parts of the
# data, 0 .. $iters_per_epoch-1.
if [ $iters_per_epoch -eq 1 ]; then
echo "Since iters-per-epoch == 1, just concatenating the data."
for n in `seq 1 $num_jobs_nnet`; do
cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
rm $dir/egs/egs_orig.$n.*.ark || exit 1;
done
else # We'll have to split it up using nnet-copy-egs.
egs_list=
for n in `seq 0 $[$iters_per_epoch-1]`; do
egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
done
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
nnet-copy-egs --random=$random_copy --srand=JOB \
"ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
fi
fi
if [ $stage -le -1 ]; then if [ $stage -le -1 ]; then
# Next, shuffle the order of the examples in each of those files. # Next, shuffle the order of the examples in each of those files.
# In order to not use too much memory (in case the size of the files is # Each one should not be too large, so we can do this in memory.
# huge) we do this by randomizing the order of the .scp file and then
# just call nnet-copy-egs. If the file system is willing to store
# stuff in memory, it is free to do so. This is not super-optimal in
# terms of file system performance but it's simple and it won't fail when
# the data gets large.
echo "Shuffling the order of training examples" echo "Shuffling the order of training examples"
echo "(in order to avoid stressing the disk, these won't all run at once)." echo "(in order to avoid stressing the disk, these won't all run at once)."
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \
utils/shuffle_list.pl --srand JOB $dir/egs/egs_orig.JOB.scp \| \ for n in `seq 0 $[$iters_per_epoch-1]`; do
nnet-copy-egs scp:- ark,scp:$dir/egs/egs.JOB.ark,$dir/egs/egs.JOB.scp \ $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
'&&' rm $dir/egs/egs_orig.JOB.ark $dir/egs/egs_orig.JOB.scp nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
smallest_len=`wc -l $dir/egs/egs.*.scp | sort -n -k1 | awk '{print $1}' | head -1` ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
# If the $samples_per_iteration is more than each split of the data, rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
# append to each .scp file the .scp files from the next one or two
# splits (or more), so each one is larger...
rm $dir/egs/egs.*.scp.orig 2>/dev/null
if [ $samples_per_iteration -gt $smallest_len ]; then
extra_files=$[($samples_per_iteration-1) / $smallest_len]
echo Each part of the data has about $smallest_len lines which is less than the
echo samples per iteration $samples_per_iteration, so appending next $extra_files
echo files to each scp file
for n in `seq $num_jobs_nnet`; do mv $dir/egs/egs.$n.scp $dir/egs/egs.$n.scp.orig; done
for n in `seq $num_jobs_nnet`; do
for e in `seq 0 $extra_files`; do
m=$[(($n + $e - 1)%$num_jobs_nnet)+1]
cat $dir/egs/egs.$m.scp.orig
done > $dir/egs/egs.$n.scp
done done
fi fi
fi
num_egs=`grep wrote $dir/log/get_egs.log | tail -1 | awk '{print $NF}'` || exit 1; num_iters_reduce=$[$num_epochs * $iters_per_epoch];
! [ $num_egs -gt 0 ] && echo "bad num_egs $num_egs" && exit 1; num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
num_iters_reduce=$[ 1 + (($num_egs * $num_epochs)/($num_jobs_nnet * $samples_per_iteration))]
num_iters_extra=$[1 + (($num_egs * $num_epochs_extra)/($num_jobs_nnet * $samples_per_iteration))]
num_iters=$[$num_iters_reduce+$num_iters_extra] num_iters=$[$num_iters_reduce+$num_iters_extra]
echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling " echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, " echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
echo " (while reducing learning rate) + (with constant learning rate)." echo " (while reducing learning rate) + (with constant learning rate)."
function get_list {
# usage: get_list <samples-per-iter> <iter> <input-file> >output
#
# Outputs an scp file for this job for this iteration. The
# output will have <samples-per-iter> lines, and will contain lines from
# egs.JOB.scp, possibly with repeats. It will be sorted numerically on its
# first field, so the .ark file is accessed in order (we then pipe to
# nnet-shuffle-egs to randomize the order). The way we do it is, we imagine
# we had concatenated the file $dir/egs/egs.JOB.scp infinite times, and
# taken from the concatenated file, the lines
# <samples-per-iter> * <iter> ... <samples-per-iter> * (<iter> + 1) - 1,
# and then sorted them on the first field (which is a number).
# We don't actually implement it this way, we do it a bit more efficiently.
# We require that samples-per-iter <= (#lines in input-file).
[ $# -ne 3 ] && echo "get_list: bad usage" && exit 1;
samples_per_iter=$1
my_iter=$2
input_file=$3
start=$[$my_iter * $samples_per_iter]; # starting-point in concatenated file.
input_len=`cat $input_file | wc -l`
start=$[$start - $input_len*($start/$input_len)]; # remove whole multiples of input_len
# we have to concatenate the input file to itself.
cat $input_file $input_file | \
head -n $[$start + $samples_per_iter] | tail -n $samples_per_iter | \
sort -k2 -k1n
}
# up till $last_normal_shrink_iter we will shrink the parameters # up till $last_normal_shrink_iter we will shrink the parameters
# in the normal way using the dev set, but after that we will # in the normal way using the dev set, but after that we will
# only re-compute the shrinkage parameters periodically. # only re-compute the shrinkage parameters periodically.
@ -361,22 +391,19 @@ x=0
while [ $x -lt $num_iters ]; do while [ $x -lt $num_iters ]; do
if [ $x -ge 0 ] && [ $stage -le $x ]; then if [ $x -ge 0 ] && [ $stage -le $x ]; then
# Set off a job that does diagnostics, in the background. # Set off jobs doing some diagnostics, in the background.
$cmd $parallel_opts $dir/log/compute_prob.$x.log \ $cmd $dir/log/compute_prob_valid.$x.log \
nnet-compute-prob $dir/$x.mdl ark:$dir/valid_shrink.egs & nnet-compute-prob $dir/$x.mdl ark:$dir/valid_diagnostic.egs &
$cmd $dir/log/compute_prob_train.$x.log \
nnet-compute-prob $dir/$x.mdl ark:$dir/train_diagnostic.egs &
if echo $realign_iters | grep -w $x >/dev/null; then if echo $realign_iters | grep -w $x >/dev/null; then
echo "Realigning data (pass $x)" echo "Realigning data (pass $x)"
$cmd JOB=1:$nj $dir/log/align.$x.JOB.log \ $cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$dir/$x.mdl" \ nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$dir/$x.mdl" \
"ark:gunzip -c $dir/fsts.JOB.gz|" "$split_feats" \ "ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
"ark:|gzip -c >$dir/ali.JOB.gz" || exit 1; "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi fi
for n in `seq $num_jobs_nnet`; do
# the following command gets a subset of the n'th scp file, containing
# $samples_per_iteration lines.
get_list $samples_per_iteration $x $dir/egs/egs.$n.scp > $dir/temp/egs.$x.$n.scp
done
echo "Training neural net (pass $x)" echo "Training neural net (pass $x)"
if [ $x -gt 0 ] && \ if [ $x -gt 0 ] && \
@ -388,9 +415,8 @@ while [ $x -lt $num_iters ]; do
fi fi
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \ $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
MKL_NUM_THREADS=$mkl_num_threads \
nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \ nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
scp:$dir/temp/egs.$x.JOB.scp ark:- \| \ ark:$dir/egs/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \ nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
"$mdl" ark:- $dir/$[$x+1].JOB.mdl \ "$mdl" ark:- $dir/$[$x+1].JOB.mdl \
|| exit 1; || exit 1;
@ -410,10 +436,10 @@ while [ $x -lt $num_iters ]; do
if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then
# For earlier iterations (while we've recently beeen adding layers), or every # For earlier iterations (while we've recently beeen adding layers), or every
# $shrink_interval=3 iters , just do shrinking normally. # $shrink_interval=3 iters , just do shrinking normally.
mb=$[($num_valid_frames_shrink+$num_train_frames_shrink+$num_threads-1)/$num_threads]
$cmd $parallel_opts $dir/log/shrink.$x.log \ $cmd $parallel_opts $dir/log/shrink.$x.log \
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads --verbose=3 \ nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
--minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \ $dir/$[$x+1].mdl ark:$dir/shrink.egs $dir/$[$x+1].mdl || exit 1;
$dir/$[$x+1].mdl ark:$dir/valid_shrink.egs $dir/$[$x+1].mdl || exit 1;
fi fi
fi fi
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
@ -423,7 +449,7 @@ while [ $x -lt $num_iters ]; do
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \ nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1; $dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
fi fi
rm $nnets_list $dir/temp/egs.$x.*.scp rm $nnets_list
fi fi
x=$[$x+1] x=$[$x+1]
done done
@ -435,15 +461,32 @@ nnets_list=
for x in `seq $[$num_iters-$num_iters_final+1] $num_iters`; do for x in `seq $[$num_iters-$num_iters_final+1] $num_iters`; do
[ $x -gt $mix_up_iter ] && nnets_list="$nnets_list $dir/$x.mdl" [ $x -gt $mix_up_iter ] && nnets_list="$nnets_list $dir/$x.mdl"
done done
if [ $stage -le $num_iters ]; then
mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
$cmd $parallel_opts $dir/log/combine.log \ $cmd $parallel_opts $dir/log/combine.log \
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads \ nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
--verbose=3 --minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \ $nnets_list ark:$dir/combine.egs $dir/final.mdl || exit 1;
$nnets_list ark:$dir/valid_combine.egs $dir/final.mdl || exit 1; fi
# Compute the probability of the final, combined model with # Compute the probability of the final, combined model with
# the same subset we used for the previous compute_probs, as the # the same subset we used for the previous compute_probs, as the
# different subsets will lead to different probs. # different subsets will lead to different probs.
$cmd $parallel_opts $dir/log/compute_prob.final.log \ $cmd $dir/log/compute_prob_valid.final.log \
nnet-compute-prob $dir/final.mdl ark:$dir/valid_shrink.egs || exit 1; nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
$cmd $dir/log/compute_prob_train.final.log \
nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
echo Done echo Done
if $cleanup; then
echo Cleaning up data
echo Removing training examples
rm -r $dir/egs
echo Removing most of the models
for x in `seq 0 $num_iters`; do
if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then
# delete all but every 10th model; don't delete the ones which combine to form the final model.
rm $dir/$x.mdl
fi
done
fi

Просмотреть файл

@ -31,16 +31,17 @@ num_jobs_nnet=8 # Number of neural net training jobs to run in parallel.
# not the same as the num-jobs (nj) which will be the same as the # not the same as the num-jobs (nj) which will be the same as the
# alignment and denlat directories. # alignment and denlat directories.
stage=0 stage=0
sub_stage=-2 # this can be used to start from a particular sub-iteration of an sub_stage=-3 # this can be used to start from a particular sub-iteration of an
# iteration # iteration
acwt=0.1 acwt=0.1
boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame. boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame.
transform_dir= # Note: by default any transforms in $alidir will be used. transform_dir= # Note: by default any transforms in $alidir will be used.
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know. parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.) io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.)
num_threads=16 # number of threads for neural net trainer.. num_threads=16 # number of threads for neural net trainer..
mkl_num_threads=1 mkl_num_threads=1
random_copy=false
# End configuration section. # End configuration section.
echo "$0 $@" # Print the command line for logging echo "$0 $@" # Print the command line for logging
@ -71,8 +72,7 @@ if [ $# != 6 ]; then
echo " # this, you may want to decrease the batch size." echo " # this, you may want to decrease the batch size."
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that" echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
echo " # use multiple threads." echo " # use multiple threads."
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the " echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for any especially I/O intensive jobs"
echo " # data. (prevents stressing the disk). "
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads" echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
echo " # should not get too large, e.g. >2k)." echo " # should not get too large, e.g. >2k)."
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each" echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each"
@ -181,34 +181,37 @@ while [ $x -lt $num_epochs ]; do
echo "Epoch $x of $num_epochs" echo "Epoch $x of $num_epochs"
if [ $stage -le $x ] && $first_iter_of_epoch; then if [ $stage -le $x ] && $first_iter_of_epoch; then
if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then
# First get the per-frame posteriors, by rescoring the lattices; this # First get the per-frame posteriors, by rescoring the lattices; this
# process also gives us at the same time the posteriors of each state for # process also gives us at the same time the posteriors of each state for
# each frame (by default, pruned to 0.01 with a randomized algorithm). # each frame (by default, pruned to 0.01 with a randomized algorithm).
# The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like # The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like
# matrix through unchanged. # matrix through unchanged. (Note: nnet-logprob2-parallel can use up to
$cmd JOB=1:$nj $dir/log/post.$z.JOB.log \ # $num_threads threads, but in practice it may be limited by the speed of
nnet-logprob2 $dir/$x.1.mdl "$feats" "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \ # the other elements of the pipe.
$cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \
nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \
"ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \ matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \
lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \ lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \
lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \ lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1; post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1;
fi fi
if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
# run nnet-get-egs for all files, to get the training examples for each frame-- # run nnet-get-egs for all files, to get the training examples for each frame--
# combines the feature and label/posterior information. The posterior information # combines the feature and label/posterior information. The posterior information
# consists of 2 things: the numerator posteriors from the alignments, the denominator # consists of 2 things: the numerator posteriors from the alignments, the denominator
# posteriors from the lattices (times -1), and the smoothing posteriors from the # posteriors from the lattices (times -1), and the smoothing posteriors from the
# neural net log-probs (times E). # neural net log-probs (times E).
# We copy the examples for each job round-robin to multiple archives, one for each # We copy the examples for each job round-robin to multiple archives, one for each
# of 1...$num_jobs_nnet. We write these along with .scp files, for more convenient # of 1...$num_jobs_nnet.
# and memory-efficient randomization.
egs_out="" egs_out=""
for n in `seq 1 $num_jobs_nnet`; do for n in `seq 1 $num_jobs_nnet`; do
egs_out="$egs_out ark,scp:$dir/egs/egs.$z.$n.JOB.ark,$dir/egs/egs.$z.$n.JOB.scp" # indexes are egs_orig.$z.$num_jobs_nnet.$nj
egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark"
done done
$cmd JOB=1:$nj $dir/log/egs.$z.JOB.log \ $cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \
ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \ ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
ali-to-post ark:- ark:- \| \ ali-to-post ark:- ark:- \| \
sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \ sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \
@ -223,23 +226,33 @@ while [ $x -lt $num_epochs ]; do
tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log
echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`" echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`"
fi fi
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
echo "Shuffling the order of training examples and splitting them up" echo "Merging training examples across original #jobs ($nj), and "
echo "(in order to avoid stressing the disk, these won't all run at once)." echo "splitting across number of nnet jobs $num_jobs_nnet"
egs_out2="" egs_out2=""
for n in `seq 1 $iters_per_epoch`; do for n in `seq 1 $iters_per_epoch`; do
egs_out2="$egs_out2 ark:$dir/egs/egs_split.$z.$n.JOB.ark" # indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet
egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark"
done done
# Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one # Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one
# job per parallel training job (different from the previous command). # job per parallel training job (different from the previous command).
# We sum up over the index JOB in the previous $cmd, and write to multiple # We sum up over the index JOB in the previous $cmd, and write to multiple
# archives, this time one for each "sub-iter". # archives, this time one for each "sub-iter".
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \ # indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj
cat $dir/egs/egs.$z.JOB.*.scp \| \ $cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \
utils/shuffle_list.pl --srand "\$[($z*$num_jobs_nnet)+JOB]" \| \ cat $dir/egs/egs_orig.$z.JOB.*.ark \| \
nnet-copy-egs scp:- $egs_out2 || exit 1; ##'&&' \ nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \
##rm $dir/egs/egs.$z.JOB.*.scp $dir/egs/egs.$z.JOB.*.ark || exit 1; ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1;
fi
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
echo "Randomizing order of examples in each job"
for n in `seq 1 $iters_per_epoch`; do
s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \
nnet-shuffle-egs "--srand=\$[JOB+$s]" \
ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \
rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1;
done
fi fi
fi fi
if [ $stage -le $x ]; then if [ $stage -le $x ]; then
@ -250,7 +263,7 @@ while [ $x -lt $num_epochs ]; do
if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \ $cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \ nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
$dir/$x.$y.mdl ark:$dir/egs/egs_split.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \ $dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
|| exit 1; || exit 1;
nnets_list= nnets_list=
for n in `seq 1 $num_jobs_nnet`; do for n in `seq 1 $num_jobs_nnet`; do

Просмотреть файл

@ -68,7 +68,7 @@ Options:
--input-left-context <n> # #frames of left context for input features; default 0. --input-left-context <n> # #frames of left context for input features; default 0.
--input-right-context <n> # #frames of right context for input features; default 0. --input-right-context <n> # #frames of right context for input features; default 0.
--param-stdddev-factor <f> # Factor which can be used to modify the standard deviation of --param-stdddev-factor <f> # Factor which can be used to modify the standard deviation of
# randomly nitialized features (default, 1. Gets multiplied by # randomly initialized features (default, 1. Gets multiplied by
# 1/sqrt of number of inputs). # 1/sqrt of number of inputs).
--initial-num-hidden-layers <n> <config-file> # If >0, number of hidden layers to initialize the network with. --initial-num-hidden-layers <n> <config-file> # If >0, number of hidden layers to initialize the network with.
# In this case, the positional parameter <num-hidden-layers> is only # In this case, the positional parameter <num-hidden-layers> is only

Просмотреть файл

@ -19,7 +19,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \
align-mapped align-compiled-mapped latgen-faster-mapped \ align-mapped align-compiled-mapped latgen-faster-mapped \
hmm-info pdf-to-counts analyze-counts extract-ctx post-to-phone-post \ hmm-info pdf-to-counts analyze-counts extract-ctx post-to-phone-post \
post-to-pdf-post duplicate-matrix logprob-to-post prob-to-post copy-post \ post-to-pdf-post duplicate-matrix logprob-to-post prob-to-post copy-post \
matrix-logprob matrix-logprob matrix-sum
OBJFILES = OBJFILES =

87
src/bin/matrix-sum.cc Normal file
Просмотреть файл

@ -0,0 +1,87 @@
// bin/matrix-sum.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
const char *usage =
"Sum (and optionally scale) two archives of input matrices\n"
"of the same dimension\n"
"\n"
"Usage: matrix-sum [options] <matrix-rspecifier1> <matrix-rspecifier2> <sum-wspecifier>\n";
BaseFloat scale1 = 1.0, scale2 = 1.0;
ParseOptions po(usage);
po.Register("scale1", &scale1, "Scale applied to first matrix");
po.Register("scale2", &scale2, "Scale applied to second matrix");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
std::string rspecifier1 = po.GetArg(1);
std::string rspecifier2 = po.GetArg(2);
std::string wspecifier = po.GetArg(3);
SequentialBaseFloatMatrixReader mat1_reader(rspecifier1);
RandomAccessBaseFloatMatrixReader mat2_reader(rspecifier2);
BaseFloatMatrixWriter mat_writer(wspecifier);
int32 num_done = 0, num_err = 0;
for (; !mat1_reader.Done(); mat1_reader.Next()) {
std::string key = mat1_reader.Key();
Matrix<BaseFloat> mat1 (mat1_reader.Value());
if (!mat2_reader.HasKey(key)) {
KALDI_WARN << "No such key " << key << " in second table.";
num_err++;
continue;
}
const Matrix<BaseFloat> &mat2 (mat2_reader.Value(key));
if (!SameDim(mat1, mat2)) {
KALDI_WARN << "Matrices for key " << key << " have different dims "
<< mat1.NumRows() << " x " << mat1.NumCols() << " vs. "
<< mat2.NumRows() << " x " << mat2.NumCols();
num_err++;
continue;
}
if (scale1 != 1.0) mat1.Scale(scale1);
mat1.AddMat(scale2, mat2);
mat_writer.Write(key, mat1);
num_done++;
}
KALDI_LOG << "Added " << num_done << " matrices; " << num_err
<< " had errors.";
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

Просмотреть файл

@ -93,10 +93,13 @@ Real* CuMatrix<Real>::RowData(MatrixIndexT r) {
template<typename Real> template<typename Real>
CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) { void CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols,
MatrixResizeType resize_type) {
// This code does not currently support the other resize_type options.
KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
if (num_rows_ == rows && num_cols_ == cols) { if (num_rows_ == rows && num_cols_ == cols) {
// SetZero(); if (resize_type == kSetZero) SetZero();
return *this; return;
} }
Destroy(); Destroy();
@ -108,17 +111,15 @@ CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) {
cuSafeCall(cudaMallocPitch((void**)&data_, &pitch, row_bytes, rows)); cuSafeCall(cudaMallocPitch((void**)&data_, &pitch, row_bytes, rows));
num_rows_ = rows; num_cols_ = cols; num_rows_ = rows; num_cols_ = cols;
stride_ = pitch/sizeof(Real); stride_ = pitch/sizeof(Real);
SetZero(); if (resize_type == kSetZero) SetZero();
} else } else
#endif #endif
{ {
mat_.Resize(rows, cols); mat_.Resize(rows, cols, resize_type);
num_rows_=rows; num_rows_=rows;
num_cols_=cols; num_cols_=cols;
stride_= mat_.Stride(); stride_= mat_.Stride();
} }
return *this;
} }
@ -134,7 +135,7 @@ void CuMatrix<Real>::Destroy() {
} else } else
#endif #endif
{ {
mat_.Destroy(); mat_.Resize(0, 0);
} }
num_rows_ = num_cols_ = stride_ = 0; num_rows_ = num_cols_ = stride_ = 0;
} }
@ -142,9 +143,8 @@ void CuMatrix<Real>::Destroy() {
template<typename Real> template<typename Real>
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) { void CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
Resize(src.NumRows(), src.NumCols()); KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
Timer tim; Timer tim;
@ -152,7 +152,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
MatrixIndexT dst_pitch = stride_*sizeof(Real); MatrixIndexT dst_pitch = stride_*sizeof(Real);
MatrixIndexT src_pitch = src.Stride()*sizeof(Real); MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
MatrixIndexT width = src.NumCols()*sizeof(Real); MatrixIndexT width = src.NumCols()*sizeof(Real);
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyDeviceToDevice)); cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
width, src.NumRows(), cudaMemcpyDeviceToDevice));
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatD2D",tim.Elapsed()); CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatD2D",tim.Elapsed());
} else } else
@ -160,16 +161,13 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
{ {
mat_.CopyFromMat(src.mat_); mat_.CopyFromMat(src.mat_);
} }
return *this;
} }
template<typename Real> template<typename Real>
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) { void CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
Resize(src.NumRows(), src.NumCols()); KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
Timer tim; Timer tim;
@ -177,7 +175,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
MatrixIndexT dst_pitch = stride_*sizeof(Real); MatrixIndexT dst_pitch = stride_*sizeof(Real);
MatrixIndexT src_pitch = src.Stride()*sizeof(Real); MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
MatrixIndexT width = src.NumCols()*sizeof(Real); MatrixIndexT width = src.NumCols()*sizeof(Real);
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyHostToDevice)); cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
width, src.NumRows(), cudaMemcpyHostToDevice));
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatH2D",tim.Elapsed()); CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatH2D",tim.Elapsed());
} else } else
@ -185,17 +184,12 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
{ {
mat_.CopyFromMat(src); mat_.CopyFromMat(src);
} }
return *this;
} }
template<typename Real> template<typename Real>
void CuMatrix<Real>::CopyToMat(Matrix<Real> *dst) const { void CuMatrix<Real>::CopyToMat(Matrix<Real> *dst) const {
if (dst->NumRows() != NumRows() || dst->NumCols() != NumCols()) { KALDI_ASSERT(dst->NumRows() == NumRows() && dst->NumCols() == NumCols());
dst->Resize(NumRows(), NumCols());
}
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
@ -257,7 +251,7 @@ void CuMatrix<Real>::Read(std::istream &is, bool binary) {
template<typename Real> template<typename Real>
void CuMatrix<Real>::Write(std::ostream &os, bool binary) const { void CuMatrix<Real>::Write(std::ostream &os, bool binary) const {
Matrix<BaseFloat> tmp; Matrix<BaseFloat> tmp(NumRows(), NumCols(), kUndefined);
CopyToMat(&tmp); CopyToMat(&tmp);
tmp.Write(os, binary); tmp.Write(os, binary);
} }

Просмотреть файл

@ -46,15 +46,41 @@ class CuMatrix {
public: public:
/// Default Constructor /// Default Constructor
CuMatrix<Real>() CuMatrix<Real>():
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) { num_rows_(0), num_cols_(0), stride_(0), data_(NULL) { }
}
/// Constructor with memory initialisation /// Constructor with memory initialisation
CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols) CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols):
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) { num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
Resize(rows, cols); Resize(rows, cols);
} }
// Note: we had to remove the "explicit" keyword due
// to problems with STL vectors of CuMatrix.
CuMatrix<Real>(const CuMatrix<Real> &other):
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
Resize(other.NumRows(), other.NumCols(), kUndefined);
CopyFromMat(other);
}
explicit CuMatrix<Real>(const Matrix<Real> &other):
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
Resize(other.NumRows(), other.NumCols(), kUndefined);
CopyFromMat(other);
}
CuMatrix<Real> &operator = (const CuMatrix<Real> &other) {
Resize(other.NumRows(), other.NumCols(), kUndefined);
CopyFromMat(other);
return *this;
}
CuMatrix<Real> &operator = (const Matrix<Real> &other) {
Resize(other.NumRows(), other.NumCols(), kUndefined);
CopyFromMat(other);
return *this;
}
/// Destructor /// Destructor
~CuMatrix() { ~CuMatrix() {
Destroy(); Destroy();
@ -65,14 +91,12 @@ class CuMatrix {
return num_rows_; return num_rows_;
} }
MatrixIndexT NumCols() const { MatrixIndexT NumCols() const { return num_cols_; }
return num_cols_;
}
MatrixIndexT Stride() const { MatrixIndexT Stride() const { return stride_; }
return stride_;
}
// MatrixDim is a struct containing "rows", "cols" and "stride",
// that is an argument of most CUDA kernels.
::MatrixDim Dim() const { ::MatrixDim Dim() const {
::MatrixDim d = { num_rows_, num_cols_, stride_ }; ::MatrixDim d = { num_rows_, num_cols_, stride_ };
return d; return d;
@ -87,29 +111,22 @@ class CuMatrix {
Real* RowData(MatrixIndexT r); Real* RowData(MatrixIndexT r);
/// Get size of matrix in bytes /// Get size of matrix in bytes
MatrixIndexT SizeInBytes() const { MatrixIndexT SizeInBytes() const { return num_rows_*stride_*sizeof(Real); }
return num_rows_*stride_*sizeof(Real);
}
/// Get size of matrix row in bytes /// Get size of matrix row in bytes
MatrixIndexT RowSizeInBytes() const { MatrixIndexT RowSizeInBytes() const { return num_cols_*sizeof(Real); }
return num_cols_*sizeof(Real);
}
/// Get size of matrix stride in bytes /// Get size of matrix stride in bytes
MatrixIndexT StrideSizeInBytes() const { MatrixIndexT StrideSizeInBytes() const { return stride_*sizeof(Real); }
return stride_*sizeof(Real);
}
/// Allocate the memory /// Allocate the memory
ThisType& Resize(MatrixIndexT rows, MatrixIndexT cols); void Resize(MatrixIndexT rows, MatrixIndexT cols,
MatrixResizeType resize_type = kSetZero);
/// Deallocate the memory /// Copy functions (reallocates when needed, but note from Dan: eventually
void Destroy(); /// I'll change it to just die if the sizes don't match, like the Matrix class.)
void CopyFromMat(const CuMatrix<Real> &src);
/// Copy functions (reallocates when needed) void CopyFromMat(const Matrix<Real> &src);
ThisType& CopyFromMat(const CuMatrix<Real> &src);
ThisType& CopyFromMat(const Matrix<Real> &src);
void CopyToMat(Matrix<Real> *dst) const; void CopyToMat(Matrix<Real> *dst) const;
/// Copy row interval from matrix /// Copy row interval from matrix
@ -154,6 +171,8 @@ class CuMatrix {
} }
private: private:
void Destroy();
MatrixIndexT num_rows_; MatrixIndexT num_rows_;
MatrixIndexT num_cols_; MatrixIndexT num_cols_;
MatrixIndexT stride_; MatrixIndexT stride_;

Просмотреть файл

@ -44,8 +44,6 @@ const Real* CuVector<Real>::Data() const {
} }
} }
template<typename Real> template<typename Real>
Real* CuVector<Real>::Data() { Real* CuVector<Real>::Data() {
#if HAVE_CUDA==1 #if HAVE_CUDA==1
@ -58,15 +56,12 @@ Real* CuVector<Real>::Data() {
} }
} }
template<typename Real> template<typename Real>
CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) { void CuVector<Real>::Resize(MatrixIndexT dim) {
if (dim_ == dim) { if (dim_ == dim) {
// SetZero(); SetZero();
return *this; return;
} }
Destroy(); Destroy();
#if HAVE_CUDA==1 #if HAVE_CUDA==1
@ -80,8 +75,6 @@ CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) {
dim_ = dim; dim_ = dim;
SetZero(); SetZero();
return *this;
} }
@ -106,9 +99,7 @@ void CuVector<Real>::Destroy() {
template<typename Real> template<typename Real>
CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) { void CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
Resize(src.Dim());
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
Timer tim; Timer tim;
@ -119,16 +110,13 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
{ {
vec_.CopyFromVec(src.vec_); vec_.CopyFromVec(src.vec_);
} }
return *this;
} }
template<typename Real> template<typename Real>
CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) { void CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
Resize(src.Dim()); KALDI_ASSERT(src.Dim() == dim_);
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
Timer tim; Timer tim;
@ -141,16 +129,14 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
{ {
vec_.CopyFromVec(src); vec_.CopyFromVec(src);
} }
return *this;
} }
template<typename Real> template<typename Real>
void CuVector<Real>::CopyToVec(Vector<Real> *dst) const { void CuVector<Real>::CopyToVec(Vector<Real> *dst) const {
if (dst->Dim() != dim_) { KALDI_ASSERT(dst->Dim() == dim_);
dst->Resize(dim_);
}
#if HAVE_CUDA==1 #if HAVE_CUDA==1
if (CuDevice::Instantiate().Enabled()) { if (CuDevice::Instantiate().Enabled()) {
@ -177,7 +163,7 @@ void CuVector<Real>::Read(std::istream &is, bool binary) {
template<typename Real> template<typename Real>
void CuVector<Real>::Write(std::ostream &os, bool binary) const { void CuVector<Real>::Write(std::ostream &os, bool binary) const {
Vector<BaseFloat> tmp; Vector<BaseFloat> tmp(Dim());
CopyToVec(&tmp); CopyToVec(&tmp);
tmp.Write(os, binary); tmp.Write(os, binary);
} }

Просмотреть файл

@ -46,6 +46,16 @@ class CuVector {
Resize(dim); Resize(dim);
} }
CuVector<Real>(const CuVector<Real> &v): dim_(0), data_(NULL) {
Resize(v.dim_);
CopyFromVec(v);
}
CuVector<Real>(const Vector<Real> &v): dim_(0), data_(NULL) {
Resize(v.Dim());
CopyFromVec(v);
}
/// Destructor /// Destructor
~CuVector() { ~CuVector() {
Destroy(); Destroy();
@ -61,14 +71,11 @@ class CuVector {
Real* Data(); Real* Data();
/// Allocate the memory /// Allocate the memory
ThisType& Resize(MatrixIndexT dim); void Resize(MatrixIndexT dim);
/// Deallocate the memory
void Destroy();
/// Copy functions (lazy reallocation when needed) /// Copy functions (lazy reallocation when needed)
ThisType& CopyFromVec(const CuVector<Real> &src); void CopyFromVec(const CuVector<Real> &src);
ThisType& CopyFromVec(const Vector<Real> &src); void CopyFromVec(const Vector<Real> &src);
void CopyToVec(Vector<Real> *dst) const; void CopyToVec(Vector<Real> *dst) const;
/// I/O /// I/O
@ -94,6 +101,7 @@ class CuVector {
} }
private: private:
void Destroy();
MatrixIndexT dim_; ///< dimension of the vector MatrixIndexT dim_; ///< dimension of the vector
Real *data_; ///< GPU data pointer Real *data_; ///< GPU data pointer
Vector<Real> vec_; ///< non-GPU vector as back-up Vector<Real> vec_; ///< non-GPU vector as back-up

Просмотреть файл

@ -234,7 +234,7 @@ class SimpleDecoder {
const Arc &arc = aiter.Value(); const Arc &arc = aiter.Value();
if (arc.ilabel == 0) { // propagate nonemitting only... if (arc.ilabel == 0) { // propagate nonemitting only...
Token *new_tok = new Token(arc, tok); Token *new_tok = new Token(arc, tok);
if (new_tok->arc_.weight.Value() > cutoff) { if (new_tok->weight_.Value() > cutoff) {
Token::TokenDelete(new_tok); Token::TokenDelete(new_tok);
} else { } else {
unordered_map<StateId, Token*>::iterator find_iter unordered_map<StateId, Token*>::iterator find_iter

Просмотреть файл

@ -213,6 +213,10 @@ void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
int32 offset = bins_[i].first; int32 offset = bins_[i].first;
const Vector<BaseFloat> &v (bins_[i].second); const Vector<BaseFloat> &v (bins_[i].second);
(*mel_energies_out)(i) = VecVec(v, power_spectrum.Range(offset, v.Dim())); (*mel_energies_out)(i) = VecVec(v, power_spectrum.Range(offset, v.Dim()));
// The following assert was added due to a problem with OpenBlas that
// we had at one point (it was a bug in that library). Just to detect
// it early.
KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
} }
if (debug_) { if (debug_) {

Просмотреть файл

@ -9,7 +9,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \ feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \
fmpe-est fmpe-copy fmpe-sum-accs append-feats extend-transform-dim \ fmpe-est fmpe-copy fmpe-sum-accs append-feats extend-transform-dim \
get-full-lda-mat compute-spectrogram-feats extract-feature-segments \ get-full-lda-mat compute-spectrogram-feats extract-feature-segments \
reverse-feats paste-feats select-feats reverse-feats paste-feats select-feats subsample-feats
OBJFILES = OBJFILES =

Просмотреть файл

@ -1,6 +1,7 @@
// featbin/append-feats.cc // featbin/append-feats.cc
// Copyright 2012 Petr Motlicek; Pawel Swietojanski // Copyright 2012 Petr Motlicek Pawel Swietojanski
// Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
@ -32,15 +33,11 @@ int main(int argc, char *argv[]) {
ParseOptions po(usage); ParseOptions po(usage);
int32 feats_offset_in1 = 0; bool truncate_frames = false;
int32 feats_offset_in2 = 0;
int32 num_feats_in1 = 0;
int32 num_feats_in2 = 0;
po.Register("feats-offset-in1", &feats_offset_in1, "Feats 1 offset"); po.Register("truncate-frames", &truncate_frames, "If true, do not treat it "
po.Register("num-feats-in1", &num_feats_in1, "Take num-feats from in1-rspeciifier"); "as an error when files differ in number of frames, but truncate "
po.Register("feats-offset-in2", &feats_offset_in2, "Feats 2 offset"); "the longest one.");
po.Register("num-feats-in2", &num_feats_in2, "Take num-feats from in2-rspeciifier");
po.Read(argc, argv); po.Read(argc, argv);
@ -53,80 +50,47 @@ int main(int argc, char *argv[]) {
std::string rspecifier2 = po.GetArg(2); std::string rspecifier2 = po.GetArg(2);
std::string wspecifier = po.GetArg(3); std::string wspecifier = po.GetArg(3);
KALDI_ASSERT(feats_offset_in1 >= 0 && feats_offset_in2 >= 0); BaseFloatMatrixWriter feats_writer(wspecifier);
SequentialBaseFloatMatrixReader feats_reader1(rspecifier1);
RandomAccessBaseFloatMatrixReader feats_reader2(rspecifier2);
BaseFloatMatrixWriter kaldi_writer(wspecifier); int32 num_done = 0, num_err = 0;
SequentialBaseFloatMatrixReader kaldi_reader1(rspecifier1);
RandomAccessBaseFloatMatrixReader kaldi_reader2(rspecifier2);
// Peeking in the archives to get the feature dimensions for (; !feats_reader1.Done(); feats_reader1.Next()) {
if (kaldi_reader1.Done()) { std::string utt = feats_reader1.Key();
KALDI_ERR << "Could not read any features from " << rspecifier1 if (!feats_reader2.HasKey(utt)) {
<< ". (empty archive?)";
}
std::string utt = kaldi_reader1.Key();
if (!kaldi_reader2.HasKey(utt)) {
KALDI_ERR << "Could not read features for key " << utt << " from "
<< rspecifier2 << ". (empty archive?)";
}
int32 dim_feats_in1 = kaldi_reader1.Value().NumCols();
int32 dim_feats_in2 = kaldi_reader2.Value(utt).NumCols();
if (num_feats_in1 == 0)
num_feats_in1 = dim_feats_in1 - feats_offset_in1;
if (num_feats_in2 == 0)
num_feats_in2 = dim_feats_in2 - feats_offset_in2;
KALDI_LOG << "Reading features from " << rspecifier1 << " and " << rspecifier2;
KALDI_LOG << "\tdim1 = " << dim_feats_in1 << "; offset1 = " << feats_offset_in1
<< "; num1 = " << num_feats_in1 << "; dim2 = " << dim_feats_in2
<< "; offset2 = " << feats_offset_in2 << "; num2 = " << num_feats_in2;
KALDI_ASSERT((feats_offset_in1 + num_feats_in1) <= dim_feats_in1);
KALDI_ASSERT((feats_offset_in2 + num_feats_in2) <= dim_feats_in2);
for (; !kaldi_reader1.Done(); kaldi_reader1.Next()) {
utt = kaldi_reader1.Key();
if (!kaldi_reader2.HasKey(utt)) {
KALDI_WARN << "Could not find features for " << utt << " in " KALDI_WARN << "Could not find features for " << utt << " in "
<< rspecifier2 << ": producing no output for the utterance"; << rspecifier2 << ": producing no output for the utterance";
num_err++;
continue; continue;
} }
const Matrix<BaseFloat> &feats1 = kaldi_reader1.Value(); const Matrix<BaseFloat> &feats1 = feats_reader1.Value();
const Matrix<BaseFloat> &feats2 = kaldi_reader2.Value(utt); const Matrix<BaseFloat> &feats2 = feats_reader2.Value(utt);
int32 num_frames = feats1.NumRows(); if (feats1.NumRows() != feats2.NumRows() && !truncate_frames) {
KALDI_VLOG(1) << "Utterance : " << utt << ": # of frames = " << num_frames; KALDI_WARN << "For utterance " << utt << ", features have different "
<< "#frames " << feats1.NumRows() << " vs. "
KALDI_ASSERT(feats1.NumCols() == dim_feats_in1 && << feats2.NumRows() << ", producing no output (use "
feats2.NumCols() == dim_feats_in2); << "--truncate-frames=true if you want output)";
if (num_frames != feats2.NumRows()) { num_err++;
KALDI_WARN << "Utterance " << utt << ": " << num_frames
<< " frames read from " << rspecifier1 << " and "
<< feats2.NumRows() << " frames read from " << rspecifier2
<< ": producing no output for the utterance";
continue; continue;
} }
int32 num_frames = std::min(feats1.NumRows(), feats2.NumRows()),
dim1 = feats1.NumCols(), dim2 = feats2.NumCols();
Matrix<BaseFloat> output(num_frames, dim1 + dim2, kUndefined);
output.Range(0, num_frames, 0, dim1).CopyFromMat(
feats1.Range(0, num_frames, 0, dim1));
output.Range(0, num_frames, dim1, dim2).CopyFromMat(
feats2.Range(0, num_frames, 0, dim2));
SubMatrix<BaseFloat> new_feats1(feats1, 0, num_frames, feats_offset_in1, feats_writer.Write(utt, output);
num_feats_in1); num_done++;
SubMatrix<BaseFloat> new_feats2(feats2, 0, num_frames, feats_offset_in2,
num_feats_in2);
Matrix<BaseFloat> output_feats(num_frames, new_feats1.NumCols() +
new_feats2.NumCols());
output_feats.Range(0, num_frames, 0,
new_feats1.NumCols()).CopyFromMat(new_feats1);
output_feats.Range(0, num_frames, new_feats1.NumCols(),
new_feats2.NumCols()).CopyFromMat(new_feats2);
kaldi_writer.Write(utt, output_feats);
} }
KALDI_LOG << "Appended " << num_done << " feats; " << num_err
return 0; << " with errors.";
} return (num_done != 0 ? 0 : 1);
catch (const std::exception& e) { } catch (const std::exception& e) {
std::cerr << e.what(); std::cerr << e.what();
return -1; return -1;
} }
} }

Просмотреть файл

@ -21,7 +21,6 @@
#include "feat/feature-mfcc.h" #include "feat/feature-mfcc.h"
#include "feat/wave-reader.h" #include "feat/wave-reader.h"
int main(int argc, char *argv[]) { int main(int argc, char *argv[]) {
try { try {
using namespace kaldi; using namespace kaldi;

Просмотреть файл

@ -0,0 +1,96 @@
// featbin/select-feats.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
#include <algorithm>
#include <iterator>
#include <utility>
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "matrix/kaldi-matrix.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
using namespace std;
const char *usage =
"Sub-samples features by taking every n'th frame"
"\n"
"Usage: subsample-feats [options] in-rspecifier out-wspecifier\n"
" e.g. subsample-feats --n=2 ark:- ark:-\n";
ParseOptions po(usage);
int32 n = 1, offset = 0;
po.Register("n", &n, "Take every n'th feature, for this value of n");
po.Register("offset", &offset, "Start with the feature with this offset, "
"then take every n'th feature.");
po.Read(argc, argv);
if (po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
string rspecifier = po.GetArg(1);
string wspecifier = po.GetArg(2);
SequentialBaseFloatMatrixReader feat_reader(rspecifier);
BaseFloatMatrixWriter feat_writer(wspecifier);
int32 num_done = 0, num_err = 0;
// process all keys
for (; !feat_reader.Done(); feat_reader.Next()) {
std::string utt = feat_reader.Key();
const Matrix<BaseFloat> feats(feat_reader.Value());
// This code could, of course, be much more efficient; I'm just
// keeping it simple.
int32 num_indexes = 0;
for (int32 k = offset; k < feats.NumRows(); k += n)
num_indexes++; // k is the index.
if (num_indexes == 0) {
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
<< "producing no output.";
num_err++;
continue;
}
Matrix<BaseFloat> output(num_indexes, feats.NumCols());
int32 i = 0;
for (int32 k = offset; k < feats.NumRows(); k += n, i++) {
SubVector<BaseFloat> src(feats, k), dest(output, i);
dest.CopyFromVec(src);
}
KALDI_ASSERT(i == num_indexes);
feat_writer.Write(utt, output);
num_done++;
}
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
<< " with errors.";
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

Просмотреть файл

@ -25,7 +25,7 @@ int main(int argc, char *argv[]) {
using namespace kaldi; using namespace kaldi;
const char *usage = const char *usage =
"Copy a subset of features\n" "Copy a subset of features (the first n features)\n"
"Usage: subset-feats [options] in-rspecifier out-wspecifier\n"; "Usage: subset-feats [options] in-rspecifier out-wspecifier\n";
ParseOptions po(usage); ParseOptions po(usage);

Просмотреть файл

@ -160,11 +160,9 @@ int main(int argc, char *argv[]) {
KALDI_LOG << "Applied transform to " << num_done << " utterances; " << num_error KALDI_LOG << "Applied transform to " << num_done << " utterances; " << num_error
<< " had errors."; << " had errors.";
return 0; return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) { } catch(const std::exception &e) {
std::cerr << e.what(); std::cerr << e.what();
return -1; return -1;
} }
} }

Просмотреть файл

@ -27,9 +27,24 @@
namespace kaldi { namespace kaldi {
inline void cblas_Xscal(const int N, float *X, const int incX, float *Y,
const int incY, const float c, const float s) { inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
cblas_srot(N, X, incX, Y, incY, c, s); const int incY) {
cblas_scopy(N, X, incX, Y, incY);
}
inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
const int incY) {
cblas_dcopy(N, X, incX, Y, incY);
}
inline float cblas_Xasum(const int N, const float *X, const int incX) {
return cblas_sasum(N, X, incX);
}
inline double cblas_Xasum(const int N, const double *X, const int incX) {
return cblas_dasum(N, X, incX);
} }
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y, inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
@ -226,6 +241,78 @@ inline void cblas_Xsyrk(
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans), cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride); dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
} }
/// matrix-vector multiply using a banded matrix; we always call this
/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for
/// elementwise multiplication. We miss some of the arguments out of this
/// wrapper.
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const double *A,
const double alpha,
const double *x,
const double beta,
double *y) {
cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
inline void cblas_Xsbmv1(
const MatrixIndexT dim,
const float *A,
const float alpha,
const float *x,
const float beta,
float *y) {
cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
1, x, 1, beta, y, 1);
}
/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
/// extend this somehow.
inline void mul_elements(
const MatrixIndexT dim,
const double *a,
double *b) { // does b *= a, elementwise.
double c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
inline void mul_elements(
const MatrixIndexT dim,
const float *a,
float *b) { // does b *= a, elementwise.
float c1, c2, c3, c4;
MatrixIndexT i;
for (i = 0; i + 4 <= dim; i += 4) {
c1 = a[i] * b[i];
c2 = a[i+1] * b[i+1];
c3 = a[i+2] * b[i+2];
c4 = a[i+3] * b[i+3];
b[i] = c1;
b[i+1] = c2;
b[i+2] = c3;
b[i+3] = c4;
}
for (; i < dim; i++)
b[i] *= a[i];
}
// add clapack here // add clapack here
#ifndef HAVE_ATLAS #ifndef HAVE_ATLAS
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) { inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {

Просмотреть файл

@ -495,6 +495,41 @@ template
void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M, void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
MatrixTransposeType Trans); MatrixTransposeType Trans);
// Specialize the template for CopyFromSp for float, float.
template<>
template<>
void MatrixBase<float>::CopyFromSp(const SpMatrix<float> & M) {
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
MatrixIndexT num_rows = num_rows_, stride = stride_;
const float *Mdata = M.Data();
float *row_data = data_, *col_data = data_;
for (MatrixIndexT i = 0; i < num_rows; i++) {
cblas_scopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
cblas_scopy(i, Mdata, 1, col_data, stride); // copy to the column.
Mdata += i+1;
row_data += stride;
col_data += 1;
}
}
// Specialize the template for CopyFromSp for double, double.
template<>
template<>
void MatrixBase<double>::CopyFromSp(const SpMatrix<double> & M) {
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
MatrixIndexT num_rows = num_rows_, stride = stride_;
const double *Mdata = M.Data();
double *row_data = data_, *col_data = data_;
for (MatrixIndexT i = 0; i < num_rows; i++) {
cblas_dcopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
cblas_dcopy(i, Mdata, 1, col_data, stride); // copy to the column.
Mdata += i+1;
row_data += stride;
col_data += 1;
}
}
template<typename Real> template<typename Real>
template<typename OtherReal> template<typename OtherReal>
void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) { void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) {
@ -711,12 +746,16 @@ void Matrix<Real>::Destroy() {
template<typename Real> template<typename Real>
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) { void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_); KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
MatrixIndexT i;
MatrixIndexT j;
for (i = 0; i < num_rows_; i++) { if (num_cols_ == stride_ && num_cols_ == a.stride_) {
for (j = 0; j < num_cols_; j++) { mul_elements(num_rows_ * num_cols_, a.data_, data_);
(*this)(i, j) *= a(i, j); } else {
MatrixIndexT a_stride = a.stride_, stride = stride_;
Real *data = data_, *a_data = a.data_;
for (MatrixIndexT i = 0; i < num_rows_; i++) {
mul_elements(num_cols_, a_data, data);
a_data += a_stride;
data += stride;
} }
} }
} }
@ -1985,6 +2024,13 @@ Real MatrixBase<Real>::ApplySoftMax() {
return max + log(sum); return max + log(sum);
} }
template<typename Real>
void MatrixBase<Real>::ApplyTanh() {
for (MatrixIndexT r = 0; r < num_rows_; r++) {
SubVector<Real> v(*this, r);
v.ApplyTanh();
}
}
template<class Real> template<class Real>
template<class OtherReal> template<class OtherReal>

Просмотреть файл

@ -341,6 +341,9 @@ class MatrixBase {
/// matrix and return normalizer (log sum of exponentials). /// matrix and return normalizer (log sum of exponentials).
Real ApplySoftMax(); Real ApplySoftMax();
/// Apply the tanh function to each element of the matrix.
void ApplyTanh();
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not * orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
@ -553,7 +556,7 @@ class Matrix : public MatrixBase<Real> {
/// It is symmetric, so no option for transpose, and NumRows == Cols /// It is symmetric, so no option for transpose, and NumRows == Cols
template<typename OtherReal> template<typename OtherReal>
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() { explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
Resize(M.NumRows(), M.NumRows()); Resize(M.NumRows(), M.NumRows(), kUndefined);
this->CopyFromSp(M); this->CopyFromSp(M);
} }
@ -562,10 +565,10 @@ class Matrix : public MatrixBase<Real> {
explicit Matrix(const TpMatrix<OtherReal> & M, explicit Matrix(const TpMatrix<OtherReal> & M,
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() { MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
if (trans == kNoTrans) { if (trans == kNoTrans) {
Resize(M.NumRows(), M.NumCols()); Resize(M.NumRows(), M.NumCols(), kUndefined);
this->CopyFromTp(M); this->CopyFromTp(M);
} else { } else {
Resize(M.NumCols(), M.NumRows()); Resize(M.NumCols(), M.NumRows(), kUndefined);
this->CopyFromTp(M, kTrans); this->CopyFromTp(M, kTrans);
} }
} }
@ -584,9 +587,6 @@ class Matrix : public MatrixBase<Real> {
/// Distructor to free matrices. /// Distructor to free matrices.
~Matrix() { Destroy(); } ~Matrix() { Destroy(); }
/// Deallocates memory and sets to empty matrix.
void Destroy();
/// Sets matrix to a specified size (zero is OK as long as both r and c are /// Sets matrix to a specified size (zero is OK as long as both r and c are
/// zero). The value of the new data depends on resize_type: /// zero). The value of the new data depends on resize_type:
/// -if kSetZero, the new data will be zero /// -if kSetZero, the new data will be zero
@ -601,9 +601,8 @@ class Matrix : public MatrixBase<Real> {
/// Assignment operator that takes MatrixBase. /// Assignment operator that takes MatrixBase.
Matrix<Real> &operator = (const MatrixBase<Real> &other) { Matrix<Real> &operator = (const MatrixBase<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() || if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols()) { MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols()); Resize(other.NumRows(), other.NumCols(), kUndefined);
}
MatrixBase<Real>::CopyFromMat(other); MatrixBase<Real>::CopyFromMat(other);
return *this; return *this;
} }
@ -611,15 +610,17 @@ class Matrix : public MatrixBase<Real> {
/// Assignment operator. Needed for inclusion in std::vector. /// Assignment operator. Needed for inclusion in std::vector.
Matrix<Real> &operator = (const Matrix<Real> &other) { Matrix<Real> &operator = (const Matrix<Real> &other) {
if (MatrixBase<Real>::NumRows() != other.NumRows() || if (MatrixBase<Real>::NumRows() != other.NumRows() ||
MatrixBase<Real>::NumCols() != other.NumCols()) { MatrixBase<Real>::NumCols() != other.NumCols())
Resize(other.NumRows(), other.NumCols()); Resize(other.NumRows(), other.NumCols(), kUndefined);
}
MatrixBase<Real>::CopyFromMat(other); MatrixBase<Real>::CopyFromMat(other);
return *this; return *this;
} }
private: private:
/// Deallocates memory and sets to empty matrix (dimension 0, 0).
void Destroy();
/// Init assumes the current class contents are invalid (i.e. junk or have /// Init assumes the current class contents are invalid (i.e. junk or have
/// already been freed), and it sets the matrix to newly allocated memory with /// already been freed), and it sets the matrix to newly allocated memory with
/// the specified number of rows and columns. r == c == 0 is acceptable. The data /// the specified number of rows and columns. r == c == 0 is acceptable. The data

Просмотреть файл

@ -375,7 +375,13 @@ template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row); void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row);
// takes elements to a power. Throws exception if could not (but only for power != 1 ad power != 2). #ifdef HAVE_MKL
template<>
void VectorBase<float>::ApplyPow(float power) { vsPowx(dim_, data_, power, data_); }
template<>
void VectorBase<double>::ApplyPow(double power) { vdPowx(dim_, data_, power, data_); }
#else
// takes elements to a power. Throws exception if could not (but only for power != 1 and power != 2).
template<typename Real> template<typename Real>
void VectorBase<Real>::ApplyPow(Real power) { void VectorBase<Real>::ApplyPow(Real power) {
if (power == 1.0) return; if (power == 1.0) return;
@ -399,6 +405,7 @@ void VectorBase<Real>::ApplyPow(Real power) {
} }
} }
} }
#endif
// Computes the p-th norm. Throws exception if could not. // Computes the p-th norm. Throws exception if could not.
template<typename Real> template<typename Real>
@ -534,14 +541,13 @@ template<typename Real>
void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) { void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
// note the double accumulator // note the double accumulator
KALDI_ASSERT(dim_ == M.NumCols()); KALDI_ASSERT(dim_ == M.NumCols());
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(); MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
for (MatrixIndexT i = 0; i < dim_; i++) { Real *data = data_;
double sum = 0.0; cblas_Xscal(dim, beta, data, 1);
const Real *src = M.Data() + i; const Real *m_data = M.Data();
for (MatrixIndexT j = 0; j < num_rows; j++)
sum += src[j*stride]; for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride)
data_[i] = alpha * sum + beta * data_[i]; cblas_Xaxpy(dim, alpha, m_data, 1, data, 1);
}
} }
template<typename Real> template<typename Real>
@ -651,6 +657,25 @@ Real VectorBase<Real>::ApplySoftMax() {
return max + log(sum); return max + log(sum);
} }
#ifdef HAVE_MKL
template<>
void VectorBase<float>::ApplyTanh() { vsTanh(dim_, data_, data_); }
template<>
void VectorBase<double>::ApplyTanh() { vdTanh(dim_, data_, data_); }
#else
template<typename Real>
void VectorBase<Real>::ApplyTanh() {
for (MatrixIndexT i = 0; i < dim_; i++) {
Real x = data_[i];
if (x > 0.0) {
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
} else {
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
}
data_[i] = x;
}
}
#endif
template<typename Real> template<typename Real>
void VectorBase<Real>::Add(Real c) { void VectorBase<Real>::Add(Real c) {

Просмотреть файл

@ -126,6 +126,10 @@ class VectorBase {
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$ /// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
Real ApplySoftMax(); Real ApplySoftMax();
/// Apply the tanh function to each element of a vector. If using MKL, does
/// it using the "less accurate" options.
void ApplyTanh();
/// Take all elements of vector to a power. /// Take all elements of vector to a power.
void ApplyPow(Real power); void ApplyPow(Real power);
@ -322,20 +326,20 @@ class Vector: public VectorBase<Real> {
/// Copy constructor. The need for this is controversial. /// Copy constructor. The need for this is controversial.
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit) Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
Resize(v.Dim()); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
/// Copy-constructor from base-class, needed to copy from SubVector. /// Copy-constructor from base-class, needed to copy from SubVector.
explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() { explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
Resize(v.Dim()); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
/// Type conversion constructor. /// Type conversion constructor.
template<typename OtherReal> template<typename OtherReal>
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() { explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
Resize(v.Dim()); Resize(v.Dim(), kUndefined);
this->CopyFromVec(v); this->CopyFromVec(v);
} }
@ -372,14 +376,14 @@ class Vector: public VectorBase<Real> {
/// Assignment operator, protected so it can only be used by std::vector /// Assignment operator, protected so it can only be used by std::vector
Vector<Real> &operator = (const Vector<Real> &other) { Vector<Real> &operator = (const Vector<Real> &other) {
Resize(other.Dim()); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }
/// Assignment operator that takes VectorBase. /// Assignment operator that takes VectorBase.
Vector<Real> &operator = (const VectorBase<Real> &other) { Vector<Real> &operator = (const VectorBase<Real> &other) {
Resize(other.Dim()); Resize(other.Dim(), kUndefined);
this->CopyFromVec(other); this->CopyFromVec(other);
return *this; return *this;
} }

Просмотреть файл

@ -679,6 +679,28 @@ template<class Real> static void UnitTestAxpy() {
} }
} }
template<class Real> static void UnitTestCopySp() {
// Checking that the various versions of copying
// matrix to SpMatrix work the same in the symmetric case.
for (MatrixIndexT iter = 0;iter < 5;iter++) {
int32 dim = 5 + rand() % 10;
SpMatrix<Real> S(dim), T(dim);
S.SetRandn();
Matrix<Real> M(S);
T.CopyFromMat(M, kTakeMeanAndCheck);
AssertEqual(S, T);
T.SetZero();
T.CopyFromMat(M, kTakeMean);
AssertEqual(S, T);
T.SetZero();
T.CopyFromMat(M, kTakeLower);
AssertEqual(S, T);
T.SetZero();
T.CopyFromMat(M, kTakeUpper);
AssertEqual(S, T);
}
}
template<class Real> static void UnitTestPower() { template<class Real> static void UnitTestPower() {
for (MatrixIndexT iter = 0;iter < 5;iter++) { for (MatrixIndexT iter = 0;iter < 5;iter++) {
@ -1430,6 +1452,7 @@ template<class Real> static void UnitTestMulElements() {
} }
} }
template<class Real> static void UnitTestSpLogExp() { template<class Real> static void UnitTestSpLogExp() {
for (MatrixIndexT i = 0; i < 5; i++) { for (MatrixIndexT i = 0; i < 5; i++) {
MatrixIndexT dimM = 10 + rand() % 10; MatrixIndexT dimM = 10 + rand() % 10;
@ -1860,6 +1883,27 @@ template<class Real> static void UnitTestLimitCond() {
} }
} }
template<class Real> static void UnitTestTanh() {
for (MatrixIndexT i = 0; i < 10; i++) {
MatrixIndexT dimM = 5 + rand() % 10, dimN = 5 + rand() % 10;
Matrix<Real> M(dimM, dimN);
Matrix<Real> N(M);
for(int32 r = 0; r < dimM; r++) {
for (int32 c = 0; c < dimN; c++) {
Real x = N(r, c);
if (x > 0.0) {
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
} else {
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
}
N(r, c) = x;
}
}
M.ApplyTanh();
AssertEqual(M, N);
}
}
template<class Real> static void UnitTestSimple() { template<class Real> static void UnitTestSimple() {
for (MatrixIndexT i = 0;i < 5;i++) { for (MatrixIndexT i = 0;i < 5;i++) {
MatrixIndexT dimM = 20 + rand()%10, dimN = 20 + rand()%20; MatrixIndexT dimM = 20 + rand()%10, dimN = 20 + rand()%20;
@ -3541,6 +3585,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
UnitTestDotprod<Real>(); UnitTestDotprod<Real>();
// UnitTestSvdVariants<Real>(); // UnitTestSvdVariants<Real>();
UnitTestPower<Real>(); UnitTestPower<Real>();
UnitTestCopySp<Real>();
UnitTestDeterminant<Real>(); UnitTestDeterminant<Real>();
KALDI_LOG << " Point F"; KALDI_LOG << " Point F";
UnitTestDeterminantSign<Real>(); UnitTestDeterminantSign<Real>();
@ -3566,6 +3611,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
UnitTestRange<Real>(); UnitTestRange<Real>();
UnitTestSimpleForVec<Real>(); UnitTestSimpleForVec<Real>();
UnitTestSimpleForMat<Real>(); UnitTestSimpleForMat<Real>();
UnitTestTanh<Real>();
UnitTestNorm<Real>(); UnitTestNorm<Real>();
UnitTestMul<Real>(); UnitTestMul<Real>();
KALDI_LOG << " Point I"; KALDI_LOG << " Point I";

Просмотреть файл

@ -169,9 +169,17 @@ void SpMatrix<Real>::CopyFromMat(const MatrixBase<Real> &M,
break; break;
} }
case kTakeLower: case kTakeLower:
for (MatrixIndexT i = 0; i < D; i++) { // making this one a bit more efficient.
const Real *src = M.Data();
Real *dest = this->data_;
MatrixIndexT stride = M.Stride();
for (MatrixIndexT i = 0; i < D; i++) {
for (MatrixIndexT j = 0; j <= i; j++) for (MatrixIndexT j = 0; j <= i; j++)
(*this)(i, j) = M(i, j); dest[j] = src[j];
dest += i + 1;
src += stride;
}
}
break; break;
case kTakeUpper: case kTakeUpper:
for (MatrixIndexT i = 0; i < D; i++) for (MatrixIndexT i = 0; i < D; i++)

Просмотреть файл

@ -10,7 +10,7 @@ OBJFILES = nnet-component.o nnet-nnet.o nnet-update.o train-nnet.o \
nnet-randomize.o nnet-compute.o am-nnet.o nnet-functions.o \ nnet-randomize.o nnet-compute.o am-nnet.o nnet-functions.o \
nnet-precondition.o shrink-nnet.o combine-nnet.o combine-nnet-a.o \ nnet-precondition.o shrink-nnet.o combine-nnet.o combine-nnet-a.o \
mixup-nnet.o nnet-lbfgs.o nnet-update-parallel.o combine-nnet-fast.o \ mixup-nnet.o nnet-lbfgs.o nnet-update-parallel.o combine-nnet-fast.o \
nnet-fix.o nnet-fix.o nnet-stats.o rescale-nnet.o nnet-limit-rank.o
#nnet-compute.o nnet-train.o #nnet-compute.o nnet-train.o
# nnet-nnet.o nnet-loss.o nnet-rnnlm.o # nnet-nnet.o nnet-loss.o nnet-rnnlm.o

Просмотреть файл

@ -183,6 +183,23 @@ void UnitTestSigmoidComponent() {
} }
} }
void UnitTestReduceComponent() {
// We're testing that the gradients are computed correctly:
// the input gradients and the model gradients.
int32 input_dim = 10 + rand() % 50, n = 1 + rand() % 3;
{
ReduceComponent reduce_component(input_dim, n);
UnitTestGenericComponentInternal(reduce_component);
}
{
ReduceComponent reduce_component;
reduce_component.InitFromString("dim=15 n=3");
UnitTestGenericComponentInternal(reduce_component);
}
}
template<class T> template<class T>
void UnitTestGenericComponent() { // works if it has an initializer from int, void UnitTestGenericComponent() { // works if it has an initializer from int,
// e.g. tanh, sigmoid. // e.g. tanh, sigmoid.
@ -463,6 +480,8 @@ int main() {
UnitTestGenericComponent<TanhComponent>(); UnitTestGenericComponent<TanhComponent>();
UnitTestGenericComponent<PermuteComponent>(); UnitTestGenericComponent<PermuteComponent>();
UnitTestGenericComponent<SoftmaxComponent>(); UnitTestGenericComponent<SoftmaxComponent>();
UnitTestSigmoidComponent();
UnitTestReduceComponent();
UnitTestAffineComponent(); UnitTestAffineComponent();
UnitTestAffinePreconInputComponent(); UnitTestAffinePreconInputComponent();
UnitTestBlockAffineComponent(); UnitTestBlockAffineComponent();

Просмотреть файл

@ -47,6 +47,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
ans = new TanhComponent(); ans = new TanhComponent();
} else if (component_type == "SoftmaxComponent") { } else if (component_type == "SoftmaxComponent") {
ans = new SoftmaxComponent(); ans = new SoftmaxComponent();
} else if (component_type == "ReduceComponent") {
ans = new ReduceComponent();
} else if (component_type == "AffineComponent") { } else if (component_type == "AffineComponent") {
ans = new AffineComponent(); ans = new AffineComponent();
} else if (component_type == "AffineComponentA") { } else if (component_type == "AffineComponentA") {
@ -407,20 +409,8 @@ void TanhComponent::Propagate(const MatrixBase<BaseFloat> &in,
// Apply tanh function to each element of the output... // Apply tanh function to each element of the output...
// the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})), // the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
// which is a scaled and shifted sigmoid. // which is a scaled and shifted sigmoid.
out->Resize(in.NumRows(), in.NumCols()); *out = in;
int32 num_rows = in.NumRows(), num_cols = in.NumCols(); out->ApplyTanh();
for(int32 r = 0; r < num_rows; r++) {
const BaseFloat *in_data = in.RowData(r),
*in_data_end = in_data + num_cols;
BaseFloat *out_data = out->RowData(r);
for (; in_data != in_data_end; ++in_data, ++out_data) {
if (*in_data > 0.0) {
*out_data = -1.0 + 2.0 / (1.0 + exp(-2.0 * *in_data));
} else {
*out_data = 1.0 - 2.0 / (1.0 + exp(2.0 * *in_data));
}
}
}
} }
void TanhComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value void TanhComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
@ -502,6 +492,67 @@ void SoftmaxComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
} }
} }
void ReduceComponent::InitFromString(std::string args) {
std::string orig_args(args);
int32 dim, n;
bool ok = ParseFromString("dim", &args, &dim) &&
ParseFromString("n", &args, &n);
if (!args.empty())
KALDI_ERR << "Could not process these elements in initializer: "
<< args;
if (!ok)
KALDI_ERR << "Bad initializer " << orig_args;
Init(dim, n);
}
void ReduceComponent::Read(std::istream &is, bool binary) {
ExpectOneOrTwoTokens(is, binary, "<ReduceComponent>", "<Dim>");
ReadBasicType(is, binary, &dim_);
ExpectToken(is, binary, "<N>");
ReadBasicType(is, binary, &n_);
ExpectToken(is, binary, "</ReduceComponent>");
}
void ReduceComponent::Write(std::ostream &os, bool binary) const {
WriteToken(os, binary, "<ReduceComponent>");
WriteToken(os, binary, "<Dim>");
WriteBasicType(os, binary, dim_);
WriteToken(os, binary, "<N>");
WriteBasicType(os, binary, n_);
WriteToken(os, binary, "</ReduceComponent>");
}
void ReduceComponent::Propagate(const MatrixBase<BaseFloat> &in,
int32 num_chunks,
Matrix<BaseFloat> *out) const {
KALDI_ASSERT(in.NumRows() > 0 && in.NumCols() == InputDim());
out->Resize(in.NumRows(), OutputDim());
int32 num_frames = in.NumRows(), input_dim = in.NumCols(), n = n_;
for (int32 r = 0; r < num_frames; r++) {
const BaseFloat *src = in.RowData(r);
BaseFloat *dest = out->RowData(r);
for (int32 c = 0; c < input_dim; c++)
dest[c / n] += src[c];
}
}
void ReduceComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value,
const MatrixBase<BaseFloat> &, // out_value,
const MatrixBase<BaseFloat> &out_deriv,
int32, // num_chunks
Component *, // to_update
Matrix<BaseFloat> *in_deriv) const {
int32 num_frames = out_deriv.NumRows(),
input_dim = InputDim(), n = n_;
in_deriv->Resize(num_frames, input_dim, kUndefined);
for (int32 r = 0; r < num_frames; r++) {
const BaseFloat *src = out_deriv.RowData(r);
BaseFloat *dest = in_deriv->RowData(r);
for (int32 c = 0; c < input_dim; c++)
dest[c] = src[c / n];
}
}
void AffineComponent::Scale(BaseFloat scale) { void AffineComponent::Scale(BaseFloat scale) {
linear_params_.Scale(scale); linear_params_.Scale(scale);
bias_params_.Scale(scale); bias_params_.Scale(scale);
@ -859,9 +910,9 @@ void AffineComponentPreconditioned::Update(
in_value_temp(i, in_value.NumCols()) = 1.0; in_value_temp(i, in_value.NumCols()) = 1.0;
Matrix<BaseFloat> in_value_precon(in_value_temp.NumRows(), Matrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
in_value_temp.NumCols()), in_value_temp.NumCols(), kUndefined),
out_deriv_precon(out_deriv.NumRows(), out_deriv_precon(out_deriv.NumRows(),
out_deriv.NumCols()); out_deriv.NumCols(), kUndefined);
// each row of in_value_precon will be that same row of // each row of in_value_precon will be that same row of
// in_value, but multiplied by the inverse of a Fisher // in_value, but multiplied by the inverse of a Fisher
// matrix that has been estimated from all the other rows, // matrix that has been estimated from all the other rows,

Просмотреть файл

@ -225,6 +225,8 @@ class NonlinearComponent: public Component {
void Scale(BaseFloat scale); void Scale(BaseFloat scale);
void Add(BaseFloat alpha, const NonlinearComponent &other); void Add(BaseFloat alpha, const NonlinearComponent &other);
// The following functions are unique to NonlinearComponent.
// They mostly relate to diagnostics.
const Vector<double> &ValueSum() const { return value_sum_; } const Vector<double> &ValueSum() const { return value_sum_; }
const Vector<double> &DerivSum() const { return deriv_sum_; } const Vector<double> &DerivSum() const { return deriv_sum_; }
double Count() const { return count_; } double Count() const { return count_; }
@ -324,6 +326,37 @@ class SoftmaxComponent: public NonlinearComponent {
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow. SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
}; };
/// This layer just sums up groups of n inputs to produce one output.
class ReduceComponent: public Component {
public:
void Init(int32 dim, int32 n) { KALDI_ASSERT(dim > 0 && n > 0);dim_ = dim; n_ = n; }
ReduceComponent(int32 dim, int32 n) { Init(dim, n); }
ReduceComponent(): dim_(0), n_(0) { } // e.g. prior to Read()
explicit ReduceComponent(const ReduceComponent &other):
dim_(other.dim_), n_(other.n_) {}
virtual Component* Copy() const { return new ReduceComponent(*this); }
virtual std::string Type() const { return "ReduceComponent"; }
virtual int32 InputDim() const { return dim_; }
virtual int32 OutputDim() const { return (dim_ + n_ - 1) / n_; }
virtual void InitFromString(std::string args);
virtual void Read(std::istream &is, bool binary);
virtual void Write(std::ostream &os, bool binary) const;
virtual void Propagate(const MatrixBase<BaseFloat> &in,
int32 num_chunks,
Matrix<BaseFloat> *out) const;
virtual void Backprop(const MatrixBase<BaseFloat> &in_value,
const MatrixBase<BaseFloat> &out_value,
const MatrixBase<BaseFloat> &out_deriv,
int32 num_chunks,
Component *to_update, // may be identical to "this".
Matrix<BaseFloat> *in_deriv) const;
virtual bool BackpropNeedsInput() const { return false; }
virtual bool BackpropNeedsOutput() const { return false; }
private:
int32 dim_;
int32 n_;
};
// Affine means a linear function plus an offset. // Affine means a linear function plus an offset.
// Note: although this class can be instantiated, it also // Note: although this class can be instantiated, it also

Просмотреть файл

@ -0,0 +1,108 @@
// nnet/nnet-limit-rank.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet-cpu/nnet-limit-rank.h"
#include "thread/kaldi-task-sequence.h"
namespace kaldi {
class LimitRankClass {
public:
LimitRankClass(const NnetLimitRankOpts &opts,
int32 c,
Nnet *nnet): opts_(opts), c_(c), nnet_(nnet) { }
void operator () () {
AffineComponent *ac = dynamic_cast<AffineComponent*>(
&(nnet_->GetComponent(c_)));
KALDI_ASSERT(ac != NULL);
// We'll limit the rank of just the linear part, keeping the bias vector full.
Matrix<BaseFloat> M (ac->LinearParams());
int32 rows = M.NumRows(), cols = M.NumCols(), rc_min = std::min(rows, cols);
Vector<BaseFloat> s(rc_min);
Matrix<BaseFloat> U(rows, rc_min), Vt(rc_min, cols);
// Do the destructive svd M = U diag(s) V^T. It actually outputs the transpose of V.
M.DestructiveSvd(&s, &U, &Vt);
SortSvd(&s, &U, &Vt); // Sort the singular values from largest to smallest.
int32 d = GetRetainedDim(rows, cols);
BaseFloat old_svd_sum = s.Sum();
U.Resize(rows, d, kCopyData);
s.Resize(d, kCopyData);
Vt.Resize(d, cols, kCopyData);
BaseFloat new_svd_sum = s.Sum();
KALDI_LOG << "For component " << c_ << " of dimension " << rows
<< " x " << cols << ", reduced rank from "
<< rc_min << " to " << d << ", SVD sum reduced from "
<< old_svd_sum << " to " << new_svd_sum;
Vt.MulRowsVec(s); // Vt <-- diag(s) Vt.
M.AddMatMat(1.0, U, kNoTrans, Vt, kNoTrans, 0.0); // Reconstruct with reduced
// rank.
Vector<BaseFloat> bias_params(ac->BiasParams());
ac->SetParams(bias_params, M);
}
int32 GetRetainedDim(int32 rows, int32 cols) {
if (opts_.parameter_proportion <= 0.0 || opts_.parameter_proportion > 1.0)
KALDI_ERR << "bad --parameter-proportion " << opts_.parameter_proportion;
// If we do SVD to dimension d, so that it's U diag(s) V^T where
// U is rows * d, s is d, and V is cols * d, then the #params is as follows...
// the first column of U has free parameters (#rows - 1) [the -1 is due to
// the length constraint]; the second has (#rows - 2) [subtract 1 for the
// length constraint and one for orthogonality with the previous row], etc.
// Total is params(U) = (rows * d) - ((d(d+1))/2),
// params(s) = d,
// params(V) = (cols * d) - ((d(d+1))/2),
// So total is (rows + cols) * d - d * d .
// For example, if d = #rows, this equals (#rows * #cols)
// We are solving for:
// (rows * cols) * parameter_proportion = (rows + cols) * d - d * d, or
// d^2 - d * (rows + cols) + (rows*cols)*parameter_proportion
// In quadratic equation
// a = 1.0,
// b = -(rows + cols)
// c = rows * cols * parameter_proportion.
// Take smaller solution.
BaseFloat a = 1.0, b = -(rows + cols),
c = rows * cols * opts_.parameter_proportion;
BaseFloat x = (-b - sqrt(b * b - 4 * a * c)) / (2.0 * a);
int32 ans = static_cast<int32>(x);
KALDI_ASSERT(ans > 0 && ans <= std::min(rows, cols));
return ans;
}
~LimitRankClass() { }
private:
const NnetLimitRankOpts &opts_;
int32 c_;
Nnet *nnet_;
};
void LimitRankParallel(const NnetLimitRankOpts &opts,
Nnet *nnet) {
TaskSequencerConfig task_config;
task_config.num_threads = opts.num_threads;
TaskSequencer<LimitRankClass> tc(task_config);
for (int32 c = 0; c < nnet->NumComponents(); c++) {
if (dynamic_cast<AffineComponent*>(&(nnet->GetComponent(c))) != NULL)
tc.Run(new LimitRankClass(opts, c, nnet));
}
}
} // namespace

Просмотреть файл

@ -0,0 +1,56 @@
// nnet-cpu/nnet-limit-rank.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
#define KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
#include "nnet-cpu/nnet-nnet.h"
#include "util/table-types.h"
#include "thread/kaldi-semaphore.h"
#include "thread/kaldi-thread.h"
#include "nnet-cpu/nnet-update.h"
namespace kaldi {
struct NnetLimitRankOpts {
int32 num_threads;
BaseFloat parameter_proportion;
NnetLimitRankOpts(): num_threads(1), parameter_proportion(0.75) { }
void Register(ParseOptions *po) {
po->Register("num-threads", &num_threads, "Number of threads used for "
"rank-limiting operation; note, will never use more than "
"#layers.");
po->Register("parameter-proportion", &parameter_proportion, "Proportion of "
"dimension of each transform to limit the rank to.");
}
};
/// This function limits the rank of each affine transform in the
/// neural net, by zeroing out the smallest singular values. The number of
/// singular values to zero out is determined on a layer by layer basis, using
/// "parameter_proportion" to set the proportion of parameters to remove.
void LimitRankParallel(const NnetLimitRankOpts &opts,
Nnet *nnet);
} // namespace
#endif // KALDI_NNET_CPU_NNET_LIMIT_RANK_H_

Просмотреть файл

@ -361,6 +361,17 @@ void Nnet::RemoveDropout() {
KALDI_LOG << "Removed " << removed << " dropout components."; KALDI_LOG << "Removed " << removed << " dropout components.";
} }
void Nnet::RemovePreconditioning() {
for (size_t i = 0; i < components_.size(); i++) {
if (dynamic_cast<AffineComponentPreconditioned*>(components_[i]) != NULL) {
AffineComponent *ac = new AffineComponent(
*(dynamic_cast<AffineComponent*>(components_[i])));
delete components_[i];
components_[i] = ac;
}
}
}
void Nnet::AddNnet(const VectorBase<BaseFloat> &scale_params, void Nnet::AddNnet(const VectorBase<BaseFloat> &scale_params,
const Nnet &other) { const Nnet &other) {
KALDI_ASSERT(scale_params.Dim() == this->NumUpdatableComponents()); KALDI_ASSERT(scale_params.Dim() == this->NumUpdatableComponents());

Просмотреть файл

@ -105,6 +105,10 @@ class Nnet {
/// Excise any components of type DropoutComponent. /// Excise any components of type DropoutComponent.
void RemoveDropout(); void RemoveDropout();
/// Replace any components of type AffineComponentPreconditioned with
/// components of type AffineComponent.
void RemovePreconditioning();
/// For each updatatable component, adds to it /// For each updatatable component, adds to it
/// the corresponding element of "other" times the /// the corresponding element of "other" times the
/// appropriate element of "scales" (which has the /// appropriate element of "scales" (which has the

Просмотреть файл

@ -25,7 +25,13 @@ void PreconditionDirections(const MatrixBase<BaseFloat> &R,
MatrixBase<BaseFloat> *P) { MatrixBase<BaseFloat> *P) {
int32 N = R.NumRows(), D = R.NumCols(); int32 N = R.NumRows(), D = R.NumCols();
KALDI_ASSERT(SameDim(R, *P) && N > 1); KALDI_ASSERT(SameDim(R, *P) && N > 0);
if (N == 1) {
KALDI_WARN << "Trying to precondition set of only one frames: returning "
<< "unchanged. Ignore this warning if infrequent.";
P->CopyFromMat(R);
return;
}
MatrixBase<BaseFloat> &Q = *P; MatrixBase<BaseFloat> &Q = *P;
if (N >= D) { if (N >= D) {

93
src/nnet-cpu/nnet-stats.h Normal file
Просмотреть файл

@ -0,0 +1,93 @@
// nnet-cpu/nnet-stats.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET_CPU_NNET_FIX_H_
#define KALDI_NNET_CPU_NNET_FIX_H_
#include "nnet-cpu/nnet-nnet.h"
namespace kaldi {
/* This program computes various statistics from a neural net. These are
summaries of certain quantities already present in the network as
stored on disk, especially regarding certain average values and
derivatives of the sigmoids.
*/
struct NnetStatsConfig {
BaseFloat bucket_width;
NnetStatsConfig(): bucket_width(0.025) { }
void Register(ParseOptions *po) {
po->Register("bucket-width", &bucket_width, "Width of bucket in average-derivative "
"stats for analysis.");
}
};
class NnetStats {
public:
NnetStats(int32 affine_component_index, BaseFloat bucket_width):
affine_component_index_(affine_component_index),
bucket_width_(bucket_width), global_(0, -1) { }
// Use default copy constructor and assignment operator.
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
void AddStatsFromNnet(const Nnet &nnet);
void PrintStats(std::ostream &os);
private:
struct StatsElement {
BaseFloat deriv_begin; // avg-deriv, beginning of bucket.
BaseFloat deriv_end; // avg-deriv, end of bucket.
BaseFloat deriv_sum; // sum of avg-deriv within bucket.
BaseFloat deriv_sumsq; // Sum-squared of avg-deriv within bucket.
BaseFloat abs_value_sum; // Sum of abs(avg-value). Tells us whether it's
// saturating at one or both ends.
BaseFloat abs_value_sumsq; // Sum-squared of abs(avg-value).
int32 count; // Number of nonlinearities in this bucket.
StatsElement(BaseFloat deriv_begin,
BaseFloat deriv_end):
deriv_begin(deriv_begin), deriv_end(deriv_end), deriv_sum(0.0),
deriv_sumsq(0.0), abs_value_sum(0.0), abs_value_sumsq(0.0), count(0) { }
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
// Outputs stats for this bucket; no newline
void PrintStats(std::ostream &os);
};
int32 BucketFor(BaseFloat avg_deriv); // returns the bucket
// for this avg-derivative value, and makes sure it is allocated.
int32 affine_component_index_; // Component index of the affine component
// associated with this nonlinearity.
BaseFloat bucket_width_; // width of buckets of stats we store (in derivative values).
std::vector<StatsElement> buckets_; // Stats divided into buckets by avg_deriv.
StatsElement global_; // All the stats.
};
void GetNnetStats(const NnetStatsConfig &config,
const Nnet &nnet,
std::vector<NnetStats> *stats);
} // namespace
#endif // KALDI_NNET_CPU_NNET_FIX_H_

Просмотреть файл

@ -44,7 +44,6 @@ class NnetUpdater {
// Possibly splices input together from forward_data_[component]. // Possibly splices input together from forward_data_[component].
// MatrixBase<BaseFloat> &GetSplicedInput(int32 component, Matrix<BaseFloat> *temp_matrix); // MatrixBase<BaseFloat> &GetSplicedInput(int32 component, Matrix<BaseFloat> *temp_matrix);
void Propagate(); void Propagate();
/// Computes objective function and derivative at output layer. /// Computes objective function and derivative at output layer.
@ -156,7 +155,7 @@ void NnetUpdater::Backprop(const std::vector<NnetTrainingExample> &data,
component.Backprop(input, output, output_deriv, num_chunks, component.Backprop(input, output, output_deriv, num_chunks,
component_to_update, &input_deriv); component_to_update, &input_deriv);
*deriv = input_deriv; input_deriv.Swap(deriv);
} }
} }

Просмотреть файл

@ -0,0 +1,212 @@
// nnet/rescale-nnet.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet-cpu/rescale-nnet.h"
namespace kaldi {
class NnetRescaler {
public:
NnetRescaler(const NnetRescaleConfig &config,
const std::vector<NnetTrainingExample> &examples,
Nnet *nnet):
config_(config), examples_(examples), nnet_(nnet) {}
void Rescale();
private:
/// takes the input and formats as a single matrix, in forward_data_[0].
void FormatInput(const std::vector<NnetTrainingExample> &data,
Matrix<BaseFloat> *input);
void RescaleComponent(int32 c, int32 num_chunks,
MatrixBase<BaseFloat> *cur_data_in,
Matrix<BaseFloat> *next_data);
void ComputeRelevantIndexes();
BaseFloat GetTargetAvgDeriv(int32 c);
const NnetRescaleConfig &config_;
const std::vector<NnetTrainingExample> &examples_;
Nnet *nnet_;
std::set<int32> relevant_indexes_; // values of c with AffineComponent followed
// by (at c+1) NonlinearComponent that is not SoftmaxComponent.
};
void NnetRescaler::FormatInput(const std::vector<NnetTrainingExample> &data,
Matrix<BaseFloat> *input) {
KALDI_ASSERT(data.size() > 0);
int32 num_splice = nnet_->LeftContext() + 1 + nnet_->RightContext();
KALDI_ASSERT(data[0].input_frames.NumRows() == num_splice);
int32 feat_dim = data[0].input_frames.NumCols(),
spk_dim = data[0].spk_info.Dim(),
tot_dim = feat_dim + spk_dim; // we append these at the neural net
// input... note, spk_dim might be 0.
KALDI_ASSERT(tot_dim == nnet_->InputDim());
int32 num_chunks = data.size();
input->Resize(num_splice * num_chunks,
tot_dim);
for (int32 chunk = 0; chunk < num_chunks; chunk++) {
SubMatrix<BaseFloat> dest(*input,
chunk * num_splice, num_splice,
0, feat_dim);
const Matrix<BaseFloat> &src(data[chunk].input_frames);
dest.CopyFromMat(src);
if (spk_dim != 0) {
SubMatrix<BaseFloat> spk_dest(*input,
chunk * num_splice, num_splice,
feat_dim, spk_dim);
spk_dest.CopyRowsFromVec(data[chunk].spk_info);
}
}
}
void NnetRescaler::ComputeRelevantIndexes() {
for (int32 c = 0; c + 1 < nnet_->NumComponents(); c++)
if (dynamic_cast<AffineComponent*>(&nnet_->GetComponent(c)) != NULL &&
(dynamic_cast<NonlinearComponent*>(&nnet_->GetComponent(c+1)) != NULL &&
dynamic_cast<SoftmaxComponent*>(&nnet_->GetComponent(c+1)) == NULL))
relevant_indexes_.insert(c);
}
BaseFloat NnetRescaler::GetTargetAvgDeriv(int32 c) {
KALDI_ASSERT(relevant_indexes_.count(c) == 1);
BaseFloat factor;
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
factor = 0.25;
else if (dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
factor = 1.0;
else
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
int32 last_c = *std::max_element(relevant_indexes_.begin(), relevant_indexes_.end()),
first_c = *std::min_element(relevant_indexes_.begin(), relevant_indexes_.end());
if (c == first_c)
return factor * config_.target_first_layer_avg_deriv;
else if (c == last_c)
return factor * config_.target_last_layer_avg_deriv;
else
return factor * config_.target_avg_deriv;
}
// Here, c is the index of the affine component, and
// c + 1 is the index of the nonlinear component; *cur_data is the
// output of the affine component.
void NnetRescaler::RescaleComponent(
int32 c,
int32 num_chunks,
MatrixBase<BaseFloat> *cur_data_in,
Matrix<BaseFloat> *next_data) {
int32 rows = cur_data_in->NumRows(), cols = cur_data_in->NumCols();
// Only handle sigmoid or tanh here.
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) == NULL &&
dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) == NULL)
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
// the nonlinear component:
NonlinearComponent &nc =
*(dynamic_cast<NonlinearComponent*>(&(nnet_->GetComponent(c + 1))));
BaseFloat orig_avg_deriv, target_avg_deriv = GetTargetAvgDeriv(c);
BaseFloat cur_scaling = 1.0; // current rescaling factor (on input).
int32 num_iters = 10;
Matrix<BaseFloat> cur_data(*cur_data_in),
ones(rows, cols), in_deriv(rows, cols);
ones.Set(1.0);
nc.Propagate(cur_data, num_chunks, next_data);
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
BaseFloat cur_avg_deriv;
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
orig_avg_deriv = cur_avg_deriv;
for (int32 iter = 0; iter < num_iters; iter++) {
// We already have "cur_avg_deriv"; perturb the scale and compute
// the next avg_deriv, so we can see how it changes with the scale.
cur_data.CopyFromMat(*cur_data_in);
cur_data.Scale(cur_scaling + config_.delta);
nc.Propagate(cur_data, num_chunks, next_data);
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
BaseFloat next_avg_deriv = in_deriv.Sum() / (rows * cols);
KALDI_ASSERT(next_avg_deriv < cur_avg_deriv);
// "gradient" is how avg_deriv changes as we change the scale.
// should be negative.
BaseFloat gradient = (next_avg_deriv - cur_avg_deriv) / config_.delta;
KALDI_ASSERT(gradient < 0.0);
BaseFloat proposed_change = (target_avg_deriv - cur_avg_deriv) / gradient;
KALDI_VLOG(2) << "cur_avg_deriv = " << cur_avg_deriv << ", target_avg_deriv = "
<< target_avg_deriv << ", gradient = " << gradient
<< ", proposed_change " << proposed_change;
// Limit size of proposed change in "cur_scaling", to ensure stability.
if (fabs(proposed_change / cur_scaling) > config_.max_change)
proposed_change = cur_scaling * config_.max_change *
(proposed_change > 0.0 ? 1.0 : -1.0);
cur_scaling += proposed_change;
cur_data.CopyFromMat(*cur_data_in);
cur_data.Scale(cur_scaling);
nc.Propagate(cur_data, num_chunks, next_data);
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
if (fabs(proposed_change) < config_.min_change) break; // Terminate the
// optimization
}
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(
&nnet_->GetComponent(c));
KALDI_ASSERT(uc != NULL);
uc->Scale(cur_scaling); // scale the parameters of the previous
// AffineComponent.
KALDI_LOG << "For component " << c << ", scaling parameters by "
<< cur_scaling << "; average "
<< "derivative changed from " << orig_avg_deriv << " to "
<< cur_avg_deriv << "; target was " << target_avg_deriv;
}
void NnetRescaler::Rescale() {
ComputeRelevantIndexes(); // set up relevant_indexes_.
Matrix<BaseFloat> cur_data, next_data;
FormatInput(examples_, &cur_data);
int32 num_chunks = examples_.size();
for (int32 c = 0; c < nnet_->NumComponents(); c++) {
Component &component = nnet_->GetComponent(c);
if (relevant_indexes_.count(c - 1) == 1) {
// the following function call also appropriately sets "next_data"
// after doing the rescaling
RescaleComponent(c - 1, num_chunks, &cur_data, &next_data);
} else {
component.Propagate(cur_data, num_chunks, &next_data);
}
cur_data.Swap(&next_data);
}
}
void RescaleNnet(const NnetRescaleConfig &rescale_config,
const std::vector<NnetTrainingExample> &examples,
Nnet *nnet) {
NnetRescaler rescaler(rescale_config, examples, nnet);
rescaler.Rescale();
}
} // namespace

Просмотреть файл

@ -0,0 +1,76 @@
// nnet-cpu/rescale-nnet.h
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET_CPU_RESCALE_NNET_H_
#define KALDI_NNET_CPU_RESCALE_NNET_H_
#include "nnet-cpu/nnet-update.h"
#include "nnet-cpu/nnet-compute.h"
#include "util/parse-options.h"
// Neural net rescaling is a rescaling of the parameters of the various layers
// of a neural net, done so as to match certain specified statistics on the
// average derivative of the sigmoid, measured on sample data. This relates to
// how "saturated" the sigmoid is.
namespace kaldi {
struct NnetRescaleConfig {
BaseFloat target_avg_deriv;
BaseFloat target_first_layer_avg_deriv;
BaseFloat target_last_layer_avg_deriv;
// These are relatively unimportant; for now they have no
// command line options.
BaseFloat num_iters;
BaseFloat delta;
BaseFloat max_change; // maximum change on any one iteration (to
// ensure stability).
BaseFloat min_change; // minimum change on any one iteration (controls
// termination
NnetRescaleConfig(): target_avg_deriv(0.2),
target_first_layer_avg_deriv(0.3),
target_last_layer_avg_deriv(0.1),
num_iters(10),
delta(0.01),
max_change(0.2), min_change(1.0e-05) { }
void Register(ParseOptions *po) {
po->Register("target-avg-deriv", &target_avg_deriv, "Target average derivative "
"for hidden layers that are the not the first or last hidden layer "
"(as fraction of maximum derivative of the nonlinearity)");
po->Register("target-first-layer-avg-deriv", &target_first_layer_avg_deriv,
"Target average derivative for the first hidden layer"
"(as fraction of maximum derivative of the nonlinearity)");
po->Register("target-last-layer-avg-deriv", &target_last_layer_avg_deriv,
"Target average derivative for the last hidden layer, if "
"#hid-layers > 1"
"(as fraction of maximum derivative of the nonlinearity)");
}
};
void RescaleNnet(const NnetRescaleConfig &rescale_config,
const std::vector<NnetTrainingExample> &examples,
Nnet *nnet);
} // namespace
#endif

Просмотреть файл

@ -13,7 +13,8 @@ BINFILES = nnet-randomize-frames nnet-am-info nnet-train nnet-init \
nnet-train-lbfgs nnet-get-egs nnet-train-parallel nnet-gradient \ nnet-train-lbfgs nnet-get-egs nnet-train-parallel nnet-gradient \
nnet-get-preconditioner nnet-precondition nnet-select-egs nnet-combine-fast \ nnet-get-preconditioner nnet-precondition nnet-select-egs nnet-combine-fast \
nnet-subset-egs nnet-shuffle-egs nnet-am-fix nnet-logprob nnet-logprob2 \ nnet-subset-egs nnet-shuffle-egs nnet-am-fix nnet-logprob nnet-logprob2 \
nnet-logprob2-parallel nnet-logprob-parallel nnet-logprob2-parallel nnet-logprob-parallel nnet-am-stats nnet-am-rescale \
nnet-am-limit-rank
OBJFILES = OBJFILES =

Просмотреть файл

@ -41,6 +41,7 @@ int main(int argc, char *argv[]) {
int32 truncate = -1; int32 truncate = -1;
bool binary_write = true; bool binary_write = true;
bool remove_dropout = false; bool remove_dropout = false;
bool remove_preconditioning = false;
BaseFloat learning_rate_factor = 1.0, learning_rate = -1; BaseFloat learning_rate_factor = 1.0, learning_rate = -1;
std::string learning_rates = ""; std::string learning_rates = "";
std::string scales = ""; std::string scales = "";
@ -64,6 +65,8 @@ int main(int argc, char *argv[]) {
"to this many components by removing the last components."); "to this many components by removing the last components.");
po.Register("remove-dropout", &remove_dropout, "Set this to true to remove " po.Register("remove-dropout", &remove_dropout, "Set this to true to remove "
"any dropout components."); "any dropout components.");
po.Register("remove-preconditioning", &remove_preconditioning, "Set this to true to replace "
"components of type AffineComponentPreconditioned with AffineComponent.");
po.Register("stats-from", &stats_from, "Before copying neural net, copy the " po.Register("stats-from", &stats_from, "Before copying neural net, copy the "
"statistics in any layer of type NonlinearComponent, from this " "statistics in any layer of type NonlinearComponent, from this "
"neural network: provide the extended filename."); "neural network: provide the extended filename.");
@ -133,6 +136,8 @@ int main(int argc, char *argv[]) {
if (remove_dropout) am_nnet.GetNnet().RemoveDropout(); if (remove_dropout) am_nnet.GetNnet().RemoveDropout();
if (remove_preconditioning) am_nnet.GetNnet().RemovePreconditioning();
if (stats_from != "") { if (stats_from != "") {
// Copy the stats associated with the layers descending from // Copy the stats associated with the layers descending from
// NonlinearComponent. // NonlinearComponent.

Просмотреть файл

@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
"e.g.:\n" "e.g.:\n"
" nnet-am-fix 1.mdl 1_fixed.mdl\n" " nnet-am-fix 1.mdl 1_fixed.mdl\n"
"or:\n" "or:\n"
" nnet-am-shrink-rows --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n"; " nnet-am-fix --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n";
bool binary_write = true; bool binary_write = true;
NnetFixConfig config; NnetFixConfig config;

Просмотреть файл

@ -0,0 +1,81 @@
// nnet-cpubin/nnet-am-limit-rank.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet-cpu/nnet-limit-rank.h"
#include "nnet-cpu/am-nnet.h"
#include "hmm/transition-model.h"
#include "tree/context-dep.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Copy a (cpu-based) neural net and its associated transition model,\n"
"but modify it to reduce the effective parameter count by limiting\n"
"the rank of weight matrices.\n"
"\n"
"Usage: nnet-am-limit-rank [options] <nnet-in> <nnet-out>\n"
"e.g.:\n"
" nnet-am-limit-rank 1.mdl 1_limited.mdl\n";
bool binary_write = true;
NnetLimitRankOpts config;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
config.Register(&po);
po.Read(argc, argv);
if (po.NumArgs() != 2) {
po.PrintUsage();
exit(1);
}
std::string nnet_rxfilename = po.GetArg(1),
nnet_wxfilename = po.GetArg(2);
TransitionModel trans_model;
AmNnet am_nnet;
{
bool binary;
Input ki(nnet_rxfilename, &binary);
trans_model.Read(ki.Stream(), binary);
am_nnet.Read(ki.Stream(), binary);
}
LimitRankParallel(config, &am_nnet.GetNnet());
{
Output ko(nnet_wxfilename, binary_write);
trans_model.Write(ko.Stream(), binary_write);
am_nnet.Write(ko.Stream(), binary_write);
}
KALDI_LOG << "Copied neural net from " << nnet_rxfilename
<< " to " << nnet_wxfilename;
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -0,0 +1,92 @@
// nnet-cpubin/nnet-am-rescale.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet-cpu/rescale-nnet.h"
#include "nnet-cpu/am-nnet.h"
#include "hmm/transition-model.h"
#include "tree/context-dep.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Rescale the parameters in a neural net to achieve certain target\n"
"statistics, relating to the average derivative of the sigmoids\n"
"measured at some supplied data. This relates to how saturated\n"
"the sigmoids are (we try to match the statistics of `good' neural\n"
"nets).\n"
"\n"
"Usage: nnet-am-rescale [options] <nnet-in> <examples-in> <nnet-out>\n"
"e.g.:\n"
" nnet-am-rescale 1.mdl valid.egs 1_rescaled.mdl\n";
bool binary_write = true;
NnetRescaleConfig config;
ParseOptions po(usage);
po.Register("binary", &binary_write, "Write output in binary mode");
config.Register(&po);
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
std::string nnet_rxfilename = po.GetArg(1),
egs_rspecifier = po.GetArg(2),
nnet_wxfilename = po.GetArg(3);
TransitionModel trans_model;
AmNnet am_nnet;
{
bool binary;
Input ki(nnet_rxfilename, &binary);
trans_model.Read(ki.Stream(), binary);
am_nnet.Read(ki.Stream(), binary);
}
std::vector<NnetTrainingExample> egs;
// This block adds samples to "egs".
SequentialNnetTrainingExampleReader example_reader(
egs_rspecifier);
for (; !example_reader.Done(); example_reader.Next())
egs.push_back(example_reader.Value());
KALDI_LOG << "Read " << egs.size() << " examples.";
KALDI_ASSERT(!egs.empty());
RescaleNnet(config, egs, &am_nnet.GetNnet());
{
Output ko(nnet_wxfilename, binary_write);
trans_model.Write(ko.Stream(), binary_write);
am_nnet.Write(ko.Stream(), binary_write);
}
KALDI_LOG << "Rescaled neural net and wrote it to " << nnet_wxfilename;
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -0,0 +1,72 @@
// nnet-cpubin/nnet-am-stats.cc
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "hmm/transition-model.h"
#include "nnet-cpu/nnet-stats.h"
#include "nnet-cpu/am-nnet.h"
#include "hmm/transition-model.h"
#include "tree/context-dep.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Print some statistics about the average derivatives of the sigmoid layers\n"
"of the neural net, that are stored in the net\n"
"\n"
"Usage: nnet-am-stats [options] <nnet-in>\n"
"e.g.:\n"
" nnet-am-stats 1.mdl 1_fixed.mdl\n";
NnetStatsConfig config;
ParseOptions po(usage);
config.Register(&po);
po.Read(argc, argv);
if (po.NumArgs() != 1) {
po.PrintUsage();
exit(1);
}
std::string nnet_rxfilename = po.GetArg(1);
TransitionModel trans_model;
AmNnet am_nnet;
{
bool binary;
Input ki(nnet_rxfilename, &binary);
trans_model.Read(ki.Stream(), binary);
am_nnet.Read(ki.Stream(), binary);
}
std::vector<NnetStats> stats;
GetNnetStats(config, am_nnet.GetNnet(), &stats);
KALDI_ASSERT(!stats.empty());
for (size_t i = 0; i < stats.size(); i++)
stats[i].PrintStats(std::cout);
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
<< " neural-network training examples "; << " neural-network training examples ";
return (static_cast<size_t>(n) == egs.size() ? 0 : 1); return (num_read != 0 ? 0 : 1);
} catch(const std::exception &e) { } catch(const std::exception &e) {
std::cerr << e.what() << '\n'; std::cerr << e.what() << '\n';
return -1; return -1;

Просмотреть файл

@ -88,8 +88,8 @@ void CacheTgtMat::AddData(const CuMatrix<BaseFloat> &features, const CuMatrix<Ba
features_.CopyRowsFromMat(leftover, features_leftover_, 0, 0); features_.CopyRowsFromMat(leftover, features_leftover_, 0, 0);
targets_.CopyRowsFromMat(leftover, targets_leftover_, 0, 0); targets_.CopyRowsFromMat(leftover, targets_leftover_, 0, 0);
features_leftover_.Destroy(); features_leftover_.Resize(0, 0);
targets_leftover_.Destroy(); targets_leftover_.Resize(0, 0);
filling_pos_ += leftover; filling_pos_ += leftover;
} }
} }

Просмотреть файл

@ -91,7 +91,7 @@ void Cache::AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32
targets_leftover_.begin()+leftover, targets_leftover_.begin()+leftover,
targets_.begin()); targets_.begin());
features_leftover_.Destroy(); features_leftover_.Resize(0, 0);
targets_leftover_.resize(0); targets_leftover_.resize(0);
filling_pos_ += leftover; filling_pos_ += leftover;
} }

Просмотреть файл

@ -32,7 +32,7 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
diff->Resize(net_out.NumRows(), net_out.NumCols()); diff->Resize(net_out.NumRows(), net_out.NumCols());
// compute derivative wrt. activations of last layer of neurons // compute derivative wrt. activations of last layer of neurons
diff->CopyFromMat(net_out); *diff = net_out;
diff->AddMat(-1.0, target); diff->AddMat(-1.0, target);
// we'll not produce per-frame classification accuracy for soft labels // we'll not produce per-frame classification accuracy for soft labels
@ -40,7 +40,8 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
// :TODO: reimplement when needed // :TODO: reimplement when needed
// compute xentropy (ON CPU) // compute xentropy (ON CPU)
Matrix<BaseFloat> target_host, net_out_host; Matrix<BaseFloat> target_host(target.NumRows(), target.NumCols(), kUndefined),
net_out_host(net_out.NumRows(), net_out.NumCols(), kUndefined);
target.CopyToMat(&target_host); target.CopyToMat(&target_host);
net_out.CopyToMat(&net_out_host); net_out.CopyToMat(&net_out_host);
BaseFloat val; BaseFloat val;
@ -69,7 +70,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
// get the xentropy and global error // get the xentropy and global error
target_device_.CopyFromVec(target); target_device_.CopyFromVec(target);
if(&net_out != diff) { //<allow no-copy speedup if(&net_out != diff) { //<allow no-copy speedup
diff->CopyFromMat(net_out); *diff = net_out;
} }
cu::DiffXent(target_device_, diff, &log_post_tgt_); cu::DiffXent(target_device_, diff, &log_post_tgt_);
// //
@ -84,6 +85,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
// log(sum_row(net_out.*target_mat))) // log(sum_row(net_out.*target_mat)))
// they now are stored in vector log_post_tgt_ // they now are stored in vector log_post_tgt_
// //
log_post_tgt_host_.Resize(log_post_tgt_.Dim());
log_post_tgt_.CopyToVec(&log_post_tgt_host_); log_post_tgt_.CopyToVec(&log_post_tgt_host_);
loss_ -= log_post_tgt_host_.Sum(); loss_ -= log_post_tgt_host_.Sum();
@ -110,9 +112,10 @@ std::string Xent::Report() {
void Mse::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &target, CuMatrix<BaseFloat> *diff) { void Mse::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &target, CuMatrix<BaseFloat> *diff) {
KALDI_ASSERT(net_out.NumCols() == target.NumCols()); KALDI_ASSERT(net_out.NumCols() == target.NumCols());
KALDI_ASSERT(net_out.NumRows() == target.NumRows()); KALDI_ASSERT(net_out.NumRows() == target.NumRows());
diff->Resize(net_out.NumRows(), net_out.NumCols());
// compute derivative w.r.t. neural nerwork outputs // compute derivative w.r.t. neural nerwork outputs
diff->Resize(net_out.NumRows(), net_out.NumCols());
diff->CopyFromMat(net_out); diff->CopyFromMat(net_out);
diff->AddMat(-1.0, target); diff->AddMat(-1.0, target);
@ -147,9 +150,9 @@ std::string Mse::Report() {
void MseProgress::Eval(const CuMatrix<BaseFloat>& net_out, const CuMatrix<BaseFloat>& target, CuMatrix<BaseFloat>* diff) { void MseProgress::Eval(const CuMatrix<BaseFloat>& net_out, const CuMatrix<BaseFloat>& target, CuMatrix<BaseFloat>* diff) {
KALDI_ASSERT(net_out.NumCols() == target.NumCols()); KALDI_ASSERT(net_out.NumCols() == target.NumCols());
KALDI_ASSERT(net_out.NumRows() == target.NumRows()); KALDI_ASSERT(net_out.NumRows() == target.NumRows());
diff->Resize(net_out.NumRows(),net_out.NumCols());
//compute derivative w.r.t. neural nerwork outputs //compute derivative w.r.t. neural nerwork outputs
diff->Resize(net_out.NumRows(),net_out.NumCols());
diff->CopyFromMat(net_out); diff->CopyFromMat(net_out);
diff->AddMat(-1.0,target); diff->AddMat(-1.0,target);

Просмотреть файл

@ -94,8 +94,7 @@ int main(int argc, char *argv[]) {
//the pointer will be given to the nnet, so we don't need to call delete //the pointer will be given to the nnet, so we don't need to call delete
//convert Vector to CuVector //convert Vector to CuVector
CuVector<BaseFloat> cu_shift; CuVector<BaseFloat> cu_shift(shift);
cu_shift.CopyFromVec(shift);
//set the weights //set the weights
shift_component->SetShiftVec(cu_shift); shift_component->SetShiftVec(cu_shift);
@ -110,8 +109,7 @@ int main(int argc, char *argv[]) {
//the pointer will be given to the nnet, so we don't need to call delete //the pointer will be given to the nnet, so we don't need to call delete
//convert Vector to CuVector //convert Vector to CuVector
CuVector<BaseFloat> cu_scale; CuVector<BaseFloat> cu_scale(scale);
cu_scale.CopyFromVec(scale);
//set the weights //set the weights
scale_component->SetScaleVec(cu_scale); scale_component->SetScaleVec(cu_scale);

Просмотреть файл

@ -129,6 +129,7 @@ int main(int argc, char *argv[]) {
} }
// push priors to GPU // push priors to GPU
priors.Resize(tmp_priors.Dim());
priors.CopyFromVec(tmp_priors); priors.CopyFromVec(tmp_priors);
} }
@ -150,7 +151,7 @@ int main(int argc, char *argv[]) {
} }
} }
// push it to gpu // push it to gpu
feats.CopyFromMat(mat); feats = mat;
// fwd-pass // fwd-pass
nnet_transf.Feedforward(feats, &feats_transf); nnet_transf.Feedforward(feats, &feats_transf);
nnet.Feedforward(feats_transf, &nnet_out); nnet.Feedforward(feats_transf, &nnet_out);
@ -170,6 +171,7 @@ int main(int argc, char *argv[]) {
} }
//download from GPU //download from GPU
nnet_out_host.Resize(nnet_out.NumRows(), nnet_out.NumCols());
nnet_out.CopyToMat(&nnet_out_host); nnet_out.CopyToMat(&nnet_out_host);
//check for NaN/inf //check for NaN/inf
for(int32 r=0; r<nnet_out_host.NumRows(); r++) { for(int32 r=0; r<nnet_out_host.NumRows(); r++) {

Просмотреть файл

@ -223,12 +223,13 @@ int main(int argc, char *argv[]) {
//3) propagate the feature to get the log-posteriors (nnet w/o sofrmax) //3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
// push features to GPU // push features to GPU
feats.CopyFromMat(mat); feats = mat;
// possibly apply transform // possibly apply transform
nnet_transf.Feedforward(feats, &feats_transf); nnet_transf.Feedforward(feats, &feats_transf);
// propagate through the nnet (assuming w/o softmax) // propagate through the nnet (assuming w/o softmax)
nnet.Propagate(feats_transf, &nnet_out); nnet.Propagate(feats_transf, &nnet_out);
// pop it back to the HOST // transfer it back to the host
nnet_out_h.Resize(nnet_out.NumRows(), nnet_out.NumCols(), kUndefined);
nnet_out.CopyToMat(&nnet_out_h); nnet_out.CopyToMat(&nnet_out_h);
// TODO: poccibly divide by priors // TODO: poccibly divide by priors
@ -277,7 +278,7 @@ int main(int argc, char *argv[]) {
//7) backpropagate through the nnet //7) backpropagate through the nnet
if (!crossvalidate) { if (!crossvalidate) {
nnet_diff.CopyFromMat(nnet_diff_h); nnet_diff = nnet_diff_h;
nnet.Backpropagate(nnet_diff, NULL); nnet.Backpropagate(nnet_diff, NULL);
} }

Просмотреть файл

@ -139,8 +139,8 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
// push features/targets to GPU // push features/targets to GPU
feats.CopyFromMat(fea_mat); feats = fea_mat;
targets.CopyFromMat(tgt_mat); targets = tgt_mat;
// possibly apply feature transform // possibly apply feature transform
nnet_transf.Feedforward(feats, &feats_transf); nnet_transf.Feedforward(feats, &feats_transf);
// add to cache // add to cache

Просмотреть файл

@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
// push features to GPU // push features to GPU
feats.CopyFromMat(mat); feats = mat;
// possibly apply transform // possibly apply transform
nnet_transf.Feedforward(feats, &feats_transf); nnet_transf.Feedforward(feats, &feats_transf);
// add to cache // add to cache

Просмотреть файл

@ -138,6 +138,7 @@ int main(int argc, char *argv[]) {
num_other_error++; num_other_error++;
} else { //dimension OK } else { //dimension OK
// push features to GPU // push features to GPU
feats.Resize(mat.NumRows(), mat.NumCols(), kUndefined);
feats.CopyFromMat(mat); feats.CopyFromMat(mat);
// possibly apply transform // possibly apply transform
nnet_transf.Feedforward(feats, &feats_transf); nnet_transf.Feedforward(feats, &feats_transf);

Просмотреть файл

@ -132,7 +132,8 @@ int main(int argc, char *argv[]) {
rbm_transf.Feedforward(feats, &feats_transf); rbm_transf.Feedforward(feats, &feats_transf);
// subsample the feats to get faster epochs // subsample the feats to get faster epochs
if(drop_data > 0.0) { if(drop_data > 0.0) {
Matrix<BaseFloat> mat2; Matrix<BaseFloat> mat2(feats_transf.NumRows(), feats_transf.NumCols(),
kUndefined);
feats_transf.CopyToMat(&mat2); feats_transf.CopyToMat(&mat2);
for(int32 r=mat2.NumRows()-1; r >= 0; r--) { for(int32 r=mat2.NumRows()-1; r >= 0; r--) {
if(RandUniform() < drop_data) { if(RandUniform() < drop_data) {

Просмотреть файл

@ -62,8 +62,7 @@ int main(int argc, char *argv[]) {
//the pointer will be given to the nnet, so we don't need to call delete //the pointer will be given to the nnet, so we don't need to call delete
//convert Matrix to CuMatrix //convert Matrix to CuMatrix
CuMatrix<BaseFloat> cu_transform; CuMatrix<BaseFloat> cu_transform(transform);
cu_transform.CopyFromMat(transform);
//set the weights //set the weights
layer->SetLinearity(cu_transform); layer->SetLinearity(cu_transform);