зеркало из https://github.com/mozilla/kaldi.git
A lot of changes: script changes RE neural nets (more efficient IO, slightly better WERs); various new functionality for nnets and improving some feature-related binaries' interfaces.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@1976 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
c1944a7209
Коммит
f699fd2be1
|
@ -1,11 +1,5 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# CAUTION: I changed e.g. 1.trans to trans.1 in the scripts. If you ran it
|
|
||||||
# part-way through prior to this, to convert to the new naming
|
|
||||||
# convention, run:
|
|
||||||
# for x in `find . -name '*.trans'`; do mv $x `echo $x | perl -ane 's/(\d+)\.trans/trans.$1/;print;'`; done
|
|
||||||
# but be careful as this will not follow soft links.
|
|
||||||
|
|
||||||
. cmd.sh
|
. cmd.sh
|
||||||
|
|
||||||
# call the next line with the directory where the RM data is
|
# call the next line with the directory where the RM data is
|
||||||
|
|
|
@ -1,5 +1,2 @@
|
||||||
beam=11.0 # beam for decoding. Was 13.0 in the scripts.
|
beam=11.0 # beam for decoding. Was 13.0 in the scripts.
|
||||||
first_beam=8.0 # beam for 1st-pass decoding in SAT.
|
first_beam=8.0 # beam for 1st-pass decoding in SAT.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,8 @@
|
||||||
)
|
)
|
||||||
|
|
||||||
# Here are the results (copied from RESULTS file)
|
# Here are the results (copied from RESULTS file)
|
||||||
#exp/nnet6a/decode_train_dev/wer_10:%WER 24.87 [ 12053 / 48460, 1590 ins, 3017 del, 7446 sub ]
|
#exp/nnet6a/decode_train_dev/wer_11:%WER 24.30 [ 11774 / 48460, 1619 ins, 2877 del, 7278 sub ]
|
||||||
#exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.1 16.0 6.9 2.7 25.6 62.6 |
|
#exp/nnet6a/decode_eval2000/score_10/eval2000.ctm.filt.sys: | Sum/Avg | 4459 42989 | 77.8 16.0 6.3 3.0 25.3 62.6 |
|
||||||
|
|
||||||
|
|
||||||
# Here are some older results when the system had 2k not 4k leaves and ran from a worse SAT
|
# Here are some older results when the system had 2k not 4k leaves and ran from a worse SAT
|
||||||
|
|
|
@ -191,6 +191,7 @@ exp/tri4a_dnn/decode_bd_tgpr_eval92/wer_10:%WER 4.00 [ 226 / 5643, 34 ins, 12 de
|
||||||
# and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot,
|
# and for eval92 is 3.79, the same system. (On this setup, discriminative training helped a lot,
|
||||||
# which seems to be the reason we can't beat the SGMM+MMI numbers here.)
|
# which seems to be the reason we can't beat the SGMM+MMI numbers here.)
|
||||||
|
|
||||||
exp/nnet5c1/decode_bd_tgpr_dev93/wer_10:%WER 7.48 [ 616 / 8234, 73 ins, 98 del, 445 sub ]
|
|
||||||
exp/nnet5c1/decode_bd_tgpr_eval92/wer_11:%WER 4.41 [ 249 / 5643, 29 ins, 19 del, 201 sub ]
|
exp/nnet5c1/decode_bd_tgpr_dev93/wer_14:%WER 7.32 [ 603 / 8234, 61 ins, 101 del, 441 sub ]
|
||||||
# Note: my 4.41% result is worse than Karel's 4.00%.
|
exp/nnet5c1/decode_bd_tgpr_eval92/wer_14:%WER 4.39 [ 248 / 5643, 32 ins, 17 del, 199 sub ]
|
||||||
|
# Note: my 4.39% result is worse than Karel's 4.00%.
|
||||||
|
|
|
@ -47,7 +47,7 @@ cat links/11-13.1/wsj0/doc/indices/train/tr_s_wv1.ndx | \
|
||||||
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
|
grep -v -i 11-2.1/wsj0/si_tr_s/401 > train_si84.flist
|
||||||
|
|
||||||
nl=`cat train_si84.flist | wc -l`
|
nl=`cat train_si84.flist | wc -l`
|
||||||
[ "$nl" -eq 7138 ] || echo "Warning: expected 37416 lines in train_si84.flist, got $nl"
|
[ "$nl" -eq 7138 ] || echo "Warning: expected 7138 lines in train_si84.flist, got $nl"
|
||||||
|
|
||||||
# This version for SI-284
|
# This version for SI-284
|
||||||
cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
|
cat links/13-34.1/wsj1/doc/indices/si_tr_s.ndx \
|
||||||
|
|
|
@ -281,7 +281,6 @@ steps/train_quick.sh --cmd "$train_cmd" \
|
||||||
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
|
exp/tri4b/graph_bd_tgpr data/test_eval92 exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
|
||||||
) &
|
) &
|
||||||
|
|
||||||
|
|
||||||
# Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
|
# Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
|
||||||
# all the data). Use 30 jobs.
|
# all the data). Use 30 jobs.
|
||||||
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
# Apache 2.0
|
||||||
|
# This script appends the features in two data directories.
|
||||||
|
|
||||||
|
# To be run from .. (one directory up from here)
|
||||||
|
# see ../run.sh for example
|
||||||
|
# This config creates MFCC features with half the window size and window shift,
|
||||||
|
# and splices and sub-samples them. We'll use another script append_feats.sh
|
||||||
|
# to combine (append) the data directories.
|
||||||
|
|
||||||
|
# Begin configuration section.
|
||||||
|
cmd=run.pl
|
||||||
|
nj=4
|
||||||
|
# End configuration section.
|
||||||
|
|
||||||
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
|
if [ -f path.sh ]; then . ./path.sh; fi
|
||||||
|
. parse_options.sh || exit 1;
|
||||||
|
|
||||||
|
if [ $# != 5 ]; then
|
||||||
|
echo "usage: append_feats.sh [options] <src-data-dir1> <src-data-dir2> <dest-data-dir> <log-dir> <path-to-storage-dir>";
|
||||||
|
echo "options: "
|
||||||
|
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||||
|
exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
data_src1=$1
|
||||||
|
data_src2=$2
|
||||||
|
data=$3
|
||||||
|
logdir=$4
|
||||||
|
mfccdir=$5
|
||||||
|
|
||||||
|
utils/split_data.sh $data_src1 $nj || exit 1;
|
||||||
|
utils/split_data.sh $data_src2 $nj || exit 1;
|
||||||
|
|
||||||
|
mkdir -p $mfccdir $logdir
|
||||||
|
|
||||||
|
rm -rf $data
|
||||||
|
mkdir -p `basename $data` # Make sure directory one level up exists.
|
||||||
|
cp -r $data_src1 $data # so we get the other files, such as utt2spk.
|
||||||
|
rm $data/cmvn.scp
|
||||||
|
rm -r $data/split* 2>/dev/null
|
||||||
|
|
||||||
|
# use "name" as part of name of the archive.
|
||||||
|
name=`basename $data`
|
||||||
|
|
||||||
|
$cmd JOB=1:$nj $logdir/append.JOB.log \
|
||||||
|
append-feats --truncate-frames=true \
|
||||||
|
scp:$data_src1/split$nj/JOB/feats.scp scp:$data_src2/split$nj/JOB/feats.scp \
|
||||||
|
ark,scp:$mfccdir/appended_$name.JOB.ark,$mfccdir/appended_$name.JOB.scp || exit 1;
|
||||||
|
|
||||||
|
# concatenate the .scp files together.
|
||||||
|
for ((n=1; n<=nj; n++)); do
|
||||||
|
cat $mfccdir/appended_$name.$n.scp >> $data/feats.scp || exit 1;
|
||||||
|
done > $data/feats.scp
|
||||||
|
|
||||||
|
|
||||||
|
nf=`cat $data/feats.scp | wc -l`
|
||||||
|
nu=`cat $data/utt2spk | wc -l`
|
||||||
|
if [ $nf -ne $nu ]; then
|
||||||
|
echo "It seems not all of the feature files were successfully ($nf != $nu);"
|
||||||
|
echo "consider using utils/fix_data_dir.sh $data"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Succeeded creating MFCC features for $name"
|
|
@ -18,6 +18,10 @@ max_mem=20000000 # This will stop the processes getting too large.
|
||||||
# This is in bytes, but not "real" bytes-- you have to multiply
|
# This is in bytes, but not "real" bytes-- you have to multiply
|
||||||
# by something like 5 or 10 to get real bytes (not sure why so large)
|
# by something like 5 or 10 to get real bytes (not sure why so large)
|
||||||
# End configuration section.
|
# End configuration section.
|
||||||
|
num_threads=1 # Number of threads used in nnet-logprob computation. If you set
|
||||||
|
# this to a different value, make sure to also set the appropriate
|
||||||
|
# queue options. If you set this too high it won't use all the
|
||||||
|
# threads as most of the time will be taken in the decoder.
|
||||||
|
|
||||||
echo "$0 $@" # Print the command line for logging
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
|
||||||
|
@ -104,9 +108,10 @@ fi
|
||||||
|
|
||||||
if [ $sub_split -eq 1 ]; then
|
if [ $sub_split -eq 1 ]; then
|
||||||
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
|
$cmd JOB=1:$nj $dir/log/decode_den.JOB.log \
|
||||||
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats" ark:- \| \
|
||||||
|
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||||
$dir/dengraph/HCLG.fst "$feats" "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
|
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.JOB.gz" || exit 1;
|
||||||
else
|
else
|
||||||
for n in `seq $nj`; do
|
for n in `seq $nj`; do
|
||||||
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
|
if [ -f $dir/.done.$n ] && [ $dir/.done.$n -nt $alidir/final.mdl ]; then
|
||||||
|
@ -120,9 +125,10 @@ else
|
||||||
mkdir -p $dir/part
|
mkdir -p $dir/part
|
||||||
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
|
feats_subset=`echo $feats | sed "s/trans.JOB/trans.$n/g" | sed s:JOB/:$n/split$sub_split/JOB/:g`
|
||||||
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
|
$cmd JOB=1:$sub_split $dir/log/$n/decode_den.JOB.log \
|
||||||
nnet-latgen-faster --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
nnet-logprob-parallel --num-threads=$num_threads $srcdir/final.mdl "$feats_subset" ark:- \| \
|
||||||
|
latgen-faster-mapped --beam=$beam --lattice-beam=$lattice_beam --acoustic-scale=$acwt \
|
||||||
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
--max-mem=$max_mem --max-active=$max_active --word-symbol-table=$lang/words.txt $srcdir/final.mdl \
|
||||||
$dir/dengraph/HCLG.fst "$feats_subset" "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
|
$dir/dengraph/HCLG.fst ark:- "ark:|gzip -c >$dir/lat.$n.JOB.gz" || exit 1;
|
||||||
echo Merging archives for data subset $n
|
echo Merging archives for data subset $n
|
||||||
rm $dir/.error 2>/dev/null;
|
rm $dir/.error 2>/dev/null;
|
||||||
for k in `seq $sub_split`; do
|
for k in `seq $sub_split`; do
|
||||||
|
|
|
@ -14,18 +14,24 @@ num_iters_final=10 # Number of final iterations to give to the
|
||||||
# optimization over the validation set.
|
# optimization over the validation set.
|
||||||
initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
|
initial_learning_rate=0.02 # for RM; or 0.01 is suitable for Swbd.
|
||||||
final_learning_rate=0.004 # for RM; or 0.001 is suitable for Swbd.
|
final_learning_rate=0.004 # for RM; or 0.001 is suitable for Swbd.
|
||||||
num_valid_utts=300 # held-out utterances, used only for diagnostics.
|
num_utts_subset=300 # number of utterances in validation and training
|
||||||
num_valid_frames_shrink=2000 # a subset of the frames in "valid_utts", used only
|
# subsets used for shrinkage and diagnostics
|
||||||
# for estimating shrinkage parameters and for
|
num_valid_frames_shrink=0 # number of validation frames in the subset
|
||||||
# objective-function reporting.
|
# used for shrinking
|
||||||
|
num_train_frames_shrink=2000 # number of training frames in the subset used
|
||||||
|
# for shrinking (by default we use all training
|
||||||
|
# frames for this.)
|
||||||
shrink_interval=3 # shrink every $shrink_interval iters,
|
shrink_interval=3 # shrink every $shrink_interval iters,
|
||||||
# except at the start of training when we do it every iter.
|
# except at the start of training when we do it every iter.
|
||||||
num_valid_frames_combine=10000 # combination weights at the very end.
|
num_valid_frames_combine=0 # #valid frames for combination weights at the very end.
|
||||||
|
num_train_frames_combine=10000 # # train frames for the above.
|
||||||
|
num_frames_diagnostic=4000 # number of frames for "compute_prob" jobs
|
||||||
minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
|
minibatch_size=128 # by default use a smallish minibatch size for neural net training; this controls instability
|
||||||
# which would otherwise be a problem with multi-threaded update. Note:
|
# which would otherwise be a problem with multi-threaded update. Note:
|
||||||
# it also interacts with the "preconditioned" update, so it's not completely cost free.
|
# it also interacts with the "preconditioned" update, so it's not completely cost free.
|
||||||
samples_per_iteration=400000 # each iteration of training, see this many samples
|
samples_per_iter=400000 # each iteration of training, see this many samples
|
||||||
# per job.
|
# per job. This is just a guideline; it will pick a number
|
||||||
|
# that divides the number of samples in the entire data.
|
||||||
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
shuffle_buffer_size=5000 # This "buffer_size" variable controls randomization of the samples
|
||||||
# on each iter. You could set it to 0 or to a large value for complete
|
# on each iter. You could set it to 0 or to a large value for complete
|
||||||
# randomization, but this would both consume memory and cause spikes in
|
# randomization, but this would both consume memory and cause spikes in
|
||||||
|
@ -37,13 +43,13 @@ add_layers_period=2 # by default, add new layers every 2 iterations.
|
||||||
num_hidden_layers=2
|
num_hidden_layers=2
|
||||||
initial_num_hidden_layers=1 # we'll add the rest one by one.
|
initial_num_hidden_layers=1 # we'll add the rest one by one.
|
||||||
num_parameters=2000000 # 2 million parameters by default.
|
num_parameters=2000000 # 2 million parameters by default.
|
||||||
stage=-7
|
stage=-9
|
||||||
realign_iters=""
|
realign_iters=""
|
||||||
beam=10 # for realignment.
|
beam=10 # for realignment.
|
||||||
retry_beam=40
|
retry_beam=40
|
||||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||||
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
||||||
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.)
|
io_opts="-tc 5" # for jobs with a lot of I/O, limits the number running at one time.
|
||||||
nnet_config_opts=
|
nnet_config_opts=
|
||||||
splice_width=4 # meaning +- 4 frames on each side for second LDA
|
splice_width=4 # meaning +- 4 frames on each side for second LDA
|
||||||
lda_dim=250
|
lda_dim=250
|
||||||
|
@ -54,7 +60,11 @@ shrink=true
|
||||||
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
|
mix_up=0 # Number of components to mix up to (should be > #tree leaves, if
|
||||||
# specified.)
|
# specified.)
|
||||||
num_threads=16
|
num_threads=16
|
||||||
mkl_num_threads=1
|
|
||||||
|
valid_is_heldout=false # For some reason, holding out the validation set from the training set
|
||||||
|
# seems to hurt, so by default we don't do it (i.e. it's included in training)
|
||||||
|
random_copy=false
|
||||||
|
cleanup=true
|
||||||
# End configuration section.
|
# End configuration section.
|
||||||
|
|
||||||
echo "$0 $@" # Print the command line for logging
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
@ -72,7 +82,7 @@ if [ $# != 4 ]; then
|
||||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||||
echo " --num-epochs <#epochs|15> # Number of epochs of main training"
|
echo " --num-epochs <#epochs|15> # Number of epochs of main training"
|
||||||
echo " # while reducing learning rate (determines #iterations, together"
|
echo " # while reducing learning rate (determines #iterations, together"
|
||||||
echo " # with --samples-per-iteration and --num-jobs-nnet)"
|
echo " # with --samples-per-iter and --num-jobs-nnet)"
|
||||||
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
|
echo " --num-epochs-extra <#epochs-extra|5> # Number of extra epochs of training"
|
||||||
echo " # after learning rate fully reduced"
|
echo " # after learning rate fully reduced"
|
||||||
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
|
echo " --initial-learning-rate <initial-learning-rate|0.02> # Learning rate at start of training, e.g. 0.02 for small"
|
||||||
|
@ -95,21 +105,27 @@ if [ $# != 4 ]; then
|
||||||
echo " # this, you may want to decrease the batch size."
|
echo " # this, you may want to decrease the batch size."
|
||||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||||
echo " # use multiple threads."
|
echo " # use multiple threads."
|
||||||
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the "
|
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for jobs that do a lot of I/O."
|
||||||
echo " # data. (prevents stressing the disk). "
|
|
||||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||||
echo " # should not get too large, e.g. >2k)."
|
echo " # should not get too large, e.g. >2k)."
|
||||||
echo " --samples-per-iteration <#samples|400000> # Number of samples of data to process per iteration, per"
|
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, per"
|
||||||
echo " # process."
|
echo " # process."
|
||||||
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
|
echo " --splice-width <width|4> # Number of frames on each side to append for feature input"
|
||||||
echo " # (note: we splice processed, typically 40-dimensional frames"
|
echo " # (note: we splice processed, typically 40-dimensional frames"
|
||||||
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
|
echo " --lda-dim <dim|250> # Dimension to reduce spliced features to with LDA"
|
||||||
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
|
echo " --num-iters-final <#iters|10> # Number of final iterations to give to nnet-combine-fast to "
|
||||||
echo " # interpolate parameters (the weights are learned with a validation set)"
|
echo " # interpolate parameters (the weights are learned with a validation set)"
|
||||||
echo " --stage <stage|-7> # Used to run a partially-completed training process from somewhere in"
|
echo " --num-utts-subset <#utts|300> # Number of utterances in subsets used for validation and diagnostics"
|
||||||
|
echo " # (the validation subset is held out from training)"
|
||||||
|
echo " --num-valid-frames-shrink <#frames|2000> # Number of frames from the validation set used for shrinking"
|
||||||
|
echo " --num-train-frames-shrink <#frames|0> # Number of frames from the training set used for shrinking"
|
||||||
|
echo " # (by default it's included in training, which for some reason helps)."
|
||||||
|
echo " --num-frames-diagnostic <#frames|4000> # Number of frames used in computing (train,valid) diagnostics"
|
||||||
|
echo " --num-valid-frames-combine <#frames|10000> # Number of frames used in getting combination weights at the"
|
||||||
|
echo " # very end."
|
||||||
|
echo " --stage <stage|-9> # Used to run a partially-completed training process from somewhere in"
|
||||||
echo " # the middle."
|
echo " # the middle."
|
||||||
|
|
||||||
|
|
||||||
exit 1;
|
exit 1;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -144,8 +160,11 @@ cp $alidir/tree $dir
|
||||||
|
|
||||||
|
|
||||||
# Get list of validation utterances.
|
# Get list of validation utterances.
|
||||||
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_valid_utts \
|
awk '{print $1}' $data/utt2spk | utils/shuffle_list.pl | head -$num_utts_subset \
|
||||||
> $dir/valid_uttlist || exit 1;
|
> $dir/valid_uttlist || exit 1;
|
||||||
|
awk '{print $1}' $data/utt2spk | utils/filter_scp.pl --exclude $dir/valid_uttlist | \
|
||||||
|
head -$num_utts_subset > $dir/train_subset_uttlist || exit 1;
|
||||||
|
|
||||||
|
|
||||||
## Set up features. Note: these are different from the normal features
|
## Set up features. Note: these are different from the normal features
|
||||||
## because we have one rspecifier that has the features for the entire
|
## because we have one rspecifier that has the features for the entire
|
||||||
|
@ -154,33 +173,49 @@ if [ -f $alidir/final.mat ]; then feat_type=lda; else feat_type=delta; fi
|
||||||
echo "$0: feature type is $feat_type"
|
echo "$0: feature type is $feat_type"
|
||||||
|
|
||||||
case $feat_type in
|
case $feat_type in
|
||||||
delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
delta) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||||
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | add-deltas ark:- ark:- |"
|
|
||||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||||
|
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | add-deltas ark:- ark:- |"
|
||||||
;;
|
;;
|
||||||
lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
lda) feats="ark,s,cs:utils/filter_scp.pl --exclude $dir/valid_uttlist $sdata/JOB/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||||
split_feats="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
|
||||||
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
valid_feats="ark,s,cs:utils/filter_scp.pl $dir/valid_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||||
|
train_subset_feats="ark,s,cs:utils/filter_scp.pl $dir/train_subset_uttlist $data/feats.scp | apply-cmvn --norm-vars=false --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp scp:- ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $dir/final.mat ark:- ark:- |"
|
||||||
cp $alidir/final.mat $dir
|
cp $alidir/final.mat $dir
|
||||||
;;
|
;;
|
||||||
*) echo "$0: invalid feature type $feat_type" && exit 1;
|
*) echo "$0: invalid feature type $feat_type" && exit 1;
|
||||||
esac
|
esac
|
||||||
if [ -f $alidir/trans.1 ]; then
|
if [ -f $alidir/trans.1 ]; then
|
||||||
echo "$0: using transforms from $alidir"
|
echo "$0: using transforms from $alidir"
|
||||||
feats="$feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
feats="$feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
|
||||||
split_feats="$split_feats transform-feats --utt2spk=ark:$sdata/JOB/utt2spk ark:$alidir/trans.JOB ark:- ark:- |"
|
|
||||||
valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
valid_feats="$valid_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
||||||
|
train_subset_feats="$train_subset_feats transform-feats --utt2spk=ark:$data/utt2spk 'ark:cat $alidir/trans.*|' ark:- ark:- |"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le -9 ]; then
|
||||||
|
echo "$0: working out number of frames of training data"
|
||||||
|
num_frames=`feat-to-len scp:$data/feats.scp ark,t:- | awk '{x += $2;} END{print x;}'` || exit 1;
|
||||||
|
echo $num_frames > $dir/num_frames
|
||||||
|
else
|
||||||
|
num_frames=`cat $dir/num_frames` || exit 1;
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Working out number of iterations per epoch.
|
||||||
|
iters_per_epoch=`perl -e "print int($num_frames/($samples_per_iter * $num_jobs_nnet) + 0.5);"` || exit 1;
|
||||||
|
[ $iters_per_epoch -eq 0 ] && iters_per_epoch=1
|
||||||
|
samples_per_iter_real=$[$num_frames/($num_jobs_nnet*$iters_per_epoch)]
|
||||||
|
echo "Every epoch, splitting the data up into $iters_per_epoch iterations,"
|
||||||
|
echo "giving samples-per-iteration of $samples_per_iter_real (you requested $samples_per_iter)."
|
||||||
|
|
||||||
|
|
||||||
## Do LDA on top of whatever features we already have; store the matrix which
|
## Do LDA on top of whatever features we already have; store the matrix which
|
||||||
## we'll put into the neural network as a constant.
|
## we'll put into the neural network as a constant.
|
||||||
|
|
||||||
if [ $stage -le -7 ]; then
|
if [ $stage -le -8 ]; then
|
||||||
echo "Accumulating LDA statistics."
|
echo "$0: Accumulating LDA statistics."
|
||||||
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
|
$cmd JOB=1:$nj $dir/log/lda_acc.JOB.log \
|
||||||
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
ali-to-post "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||||
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
|
weight-silence-post 0.0 $silphonelist $alidir/final.mdl ark:- ark:- \| \
|
||||||
acc-lda --rand-prune=$randprune $alidir/final.mdl "$split_feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
|
acc-lda --rand-prune=$randprune $alidir/final.mdl "$feats splice-feats --left-context=$splice_width --right-context=$splice_width ark:- ark:- |" ark,s,cs:- \
|
||||||
$dir/lda.JOB.acc || exit 1;
|
$dir/lda.JOB.acc || exit 1;
|
||||||
est-lda --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
|
est-lda --dim=$lda_dim $dir/lda.mat $dir/lda.*.acc \
|
||||||
2>$dir/log/lda_est.log || exit 1;
|
2>$dir/log/lda_est.log || exit 1;
|
||||||
|
@ -195,7 +230,7 @@ if [ $initial_num_hidden_layers -gt $num_hidden_layers ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
if [ $stage -le -6 ]; then
|
if [ $stage -le -7 ]; then
|
||||||
echo "$0: initializing neural net";
|
echo "$0: initializing neural net";
|
||||||
# to hidden.config it will write the part of the config corresponding to a
|
# to hidden.config it will write the part of the config corresponding to a
|
||||||
# single hidden layer; we need this to add new layers.
|
# single hidden layer; we need this to add new layers.
|
||||||
|
@ -219,14 +254,14 @@ if [ $stage -le -6 ]; then
|
||||||
$dir/0.mdl || exit 1;
|
$dir/0.mdl || exit 1;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le -5 ]; then
|
if [ $stage -le -6 ]; then
|
||||||
echo "Training transition probabilities and setting priors"
|
echo "Training transition probabilities and setting priors"
|
||||||
$cmd $dir/log/train_trans.log \
|
$cmd $dir/log/train_trans.log \
|
||||||
nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
|
nnet-train-transitions $dir/0.mdl "ark:gunzip -c $alidir/ali.*.gz|" $dir/0.mdl \
|
||||||
|| exit 1;
|
|| exit 1;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le -4 ]; then
|
if [ $stage -le -5 ]; then
|
||||||
echo "Compiling graphs of transcripts"
|
echo "Compiling graphs of transcripts"
|
||||||
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
|
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
|
||||||
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
compile-train-graphs $dir/tree $dir/0.mdl $lang/L.fst \
|
||||||
|
@ -239,118 +274,113 @@ cp $alidir/ali.*.gz $dir
|
||||||
|
|
||||||
nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
|
nnet_context_opts="--left-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w left-context | awk '{print $2}'` --right-context=`nnet-am-info $dir/0.mdl 2>/dev/null | grep -w right-context | awk '{print $2}'`" || exit 1;
|
||||||
|
|
||||||
if [ $stage -le -3 ]; then
|
if [ $stage -le -4 ]; then
|
||||||
echo "Getting validation examples."
|
echo "Getting validation and training subset examples."
|
||||||
$cmd $dir/log/create_valid_subset_shrink.log \
|
rm $dir/.error 2>/dev/null
|
||||||
|
$cmd $dir/log/create_valid_subset.log \
|
||||||
nnet-get-egs $nnet_context_opts "$valid_feats" \
|
nnet-get-egs $nnet_context_opts "$valid_feats" \
|
||||||
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||||
"ark:$dir/valid_all.egs" || exit 1;
|
"ark:$dir/valid_all.egs" || touch $dir/.error &
|
||||||
echo "Getting subsets of validation examples for shrinking and combination."
|
$cmd $dir/log/create_train_subset.log \
|
||||||
|
nnet-get-egs $nnet_context_opts "$train_subset_feats" \
|
||||||
|
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $dir/0.mdl ark:- ark:- | ali-to-post ark:- ark:- |" \
|
||||||
|
"ark:$dir/train_subset_all.egs" || touch $dir/.error &
|
||||||
|
wait;
|
||||||
|
[ -f $dir/.error ] && exit 1;
|
||||||
|
echo "Getting subsets of validation examples for shrinking, diagnostics and combination."
|
||||||
$cmd $dir/log/create_valid_subset_shrink.log \
|
$cmd $dir/log/create_valid_subset_shrink.log \
|
||||||
nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs ark:$dir/valid_shrink.egs &
|
nnet-subset-egs --n=$num_valid_frames_shrink ark:$dir/valid_all.egs \
|
||||||
|
ark:$dir/valid_shrink.egs || touch $dir/.error &
|
||||||
$cmd $dir/log/create_valid_subset_combine.log \
|
$cmd $dir/log/create_valid_subset_combine.log \
|
||||||
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs ark:$dir/valid_combine.egs &
|
nnet-subset-egs --n=$num_valid_frames_combine ark:$dir/valid_all.egs \
|
||||||
|
ark:$dir/valid_combine.egs || touch $dir/.error &
|
||||||
|
$cmd $dir/log/create_valid_subset_diagnostic.log \
|
||||||
|
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/valid_all.egs \
|
||||||
|
ark:$dir/valid_diagnostic.egs || touch $dir/.error &
|
||||||
|
|
||||||
|
$cmd $dir/log/create_train_subset_shrink.log \
|
||||||
|
nnet-subset-egs --n=$num_train_frames_shrink ark:$dir/train_subset_all.egs \
|
||||||
|
ark:$dir/train_shrink.egs || touch $dir/.error &
|
||||||
|
$cmd $dir/log/create_train_subset_combine.log \
|
||||||
|
nnet-subset-egs --n=$num_train_frames_combine ark:$dir/train_subset_all.egs \
|
||||||
|
ark:$dir/train_combine.egs || touch $dir/.error &
|
||||||
|
$cmd $dir/log/create_train_subset_diagnostic.log \
|
||||||
|
nnet-subset-egs --n=$num_frames_diagnostic ark:$dir/train_subset_all.egs \
|
||||||
|
ark:$dir/train_diagnostic.egs || touch $dir/.error &
|
||||||
wait
|
wait
|
||||||
[ ! -s $dir/valid_shrink.egs ] && echo "No validation examples for shrinking" && exit 1;
|
cat $dir/valid_shrink.egs $dir/train_shrink.egs > $dir/shrink.egs
|
||||||
[ ! -s $dir/valid_combine.egs ] && echo "No validation examples for combination" && exit 1;
|
cat $dir/valid_combine.egs $dir/train_combine.egs > $dir/combine.egs
|
||||||
rm $dir/valid_all.egs
|
|
||||||
|
for f in $dir/{shrink,combine,train_diagnostic,valid_diagnostic}.egs; do
|
||||||
|
[ ! -s $f ] && echo "No examples in file $f" && exit 1;
|
||||||
|
done
|
||||||
|
rm $dir/valid_all.egs $dir/train_subset_all.egs $dir/{train,valid}_{shrink,combine}.egs
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $stage -le -2 ]; then
|
if [ $stage -le -3 ]; then
|
||||||
mkdir -p $dir/egs
|
mkdir -p $dir/egs
|
||||||
mkdir -p $dir/temp
|
mkdir -p $dir/temp
|
||||||
echo "Creating training examples";
|
echo "Creating training examples";
|
||||||
# in $dir/egs, create $num_jobs_nnet separate files with training examples,
|
# in $dir/egs, create $num_jobs_nnet separate files with training examples.
|
||||||
# with randomly shuffled order. We shuffle the order of examples in each
|
# The order is not randomized at this point.
|
||||||
# file. Then on each iteration, for each training process, we'll take a
|
|
||||||
# random subset of blocks of examples within that process's file.
|
|
||||||
# We take them in blocks, because it avoids the overhead of fseek() while
|
|
||||||
# creating the examples.
|
|
||||||
|
|
||||||
egs_list=
|
egs_list=
|
||||||
for n in `seq 1 $num_jobs_nnet`; do
|
for n in `seq 1 $num_jobs_nnet`; do
|
||||||
egs_list="$egs_list ark,scp:$dir/egs/egs_orig.$n.ark,$dir/egs/egs_orig.$n.scp"
|
egs_list="$egs_list ark:$dir/egs/egs_orig.$n.JOB.ark"
|
||||||
done
|
done
|
||||||
echo "Generating training examples on disk"
|
echo "Generating training examples on disk"
|
||||||
# The examples will go round-robin to egs_list.
|
# The examples will go round-robin to egs_list.
|
||||||
$cmd $dir/log/get_egs.log \
|
$cmd $io_opts JOB=1:$nj $dir/log/get_egs.JOB.log \
|
||||||
nnet-get-egs $nnet_context_opts "$feats" \
|
nnet-get-egs $nnet_context_opts "$feats" \
|
||||||
"ark,cs:gunzip -c $dir/ali.*.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
|
"ark,cs:gunzip -c $dir/ali.JOB.gz | ali-to-pdf $alidir/final.mdl ark:- ark:- | ali-to-post ark:- ark:- |" ark:- \| \
|
||||||
nnet-copy-egs ark:- $egs_list || exit 1;
|
nnet-copy-egs ark:- $egs_list || exit 1;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ $stage -le -2 ]; then
|
||||||
|
# combine all the "egs_orig.JOB.*.scp" (over the $nj splits of the data) and
|
||||||
|
# then split into multiple parts egs.JOB.*.scp for different parts of the
|
||||||
|
# data, 0 .. $iters_per_epoch-1.
|
||||||
|
|
||||||
|
if [ $iters_per_epoch -eq 1 ]; then
|
||||||
|
echo "Since iters-per-epoch == 1, just concatenating the data."
|
||||||
|
for n in `seq 1 $num_jobs_nnet`; do
|
||||||
|
cat $dir/egs/egs_orig.$n.*.ark > $dir/egs/egs_tmp.$n.0.ark || exit 1;
|
||||||
|
rm $dir/egs/egs_orig.$n.*.ark || exit 1;
|
||||||
|
done
|
||||||
|
else # We'll have to split it up using nnet-copy-egs.
|
||||||
|
egs_list=
|
||||||
|
for n in `seq 0 $[$iters_per_epoch-1]`; do
|
||||||
|
egs_list="$egs_list ark:$dir/egs/egs_tmp.JOB.$n.ark"
|
||||||
|
done
|
||||||
|
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/split_egs.JOB.log \
|
||||||
|
nnet-copy-egs --random=$random_copy --srand=JOB \
|
||||||
|
"ark:cat $dir/egs/egs_orig.JOB.*.ark|" $egs_list '&&' \
|
||||||
|
rm $dir/egs/egs_orig.JOB.*.ark || exit 1;
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
if [ $stage -le -1 ]; then
|
if [ $stage -le -1 ]; then
|
||||||
# Next, shuffle the order of the examples in each of those files.
|
# Next, shuffle the order of the examples in each of those files.
|
||||||
# In order to not use too much memory (in case the size of the files is
|
# Each one should not be too large, so we can do this in memory.
|
||||||
# huge) we do this by randomizing the order of the .scp file and then
|
|
||||||
# just call nnet-copy-egs. If the file system is willing to store
|
|
||||||
# stuff in memory, it is free to do so. This is not super-optimal in
|
|
||||||
# terms of file system performance but it's simple and it won't fail when
|
|
||||||
# the data gets large.
|
|
||||||
echo "Shuffling the order of training examples"
|
echo "Shuffling the order of training examples"
|
||||||
echo "(in order to avoid stressing the disk, these won't all run at once)."
|
echo "(in order to avoid stressing the disk, these won't all run at once)."
|
||||||
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \
|
|
||||||
utils/shuffle_list.pl --srand JOB $dir/egs/egs_orig.JOB.scp \| \
|
for n in `seq 0 $[$iters_per_epoch-1]`; do
|
||||||
nnet-copy-egs scp:- ark,scp:$dir/egs/egs.JOB.ark,$dir/egs/egs.JOB.scp \
|
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$n.JOB.log \
|
||||||
'&&' rm $dir/egs/egs_orig.JOB.ark $dir/egs/egs_orig.JOB.scp
|
nnet-shuffle-egs "--srand=\$[JOB+($num_jobs_nnet*$n)]" \
|
||||||
smallest_len=`wc -l $dir/egs/egs.*.scp | sort -n -k1 | awk '{print $1}' | head -1`
|
ark:$dir/egs/egs_tmp.JOB.$n.ark ark:$dir/egs/egs.JOB.$n.ark '&&' \
|
||||||
# If the $samples_per_iteration is more than each split of the data,
|
rm $dir/egs/egs_tmp.JOB.$n.ark || exit 1;
|
||||||
# append to each .scp file the .scp files from the next one or two
|
|
||||||
# splits (or more), so each one is larger...
|
|
||||||
rm $dir/egs/egs.*.scp.orig 2>/dev/null
|
|
||||||
if [ $samples_per_iteration -gt $smallest_len ]; then
|
|
||||||
extra_files=$[($samples_per_iteration-1) / $smallest_len]
|
|
||||||
echo Each part of the data has about $smallest_len lines which is less than the
|
|
||||||
echo samples per iteration $samples_per_iteration, so appending next $extra_files
|
|
||||||
echo files to each scp file
|
|
||||||
for n in `seq $num_jobs_nnet`; do mv $dir/egs/egs.$n.scp $dir/egs/egs.$n.scp.orig; done
|
|
||||||
for n in `seq $num_jobs_nnet`; do
|
|
||||||
for e in `seq 0 $extra_files`; do
|
|
||||||
m=$[(($n + $e - 1)%$num_jobs_nnet)+1]
|
|
||||||
cat $dir/egs/egs.$m.scp.orig
|
|
||||||
done > $dir/egs/egs.$n.scp
|
|
||||||
done
|
done
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
|
|
||||||
num_egs=`grep wrote $dir/log/get_egs.log | tail -1 | awk '{print $NF}'` || exit 1;
|
num_iters_reduce=$[$num_epochs * $iters_per_epoch];
|
||||||
! [ $num_egs -gt 0 ] && echo "bad num_egs $num_egs" && exit 1;
|
num_iters_extra=$[$num_epochs_extra * $iters_per_epoch];
|
||||||
num_iters_reduce=$[ 1 + (($num_egs * $num_epochs)/($num_jobs_nnet * $samples_per_iteration))]
|
|
||||||
num_iters_extra=$[1 + (($num_egs * $num_epochs_extra)/($num_jobs_nnet * $samples_per_iteration))]
|
|
||||||
num_iters=$[$num_iters_reduce+$num_iters_extra]
|
num_iters=$[$num_iters_reduce+$num_iters_extra]
|
||||||
|
|
||||||
echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
|
echo "Will train for $num_epochs + $num_epochs_extra epochs, equalling "
|
||||||
echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
|
echo " $num_iters_reduce + $num_iters_extra = $num_iters iterations, "
|
||||||
echo " (while reducing learning rate) + (with constant learning rate)."
|
echo " (while reducing learning rate) + (with constant learning rate)."
|
||||||
|
|
||||||
function get_list {
|
|
||||||
# usage: get_list <samples-per-iter> <iter> <input-file> >output
|
|
||||||
#
|
|
||||||
# Outputs an scp file for this job for this iteration. The
|
|
||||||
# output will have <samples-per-iter> lines, and will contain lines from
|
|
||||||
# egs.JOB.scp, possibly with repeats. It will be sorted numerically on its
|
|
||||||
# first field, so the .ark file is accessed in order (we then pipe to
|
|
||||||
# nnet-shuffle-egs to randomize the order). The way we do it is, we imagine
|
|
||||||
# we had concatenated the file $dir/egs/egs.JOB.scp infinite times, and
|
|
||||||
# taken from the concatenated file, the lines
|
|
||||||
# <samples-per-iter> * <iter> ... <samples-per-iter> * (<iter> + 1) - 1,
|
|
||||||
# and then sorted them on the first field (which is a number).
|
|
||||||
# We don't actually implement it this way, we do it a bit more efficiently.
|
|
||||||
# We require that samples-per-iter <= (#lines in input-file).
|
|
||||||
[ $# -ne 3 ] && echo "get_list: bad usage" && exit 1;
|
|
||||||
samples_per_iter=$1
|
|
||||||
my_iter=$2
|
|
||||||
input_file=$3
|
|
||||||
start=$[$my_iter * $samples_per_iter]; # starting-point in concatenated file.
|
|
||||||
input_len=`cat $input_file | wc -l`
|
|
||||||
start=$[$start - $input_len*($start/$input_len)]; # remove whole multiples of input_len
|
|
||||||
# we have to concatenate the input file to itself.
|
|
||||||
cat $input_file $input_file | \
|
|
||||||
head -n $[$start + $samples_per_iter] | tail -n $samples_per_iter | \
|
|
||||||
sort -k2 -k1n
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# up till $last_normal_shrink_iter we will shrink the parameters
|
# up till $last_normal_shrink_iter we will shrink the parameters
|
||||||
# in the normal way using the dev set, but after that we will
|
# in the normal way using the dev set, but after that we will
|
||||||
# only re-compute the shrinkage parameters periodically.
|
# only re-compute the shrinkage parameters periodically.
|
||||||
|
@ -361,22 +391,19 @@ x=0
|
||||||
while [ $x -lt $num_iters ]; do
|
while [ $x -lt $num_iters ]; do
|
||||||
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
if [ $x -ge 0 ] && [ $stage -le $x ]; then
|
||||||
|
|
||||||
# Set off a job that does diagnostics, in the background.
|
# Set off jobs doing some diagnostics, in the background.
|
||||||
$cmd $parallel_opts $dir/log/compute_prob.$x.log \
|
$cmd $dir/log/compute_prob_valid.$x.log \
|
||||||
nnet-compute-prob $dir/$x.mdl ark:$dir/valid_shrink.egs &
|
nnet-compute-prob $dir/$x.mdl ark:$dir/valid_diagnostic.egs &
|
||||||
|
$cmd $dir/log/compute_prob_train.$x.log \
|
||||||
|
nnet-compute-prob $dir/$x.mdl ark:$dir/train_diagnostic.egs &
|
||||||
|
|
||||||
if echo $realign_iters | grep -w $x >/dev/null; then
|
if echo $realign_iters | grep -w $x >/dev/null; then
|
||||||
echo "Realigning data (pass $x)"
|
echo "Realigning data (pass $x)"
|
||||||
$cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
|
$cmd JOB=1:$nj $dir/log/align.$x.JOB.log \
|
||||||
nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$dir/$x.mdl" \
|
nnet-align-compiled $scale_opts --beam=$beam --retry-beam=$retry_beam "$dir/$x.mdl" \
|
||||||
"ark:gunzip -c $dir/fsts.JOB.gz|" "$split_feats" \
|
"ark:gunzip -c $dir/fsts.JOB.gz|" "$feats" \
|
||||||
"ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
"ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
||||||
fi
|
fi
|
||||||
for n in `seq $num_jobs_nnet`; do
|
|
||||||
# the following command gets a subset of the n'th scp file, containing
|
|
||||||
# $samples_per_iteration lines.
|
|
||||||
get_list $samples_per_iteration $x $dir/egs/egs.$n.scp > $dir/temp/egs.$x.$n.scp
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Training neural net (pass $x)"
|
echo "Training neural net (pass $x)"
|
||||||
if [ $x -gt 0 ] && \
|
if [ $x -gt 0 ] && \
|
||||||
|
@ -388,9 +415,8 @@ while [ $x -lt $num_iters ]; do
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
|
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.JOB.log \
|
||||||
MKL_NUM_THREADS=$mkl_num_threads \
|
|
||||||
nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
|
nnet-shuffle-egs --buffer-size=$shuffle_buffer_size --srand=$x \
|
||||||
scp:$dir/temp/egs.$x.JOB.scp ark:- \| \
|
ark:$dir/egs/egs.JOB.$[$x%$iters_per_epoch].ark ark:- \| \
|
||||||
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
||||||
"$mdl" ark:- $dir/$[$x+1].JOB.mdl \
|
"$mdl" ark:- $dir/$[$x+1].JOB.mdl \
|
||||||
|| exit 1;
|
|| exit 1;
|
||||||
|
@ -410,10 +436,10 @@ while [ $x -lt $num_iters ]; do
|
||||||
if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then
|
if [ $x -le $last_normal_shrink_iter ] || [ $[$x % $shrink_interval] -eq 0 ]; then
|
||||||
# For earlier iterations (while we've recently beeen adding layers), or every
|
# For earlier iterations (while we've recently beeen adding layers), or every
|
||||||
# $shrink_interval=3 iters , just do shrinking normally.
|
# $shrink_interval=3 iters , just do shrinking normally.
|
||||||
|
mb=$[($num_valid_frames_shrink+$num_train_frames_shrink+$num_threads-1)/$num_threads]
|
||||||
$cmd $parallel_opts $dir/log/shrink.$x.log \
|
$cmd $parallel_opts $dir/log/shrink.$x.log \
|
||||||
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads --verbose=3 \
|
nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
|
||||||
--minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \
|
$dir/$[$x+1].mdl ark:$dir/shrink.egs $dir/$[$x+1].mdl || exit 1;
|
||||||
$dir/$[$x+1].mdl ark:$dir/valid_shrink.egs $dir/$[$x+1].mdl || exit 1;
|
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
|
if [ "$mix_up" -gt 0 ] && [ $x -eq $mix_up_iter ]; then
|
||||||
|
@ -423,7 +449,7 @@ while [ $x -lt $num_iters ]; do
|
||||||
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
|
nnet-am-mixup --min-count=10 --num-mixtures=$mix_up \
|
||||||
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
|
$dir/$[$x+1].mdl $dir/$[$x+1].mdl || exit 1;
|
||||||
fi
|
fi
|
||||||
rm $nnets_list $dir/temp/egs.$x.*.scp
|
rm $nnets_list
|
||||||
fi
|
fi
|
||||||
x=$[$x+1]
|
x=$[$x+1]
|
||||||
done
|
done
|
||||||
|
@ -435,15 +461,32 @@ nnets_list=
|
||||||
for x in `seq $[$num_iters-$num_iters_final+1] $num_iters`; do
|
for x in `seq $[$num_iters-$num_iters_final+1] $num_iters`; do
|
||||||
[ $x -gt $mix_up_iter ] && nnets_list="$nnets_list $dir/$x.mdl"
|
[ $x -gt $mix_up_iter ] && nnets_list="$nnets_list $dir/$x.mdl"
|
||||||
done
|
done
|
||||||
|
if [ $stage -le $num_iters ]; then
|
||||||
|
mb=$[($num_valid_frames_combine+$num_train_frames_combine+$num_threads-1)/$num_threads]
|
||||||
$cmd $parallel_opts $dir/log/combine.log \
|
$cmd $parallel_opts $dir/log/combine.log \
|
||||||
MKL_NUM_THREADS=$mkl_num_threads nnet-combine-fast --num-threads=$num_threads \
|
nnet-combine-fast --num-threads=$num_threads --verbose=3 --minibatch-size=$mb \
|
||||||
--verbose=3 --minibatch-size=$[($num_valid_frames_shrink+$num_threads-1)/$num_threads] \
|
$nnets_list ark:$dir/combine.egs $dir/final.mdl || exit 1;
|
||||||
$nnets_list ark:$dir/valid_combine.egs $dir/final.mdl || exit 1;
|
fi
|
||||||
|
|
||||||
# Compute the probability of the final, combined model with
|
# Compute the probability of the final, combined model with
|
||||||
# the same subset we used for the previous compute_probs, as the
|
# the same subset we used for the previous compute_probs, as the
|
||||||
# different subsets will lead to different probs.
|
# different subsets will lead to different probs.
|
||||||
$cmd $parallel_opts $dir/log/compute_prob.final.log \
|
$cmd $dir/log/compute_prob_valid.final.log \
|
||||||
nnet-compute-prob $dir/final.mdl ark:$dir/valid_shrink.egs || exit 1;
|
nnet-compute-prob $dir/final.mdl ark:$dir/valid_diagnostic.egs &
|
||||||
|
$cmd $dir/log/compute_prob_train.final.log \
|
||||||
|
nnet-compute-prob $dir/final.mdl ark:$dir/train_diagnostic.egs &
|
||||||
|
|
||||||
echo Done
|
echo Done
|
||||||
|
|
||||||
|
if $cleanup; then
|
||||||
|
echo Cleaning up data
|
||||||
|
echo Removing training examples
|
||||||
|
rm -r $dir/egs
|
||||||
|
echo Removing most of the models
|
||||||
|
for x in `seq 0 $num_iters`; do
|
||||||
|
if [ $[$x%10] -ne 0 ] && [ $x -lt $[$num_iters-$num_iters_final+1] ]; then
|
||||||
|
# delete all but every 10th model; don't delete the ones which combine to form the final model.
|
||||||
|
rm $dir/$x.mdl
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
|
@ -31,16 +31,17 @@ num_jobs_nnet=8 # Number of neural net training jobs to run in parallel.
|
||||||
# not the same as the num-jobs (nj) which will be the same as the
|
# not the same as the num-jobs (nj) which will be the same as the
|
||||||
# alignment and denlat directories.
|
# alignment and denlat directories.
|
||||||
stage=0
|
stage=0
|
||||||
sub_stage=-2 # this can be used to start from a particular sub-iteration of an
|
sub_stage=-3 # this can be used to start from a particular sub-iteration of an
|
||||||
# iteration
|
# iteration
|
||||||
acwt=0.1
|
acwt=0.1
|
||||||
boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame.
|
boost=0.0 # boosting for BMMI (you can try 0.1).. this is applied per frame.
|
||||||
transform_dir= # Note: by default any transforms in $alidir will be used.
|
transform_dir= # Note: by default any transforms in $alidir will be used.
|
||||||
|
|
||||||
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
parallel_opts="-pe smp 16" # by default we use 16 threads; this lets the queue know.
|
||||||
shuffle_opts="-tc 5" # max 5 jobs running at one time (a lot of I/O.)
|
io_opts="-tc 10" # max 5 jobs running at one time (a lot of I/O.)
|
||||||
num_threads=16 # number of threads for neural net trainer..
|
num_threads=16 # number of threads for neural net trainer..
|
||||||
mkl_num_threads=1
|
mkl_num_threads=1
|
||||||
|
random_copy=false
|
||||||
# End configuration section.
|
# End configuration section.
|
||||||
|
|
||||||
echo "$0 $@" # Print the command line for logging
|
echo "$0 $@" # Print the command line for logging
|
||||||
|
@ -71,8 +72,7 @@ if [ $# != 6 ]; then
|
||||||
echo " # this, you may want to decrease the batch size."
|
echo " # this, you may want to decrease the batch size."
|
||||||
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
echo " --parallel-opts <opts|\"-pe smp 16\"> # extra options to pass to e.g. queue.pl for processes that"
|
||||||
echo " # use multiple threads."
|
echo " # use multiple threads."
|
||||||
echo " --shuffle-opts <opts|\"-tc 5\"> # Options given to e.g. queue.pl for the job that shuffles the "
|
echo " --io-opts <opts|\"-tc 10\"> # Options given to e.g. queue.pl for any especially I/O intensive jobs"
|
||||||
echo " # data. (prevents stressing the disk). "
|
|
||||||
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
echo " --minibatch-size <minibatch-size|128> # Size of minibatch to process (note: product with --num-threads"
|
||||||
echo " # should not get too large, e.g. >2k)."
|
echo " # should not get too large, e.g. >2k)."
|
||||||
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each"
|
echo " --samples-per-iter <#samples|400000> # Number of samples of data to process per iteration, for each"
|
||||||
|
@ -181,34 +181,37 @@ while [ $x -lt $num_epochs ]; do
|
||||||
echo "Epoch $x of $num_epochs"
|
echo "Epoch $x of $num_epochs"
|
||||||
|
|
||||||
if [ $stage -le $x ] && $first_iter_of_epoch; then
|
if [ $stage -le $x ] && $first_iter_of_epoch; then
|
||||||
if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
|
if [ $stage -lt $x ] || [ $sub_stage -le -3 ]; then
|
||||||
# First get the per-frame posteriors, by rescoring the lattices; this
|
# First get the per-frame posteriors, by rescoring the lattices; this
|
||||||
# process also gives us at the same time the posteriors of each state for
|
# process also gives us at the same time the posteriors of each state for
|
||||||
# each frame (by default, pruned to 0.01 with a randomized algorithm).
|
# each frame (by default, pruned to 0.01 with a randomized algorithm).
|
||||||
# The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like
|
# The matrix-logprob stage produces a diagnostic and passes the pseudo-log-like
|
||||||
# matrix through unchanged.
|
# matrix through unchanged. (Note: nnet-logprob2-parallel can use up to
|
||||||
$cmd JOB=1:$nj $dir/log/post.$z.JOB.log \
|
# $num_threads threads, but in practice it may be limited by the speed of
|
||||||
nnet-logprob2 $dir/$x.1.mdl "$feats" "ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
|
# the other elements of the pipe.
|
||||||
|
$cmd $parallel_opts JOB=1:$nj $dir/log/post.$z.JOB.log \
|
||||||
|
nnet-logprob2-parallel --num-threads=$num_threads $dir/$x.1.mdl "$feats" \
|
||||||
|
"ark:|prob-to-post ark:- ark:- | gzip -c >$dir/post/smooth_post.$z.JOB.gz" ark:- \| \
|
||||||
matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \
|
matrix-logprob ark:- "ark:gunzip -c $alidir/ali.JOB.gz | ali-to-pdf $dir/$x.1.mdl ark:- ark:-|" ark:- \| \
|
||||||
lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \
|
lattice-rescore-mapped $dir/$x.1.mdl "ark:gunzip -c $denlatdir/lat.JOB.gz|" ark:- ark:- \| \
|
||||||
lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
lattice-boost-ali --b=$boost --silence-phones=$silphonelist $dir/$x.1.mdl ark:- "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
||||||
post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1;
|
post-to-pdf-post $dir/$x.1.mdl ark:- "ark:|gzip -c >$dir/post/den_post.$z.JOB.gz" || exit 1;
|
||||||
fi
|
fi
|
||||||
if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
|
if [ $stage -lt $x ] || [ $sub_stage -le -2 ]; then
|
||||||
# run nnet-get-egs for all files, to get the training examples for each frame--
|
# run nnet-get-egs for all files, to get the training examples for each frame--
|
||||||
# combines the feature and label/posterior information. The posterior information
|
# combines the feature and label/posterior information. The posterior information
|
||||||
# consists of 2 things: the numerator posteriors from the alignments, the denominator
|
# consists of 2 things: the numerator posteriors from the alignments, the denominator
|
||||||
# posteriors from the lattices (times -1), and the smoothing posteriors from the
|
# posteriors from the lattices (times -1), and the smoothing posteriors from the
|
||||||
# neural net log-probs (times E).
|
# neural net log-probs (times E).
|
||||||
# We copy the examples for each job round-robin to multiple archives, one for each
|
# We copy the examples for each job round-robin to multiple archives, one for each
|
||||||
# of 1...$num_jobs_nnet. We write these along with .scp files, for more convenient
|
# of 1...$num_jobs_nnet.
|
||||||
# and memory-efficient randomization.
|
|
||||||
egs_out=""
|
egs_out=""
|
||||||
for n in `seq 1 $num_jobs_nnet`; do
|
for n in `seq 1 $num_jobs_nnet`; do
|
||||||
egs_out="$egs_out ark,scp:$dir/egs/egs.$z.$n.JOB.ark,$dir/egs/egs.$z.$n.JOB.scp"
|
# indexes are egs_orig.$z.$num_jobs_nnet.$nj
|
||||||
|
egs_out="$egs_out ark:$dir/egs/egs_orig.$z.$n.JOB.ark"
|
||||||
done
|
done
|
||||||
$cmd JOB=1:$nj $dir/log/egs.$z.JOB.log \
|
$cmd JOB=1:$nj $dir/log/get_egs.$z.JOB.log \
|
||||||
ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
ali-to-pdf $dir/$x.1.mdl "ark:gunzip -c $alidir/ali.JOB.gz|" ark:- \| \
|
||||||
ali-to-post ark:- ark:- \| \
|
ali-to-post ark:- ark:- \| \
|
||||||
sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \
|
sum-post --scale2=$E ark:- "ark:gunzip -c $dir/post/smooth_post.$z.JOB.gz|" ark:- \| \
|
||||||
|
@ -223,23 +226,33 @@ while [ $x -lt $num_epochs ]; do
|
||||||
tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log
|
tail -n 50 $dir/log/post.$z.*.log | perl -e '$acwt=shift @ARGV; $acwt>0.0 || die "bad acwt"; while(<STDIN>) { if (m|lattice-to-post.+Overall average log-like/frame is (\S+) over (\S+) frames. Average acoustic like/frame is (\S+)|) { $tot_den_lat_like += $1*$2; $tot_frames += $2; } if (m|matrix-logprob.+Average log-prob per frame is (\S+) over (\S+) frames|) { $tot_num_like += $1*$2; $tot_num_frames += $2; } } if (abs($tot_frames - $tot_num_frames) > 0.01*($tot_frames + $tot_num_frames)) { print STDERR "#frames differ $tot_frames vs $tot_num_frames\n"; } $tot_den_lat_like /= $tot_frames; $tot_num_like /= $tot_num_frames; $objf = $acwt * $tot_num_like - $tot_den_lat_like; print $objf."\n"; ' $acwt > $dir/log/objf.$z.log
|
||||||
echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`"
|
echo "Objf on EBW iter $z is `cat $dir/log/objf.$z.log`"
|
||||||
fi
|
fi
|
||||||
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
|
if [ $stage -lt $x ] || [ $sub_stage -le -1 ]; then
|
||||||
echo "Shuffling the order of training examples and splitting them up"
|
echo "Merging training examples across original #jobs ($nj), and "
|
||||||
echo "(in order to avoid stressing the disk, these won't all run at once)."
|
echo "splitting across number of nnet jobs $num_jobs_nnet"
|
||||||
|
|
||||||
egs_out2=""
|
egs_out2=""
|
||||||
for n in `seq 1 $iters_per_epoch`; do
|
for n in `seq 1 $iters_per_epoch`; do
|
||||||
egs_out2="$egs_out2 ark:$dir/egs/egs_split.$z.$n.JOB.ark"
|
# indexes of egs_merged are: egs_merged.$z.$iters_per_epoch.$num_jobs_nnet
|
||||||
|
egs_out2="$egs_out2 ark:$dir/egs/egs_merged.$z.$n.JOB.ark"
|
||||||
done
|
done
|
||||||
# Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one
|
# Note: in the following command, JOB goes from 1 to $num_jobs_nnet, so one
|
||||||
# job per parallel training job (different from the previous command).
|
# job per parallel training job (different from the previous command).
|
||||||
# We sum up over the index JOB in the previous $cmd, and write to multiple
|
# We sum up over the index JOB in the previous $cmd, and write to multiple
|
||||||
# archives, this time one for each "sub-iter".
|
# archives, this time one for each "sub-iter".
|
||||||
$cmd $shuffle_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.JOB.log \
|
# indexes of egs_orig are: egs_orig.$z.$num_jobs_nnet.$nj
|
||||||
cat $dir/egs/egs.$z.JOB.*.scp \| \
|
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/merge_and_split.$x.JOB.log \
|
||||||
utils/shuffle_list.pl --srand "\$[($z*$num_jobs_nnet)+JOB]" \| \
|
cat $dir/egs/egs_orig.$z.JOB.*.ark \| \
|
||||||
nnet-copy-egs scp:- $egs_out2 || exit 1; ##'&&' \
|
nnet-copy-egs --random=$random_copy "--srand=\$[JOB+($x*$num_jobs_nnet)]" \
|
||||||
##rm $dir/egs/egs.$z.JOB.*.scp $dir/egs/egs.$z.JOB.*.ark || exit 1;
|
ark:- $egs_out2 '&&' rm $dir/egs/egs_orig.$z.JOB.*.ark || exit 1;
|
||||||
|
fi
|
||||||
|
if [ $stage -lt $x ] || [ $sub_stage -le 0 ]; then
|
||||||
|
echo "Randomizing order of examples in each job"
|
||||||
|
for n in `seq 1 $iters_per_epoch`; do
|
||||||
|
s=$[$num_jobs_nnet*($n+($iters_per_epoch*$z))] # for srand
|
||||||
|
$cmd $io_opts JOB=1:$num_jobs_nnet $dir/log/shuffle.$z.$n.JOB.log \
|
||||||
|
nnet-shuffle-egs "--srand=\$[JOB+$s]" \
|
||||||
|
ark:$dir/egs/egs_merged.$z.$n.JOB.ark ark:$dir/egs/egs.$z.$n.JOB.ark '&&' \
|
||||||
|
rm $dir/egs/egs_merged.$z.$n.JOB.ark || exit 1;
|
||||||
|
done
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
if [ $stage -le $x ]; then
|
if [ $stage -le $x ]; then
|
||||||
|
@ -250,7 +263,7 @@ while [ $x -lt $num_epochs ]; do
|
||||||
if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then
|
if [ $stage -lt $x ] || [ $sub_stage -le $y ]; then
|
||||||
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \
|
$cmd $parallel_opts JOB=1:$num_jobs_nnet $dir/log/train.$x.$y.JOB.log \
|
||||||
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
nnet-train-parallel --num-threads=$num_threads --minibatch-size=$minibatch_size \
|
||||||
$dir/$x.$y.mdl ark:$dir/egs/egs_split.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
|
$dir/$x.$y.mdl ark:$dir/egs/egs.$z.$y.JOB.ark $dir/$x.$y.JOB.mdl \
|
||||||
|| exit 1;
|
|| exit 1;
|
||||||
nnets_list=
|
nnets_list=
|
||||||
for n in `seq 1 $num_jobs_nnet`; do
|
for n in `seq 1 $num_jobs_nnet`; do
|
||||||
|
|
|
@ -68,7 +68,7 @@ Options:
|
||||||
--input-left-context <n> # #frames of left context for input features; default 0.
|
--input-left-context <n> # #frames of left context for input features; default 0.
|
||||||
--input-right-context <n> # #frames of right context for input features; default 0.
|
--input-right-context <n> # #frames of right context for input features; default 0.
|
||||||
--param-stdddev-factor <f> # Factor which can be used to modify the standard deviation of
|
--param-stdddev-factor <f> # Factor which can be used to modify the standard deviation of
|
||||||
# randomly nitialized features (default, 1. Gets multiplied by
|
# randomly initialized features (default, 1. Gets multiplied by
|
||||||
# 1/sqrt of number of inputs).
|
# 1/sqrt of number of inputs).
|
||||||
--initial-num-hidden-layers <n> <config-file> # If >0, number of hidden layers to initialize the network with.
|
--initial-num-hidden-layers <n> <config-file> # If >0, number of hidden layers to initialize the network with.
|
||||||
# In this case, the positional parameter <num-hidden-layers> is only
|
# In this case, the positional parameter <num-hidden-layers> is only
|
||||||
|
|
|
@ -19,7 +19,7 @@ BINFILES = align-equal align-equal-compiled acc-tree-stats \
|
||||||
align-mapped align-compiled-mapped latgen-faster-mapped \
|
align-mapped align-compiled-mapped latgen-faster-mapped \
|
||||||
hmm-info pdf-to-counts analyze-counts extract-ctx post-to-phone-post \
|
hmm-info pdf-to-counts analyze-counts extract-ctx post-to-phone-post \
|
||||||
post-to-pdf-post duplicate-matrix logprob-to-post prob-to-post copy-post \
|
post-to-pdf-post duplicate-matrix logprob-to-post prob-to-post copy-post \
|
||||||
matrix-logprob
|
matrix-logprob matrix-sum
|
||||||
|
|
||||||
OBJFILES =
|
OBJFILES =
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
// bin/matrix-sum.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "util/common-utils.h"
|
||||||
|
#include "matrix/kaldi-matrix.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
try {
|
||||||
|
using namespace kaldi;
|
||||||
|
|
||||||
|
const char *usage =
|
||||||
|
"Sum (and optionally scale) two archives of input matrices\n"
|
||||||
|
"of the same dimension\n"
|
||||||
|
"\n"
|
||||||
|
"Usage: matrix-sum [options] <matrix-rspecifier1> <matrix-rspecifier2> <sum-wspecifier>\n";
|
||||||
|
|
||||||
|
BaseFloat scale1 = 1.0, scale2 = 1.0;
|
||||||
|
|
||||||
|
ParseOptions po(usage);
|
||||||
|
|
||||||
|
po.Register("scale1", &scale1, "Scale applied to first matrix");
|
||||||
|
po.Register("scale2", &scale2, "Scale applied to second matrix");
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
if (po.NumArgs() != 3) {
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
std::string rspecifier1 = po.GetArg(1);
|
||||||
|
std::string rspecifier2 = po.GetArg(2);
|
||||||
|
std::string wspecifier = po.GetArg(3);
|
||||||
|
|
||||||
|
SequentialBaseFloatMatrixReader mat1_reader(rspecifier1);
|
||||||
|
RandomAccessBaseFloatMatrixReader mat2_reader(rspecifier2);
|
||||||
|
BaseFloatMatrixWriter mat_writer(wspecifier);
|
||||||
|
|
||||||
|
int32 num_done = 0, num_err = 0;
|
||||||
|
|
||||||
|
for (; !mat1_reader.Done(); mat1_reader.Next()) {
|
||||||
|
std::string key = mat1_reader.Key();
|
||||||
|
Matrix<BaseFloat> mat1 (mat1_reader.Value());
|
||||||
|
if (!mat2_reader.HasKey(key)) {
|
||||||
|
KALDI_WARN << "No such key " << key << " in second table.";
|
||||||
|
num_err++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const Matrix<BaseFloat> &mat2 (mat2_reader.Value(key));
|
||||||
|
if (!SameDim(mat1, mat2)) {
|
||||||
|
KALDI_WARN << "Matrices for key " << key << " have different dims "
|
||||||
|
<< mat1.NumRows() << " x " << mat1.NumCols() << " vs. "
|
||||||
|
<< mat2.NumRows() << " x " << mat2.NumCols();
|
||||||
|
num_err++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (scale1 != 1.0) mat1.Scale(scale1);
|
||||||
|
mat1.AddMat(scale2, mat2);
|
||||||
|
mat_writer.Write(key, mat1);
|
||||||
|
num_done++;
|
||||||
|
}
|
||||||
|
KALDI_LOG << "Added " << num_done << " matrices; " << num_err
|
||||||
|
<< " had errors.";
|
||||||
|
|
||||||
|
return (num_done != 0 ? 0 : 1);
|
||||||
|
} catch(const std::exception &e) {
|
||||||
|
std::cerr << e.what();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -93,10 +93,13 @@ Real* CuMatrix<Real>::RowData(MatrixIndexT r) {
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) {
|
void CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols,
|
||||||
|
MatrixResizeType resize_type) {
|
||||||
|
// This code does not currently support the other resize_type options.
|
||||||
|
KALDI_ASSERT(resize_type == kSetZero || resize_type == kUndefined);
|
||||||
if (num_rows_ == rows && num_cols_ == cols) {
|
if (num_rows_ == rows && num_cols_ == cols) {
|
||||||
// SetZero();
|
if (resize_type == kSetZero) SetZero();
|
||||||
return *this;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Destroy();
|
Destroy();
|
||||||
|
@ -108,17 +111,15 @@ CuMatrix<Real>& CuMatrix<Real>::Resize(MatrixIndexT rows, MatrixIndexT cols) {
|
||||||
cuSafeCall(cudaMallocPitch((void**)&data_, &pitch, row_bytes, rows));
|
cuSafeCall(cudaMallocPitch((void**)&data_, &pitch, row_bytes, rows));
|
||||||
num_rows_ = rows; num_cols_ = cols;
|
num_rows_ = rows; num_cols_ = cols;
|
||||||
stride_ = pitch/sizeof(Real);
|
stride_ = pitch/sizeof(Real);
|
||||||
SetZero();
|
if (resize_type == kSetZero) SetZero();
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
mat_.Resize(rows, cols);
|
mat_.Resize(rows, cols, resize_type);
|
||||||
num_rows_=rows;
|
num_rows_=rows;
|
||||||
num_cols_=cols;
|
num_cols_=cols;
|
||||||
stride_= mat_.Stride();
|
stride_= mat_.Stride();
|
||||||
}
|
}
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -134,7 +135,7 @@ void CuMatrix<Real>::Destroy() {
|
||||||
} else
|
} else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
mat_.Destroy();
|
mat_.Resize(0, 0);
|
||||||
}
|
}
|
||||||
num_rows_ = num_cols_ = stride_ = 0;
|
num_rows_ = num_cols_ = stride_ = 0;
|
||||||
}
|
}
|
||||||
|
@ -142,9 +143,8 @@ void CuMatrix<Real>::Destroy() {
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
void CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
||||||
Resize(src.NumRows(), src.NumCols());
|
KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
Timer tim;
|
Timer tim;
|
||||||
|
@ -152,7 +152,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
||||||
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
||||||
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
||||||
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
||||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyDeviceToDevice));
|
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
|
||||||
|
width, src.NumRows(), cudaMemcpyDeviceToDevice));
|
||||||
|
|
||||||
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatD2D",tim.Elapsed());
|
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatD2D",tim.Elapsed());
|
||||||
} else
|
} else
|
||||||
|
@ -160,16 +161,13 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const CuMatrix<Real> &src) {
|
||||||
{
|
{
|
||||||
mat_.CopyFromMat(src.mat_);
|
mat_.CopyFromMat(src.mat_);
|
||||||
}
|
}
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
void CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
||||||
Resize(src.NumRows(), src.NumCols());
|
KALDI_ASSERT(src.NumRows() == num_rows_ && src.NumCols() == num_cols_);
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
Timer tim;
|
Timer tim;
|
||||||
|
@ -177,7 +175,8 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
||||||
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
MatrixIndexT dst_pitch = stride_*sizeof(Real);
|
||||||
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
MatrixIndexT src_pitch = src.Stride()*sizeof(Real);
|
||||||
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
MatrixIndexT width = src.NumCols()*sizeof(Real);
|
||||||
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch, width, src.NumRows(), cudaMemcpyHostToDevice));
|
cuSafeCall(cudaMemcpy2D(data_, dst_pitch, src.Data(), src_pitch,
|
||||||
|
width, src.NumRows(), cudaMemcpyHostToDevice));
|
||||||
|
|
||||||
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatH2D",tim.Elapsed());
|
CuDevice::Instantiate().AccuProfile("CuMatrix::CopyFromMatH2D",tim.Elapsed());
|
||||||
} else
|
} else
|
||||||
|
@ -185,17 +184,12 @@ CuMatrix<Real>& CuMatrix<Real>::CopyFromMat(const Matrix<Real> &src) {
|
||||||
{
|
{
|
||||||
mat_.CopyFromMat(src);
|
mat_.CopyFromMat(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void CuMatrix<Real>::CopyToMat(Matrix<Real> *dst) const {
|
void CuMatrix<Real>::CopyToMat(Matrix<Real> *dst) const {
|
||||||
if (dst->NumRows() != NumRows() || dst->NumCols() != NumCols()) {
|
KALDI_ASSERT(dst->NumRows() == NumRows() && dst->NumCols() == NumCols());
|
||||||
dst->Resize(NumRows(), NumCols());
|
|
||||||
}
|
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
|
@ -257,7 +251,7 @@ void CuMatrix<Real>::Read(std::istream &is, bool binary) {
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void CuMatrix<Real>::Write(std::ostream &os, bool binary) const {
|
void CuMatrix<Real>::Write(std::ostream &os, bool binary) const {
|
||||||
Matrix<BaseFloat> tmp;
|
Matrix<BaseFloat> tmp(NumRows(), NumCols(), kUndefined);
|
||||||
CopyToMat(&tmp);
|
CopyToMat(&tmp);
|
||||||
tmp.Write(os, binary);
|
tmp.Write(os, binary);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,15 +46,41 @@ class CuMatrix {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
/// Default Constructor
|
/// Default Constructor
|
||||||
CuMatrix<Real>()
|
CuMatrix<Real>():
|
||||||
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) { }
|
||||||
}
|
|
||||||
/// Constructor with memory initialisation
|
/// Constructor with memory initialisation
|
||||||
CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols)
|
CuMatrix<Real>(MatrixIndexT rows, MatrixIndexT cols):
|
||||||
: num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||||
Resize(rows, cols);
|
Resize(rows, cols);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Note: we had to remove the "explicit" keyword due
|
||||||
|
// to problems with STL vectors of CuMatrix.
|
||||||
|
CuMatrix<Real>(const CuMatrix<Real> &other):
|
||||||
|
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||||
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
|
CopyFromMat(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
explicit CuMatrix<Real>(const Matrix<Real> &other):
|
||||||
|
num_rows_(0), num_cols_(0), stride_(0), data_(NULL) {
|
||||||
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
|
CopyFromMat(other);
|
||||||
|
}
|
||||||
|
|
||||||
|
CuMatrix<Real> &operator = (const CuMatrix<Real> &other) {
|
||||||
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
|
CopyFromMat(other);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
CuMatrix<Real> &operator = (const Matrix<Real> &other) {
|
||||||
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
|
CopyFromMat(other);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
/// Destructor
|
/// Destructor
|
||||||
~CuMatrix() {
|
~CuMatrix() {
|
||||||
Destroy();
|
Destroy();
|
||||||
|
@ -65,14 +91,12 @@ class CuMatrix {
|
||||||
return num_rows_;
|
return num_rows_;
|
||||||
}
|
}
|
||||||
|
|
||||||
MatrixIndexT NumCols() const {
|
MatrixIndexT NumCols() const { return num_cols_; }
|
||||||
return num_cols_;
|
|
||||||
}
|
|
||||||
|
|
||||||
MatrixIndexT Stride() const {
|
MatrixIndexT Stride() const { return stride_; }
|
||||||
return stride_;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// MatrixDim is a struct containing "rows", "cols" and "stride",
|
||||||
|
// that is an argument of most CUDA kernels.
|
||||||
::MatrixDim Dim() const {
|
::MatrixDim Dim() const {
|
||||||
::MatrixDim d = { num_rows_, num_cols_, stride_ };
|
::MatrixDim d = { num_rows_, num_cols_, stride_ };
|
||||||
return d;
|
return d;
|
||||||
|
@ -87,29 +111,22 @@ class CuMatrix {
|
||||||
Real* RowData(MatrixIndexT r);
|
Real* RowData(MatrixIndexT r);
|
||||||
|
|
||||||
/// Get size of matrix in bytes
|
/// Get size of matrix in bytes
|
||||||
MatrixIndexT SizeInBytes() const {
|
MatrixIndexT SizeInBytes() const { return num_rows_*stride_*sizeof(Real); }
|
||||||
return num_rows_*stride_*sizeof(Real);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get size of matrix row in bytes
|
/// Get size of matrix row in bytes
|
||||||
MatrixIndexT RowSizeInBytes() const {
|
MatrixIndexT RowSizeInBytes() const { return num_cols_*sizeof(Real); }
|
||||||
return num_cols_*sizeof(Real);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get size of matrix stride in bytes
|
/// Get size of matrix stride in bytes
|
||||||
MatrixIndexT StrideSizeInBytes() const {
|
MatrixIndexT StrideSizeInBytes() const { return stride_*sizeof(Real); }
|
||||||
return stride_*sizeof(Real);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Allocate the memory
|
/// Allocate the memory
|
||||||
ThisType& Resize(MatrixIndexT rows, MatrixIndexT cols);
|
void Resize(MatrixIndexT rows, MatrixIndexT cols,
|
||||||
|
MatrixResizeType resize_type = kSetZero);
|
||||||
|
|
||||||
/// Deallocate the memory
|
/// Copy functions (reallocates when needed, but note from Dan: eventually
|
||||||
void Destroy();
|
/// I'll change it to just die if the sizes don't match, like the Matrix class.)
|
||||||
|
void CopyFromMat(const CuMatrix<Real> &src);
|
||||||
/// Copy functions (reallocates when needed)
|
void CopyFromMat(const Matrix<Real> &src);
|
||||||
ThisType& CopyFromMat(const CuMatrix<Real> &src);
|
|
||||||
ThisType& CopyFromMat(const Matrix<Real> &src);
|
|
||||||
void CopyToMat(Matrix<Real> *dst) const;
|
void CopyToMat(Matrix<Real> *dst) const;
|
||||||
|
|
||||||
/// Copy row interval from matrix
|
/// Copy row interval from matrix
|
||||||
|
@ -154,6 +171,8 @@ class CuMatrix {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void Destroy();
|
||||||
|
|
||||||
MatrixIndexT num_rows_;
|
MatrixIndexT num_rows_;
|
||||||
MatrixIndexT num_cols_;
|
MatrixIndexT num_cols_;
|
||||||
MatrixIndexT stride_;
|
MatrixIndexT stride_;
|
||||||
|
|
|
@ -44,8 +44,6 @@ const Real* CuVector<Real>::Data() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
Real* CuVector<Real>::Data() {
|
Real* CuVector<Real>::Data() {
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
|
@ -58,15 +56,12 @@ Real* CuVector<Real>::Data() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) {
|
void CuVector<Real>::Resize(MatrixIndexT dim) {
|
||||||
if (dim_ == dim) {
|
if (dim_ == dim) {
|
||||||
// SetZero();
|
SetZero();
|
||||||
return *this;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Destroy();
|
Destroy();
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
|
@ -80,8 +75,6 @@ CuVector<Real>& CuVector<Real>::Resize(MatrixIndexT dim) {
|
||||||
|
|
||||||
dim_ = dim;
|
dim_ = dim;
|
||||||
SetZero();
|
SetZero();
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,9 +99,7 @@ void CuVector<Real>::Destroy() {
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
void CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
||||||
Resize(src.Dim());
|
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
Timer tim;
|
Timer tim;
|
||||||
|
@ -119,16 +110,13 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const CuVector<Real> &src) {
|
||||||
{
|
{
|
||||||
vec_.CopyFromVec(src.vec_);
|
vec_.CopyFromVec(src.vec_);
|
||||||
}
|
}
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
void CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
||||||
Resize(src.Dim());
|
KALDI_ASSERT(src.Dim() == dim_);
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
Timer tim;
|
Timer tim;
|
||||||
|
@ -141,16 +129,14 @@ CuVector<Real>& CuVector<Real>::CopyFromVec(const Vector<Real> &src) {
|
||||||
{
|
{
|
||||||
vec_.CopyFromVec(src);
|
vec_.CopyFromVec(src);
|
||||||
}
|
}
|
||||||
return *this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void CuVector<Real>::CopyToVec(Vector<Real> *dst) const {
|
void CuVector<Real>::CopyToVec(Vector<Real> *dst) const {
|
||||||
if (dst->Dim() != dim_) {
|
KALDI_ASSERT(dst->Dim() == dim_);
|
||||||
dst->Resize(dim_);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if HAVE_CUDA==1
|
#if HAVE_CUDA==1
|
||||||
if (CuDevice::Instantiate().Enabled()) {
|
if (CuDevice::Instantiate().Enabled()) {
|
||||||
|
@ -177,7 +163,7 @@ void CuVector<Real>::Read(std::istream &is, bool binary) {
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void CuVector<Real>::Write(std::ostream &os, bool binary) const {
|
void CuVector<Real>::Write(std::ostream &os, bool binary) const {
|
||||||
Vector<BaseFloat> tmp;
|
Vector<BaseFloat> tmp(Dim());
|
||||||
CopyToVec(&tmp);
|
CopyToVec(&tmp);
|
||||||
tmp.Write(os, binary);
|
tmp.Write(os, binary);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,16 @@ class CuVector {
|
||||||
Resize(dim);
|
Resize(dim);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CuVector<Real>(const CuVector<Real> &v): dim_(0), data_(NULL) {
|
||||||
|
Resize(v.dim_);
|
||||||
|
CopyFromVec(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
CuVector<Real>(const Vector<Real> &v): dim_(0), data_(NULL) {
|
||||||
|
Resize(v.Dim());
|
||||||
|
CopyFromVec(v);
|
||||||
|
}
|
||||||
|
|
||||||
/// Destructor
|
/// Destructor
|
||||||
~CuVector() {
|
~CuVector() {
|
||||||
Destroy();
|
Destroy();
|
||||||
|
@ -61,14 +71,11 @@ class CuVector {
|
||||||
Real* Data();
|
Real* Data();
|
||||||
|
|
||||||
/// Allocate the memory
|
/// Allocate the memory
|
||||||
ThisType& Resize(MatrixIndexT dim);
|
void Resize(MatrixIndexT dim);
|
||||||
|
|
||||||
/// Deallocate the memory
|
|
||||||
void Destroy();
|
|
||||||
|
|
||||||
/// Copy functions (lazy reallocation when needed)
|
/// Copy functions (lazy reallocation when needed)
|
||||||
ThisType& CopyFromVec(const CuVector<Real> &src);
|
void CopyFromVec(const CuVector<Real> &src);
|
||||||
ThisType& CopyFromVec(const Vector<Real> &src);
|
void CopyFromVec(const Vector<Real> &src);
|
||||||
void CopyToVec(Vector<Real> *dst) const;
|
void CopyToVec(Vector<Real> *dst) const;
|
||||||
|
|
||||||
/// I/O
|
/// I/O
|
||||||
|
@ -94,6 +101,7 @@ class CuVector {
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void Destroy();
|
||||||
MatrixIndexT dim_; ///< dimension of the vector
|
MatrixIndexT dim_; ///< dimension of the vector
|
||||||
Real *data_; ///< GPU data pointer
|
Real *data_; ///< GPU data pointer
|
||||||
Vector<Real> vec_; ///< non-GPU vector as back-up
|
Vector<Real> vec_; ///< non-GPU vector as back-up
|
||||||
|
|
|
@ -234,7 +234,7 @@ class SimpleDecoder {
|
||||||
const Arc &arc = aiter.Value();
|
const Arc &arc = aiter.Value();
|
||||||
if (arc.ilabel == 0) { // propagate nonemitting only...
|
if (arc.ilabel == 0) { // propagate nonemitting only...
|
||||||
Token *new_tok = new Token(arc, tok);
|
Token *new_tok = new Token(arc, tok);
|
||||||
if (new_tok->arc_.weight.Value() > cutoff) {
|
if (new_tok->weight_.Value() > cutoff) {
|
||||||
Token::TokenDelete(new_tok);
|
Token::TokenDelete(new_tok);
|
||||||
} else {
|
} else {
|
||||||
unordered_map<StateId, Token*>::iterator find_iter
|
unordered_map<StateId, Token*>::iterator find_iter
|
||||||
|
|
|
@ -213,6 +213,10 @@ void MelBanks::Compute(const VectorBase<BaseFloat> &power_spectrum,
|
||||||
int32 offset = bins_[i].first;
|
int32 offset = bins_[i].first;
|
||||||
const Vector<BaseFloat> &v (bins_[i].second);
|
const Vector<BaseFloat> &v (bins_[i].second);
|
||||||
(*mel_energies_out)(i) = VecVec(v, power_spectrum.Range(offset, v.Dim()));
|
(*mel_energies_out)(i) = VecVec(v, power_spectrum.Range(offset, v.Dim()));
|
||||||
|
// The following assert was added due to a problem with OpenBlas that
|
||||||
|
// we had at one point (it was a bug in that library). Just to detect
|
||||||
|
// it early.
|
||||||
|
KALDI_ASSERT(!KALDI_ISNAN((*mel_energies_out)(i)));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (debug_) {
|
if (debug_) {
|
||||||
|
|
|
@ -9,7 +9,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
|
||||||
feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \
|
feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \
|
||||||
fmpe-est fmpe-copy fmpe-sum-accs append-feats extend-transform-dim \
|
fmpe-est fmpe-copy fmpe-sum-accs append-feats extend-transform-dim \
|
||||||
get-full-lda-mat compute-spectrogram-feats extract-feature-segments \
|
get-full-lda-mat compute-spectrogram-feats extract-feature-segments \
|
||||||
reverse-feats paste-feats select-feats
|
reverse-feats paste-feats select-feats subsample-feats
|
||||||
|
|
||||||
OBJFILES =
|
OBJFILES =
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// featbin/append-feats.cc
|
// featbin/append-feats.cc
|
||||||
|
|
||||||
// Copyright 2012 Petr Motlicek; Pawel Swietojanski
|
// Copyright 2012 Petr Motlicek Pawel Swietojanski
|
||||||
|
// Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
// you may not use this file except in compliance with the License.
|
// you may not use this file except in compliance with the License.
|
||||||
|
@ -32,15 +33,11 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
ParseOptions po(usage);
|
ParseOptions po(usage);
|
||||||
|
|
||||||
int32 feats_offset_in1 = 0;
|
bool truncate_frames = false;
|
||||||
int32 feats_offset_in2 = 0;
|
|
||||||
int32 num_feats_in1 = 0;
|
|
||||||
int32 num_feats_in2 = 0;
|
|
||||||
|
|
||||||
po.Register("feats-offset-in1", &feats_offset_in1, "Feats 1 offset");
|
po.Register("truncate-frames", &truncate_frames, "If true, do not treat it "
|
||||||
po.Register("num-feats-in1", &num_feats_in1, "Take num-feats from in1-rspeciifier");
|
"as an error when files differ in number of frames, but truncate "
|
||||||
po.Register("feats-offset-in2", &feats_offset_in2, "Feats 2 offset");
|
"the longest one.");
|
||||||
po.Register("num-feats-in2", &num_feats_in2, "Take num-feats from in2-rspeciifier");
|
|
||||||
|
|
||||||
po.Read(argc, argv);
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
@ -53,80 +50,47 @@ int main(int argc, char *argv[]) {
|
||||||
std::string rspecifier2 = po.GetArg(2);
|
std::string rspecifier2 = po.GetArg(2);
|
||||||
std::string wspecifier = po.GetArg(3);
|
std::string wspecifier = po.GetArg(3);
|
||||||
|
|
||||||
KALDI_ASSERT(feats_offset_in1 >= 0 && feats_offset_in2 >= 0);
|
BaseFloatMatrixWriter feats_writer(wspecifier);
|
||||||
|
SequentialBaseFloatMatrixReader feats_reader1(rspecifier1);
|
||||||
|
RandomAccessBaseFloatMatrixReader feats_reader2(rspecifier2);
|
||||||
|
|
||||||
BaseFloatMatrixWriter kaldi_writer(wspecifier);
|
int32 num_done = 0, num_err = 0;
|
||||||
SequentialBaseFloatMatrixReader kaldi_reader1(rspecifier1);
|
|
||||||
RandomAccessBaseFloatMatrixReader kaldi_reader2(rspecifier2);
|
|
||||||
|
|
||||||
// Peeking in the archives to get the feature dimensions
|
for (; !feats_reader1.Done(); feats_reader1.Next()) {
|
||||||
if (kaldi_reader1.Done()) {
|
std::string utt = feats_reader1.Key();
|
||||||
KALDI_ERR << "Could not read any features from " << rspecifier1
|
if (!feats_reader2.HasKey(utt)) {
|
||||||
<< ". (empty archive?)";
|
|
||||||
}
|
|
||||||
std::string utt = kaldi_reader1.Key();
|
|
||||||
if (!kaldi_reader2.HasKey(utt)) {
|
|
||||||
KALDI_ERR << "Could not read features for key " << utt << " from "
|
|
||||||
<< rspecifier2 << ". (empty archive?)";
|
|
||||||
}
|
|
||||||
|
|
||||||
int32 dim_feats_in1 = kaldi_reader1.Value().NumCols();
|
|
||||||
int32 dim_feats_in2 = kaldi_reader2.Value(utt).NumCols();
|
|
||||||
if (num_feats_in1 == 0)
|
|
||||||
num_feats_in1 = dim_feats_in1 - feats_offset_in1;
|
|
||||||
if (num_feats_in2 == 0)
|
|
||||||
num_feats_in2 = dim_feats_in2 - feats_offset_in2;
|
|
||||||
|
|
||||||
KALDI_LOG << "Reading features from " << rspecifier1 << " and " << rspecifier2;
|
|
||||||
KALDI_LOG << "\tdim1 = " << dim_feats_in1 << "; offset1 = " << feats_offset_in1
|
|
||||||
<< "; num1 = " << num_feats_in1 << "; dim2 = " << dim_feats_in2
|
|
||||||
<< "; offset2 = " << feats_offset_in2 << "; num2 = " << num_feats_in2;
|
|
||||||
|
|
||||||
KALDI_ASSERT((feats_offset_in1 + num_feats_in1) <= dim_feats_in1);
|
|
||||||
KALDI_ASSERT((feats_offset_in2 + num_feats_in2) <= dim_feats_in2);
|
|
||||||
|
|
||||||
for (; !kaldi_reader1.Done(); kaldi_reader1.Next()) {
|
|
||||||
utt = kaldi_reader1.Key();
|
|
||||||
if (!kaldi_reader2.HasKey(utt)) {
|
|
||||||
KALDI_WARN << "Could not find features for " << utt << " in "
|
KALDI_WARN << "Could not find features for " << utt << " in "
|
||||||
<< rspecifier2 << ": producing no output for the utterance";
|
<< rspecifier2 << ": producing no output for the utterance";
|
||||||
|
num_err++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Matrix<BaseFloat> &feats1 = kaldi_reader1.Value();
|
const Matrix<BaseFloat> &feats1 = feats_reader1.Value();
|
||||||
const Matrix<BaseFloat> &feats2 = kaldi_reader2.Value(utt);
|
const Matrix<BaseFloat> &feats2 = feats_reader2.Value(utt);
|
||||||
int32 num_frames = feats1.NumRows();
|
if (feats1.NumRows() != feats2.NumRows() && !truncate_frames) {
|
||||||
KALDI_VLOG(1) << "Utterance : " << utt << ": # of frames = " << num_frames;
|
KALDI_WARN << "For utterance " << utt << ", features have different "
|
||||||
|
<< "#frames " << feats1.NumRows() << " vs. "
|
||||||
KALDI_ASSERT(feats1.NumCols() == dim_feats_in1 &&
|
<< feats2.NumRows() << ", producing no output (use "
|
||||||
feats2.NumCols() == dim_feats_in2);
|
<< "--truncate-frames=true if you want output)";
|
||||||
if (num_frames != feats2.NumRows()) {
|
num_err++;
|
||||||
KALDI_WARN << "Utterance " << utt << ": " << num_frames
|
|
||||||
<< " frames read from " << rspecifier1 << " and "
|
|
||||||
<< feats2.NumRows() << " frames read from " << rspecifier2
|
|
||||||
<< ": producing no output for the utterance";
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
int32 num_frames = std::min(feats1.NumRows(), feats2.NumRows()),
|
||||||
|
dim1 = feats1.NumCols(), dim2 = feats2.NumCols();
|
||||||
|
Matrix<BaseFloat> output(num_frames, dim1 + dim2, kUndefined);
|
||||||
|
output.Range(0, num_frames, 0, dim1).CopyFromMat(
|
||||||
|
feats1.Range(0, num_frames, 0, dim1));
|
||||||
|
output.Range(0, num_frames, dim1, dim2).CopyFromMat(
|
||||||
|
feats2.Range(0, num_frames, 0, dim2));
|
||||||
|
|
||||||
SubMatrix<BaseFloat> new_feats1(feats1, 0, num_frames, feats_offset_in1,
|
feats_writer.Write(utt, output);
|
||||||
num_feats_in1);
|
num_done++;
|
||||||
SubMatrix<BaseFloat> new_feats2(feats2, 0, num_frames, feats_offset_in2,
|
|
||||||
num_feats_in2);
|
|
||||||
Matrix<BaseFloat> output_feats(num_frames, new_feats1.NumCols() +
|
|
||||||
new_feats2.NumCols());
|
|
||||||
output_feats.Range(0, num_frames, 0,
|
|
||||||
new_feats1.NumCols()).CopyFromMat(new_feats1);
|
|
||||||
output_feats.Range(0, num_frames, new_feats1.NumCols(),
|
|
||||||
new_feats2.NumCols()).CopyFromMat(new_feats2);
|
|
||||||
kaldi_writer.Write(utt, output_feats);
|
|
||||||
}
|
}
|
||||||
|
KALDI_LOG << "Appended " << num_done << " feats; " << num_err
|
||||||
return 0;
|
<< " with errors.";
|
||||||
}
|
return (num_done != 0 ? 0 : 1);
|
||||||
catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
std::cerr << e.what();
|
std::cerr << e.what();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@
|
||||||
#include "feat/feature-mfcc.h"
|
#include "feat/feature-mfcc.h"
|
||||||
#include "feat/wave-reader.h"
|
#include "feat/wave-reader.h"
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]) {
|
int main(int argc, char *argv[]) {
|
||||||
try {
|
try {
|
||||||
using namespace kaldi;
|
using namespace kaldi;
|
||||||
|
|
|
@ -0,0 +1,96 @@
|
||||||
|
// featbin/select-feats.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "util/common-utils.h"
|
||||||
|
#include "matrix/kaldi-matrix.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
try {
|
||||||
|
using namespace kaldi;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
const char *usage =
|
||||||
|
"Sub-samples features by taking every n'th frame"
|
||||||
|
"\n"
|
||||||
|
"Usage: subsample-feats [options] in-rspecifier out-wspecifier\n"
|
||||||
|
" e.g. subsample-feats --n=2 ark:- ark:-\n";
|
||||||
|
|
||||||
|
ParseOptions po(usage);
|
||||||
|
|
||||||
|
int32 n = 1, offset = 0;
|
||||||
|
|
||||||
|
po.Register("n", &n, "Take every n'th feature, for this value of n");
|
||||||
|
po.Register("offset", &offset, "Start with the feature with this offset, "
|
||||||
|
"then take every n'th feature.");
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
if (po.NumArgs() != 2) {
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
string rspecifier = po.GetArg(1);
|
||||||
|
string wspecifier = po.GetArg(2);
|
||||||
|
|
||||||
|
SequentialBaseFloatMatrixReader feat_reader(rspecifier);
|
||||||
|
BaseFloatMatrixWriter feat_writer(wspecifier);
|
||||||
|
|
||||||
|
int32 num_done = 0, num_err = 0;
|
||||||
|
|
||||||
|
// process all keys
|
||||||
|
for (; !feat_reader.Done(); feat_reader.Next()) {
|
||||||
|
std::string utt = feat_reader.Key();
|
||||||
|
const Matrix<BaseFloat> feats(feat_reader.Value());
|
||||||
|
|
||||||
|
// This code could, of course, be much more efficient; I'm just
|
||||||
|
// keeping it simple.
|
||||||
|
int32 num_indexes = 0;
|
||||||
|
for (int32 k = offset; k < feats.NumRows(); k += n)
|
||||||
|
num_indexes++; // k is the index.
|
||||||
|
|
||||||
|
if (num_indexes == 0) {
|
||||||
|
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
|
||||||
|
<< "producing no output.";
|
||||||
|
num_err++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Matrix<BaseFloat> output(num_indexes, feats.NumCols());
|
||||||
|
int32 i = 0;
|
||||||
|
for (int32 k = offset; k < feats.NumRows(); k += n, i++) {
|
||||||
|
SubVector<BaseFloat> src(feats, k), dest(output, i);
|
||||||
|
dest.CopyFromVec(src);
|
||||||
|
}
|
||||||
|
KALDI_ASSERT(i == num_indexes);
|
||||||
|
feat_writer.Write(utt, output);
|
||||||
|
num_done++;
|
||||||
|
}
|
||||||
|
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
|
||||||
|
<< " with errors.";
|
||||||
|
return (num_done != 0 ? 0 : 1);
|
||||||
|
} catch(const std::exception &e) {
|
||||||
|
std::cerr << e.what();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,7 +25,7 @@ int main(int argc, char *argv[]) {
|
||||||
using namespace kaldi;
|
using namespace kaldi;
|
||||||
|
|
||||||
const char *usage =
|
const char *usage =
|
||||||
"Copy a subset of features\n"
|
"Copy a subset of features (the first n features)\n"
|
||||||
"Usage: subset-feats [options] in-rspecifier out-wspecifier\n";
|
"Usage: subset-feats [options] in-rspecifier out-wspecifier\n";
|
||||||
|
|
||||||
ParseOptions po(usage);
|
ParseOptions po(usage);
|
||||||
|
|
|
@ -160,11 +160,9 @@ int main(int argc, char *argv[]) {
|
||||||
KALDI_LOG << "Applied transform to " << num_done << " utterances; " << num_error
|
KALDI_LOG << "Applied transform to " << num_done << " utterances; " << num_error
|
||||||
<< " had errors.";
|
<< " had errors.";
|
||||||
|
|
||||||
return 0;
|
return (num_done != 0 ? 0 : 1);
|
||||||
} catch(const std::exception &e) {
|
} catch(const std::exception &e) {
|
||||||
std::cerr << e.what();
|
std::cerr << e.what();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -27,9 +27,24 @@
|
||||||
|
|
||||||
namespace kaldi {
|
namespace kaldi {
|
||||||
|
|
||||||
inline void cblas_Xscal(const int N, float *X, const int incX, float *Y,
|
|
||||||
const int incY, const float c, const float s) {
|
inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
|
||||||
cblas_srot(N, X, incX, Y, incY, c, s);
|
const int incY) {
|
||||||
|
cblas_scopy(N, X, incX, Y, incY);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
|
||||||
|
const int incY) {
|
||||||
|
cblas_dcopy(N, X, incX, Y, incY);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
inline float cblas_Xasum(const int N, const float *X, const int incX) {
|
||||||
|
return cblas_sasum(N, X, incX);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline double cblas_Xasum(const int N, const double *X, const int incX) {
|
||||||
|
return cblas_dasum(N, X, incX);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
|
inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
|
||||||
|
@ -226,6 +241,78 @@ inline void cblas_Xsyrk(
|
||||||
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
|
cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
|
||||||
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
|
dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// matrix-vector multiply using a banded matrix; we always call this
|
||||||
|
/// with b = 1 meaning we're multiplying by a diagonal matrix. This is used for
|
||||||
|
/// elementwise multiplication. We miss some of the arguments out of this
|
||||||
|
/// wrapper.
|
||||||
|
inline void cblas_Xsbmv1(
|
||||||
|
const MatrixIndexT dim,
|
||||||
|
const double *A,
|
||||||
|
const double alpha,
|
||||||
|
const double *x,
|
||||||
|
const double beta,
|
||||||
|
double *y) {
|
||||||
|
cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
|
||||||
|
1, x, 1, beta, y, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void cblas_Xsbmv1(
|
||||||
|
const MatrixIndexT dim,
|
||||||
|
const float *A,
|
||||||
|
const float alpha,
|
||||||
|
const float *x,
|
||||||
|
const float beta,
|
||||||
|
float *y) {
|
||||||
|
cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
|
||||||
|
1, x, 1, beta, y, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
|
||||||
|
/// extend this somehow.
|
||||||
|
inline void mul_elements(
|
||||||
|
const MatrixIndexT dim,
|
||||||
|
const double *a,
|
||||||
|
double *b) { // does b *= a, elementwise.
|
||||||
|
double c1, c2, c3, c4;
|
||||||
|
MatrixIndexT i;
|
||||||
|
for (i = 0; i + 4 <= dim; i += 4) {
|
||||||
|
c1 = a[i] * b[i];
|
||||||
|
c2 = a[i+1] * b[i+1];
|
||||||
|
c3 = a[i+2] * b[i+2];
|
||||||
|
c4 = a[i+3] * b[i+3];
|
||||||
|
b[i] = c1;
|
||||||
|
b[i+1] = c2;
|
||||||
|
b[i+2] = c3;
|
||||||
|
b[i+3] = c4;
|
||||||
|
}
|
||||||
|
for (; i < dim; i++)
|
||||||
|
b[i] *= a[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void mul_elements(
|
||||||
|
const MatrixIndexT dim,
|
||||||
|
const float *a,
|
||||||
|
float *b) { // does b *= a, elementwise.
|
||||||
|
float c1, c2, c3, c4;
|
||||||
|
MatrixIndexT i;
|
||||||
|
for (i = 0; i + 4 <= dim; i += 4) {
|
||||||
|
c1 = a[i] * b[i];
|
||||||
|
c2 = a[i+1] * b[i+1];
|
||||||
|
c3 = a[i+2] * b[i+2];
|
||||||
|
c4 = a[i+3] * b[i+3];
|
||||||
|
b[i] = c1;
|
||||||
|
b[i+1] = c2;
|
||||||
|
b[i+2] = c3;
|
||||||
|
b[i+3] = c4;
|
||||||
|
}
|
||||||
|
for (; i < dim; i++)
|
||||||
|
b[i] *= a[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// add clapack here
|
// add clapack here
|
||||||
#ifndef HAVE_ATLAS
|
#ifndef HAVE_ATLAS
|
||||||
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
|
inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
|
||||||
|
|
|
@ -495,6 +495,41 @@ template
|
||||||
void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
|
void MatrixBase<double>::CopyFromMat(const MatrixBase<double> & M,
|
||||||
MatrixTransposeType Trans);
|
MatrixTransposeType Trans);
|
||||||
|
|
||||||
|
// Specialize the template for CopyFromSp for float, float.
|
||||||
|
template<>
|
||||||
|
template<>
|
||||||
|
void MatrixBase<float>::CopyFromSp(const SpMatrix<float> & M) {
|
||||||
|
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
|
||||||
|
MatrixIndexT num_rows = num_rows_, stride = stride_;
|
||||||
|
const float *Mdata = M.Data();
|
||||||
|
float *row_data = data_, *col_data = data_;
|
||||||
|
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||||
|
cblas_scopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
|
||||||
|
cblas_scopy(i, Mdata, 1, col_data, stride); // copy to the column.
|
||||||
|
Mdata += i+1;
|
||||||
|
row_data += stride;
|
||||||
|
col_data += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Specialize the template for CopyFromSp for double, double.
|
||||||
|
template<>
|
||||||
|
template<>
|
||||||
|
void MatrixBase<double>::CopyFromSp(const SpMatrix<double> & M) {
|
||||||
|
KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
|
||||||
|
MatrixIndexT num_rows = num_rows_, stride = stride_;
|
||||||
|
const double *Mdata = M.Data();
|
||||||
|
double *row_data = data_, *col_data = data_;
|
||||||
|
for (MatrixIndexT i = 0; i < num_rows; i++) {
|
||||||
|
cblas_dcopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
|
||||||
|
cblas_dcopy(i, Mdata, 1, col_data, stride); // copy to the column.
|
||||||
|
Mdata += i+1;
|
||||||
|
row_data += stride;
|
||||||
|
col_data += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
template<typename OtherReal>
|
template<typename OtherReal>
|
||||||
void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) {
|
void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) {
|
||||||
|
@ -711,12 +746,16 @@ void Matrix<Real>::Destroy() {
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
|
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
|
||||||
KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
|
KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
|
||||||
MatrixIndexT i;
|
|
||||||
MatrixIndexT j;
|
|
||||||
|
|
||||||
for (i = 0; i < num_rows_; i++) {
|
if (num_cols_ == stride_ && num_cols_ == a.stride_) {
|
||||||
for (j = 0; j < num_cols_; j++) {
|
mul_elements(num_rows_ * num_cols_, a.data_, data_);
|
||||||
(*this)(i, j) *= a(i, j);
|
} else {
|
||||||
|
MatrixIndexT a_stride = a.stride_, stride = stride_;
|
||||||
|
Real *data = data_, *a_data = a.data_;
|
||||||
|
for (MatrixIndexT i = 0; i < num_rows_; i++) {
|
||||||
|
mul_elements(num_cols_, a_data, data);
|
||||||
|
a_data += a_stride;
|
||||||
|
data += stride;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1985,6 +2024,13 @@ Real MatrixBase<Real>::ApplySoftMax() {
|
||||||
return max + log(sum);
|
return max + log(sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Real>
|
||||||
|
void MatrixBase<Real>::ApplyTanh() {
|
||||||
|
for (MatrixIndexT r = 0; r < num_rows_; r++) {
|
||||||
|
SubVector<Real> v(*this, r);
|
||||||
|
v.ApplyTanh();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Real>
|
template<class Real>
|
||||||
template<class OtherReal>
|
template<class OtherReal>
|
||||||
|
|
|
@ -341,6 +341,9 @@ class MatrixBase {
|
||||||
/// matrix and return normalizer (log sum of exponentials).
|
/// matrix and return normalizer (log sum of exponentials).
|
||||||
Real ApplySoftMax();
|
Real ApplySoftMax();
|
||||||
|
|
||||||
|
/// Apply the tanh function to each element of the matrix.
|
||||||
|
void ApplyTanh();
|
||||||
|
|
||||||
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
|
/** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
|
||||||
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
|
* semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
|
||||||
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
|
* orthogonal matrix so rP^{-1} = rP^T. Throws exception if input was not
|
||||||
|
@ -553,7 +556,7 @@ class Matrix : public MatrixBase<Real> {
|
||||||
/// It is symmetric, so no option for transpose, and NumRows == Cols
|
/// It is symmetric, so no option for transpose, and NumRows == Cols
|
||||||
template<typename OtherReal>
|
template<typename OtherReal>
|
||||||
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
|
explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
|
||||||
Resize(M.NumRows(), M.NumRows());
|
Resize(M.NumRows(), M.NumRows(), kUndefined);
|
||||||
this->CopyFromSp(M);
|
this->CopyFromSp(M);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,10 +565,10 @@ class Matrix : public MatrixBase<Real> {
|
||||||
explicit Matrix(const TpMatrix<OtherReal> & M,
|
explicit Matrix(const TpMatrix<OtherReal> & M,
|
||||||
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
|
MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
|
||||||
if (trans == kNoTrans) {
|
if (trans == kNoTrans) {
|
||||||
Resize(M.NumRows(), M.NumCols());
|
Resize(M.NumRows(), M.NumCols(), kUndefined);
|
||||||
this->CopyFromTp(M);
|
this->CopyFromTp(M);
|
||||||
} else {
|
} else {
|
||||||
Resize(M.NumCols(), M.NumRows());
|
Resize(M.NumCols(), M.NumRows(), kUndefined);
|
||||||
this->CopyFromTp(M, kTrans);
|
this->CopyFromTp(M, kTrans);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -584,9 +587,6 @@ class Matrix : public MatrixBase<Real> {
|
||||||
/// Distructor to free matrices.
|
/// Distructor to free matrices.
|
||||||
~Matrix() { Destroy(); }
|
~Matrix() { Destroy(); }
|
||||||
|
|
||||||
/// Deallocates memory and sets to empty matrix.
|
|
||||||
void Destroy();
|
|
||||||
|
|
||||||
/// Sets matrix to a specified size (zero is OK as long as both r and c are
|
/// Sets matrix to a specified size (zero is OK as long as both r and c are
|
||||||
/// zero). The value of the new data depends on resize_type:
|
/// zero). The value of the new data depends on resize_type:
|
||||||
/// -if kSetZero, the new data will be zero
|
/// -if kSetZero, the new data will be zero
|
||||||
|
@ -601,9 +601,8 @@ class Matrix : public MatrixBase<Real> {
|
||||||
/// Assignment operator that takes MatrixBase.
|
/// Assignment operator that takes MatrixBase.
|
||||||
Matrix<Real> &operator = (const MatrixBase<Real> &other) {
|
Matrix<Real> &operator = (const MatrixBase<Real> &other) {
|
||||||
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
||||||
MatrixBase<Real>::NumCols() != other.NumCols()) {
|
MatrixBase<Real>::NumCols() != other.NumCols())
|
||||||
Resize(other.NumRows(), other.NumCols());
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
}
|
|
||||||
MatrixBase<Real>::CopyFromMat(other);
|
MatrixBase<Real>::CopyFromMat(other);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -611,15 +610,17 @@ class Matrix : public MatrixBase<Real> {
|
||||||
/// Assignment operator. Needed for inclusion in std::vector.
|
/// Assignment operator. Needed for inclusion in std::vector.
|
||||||
Matrix<Real> &operator = (const Matrix<Real> &other) {
|
Matrix<Real> &operator = (const Matrix<Real> &other) {
|
||||||
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
if (MatrixBase<Real>::NumRows() != other.NumRows() ||
|
||||||
MatrixBase<Real>::NumCols() != other.NumCols()) {
|
MatrixBase<Real>::NumCols() != other.NumCols())
|
||||||
Resize(other.NumRows(), other.NumCols());
|
Resize(other.NumRows(), other.NumCols(), kUndefined);
|
||||||
}
|
|
||||||
MatrixBase<Real>::CopyFromMat(other);
|
MatrixBase<Real>::CopyFromMat(other);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/// Deallocates memory and sets to empty matrix (dimension 0, 0).
|
||||||
|
void Destroy();
|
||||||
|
|
||||||
/// Init assumes the current class contents are invalid (i.e. junk or have
|
/// Init assumes the current class contents are invalid (i.e. junk or have
|
||||||
/// already been freed), and it sets the matrix to newly allocated memory with
|
/// already been freed), and it sets the matrix to newly allocated memory with
|
||||||
/// the specified number of rows and columns. r == c == 0 is acceptable. The data
|
/// the specified number of rows and columns. r == c == 0 is acceptable. The data
|
||||||
|
|
|
@ -375,7 +375,13 @@ template
|
||||||
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row);
|
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT row);
|
||||||
|
|
||||||
|
|
||||||
// takes elements to a power. Throws exception if could not (but only for power != 1 ad power != 2).
|
#ifdef HAVE_MKL
|
||||||
|
template<>
|
||||||
|
void VectorBase<float>::ApplyPow(float power) { vsPowx(dim_, data_, power, data_); }
|
||||||
|
template<>
|
||||||
|
void VectorBase<double>::ApplyPow(double power) { vdPowx(dim_, data_, power, data_); }
|
||||||
|
#else
|
||||||
|
// takes elements to a power. Throws exception if could not (but only for power != 1 and power != 2).
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void VectorBase<Real>::ApplyPow(Real power) {
|
void VectorBase<Real>::ApplyPow(Real power) {
|
||||||
if (power == 1.0) return;
|
if (power == 1.0) return;
|
||||||
|
@ -399,6 +405,7 @@ void VectorBase<Real>::ApplyPow(Real power) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Computes the p-th norm. Throws exception if could not.
|
// Computes the p-th norm. Throws exception if could not.
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
|
@ -534,14 +541,13 @@ template<typename Real>
|
||||||
void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
|
void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta) {
|
||||||
// note the double accumulator
|
// note the double accumulator
|
||||||
KALDI_ASSERT(dim_ == M.NumCols());
|
KALDI_ASSERT(dim_ == M.NumCols());
|
||||||
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride();
|
MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
|
||||||
for (MatrixIndexT i = 0; i < dim_; i++) {
|
Real *data = data_;
|
||||||
double sum = 0.0;
|
cblas_Xscal(dim, beta, data, 1);
|
||||||
const Real *src = M.Data() + i;
|
const Real *m_data = M.Data();
|
||||||
for (MatrixIndexT j = 0; j < num_rows; j++)
|
|
||||||
sum += src[j*stride];
|
for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride)
|
||||||
data_[i] = alpha * sum + beta * data_[i];
|
cblas_Xaxpy(dim, alpha, m_data, 1, data, 1);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
|
@ -651,6 +657,25 @@ Real VectorBase<Real>::ApplySoftMax() {
|
||||||
return max + log(sum);
|
return max + log(sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef HAVE_MKL
|
||||||
|
template<>
|
||||||
|
void VectorBase<float>::ApplyTanh() { vsTanh(dim_, data_, data_); }
|
||||||
|
template<>
|
||||||
|
void VectorBase<double>::ApplyTanh() { vdTanh(dim_, data_, data_); }
|
||||||
|
#else
|
||||||
|
template<typename Real>
|
||||||
|
void VectorBase<Real>::ApplyTanh() {
|
||||||
|
for (MatrixIndexT i = 0; i < dim_; i++) {
|
||||||
|
Real x = data_[i];
|
||||||
|
if (x > 0.0) {
|
||||||
|
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
|
||||||
|
} else {
|
||||||
|
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
|
||||||
|
}
|
||||||
|
data_[i] = x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template<typename Real>
|
template<typename Real>
|
||||||
void VectorBase<Real>::Add(Real c) {
|
void VectorBase<Real>::Add(Real c) {
|
||||||
|
|
|
@ -126,6 +126,10 @@ class VectorBase {
|
||||||
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
|
/// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
|
||||||
Real ApplySoftMax();
|
Real ApplySoftMax();
|
||||||
|
|
||||||
|
/// Apply the tanh function to each element of a vector. If using MKL, does
|
||||||
|
/// it using the "less accurate" options.
|
||||||
|
void ApplyTanh();
|
||||||
|
|
||||||
/// Take all elements of vector to a power.
|
/// Take all elements of vector to a power.
|
||||||
void ApplyPow(Real power);
|
void ApplyPow(Real power);
|
||||||
|
|
||||||
|
@ -322,20 +326,20 @@ class Vector: public VectorBase<Real> {
|
||||||
|
|
||||||
/// Copy constructor. The need for this is controversial.
|
/// Copy constructor. The need for this is controversial.
|
||||||
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
|
Vector(const Vector<Real> &v) : VectorBase<Real>() { // (cannot be explicit)
|
||||||
Resize(v.Dim());
|
Resize(v.Dim(), kUndefined);
|
||||||
this->CopyFromVec(v);
|
this->CopyFromVec(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Copy-constructor from base-class, needed to copy from SubVector.
|
/// Copy-constructor from base-class, needed to copy from SubVector.
|
||||||
explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
|
explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
|
||||||
Resize(v.Dim());
|
Resize(v.Dim(), kUndefined);
|
||||||
this->CopyFromVec(v);
|
this->CopyFromVec(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Type conversion constructor.
|
/// Type conversion constructor.
|
||||||
template<typename OtherReal>
|
template<typename OtherReal>
|
||||||
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
|
explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
|
||||||
Resize(v.Dim());
|
Resize(v.Dim(), kUndefined);
|
||||||
this->CopyFromVec(v);
|
this->CopyFromVec(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -372,14 +376,14 @@ class Vector: public VectorBase<Real> {
|
||||||
|
|
||||||
/// Assignment operator, protected so it can only be used by std::vector
|
/// Assignment operator, protected so it can only be used by std::vector
|
||||||
Vector<Real> &operator = (const Vector<Real> &other) {
|
Vector<Real> &operator = (const Vector<Real> &other) {
|
||||||
Resize(other.Dim());
|
Resize(other.Dim(), kUndefined);
|
||||||
this->CopyFromVec(other);
|
this->CopyFromVec(other);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Assignment operator that takes VectorBase.
|
/// Assignment operator that takes VectorBase.
|
||||||
Vector<Real> &operator = (const VectorBase<Real> &other) {
|
Vector<Real> &operator = (const VectorBase<Real> &other) {
|
||||||
Resize(other.Dim());
|
Resize(other.Dim(), kUndefined);
|
||||||
this->CopyFromVec(other);
|
this->CopyFromVec(other);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
|
@ -679,6 +679,28 @@ template<class Real> static void UnitTestAxpy() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Real> static void UnitTestCopySp() {
|
||||||
|
// Checking that the various versions of copying
|
||||||
|
// matrix to SpMatrix work the same in the symmetric case.
|
||||||
|
for (MatrixIndexT iter = 0;iter < 5;iter++) {
|
||||||
|
int32 dim = 5 + rand() % 10;
|
||||||
|
SpMatrix<Real> S(dim), T(dim);
|
||||||
|
S.SetRandn();
|
||||||
|
Matrix<Real> M(S);
|
||||||
|
T.CopyFromMat(M, kTakeMeanAndCheck);
|
||||||
|
AssertEqual(S, T);
|
||||||
|
T.SetZero();
|
||||||
|
T.CopyFromMat(M, kTakeMean);
|
||||||
|
AssertEqual(S, T);
|
||||||
|
T.SetZero();
|
||||||
|
T.CopyFromMat(M, kTakeLower);
|
||||||
|
AssertEqual(S, T);
|
||||||
|
T.SetZero();
|
||||||
|
T.CopyFromMat(M, kTakeUpper);
|
||||||
|
AssertEqual(S, T);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Real> static void UnitTestPower() {
|
template<class Real> static void UnitTestPower() {
|
||||||
for (MatrixIndexT iter = 0;iter < 5;iter++) {
|
for (MatrixIndexT iter = 0;iter < 5;iter++) {
|
||||||
|
@ -1430,6 +1452,7 @@ template<class Real> static void UnitTestMulElements() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Real> static void UnitTestSpLogExp() {
|
template<class Real> static void UnitTestSpLogExp() {
|
||||||
for (MatrixIndexT i = 0; i < 5; i++) {
|
for (MatrixIndexT i = 0; i < 5; i++) {
|
||||||
MatrixIndexT dimM = 10 + rand() % 10;
|
MatrixIndexT dimM = 10 + rand() % 10;
|
||||||
|
@ -1860,6 +1883,27 @@ template<class Real> static void UnitTestLimitCond() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Real> static void UnitTestTanh() {
|
||||||
|
for (MatrixIndexT i = 0; i < 10; i++) {
|
||||||
|
MatrixIndexT dimM = 5 + rand() % 10, dimN = 5 + rand() % 10;
|
||||||
|
Matrix<Real> M(dimM, dimN);
|
||||||
|
Matrix<Real> N(M);
|
||||||
|
for(int32 r = 0; r < dimM; r++) {
|
||||||
|
for (int32 c = 0; c < dimN; c++) {
|
||||||
|
Real x = N(r, c);
|
||||||
|
if (x > 0.0) {
|
||||||
|
x = -1.0 + 2.0 / (1.0 + exp(-2.0 * x));
|
||||||
|
} else {
|
||||||
|
x = 1.0 - 2.0 / (1.0 + exp(2.0 * x));
|
||||||
|
}
|
||||||
|
N(r, c) = x;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
M.ApplyTanh();
|
||||||
|
AssertEqual(M, N);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<class Real> static void UnitTestSimple() {
|
template<class Real> static void UnitTestSimple() {
|
||||||
for (MatrixIndexT i = 0;i < 5;i++) {
|
for (MatrixIndexT i = 0;i < 5;i++) {
|
||||||
MatrixIndexT dimM = 20 + rand()%10, dimN = 20 + rand()%20;
|
MatrixIndexT dimM = 20 + rand()%10, dimN = 20 + rand()%20;
|
||||||
|
@ -3541,6 +3585,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
|
||||||
UnitTestDotprod<Real>();
|
UnitTestDotprod<Real>();
|
||||||
// UnitTestSvdVariants<Real>();
|
// UnitTestSvdVariants<Real>();
|
||||||
UnitTestPower<Real>();
|
UnitTestPower<Real>();
|
||||||
|
UnitTestCopySp<Real>();
|
||||||
UnitTestDeterminant<Real>();
|
UnitTestDeterminant<Real>();
|
||||||
KALDI_LOG << " Point F";
|
KALDI_LOG << " Point F";
|
||||||
UnitTestDeterminantSign<Real>();
|
UnitTestDeterminantSign<Real>();
|
||||||
|
@ -3566,6 +3611,7 @@ template<class Real> static void MatrixUnitTest(bool full_test) {
|
||||||
UnitTestRange<Real>();
|
UnitTestRange<Real>();
|
||||||
UnitTestSimpleForVec<Real>();
|
UnitTestSimpleForVec<Real>();
|
||||||
UnitTestSimpleForMat<Real>();
|
UnitTestSimpleForMat<Real>();
|
||||||
|
UnitTestTanh<Real>();
|
||||||
UnitTestNorm<Real>();
|
UnitTestNorm<Real>();
|
||||||
UnitTestMul<Real>();
|
UnitTestMul<Real>();
|
||||||
KALDI_LOG << " Point I";
|
KALDI_LOG << " Point I";
|
||||||
|
|
|
@ -169,9 +169,17 @@ void SpMatrix<Real>::CopyFromMat(const MatrixBase<Real> &M,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kTakeLower:
|
case kTakeLower:
|
||||||
for (MatrixIndexT i = 0; i < D; i++)
|
{ // making this one a bit more efficient.
|
||||||
|
const Real *src = M.Data();
|
||||||
|
Real *dest = this->data_;
|
||||||
|
MatrixIndexT stride = M.Stride();
|
||||||
|
for (MatrixIndexT i = 0; i < D; i++) {
|
||||||
for (MatrixIndexT j = 0; j <= i; j++)
|
for (MatrixIndexT j = 0; j <= i; j++)
|
||||||
(*this)(i, j) = M(i, j);
|
dest[j] = src[j];
|
||||||
|
dest += i + 1;
|
||||||
|
src += stride;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case kTakeUpper:
|
case kTakeUpper:
|
||||||
for (MatrixIndexT i = 0; i < D; i++)
|
for (MatrixIndexT i = 0; i < D; i++)
|
||||||
|
|
|
@ -10,7 +10,7 @@ OBJFILES = nnet-component.o nnet-nnet.o nnet-update.o train-nnet.o \
|
||||||
nnet-randomize.o nnet-compute.o am-nnet.o nnet-functions.o \
|
nnet-randomize.o nnet-compute.o am-nnet.o nnet-functions.o \
|
||||||
nnet-precondition.o shrink-nnet.o combine-nnet.o combine-nnet-a.o \
|
nnet-precondition.o shrink-nnet.o combine-nnet.o combine-nnet-a.o \
|
||||||
mixup-nnet.o nnet-lbfgs.o nnet-update-parallel.o combine-nnet-fast.o \
|
mixup-nnet.o nnet-lbfgs.o nnet-update-parallel.o combine-nnet-fast.o \
|
||||||
nnet-fix.o
|
nnet-fix.o nnet-stats.o rescale-nnet.o nnet-limit-rank.o
|
||||||
|
|
||||||
#nnet-compute.o nnet-train.o
|
#nnet-compute.o nnet-train.o
|
||||||
# nnet-nnet.o nnet-loss.o nnet-rnnlm.o
|
# nnet-nnet.o nnet-loss.o nnet-rnnlm.o
|
||||||
|
|
|
@ -183,6 +183,23 @@ void UnitTestSigmoidComponent() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnitTestReduceComponent() {
|
||||||
|
// We're testing that the gradients are computed correctly:
|
||||||
|
// the input gradients and the model gradients.
|
||||||
|
|
||||||
|
int32 input_dim = 10 + rand() % 50, n = 1 + rand() % 3;
|
||||||
|
{
|
||||||
|
ReduceComponent reduce_component(input_dim, n);
|
||||||
|
UnitTestGenericComponentInternal(reduce_component);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
ReduceComponent reduce_component;
|
||||||
|
reduce_component.InitFromString("dim=15 n=3");
|
||||||
|
UnitTestGenericComponentInternal(reduce_component);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
void UnitTestGenericComponent() { // works if it has an initializer from int,
|
void UnitTestGenericComponent() { // works if it has an initializer from int,
|
||||||
// e.g. tanh, sigmoid.
|
// e.g. tanh, sigmoid.
|
||||||
|
@ -463,6 +480,8 @@ int main() {
|
||||||
UnitTestGenericComponent<TanhComponent>();
|
UnitTestGenericComponent<TanhComponent>();
|
||||||
UnitTestGenericComponent<PermuteComponent>();
|
UnitTestGenericComponent<PermuteComponent>();
|
||||||
UnitTestGenericComponent<SoftmaxComponent>();
|
UnitTestGenericComponent<SoftmaxComponent>();
|
||||||
|
UnitTestSigmoidComponent();
|
||||||
|
UnitTestReduceComponent();
|
||||||
UnitTestAffineComponent();
|
UnitTestAffineComponent();
|
||||||
UnitTestAffinePreconInputComponent();
|
UnitTestAffinePreconInputComponent();
|
||||||
UnitTestBlockAffineComponent();
|
UnitTestBlockAffineComponent();
|
||||||
|
|
|
@ -47,6 +47,8 @@ Component* Component::NewComponentOfType(const std::string &component_type) {
|
||||||
ans = new TanhComponent();
|
ans = new TanhComponent();
|
||||||
} else if (component_type == "SoftmaxComponent") {
|
} else if (component_type == "SoftmaxComponent") {
|
||||||
ans = new SoftmaxComponent();
|
ans = new SoftmaxComponent();
|
||||||
|
} else if (component_type == "ReduceComponent") {
|
||||||
|
ans = new ReduceComponent();
|
||||||
} else if (component_type == "AffineComponent") {
|
} else if (component_type == "AffineComponent") {
|
||||||
ans = new AffineComponent();
|
ans = new AffineComponent();
|
||||||
} else if (component_type == "AffineComponentA") {
|
} else if (component_type == "AffineComponentA") {
|
||||||
|
@ -407,20 +409,8 @@ void TanhComponent::Propagate(const MatrixBase<BaseFloat> &in,
|
||||||
// Apply tanh function to each element of the output...
|
// Apply tanh function to each element of the output...
|
||||||
// the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
|
// the tanh function may be written as -1 + ( 2 / (1 + e^{-2 x})),
|
||||||
// which is a scaled and shifted sigmoid.
|
// which is a scaled and shifted sigmoid.
|
||||||
out->Resize(in.NumRows(), in.NumCols());
|
*out = in;
|
||||||
int32 num_rows = in.NumRows(), num_cols = in.NumCols();
|
out->ApplyTanh();
|
||||||
for(int32 r = 0; r < num_rows; r++) {
|
|
||||||
const BaseFloat *in_data = in.RowData(r),
|
|
||||||
*in_data_end = in_data + num_cols;
|
|
||||||
BaseFloat *out_data = out->RowData(r);
|
|
||||||
for (; in_data != in_data_end; ++in_data, ++out_data) {
|
|
||||||
if (*in_data > 0.0) {
|
|
||||||
*out_data = -1.0 + 2.0 / (1.0 + exp(-2.0 * *in_data));
|
|
||||||
} else {
|
|
||||||
*out_data = 1.0 - 2.0 / (1.0 + exp(2.0 * *in_data));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void TanhComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
|
void TanhComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
|
||||||
|
@ -502,6 +492,67 @@ void SoftmaxComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ReduceComponent::InitFromString(std::string args) {
|
||||||
|
std::string orig_args(args);
|
||||||
|
int32 dim, n;
|
||||||
|
bool ok = ParseFromString("dim", &args, &dim) &&
|
||||||
|
ParseFromString("n", &args, &n);
|
||||||
|
if (!args.empty())
|
||||||
|
KALDI_ERR << "Could not process these elements in initializer: "
|
||||||
|
<< args;
|
||||||
|
if (!ok)
|
||||||
|
KALDI_ERR << "Bad initializer " << orig_args;
|
||||||
|
Init(dim, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReduceComponent::Read(std::istream &is, bool binary) {
|
||||||
|
ExpectOneOrTwoTokens(is, binary, "<ReduceComponent>", "<Dim>");
|
||||||
|
ReadBasicType(is, binary, &dim_);
|
||||||
|
ExpectToken(is, binary, "<N>");
|
||||||
|
ReadBasicType(is, binary, &n_);
|
||||||
|
ExpectToken(is, binary, "</ReduceComponent>");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReduceComponent::Write(std::ostream &os, bool binary) const {
|
||||||
|
WriteToken(os, binary, "<ReduceComponent>");
|
||||||
|
WriteToken(os, binary, "<Dim>");
|
||||||
|
WriteBasicType(os, binary, dim_);
|
||||||
|
WriteToken(os, binary, "<N>");
|
||||||
|
WriteBasicType(os, binary, n_);
|
||||||
|
WriteToken(os, binary, "</ReduceComponent>");
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReduceComponent::Propagate(const MatrixBase<BaseFloat> &in,
|
||||||
|
int32 num_chunks,
|
||||||
|
Matrix<BaseFloat> *out) const {
|
||||||
|
KALDI_ASSERT(in.NumRows() > 0 && in.NumCols() == InputDim());
|
||||||
|
out->Resize(in.NumRows(), OutputDim());
|
||||||
|
int32 num_frames = in.NumRows(), input_dim = in.NumCols(), n = n_;
|
||||||
|
for (int32 r = 0; r < num_frames; r++) {
|
||||||
|
const BaseFloat *src = in.RowData(r);
|
||||||
|
BaseFloat *dest = out->RowData(r);
|
||||||
|
for (int32 c = 0; c < input_dim; c++)
|
||||||
|
dest[c / n] += src[c];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ReduceComponent::Backprop(const MatrixBase<BaseFloat> &, // in_value,
|
||||||
|
const MatrixBase<BaseFloat> &, // out_value,
|
||||||
|
const MatrixBase<BaseFloat> &out_deriv,
|
||||||
|
int32, // num_chunks
|
||||||
|
Component *, // to_update
|
||||||
|
Matrix<BaseFloat> *in_deriv) const {
|
||||||
|
int32 num_frames = out_deriv.NumRows(),
|
||||||
|
input_dim = InputDim(), n = n_;
|
||||||
|
in_deriv->Resize(num_frames, input_dim, kUndefined);
|
||||||
|
for (int32 r = 0; r < num_frames; r++) {
|
||||||
|
const BaseFloat *src = out_deriv.RowData(r);
|
||||||
|
BaseFloat *dest = in_deriv->RowData(r);
|
||||||
|
for (int32 c = 0; c < input_dim; c++)
|
||||||
|
dest[c] = src[c / n];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void AffineComponent::Scale(BaseFloat scale) {
|
void AffineComponent::Scale(BaseFloat scale) {
|
||||||
linear_params_.Scale(scale);
|
linear_params_.Scale(scale);
|
||||||
bias_params_.Scale(scale);
|
bias_params_.Scale(scale);
|
||||||
|
@ -859,9 +910,9 @@ void AffineComponentPreconditioned::Update(
|
||||||
in_value_temp(i, in_value.NumCols()) = 1.0;
|
in_value_temp(i, in_value.NumCols()) = 1.0;
|
||||||
|
|
||||||
Matrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
|
Matrix<BaseFloat> in_value_precon(in_value_temp.NumRows(),
|
||||||
in_value_temp.NumCols()),
|
in_value_temp.NumCols(), kUndefined),
|
||||||
out_deriv_precon(out_deriv.NumRows(),
|
out_deriv_precon(out_deriv.NumRows(),
|
||||||
out_deriv.NumCols());
|
out_deriv.NumCols(), kUndefined);
|
||||||
// each row of in_value_precon will be that same row of
|
// each row of in_value_precon will be that same row of
|
||||||
// in_value, but multiplied by the inverse of a Fisher
|
// in_value, but multiplied by the inverse of a Fisher
|
||||||
// matrix that has been estimated from all the other rows,
|
// matrix that has been estimated from all the other rows,
|
||||||
|
|
|
@ -225,6 +225,8 @@ class NonlinearComponent: public Component {
|
||||||
void Scale(BaseFloat scale);
|
void Scale(BaseFloat scale);
|
||||||
void Add(BaseFloat alpha, const NonlinearComponent &other);
|
void Add(BaseFloat alpha, const NonlinearComponent &other);
|
||||||
|
|
||||||
|
// The following functions are unique to NonlinearComponent.
|
||||||
|
// They mostly relate to diagnostics.
|
||||||
const Vector<double> &ValueSum() const { return value_sum_; }
|
const Vector<double> &ValueSum() const { return value_sum_; }
|
||||||
const Vector<double> &DerivSum() const { return deriv_sum_; }
|
const Vector<double> &DerivSum() const { return deriv_sum_; }
|
||||||
double Count() const { return count_; }
|
double Count() const { return count_; }
|
||||||
|
@ -324,6 +326,37 @@ class SoftmaxComponent: public NonlinearComponent {
|
||||||
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
|
SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// This layer just sums up groups of n inputs to produce one output.
|
||||||
|
class ReduceComponent: public Component {
|
||||||
|
public:
|
||||||
|
void Init(int32 dim, int32 n) { KALDI_ASSERT(dim > 0 && n > 0);dim_ = dim; n_ = n; }
|
||||||
|
ReduceComponent(int32 dim, int32 n) { Init(dim, n); }
|
||||||
|
ReduceComponent(): dim_(0), n_(0) { } // e.g. prior to Read()
|
||||||
|
explicit ReduceComponent(const ReduceComponent &other):
|
||||||
|
dim_(other.dim_), n_(other.n_) {}
|
||||||
|
virtual Component* Copy() const { return new ReduceComponent(*this); }
|
||||||
|
virtual std::string Type() const { return "ReduceComponent"; }
|
||||||
|
virtual int32 InputDim() const { return dim_; }
|
||||||
|
virtual int32 OutputDim() const { return (dim_ + n_ - 1) / n_; }
|
||||||
|
virtual void InitFromString(std::string args);
|
||||||
|
virtual void Read(std::istream &is, bool binary);
|
||||||
|
virtual void Write(std::ostream &os, bool binary) const;
|
||||||
|
virtual void Propagate(const MatrixBase<BaseFloat> &in,
|
||||||
|
int32 num_chunks,
|
||||||
|
Matrix<BaseFloat> *out) const;
|
||||||
|
virtual void Backprop(const MatrixBase<BaseFloat> &in_value,
|
||||||
|
const MatrixBase<BaseFloat> &out_value,
|
||||||
|
const MatrixBase<BaseFloat> &out_deriv,
|
||||||
|
int32 num_chunks,
|
||||||
|
Component *to_update, // may be identical to "this".
|
||||||
|
Matrix<BaseFloat> *in_deriv) const;
|
||||||
|
virtual bool BackpropNeedsInput() const { return false; }
|
||||||
|
virtual bool BackpropNeedsOutput() const { return false; }
|
||||||
|
private:
|
||||||
|
int32 dim_;
|
||||||
|
int32 n_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// Affine means a linear function plus an offset.
|
// Affine means a linear function plus an offset.
|
||||||
// Note: although this class can be instantiated, it also
|
// Note: although this class can be instantiated, it also
|
||||||
|
|
|
@ -0,0 +1,108 @@
|
||||||
|
// nnet/nnet-limit-rank.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "nnet-cpu/nnet-limit-rank.h"
|
||||||
|
#include "thread/kaldi-task-sequence.h"
|
||||||
|
|
||||||
|
namespace kaldi {
|
||||||
|
|
||||||
|
class LimitRankClass {
|
||||||
|
public:
|
||||||
|
LimitRankClass(const NnetLimitRankOpts &opts,
|
||||||
|
int32 c,
|
||||||
|
Nnet *nnet): opts_(opts), c_(c), nnet_(nnet) { }
|
||||||
|
void operator () () {
|
||||||
|
AffineComponent *ac = dynamic_cast<AffineComponent*>(
|
||||||
|
&(nnet_->GetComponent(c_)));
|
||||||
|
KALDI_ASSERT(ac != NULL);
|
||||||
|
|
||||||
|
// We'll limit the rank of just the linear part, keeping the bias vector full.
|
||||||
|
Matrix<BaseFloat> M (ac->LinearParams());
|
||||||
|
int32 rows = M.NumRows(), cols = M.NumCols(), rc_min = std::min(rows, cols);
|
||||||
|
Vector<BaseFloat> s(rc_min);
|
||||||
|
Matrix<BaseFloat> U(rows, rc_min), Vt(rc_min, cols);
|
||||||
|
// Do the destructive svd M = U diag(s) V^T. It actually outputs the transpose of V.
|
||||||
|
M.DestructiveSvd(&s, &U, &Vt);
|
||||||
|
SortSvd(&s, &U, &Vt); // Sort the singular values from largest to smallest.
|
||||||
|
|
||||||
|
int32 d = GetRetainedDim(rows, cols);
|
||||||
|
BaseFloat old_svd_sum = s.Sum();
|
||||||
|
U.Resize(rows, d, kCopyData);
|
||||||
|
s.Resize(d, kCopyData);
|
||||||
|
Vt.Resize(d, cols, kCopyData);
|
||||||
|
BaseFloat new_svd_sum = s.Sum();
|
||||||
|
KALDI_LOG << "For component " << c_ << " of dimension " << rows
|
||||||
|
<< " x " << cols << ", reduced rank from "
|
||||||
|
<< rc_min << " to " << d << ", SVD sum reduced from "
|
||||||
|
<< old_svd_sum << " to " << new_svd_sum;
|
||||||
|
Vt.MulRowsVec(s); // Vt <-- diag(s) Vt.
|
||||||
|
M.AddMatMat(1.0, U, kNoTrans, Vt, kNoTrans, 0.0); // Reconstruct with reduced
|
||||||
|
// rank.
|
||||||
|
Vector<BaseFloat> bias_params(ac->BiasParams());
|
||||||
|
ac->SetParams(bias_params, M);
|
||||||
|
}
|
||||||
|
|
||||||
|
int32 GetRetainedDim(int32 rows, int32 cols) {
|
||||||
|
if (opts_.parameter_proportion <= 0.0 || opts_.parameter_proportion > 1.0)
|
||||||
|
KALDI_ERR << "bad --parameter-proportion " << opts_.parameter_proportion;
|
||||||
|
// If we do SVD to dimension d, so that it's U diag(s) V^T where
|
||||||
|
// U is rows * d, s is d, and V is cols * d, then the #params is as follows...
|
||||||
|
// the first column of U has free parameters (#rows - 1) [the -1 is due to
|
||||||
|
// the length constraint]; the second has (#rows - 2) [subtract 1 for the
|
||||||
|
// length constraint and one for orthogonality with the previous row], etc.
|
||||||
|
// Total is params(U) = (rows * d) - ((d(d+1))/2),
|
||||||
|
// params(s) = d,
|
||||||
|
// params(V) = (cols * d) - ((d(d+1))/2),
|
||||||
|
// So total is (rows + cols) * d - d * d .
|
||||||
|
// For example, if d = #rows, this equals (#rows * #cols)
|
||||||
|
// We are solving for:
|
||||||
|
// (rows * cols) * parameter_proportion = (rows + cols) * d - d * d, or
|
||||||
|
// d^2 - d * (rows + cols) + (rows*cols)*parameter_proportion
|
||||||
|
// In quadratic equation
|
||||||
|
// a = 1.0,
|
||||||
|
// b = -(rows + cols)
|
||||||
|
// c = rows * cols * parameter_proportion.
|
||||||
|
// Take smaller solution.
|
||||||
|
BaseFloat a = 1.0, b = -(rows + cols),
|
||||||
|
c = rows * cols * opts_.parameter_proportion;
|
||||||
|
BaseFloat x = (-b - sqrt(b * b - 4 * a * c)) / (2.0 * a);
|
||||||
|
int32 ans = static_cast<int32>(x);
|
||||||
|
KALDI_ASSERT(ans > 0 && ans <= std::min(rows, cols));
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
~LimitRankClass() { }
|
||||||
|
private:
|
||||||
|
const NnetLimitRankOpts &opts_;
|
||||||
|
int32 c_;
|
||||||
|
Nnet *nnet_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void LimitRankParallel(const NnetLimitRankOpts &opts,
|
||||||
|
Nnet *nnet) {
|
||||||
|
TaskSequencerConfig task_config;
|
||||||
|
task_config.num_threads = opts.num_threads;
|
||||||
|
TaskSequencer<LimitRankClass> tc(task_config);
|
||||||
|
for (int32 c = 0; c < nnet->NumComponents(); c++) {
|
||||||
|
if (dynamic_cast<AffineComponent*>(&(nnet->GetComponent(c))) != NULL)
|
||||||
|
tc.Run(new LimitRankClass(opts, c, nnet));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
|
@ -0,0 +1,56 @@
|
||||||
|
// nnet-cpu/nnet-limit-rank.h
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
||||||
|
#define KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
||||||
|
|
||||||
|
#include "nnet-cpu/nnet-nnet.h"
|
||||||
|
#include "util/table-types.h"
|
||||||
|
#include "thread/kaldi-semaphore.h"
|
||||||
|
#include "thread/kaldi-thread.h"
|
||||||
|
#include "nnet-cpu/nnet-update.h"
|
||||||
|
|
||||||
|
namespace kaldi {
|
||||||
|
|
||||||
|
struct NnetLimitRankOpts {
|
||||||
|
int32 num_threads;
|
||||||
|
BaseFloat parameter_proportion;
|
||||||
|
|
||||||
|
NnetLimitRankOpts(): num_threads(1), parameter_proportion(0.75) { }
|
||||||
|
|
||||||
|
void Register(ParseOptions *po) {
|
||||||
|
po->Register("num-threads", &num_threads, "Number of threads used for "
|
||||||
|
"rank-limiting operation; note, will never use more than "
|
||||||
|
"#layers.");
|
||||||
|
po->Register("parameter-proportion", ¶meter_proportion, "Proportion of "
|
||||||
|
"dimension of each transform to limit the rank to.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// This function limits the rank of each affine transform in the
|
||||||
|
/// neural net, by zeroing out the smallest singular values. The number of
|
||||||
|
/// singular values to zero out is determined on a layer by layer basis, using
|
||||||
|
/// "parameter_proportion" to set the proportion of parameters to remove.
|
||||||
|
void LimitRankParallel(const NnetLimitRankOpts &opts,
|
||||||
|
Nnet *nnet);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif // KALDI_NNET_CPU_NNET_LIMIT_RANK_H_
|
|
@ -361,6 +361,17 @@ void Nnet::RemoveDropout() {
|
||||||
KALDI_LOG << "Removed " << removed << " dropout components.";
|
KALDI_LOG << "Removed " << removed << " dropout components.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Nnet::RemovePreconditioning() {
|
||||||
|
for (size_t i = 0; i < components_.size(); i++) {
|
||||||
|
if (dynamic_cast<AffineComponentPreconditioned*>(components_[i]) != NULL) {
|
||||||
|
AffineComponent *ac = new AffineComponent(
|
||||||
|
*(dynamic_cast<AffineComponent*>(components_[i])));
|
||||||
|
delete components_[i];
|
||||||
|
components_[i] = ac;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Nnet::AddNnet(const VectorBase<BaseFloat> &scale_params,
|
void Nnet::AddNnet(const VectorBase<BaseFloat> &scale_params,
|
||||||
const Nnet &other) {
|
const Nnet &other) {
|
||||||
KALDI_ASSERT(scale_params.Dim() == this->NumUpdatableComponents());
|
KALDI_ASSERT(scale_params.Dim() == this->NumUpdatableComponents());
|
||||||
|
|
|
@ -105,6 +105,10 @@ class Nnet {
|
||||||
/// Excise any components of type DropoutComponent.
|
/// Excise any components of type DropoutComponent.
|
||||||
void RemoveDropout();
|
void RemoveDropout();
|
||||||
|
|
||||||
|
/// Replace any components of type AffineComponentPreconditioned with
|
||||||
|
/// components of type AffineComponent.
|
||||||
|
void RemovePreconditioning();
|
||||||
|
|
||||||
/// For each updatatable component, adds to it
|
/// For each updatatable component, adds to it
|
||||||
/// the corresponding element of "other" times the
|
/// the corresponding element of "other" times the
|
||||||
/// appropriate element of "scales" (which has the
|
/// appropriate element of "scales" (which has the
|
||||||
|
|
|
@ -25,7 +25,13 @@ void PreconditionDirections(const MatrixBase<BaseFloat> &R,
|
||||||
MatrixBase<BaseFloat> *P) {
|
MatrixBase<BaseFloat> *P) {
|
||||||
|
|
||||||
int32 N = R.NumRows(), D = R.NumCols();
|
int32 N = R.NumRows(), D = R.NumCols();
|
||||||
KALDI_ASSERT(SameDim(R, *P) && N > 1);
|
KALDI_ASSERT(SameDim(R, *P) && N > 0);
|
||||||
|
if (N == 1) {
|
||||||
|
KALDI_WARN << "Trying to precondition set of only one frames: returning "
|
||||||
|
<< "unchanged. Ignore this warning if infrequent.";
|
||||||
|
P->CopyFromMat(R);
|
||||||
|
return;
|
||||||
|
}
|
||||||
MatrixBase<BaseFloat> &Q = *P;
|
MatrixBase<BaseFloat> &Q = *P;
|
||||||
|
|
||||||
if (N >= D) {
|
if (N >= D) {
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
// nnet-cpu/nnet-stats.h
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef KALDI_NNET_CPU_NNET_FIX_H_
|
||||||
|
#define KALDI_NNET_CPU_NNET_FIX_H_
|
||||||
|
|
||||||
|
#include "nnet-cpu/nnet-nnet.h"
|
||||||
|
|
||||||
|
namespace kaldi {
|
||||||
|
|
||||||
|
/* This program computes various statistics from a neural net. These are
|
||||||
|
summaries of certain quantities already present in the network as
|
||||||
|
stored on disk, especially regarding certain average values and
|
||||||
|
derivatives of the sigmoids.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct NnetStatsConfig {
|
||||||
|
BaseFloat bucket_width;
|
||||||
|
NnetStatsConfig(): bucket_width(0.025) { }
|
||||||
|
|
||||||
|
void Register(ParseOptions *po) {
|
||||||
|
po->Register("bucket-width", &bucket_width, "Width of bucket in average-derivative "
|
||||||
|
"stats for analysis.");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class NnetStats {
|
||||||
|
public:
|
||||||
|
NnetStats(int32 affine_component_index, BaseFloat bucket_width):
|
||||||
|
affine_component_index_(affine_component_index),
|
||||||
|
bucket_width_(bucket_width), global_(0, -1) { }
|
||||||
|
|
||||||
|
// Use default copy constructor and assignment operator.
|
||||||
|
|
||||||
|
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
|
||||||
|
|
||||||
|
void AddStatsFromNnet(const Nnet &nnet);
|
||||||
|
|
||||||
|
void PrintStats(std::ostream &os);
|
||||||
|
private:
|
||||||
|
|
||||||
|
struct StatsElement {
|
||||||
|
BaseFloat deriv_begin; // avg-deriv, beginning of bucket.
|
||||||
|
BaseFloat deriv_end; // avg-deriv, end of bucket.
|
||||||
|
BaseFloat deriv_sum; // sum of avg-deriv within bucket.
|
||||||
|
BaseFloat deriv_sumsq; // Sum-squared of avg-deriv within bucket.
|
||||||
|
BaseFloat abs_value_sum; // Sum of abs(avg-value). Tells us whether it's
|
||||||
|
// saturating at one or both ends.
|
||||||
|
BaseFloat abs_value_sumsq; // Sum-squared of abs(avg-value).
|
||||||
|
int32 count; // Number of nonlinearities in this bucket.
|
||||||
|
|
||||||
|
StatsElement(BaseFloat deriv_begin,
|
||||||
|
BaseFloat deriv_end):
|
||||||
|
deriv_begin(deriv_begin), deriv_end(deriv_end), deriv_sum(0.0),
|
||||||
|
deriv_sumsq(0.0), abs_value_sum(0.0), abs_value_sumsq(0.0), count(0) { }
|
||||||
|
void AddStats(BaseFloat avg_deriv, BaseFloat avg_value);
|
||||||
|
// Outputs stats for this bucket; no newline
|
||||||
|
void PrintStats(std::ostream &os);
|
||||||
|
};
|
||||||
|
int32 BucketFor(BaseFloat avg_deriv); // returns the bucket
|
||||||
|
// for this avg-derivative value, and makes sure it is allocated.
|
||||||
|
|
||||||
|
int32 affine_component_index_; // Component index of the affine component
|
||||||
|
// associated with this nonlinearity.
|
||||||
|
BaseFloat bucket_width_; // width of buckets of stats we store (in derivative values).
|
||||||
|
|
||||||
|
std::vector<StatsElement> buckets_; // Stats divided into buckets by avg_deriv.
|
||||||
|
StatsElement global_; // All the stats.
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
void GetNnetStats(const NnetStatsConfig &config,
|
||||||
|
const Nnet &nnet,
|
||||||
|
std::vector<NnetStats> *stats);
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif // KALDI_NNET_CPU_NNET_FIX_H_
|
|
@ -44,7 +44,6 @@ class NnetUpdater {
|
||||||
// Possibly splices input together from forward_data_[component].
|
// Possibly splices input together from forward_data_[component].
|
||||||
// MatrixBase<BaseFloat> &GetSplicedInput(int32 component, Matrix<BaseFloat> *temp_matrix);
|
// MatrixBase<BaseFloat> &GetSplicedInput(int32 component, Matrix<BaseFloat> *temp_matrix);
|
||||||
|
|
||||||
|
|
||||||
void Propagate();
|
void Propagate();
|
||||||
|
|
||||||
/// Computes objective function and derivative at output layer.
|
/// Computes objective function and derivative at output layer.
|
||||||
|
@ -156,7 +155,7 @@ void NnetUpdater::Backprop(const std::vector<NnetTrainingExample> &data,
|
||||||
|
|
||||||
component.Backprop(input, output, output_deriv, num_chunks,
|
component.Backprop(input, output, output_deriv, num_chunks,
|
||||||
component_to_update, &input_deriv);
|
component_to_update, &input_deriv);
|
||||||
*deriv = input_deriv;
|
input_deriv.Swap(deriv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,212 @@
|
||||||
|
// nnet/rescale-nnet.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "nnet-cpu/rescale-nnet.h"
|
||||||
|
|
||||||
|
namespace kaldi {
|
||||||
|
|
||||||
|
|
||||||
|
class NnetRescaler {
|
||||||
|
public:
|
||||||
|
NnetRescaler(const NnetRescaleConfig &config,
|
||||||
|
const std::vector<NnetTrainingExample> &examples,
|
||||||
|
Nnet *nnet):
|
||||||
|
config_(config), examples_(examples), nnet_(nnet) {}
|
||||||
|
|
||||||
|
void Rescale();
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// takes the input and formats as a single matrix, in forward_data_[0].
|
||||||
|
void FormatInput(const std::vector<NnetTrainingExample> &data,
|
||||||
|
Matrix<BaseFloat> *input);
|
||||||
|
void RescaleComponent(int32 c, int32 num_chunks,
|
||||||
|
MatrixBase<BaseFloat> *cur_data_in,
|
||||||
|
Matrix<BaseFloat> *next_data);
|
||||||
|
|
||||||
|
void ComputeRelevantIndexes();
|
||||||
|
|
||||||
|
BaseFloat GetTargetAvgDeriv(int32 c);
|
||||||
|
|
||||||
|
const NnetRescaleConfig &config_;
|
||||||
|
const std::vector<NnetTrainingExample> &examples_;
|
||||||
|
Nnet *nnet_;
|
||||||
|
std::set<int32> relevant_indexes_; // values of c with AffineComponent followed
|
||||||
|
// by (at c+1) NonlinearComponent that is not SoftmaxComponent.
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
void NnetRescaler::FormatInput(const std::vector<NnetTrainingExample> &data,
|
||||||
|
Matrix<BaseFloat> *input) {
|
||||||
|
KALDI_ASSERT(data.size() > 0);
|
||||||
|
int32 num_splice = nnet_->LeftContext() + 1 + nnet_->RightContext();
|
||||||
|
KALDI_ASSERT(data[0].input_frames.NumRows() == num_splice);
|
||||||
|
|
||||||
|
int32 feat_dim = data[0].input_frames.NumCols(),
|
||||||
|
spk_dim = data[0].spk_info.Dim(),
|
||||||
|
tot_dim = feat_dim + spk_dim; // we append these at the neural net
|
||||||
|
// input... note, spk_dim might be 0.
|
||||||
|
KALDI_ASSERT(tot_dim == nnet_->InputDim());
|
||||||
|
int32 num_chunks = data.size();
|
||||||
|
|
||||||
|
input->Resize(num_splice * num_chunks,
|
||||||
|
tot_dim);
|
||||||
|
for (int32 chunk = 0; chunk < num_chunks; chunk++) {
|
||||||
|
SubMatrix<BaseFloat> dest(*input,
|
||||||
|
chunk * num_splice, num_splice,
|
||||||
|
0, feat_dim);
|
||||||
|
const Matrix<BaseFloat> &src(data[chunk].input_frames);
|
||||||
|
dest.CopyFromMat(src);
|
||||||
|
if (spk_dim != 0) {
|
||||||
|
SubMatrix<BaseFloat> spk_dest(*input,
|
||||||
|
chunk * num_splice, num_splice,
|
||||||
|
feat_dim, spk_dim);
|
||||||
|
spk_dest.CopyRowsFromVec(data[chunk].spk_info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void NnetRescaler::ComputeRelevantIndexes() {
|
||||||
|
for (int32 c = 0; c + 1 < nnet_->NumComponents(); c++)
|
||||||
|
if (dynamic_cast<AffineComponent*>(&nnet_->GetComponent(c)) != NULL &&
|
||||||
|
(dynamic_cast<NonlinearComponent*>(&nnet_->GetComponent(c+1)) != NULL &&
|
||||||
|
dynamic_cast<SoftmaxComponent*>(&nnet_->GetComponent(c+1)) == NULL))
|
||||||
|
relevant_indexes_.insert(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
BaseFloat NnetRescaler::GetTargetAvgDeriv(int32 c) {
|
||||||
|
KALDI_ASSERT(relevant_indexes_.count(c) == 1);
|
||||||
|
BaseFloat factor;
|
||||||
|
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
|
||||||
|
factor = 0.25;
|
||||||
|
else if (dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) != NULL)
|
||||||
|
factor = 1.0;
|
||||||
|
else
|
||||||
|
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
|
||||||
|
|
||||||
|
int32 last_c = *std::max_element(relevant_indexes_.begin(), relevant_indexes_.end()),
|
||||||
|
first_c = *std::min_element(relevant_indexes_.begin(), relevant_indexes_.end());
|
||||||
|
if (c == first_c)
|
||||||
|
return factor * config_.target_first_layer_avg_deriv;
|
||||||
|
else if (c == last_c)
|
||||||
|
return factor * config_.target_last_layer_avg_deriv;
|
||||||
|
else
|
||||||
|
return factor * config_.target_avg_deriv;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here, c is the index of the affine component, and
|
||||||
|
// c + 1 is the index of the nonlinear component; *cur_data is the
|
||||||
|
// output of the affine component.
|
||||||
|
void NnetRescaler::RescaleComponent(
|
||||||
|
int32 c,
|
||||||
|
int32 num_chunks,
|
||||||
|
MatrixBase<BaseFloat> *cur_data_in,
|
||||||
|
Matrix<BaseFloat> *next_data) {
|
||||||
|
int32 rows = cur_data_in->NumRows(), cols = cur_data_in->NumCols();
|
||||||
|
// Only handle sigmoid or tanh here.
|
||||||
|
if (dynamic_cast<SigmoidComponent*>(&(nnet_->GetComponent(c + 1))) == NULL &&
|
||||||
|
dynamic_cast<TanhComponent*>(&(nnet_->GetComponent(c + 1))) == NULL)
|
||||||
|
KALDI_ERR << "This type of nonlinear component is not handled: index " << c;
|
||||||
|
// the nonlinear component:
|
||||||
|
NonlinearComponent &nc =
|
||||||
|
*(dynamic_cast<NonlinearComponent*>(&(nnet_->GetComponent(c + 1))));
|
||||||
|
|
||||||
|
BaseFloat orig_avg_deriv, target_avg_deriv = GetTargetAvgDeriv(c);
|
||||||
|
BaseFloat cur_scaling = 1.0; // current rescaling factor (on input).
|
||||||
|
int32 num_iters = 10;
|
||||||
|
|
||||||
|
Matrix<BaseFloat> cur_data(*cur_data_in),
|
||||||
|
ones(rows, cols), in_deriv(rows, cols);
|
||||||
|
|
||||||
|
ones.Set(1.0);
|
||||||
|
nc.Propagate(cur_data, num_chunks, next_data);
|
||||||
|
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||||
|
BaseFloat cur_avg_deriv;
|
||||||
|
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||||
|
orig_avg_deriv = cur_avg_deriv;
|
||||||
|
for (int32 iter = 0; iter < num_iters; iter++) {
|
||||||
|
// We already have "cur_avg_deriv"; perturb the scale and compute
|
||||||
|
// the next avg_deriv, so we can see how it changes with the scale.
|
||||||
|
cur_data.CopyFromMat(*cur_data_in);
|
||||||
|
cur_data.Scale(cur_scaling + config_.delta);
|
||||||
|
nc.Propagate(cur_data, num_chunks, next_data);
|
||||||
|
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||||
|
BaseFloat next_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||||
|
KALDI_ASSERT(next_avg_deriv < cur_avg_deriv);
|
||||||
|
// "gradient" is how avg_deriv changes as we change the scale.
|
||||||
|
// should be negative.
|
||||||
|
BaseFloat gradient = (next_avg_deriv - cur_avg_deriv) / config_.delta;
|
||||||
|
KALDI_ASSERT(gradient < 0.0);
|
||||||
|
BaseFloat proposed_change = (target_avg_deriv - cur_avg_deriv) / gradient;
|
||||||
|
KALDI_VLOG(2) << "cur_avg_deriv = " << cur_avg_deriv << ", target_avg_deriv = "
|
||||||
|
<< target_avg_deriv << ", gradient = " << gradient
|
||||||
|
<< ", proposed_change " << proposed_change;
|
||||||
|
// Limit size of proposed change in "cur_scaling", to ensure stability.
|
||||||
|
if (fabs(proposed_change / cur_scaling) > config_.max_change)
|
||||||
|
proposed_change = cur_scaling * config_.max_change *
|
||||||
|
(proposed_change > 0.0 ? 1.0 : -1.0);
|
||||||
|
cur_scaling += proposed_change;
|
||||||
|
|
||||||
|
cur_data.CopyFromMat(*cur_data_in);
|
||||||
|
cur_data.Scale(cur_scaling);
|
||||||
|
nc.Propagate(cur_data, num_chunks, next_data);
|
||||||
|
nc.Backprop(cur_data, *next_data, ones, num_chunks, NULL, &in_deriv);
|
||||||
|
cur_avg_deriv = in_deriv.Sum() / (rows * cols);
|
||||||
|
if (fabs(proposed_change) < config_.min_change) break; // Terminate the
|
||||||
|
// optimization
|
||||||
|
}
|
||||||
|
UpdatableComponent *uc = dynamic_cast<UpdatableComponent*>(
|
||||||
|
&nnet_->GetComponent(c));
|
||||||
|
KALDI_ASSERT(uc != NULL);
|
||||||
|
uc->Scale(cur_scaling); // scale the parameters of the previous
|
||||||
|
// AffineComponent.
|
||||||
|
|
||||||
|
KALDI_LOG << "For component " << c << ", scaling parameters by "
|
||||||
|
<< cur_scaling << "; average "
|
||||||
|
<< "derivative changed from " << orig_avg_deriv << " to "
|
||||||
|
<< cur_avg_deriv << "; target was " << target_avg_deriv;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void NnetRescaler::Rescale() {
|
||||||
|
ComputeRelevantIndexes(); // set up relevant_indexes_.
|
||||||
|
Matrix<BaseFloat> cur_data, next_data;
|
||||||
|
FormatInput(examples_, &cur_data);
|
||||||
|
int32 num_chunks = examples_.size();
|
||||||
|
for (int32 c = 0; c < nnet_->NumComponents(); c++) {
|
||||||
|
Component &component = nnet_->GetComponent(c);
|
||||||
|
if (relevant_indexes_.count(c - 1) == 1) {
|
||||||
|
// the following function call also appropriately sets "next_data"
|
||||||
|
// after doing the rescaling
|
||||||
|
RescaleComponent(c - 1, num_chunks, &cur_data, &next_data);
|
||||||
|
} else {
|
||||||
|
component.Propagate(cur_data, num_chunks, &next_data);
|
||||||
|
}
|
||||||
|
cur_data.Swap(&next_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void RescaleNnet(const NnetRescaleConfig &rescale_config,
|
||||||
|
const std::vector<NnetTrainingExample> &examples,
|
||||||
|
Nnet *nnet) {
|
||||||
|
NnetRescaler rescaler(rescale_config, examples, nnet);
|
||||||
|
rescaler.Rescale();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
|
@ -0,0 +1,76 @@
|
||||||
|
// nnet-cpu/rescale-nnet.h
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef KALDI_NNET_CPU_RESCALE_NNET_H_
|
||||||
|
#define KALDI_NNET_CPU_RESCALE_NNET_H_
|
||||||
|
|
||||||
|
#include "nnet-cpu/nnet-update.h"
|
||||||
|
#include "nnet-cpu/nnet-compute.h"
|
||||||
|
#include "util/parse-options.h"
|
||||||
|
|
||||||
|
// Neural net rescaling is a rescaling of the parameters of the various layers
|
||||||
|
// of a neural net, done so as to match certain specified statistics on the
|
||||||
|
// average derivative of the sigmoid, measured on sample data. This relates to
|
||||||
|
// how "saturated" the sigmoid is.
|
||||||
|
|
||||||
|
namespace kaldi {
|
||||||
|
|
||||||
|
|
||||||
|
struct NnetRescaleConfig {
|
||||||
|
BaseFloat target_avg_deriv;
|
||||||
|
BaseFloat target_first_layer_avg_deriv;
|
||||||
|
BaseFloat target_last_layer_avg_deriv;
|
||||||
|
|
||||||
|
// These are relatively unimportant; for now they have no
|
||||||
|
// command line options.
|
||||||
|
BaseFloat num_iters;
|
||||||
|
BaseFloat delta;
|
||||||
|
BaseFloat max_change; // maximum change on any one iteration (to
|
||||||
|
// ensure stability).
|
||||||
|
BaseFloat min_change; // minimum change on any one iteration (controls
|
||||||
|
// termination
|
||||||
|
|
||||||
|
NnetRescaleConfig(): target_avg_deriv(0.2),
|
||||||
|
target_first_layer_avg_deriv(0.3),
|
||||||
|
target_last_layer_avg_deriv(0.1),
|
||||||
|
num_iters(10),
|
||||||
|
delta(0.01),
|
||||||
|
max_change(0.2), min_change(1.0e-05) { }
|
||||||
|
|
||||||
|
void Register(ParseOptions *po) {
|
||||||
|
po->Register("target-avg-deriv", &target_avg_deriv, "Target average derivative "
|
||||||
|
"for hidden layers that are the not the first or last hidden layer "
|
||||||
|
"(as fraction of maximum derivative of the nonlinearity)");
|
||||||
|
po->Register("target-first-layer-avg-deriv", &target_first_layer_avg_deriv,
|
||||||
|
"Target average derivative for the first hidden layer"
|
||||||
|
"(as fraction of maximum derivative of the nonlinearity)");
|
||||||
|
po->Register("target-last-layer-avg-deriv", &target_last_layer_avg_deriv,
|
||||||
|
"Target average derivative for the last hidden layer, if "
|
||||||
|
"#hid-layers > 1"
|
||||||
|
"(as fraction of maximum derivative of the nonlinearity)");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void RescaleNnet(const NnetRescaleConfig &rescale_config,
|
||||||
|
const std::vector<NnetTrainingExample> &examples,
|
||||||
|
Nnet *nnet);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif
|
|
@ -13,7 +13,8 @@ BINFILES = nnet-randomize-frames nnet-am-info nnet-train nnet-init \
|
||||||
nnet-train-lbfgs nnet-get-egs nnet-train-parallel nnet-gradient \
|
nnet-train-lbfgs nnet-get-egs nnet-train-parallel nnet-gradient \
|
||||||
nnet-get-preconditioner nnet-precondition nnet-select-egs nnet-combine-fast \
|
nnet-get-preconditioner nnet-precondition nnet-select-egs nnet-combine-fast \
|
||||||
nnet-subset-egs nnet-shuffle-egs nnet-am-fix nnet-logprob nnet-logprob2 \
|
nnet-subset-egs nnet-shuffle-egs nnet-am-fix nnet-logprob nnet-logprob2 \
|
||||||
nnet-logprob2-parallel nnet-logprob-parallel
|
nnet-logprob2-parallel nnet-logprob-parallel nnet-am-stats nnet-am-rescale \
|
||||||
|
nnet-am-limit-rank
|
||||||
|
|
||||||
OBJFILES =
|
OBJFILES =
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,7 @@ int main(int argc, char *argv[]) {
|
||||||
int32 truncate = -1;
|
int32 truncate = -1;
|
||||||
bool binary_write = true;
|
bool binary_write = true;
|
||||||
bool remove_dropout = false;
|
bool remove_dropout = false;
|
||||||
|
bool remove_preconditioning = false;
|
||||||
BaseFloat learning_rate_factor = 1.0, learning_rate = -1;
|
BaseFloat learning_rate_factor = 1.0, learning_rate = -1;
|
||||||
std::string learning_rates = "";
|
std::string learning_rates = "";
|
||||||
std::string scales = "";
|
std::string scales = "";
|
||||||
|
@ -64,6 +65,8 @@ int main(int argc, char *argv[]) {
|
||||||
"to this many components by removing the last components.");
|
"to this many components by removing the last components.");
|
||||||
po.Register("remove-dropout", &remove_dropout, "Set this to true to remove "
|
po.Register("remove-dropout", &remove_dropout, "Set this to true to remove "
|
||||||
"any dropout components.");
|
"any dropout components.");
|
||||||
|
po.Register("remove-preconditioning", &remove_preconditioning, "Set this to true to replace "
|
||||||
|
"components of type AffineComponentPreconditioned with AffineComponent.");
|
||||||
po.Register("stats-from", &stats_from, "Before copying neural net, copy the "
|
po.Register("stats-from", &stats_from, "Before copying neural net, copy the "
|
||||||
"statistics in any layer of type NonlinearComponent, from this "
|
"statistics in any layer of type NonlinearComponent, from this "
|
||||||
"neural network: provide the extended filename.");
|
"neural network: provide the extended filename.");
|
||||||
|
@ -133,6 +136,8 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
if (remove_dropout) am_nnet.GetNnet().RemoveDropout();
|
if (remove_dropout) am_nnet.GetNnet().RemoveDropout();
|
||||||
|
|
||||||
|
if (remove_preconditioning) am_nnet.GetNnet().RemovePreconditioning();
|
||||||
|
|
||||||
if (stats_from != "") {
|
if (stats_from != "") {
|
||||||
// Copy the stats associated with the layers descending from
|
// Copy the stats associated with the layers descending from
|
||||||
// NonlinearComponent.
|
// NonlinearComponent.
|
||||||
|
|
|
@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
|
||||||
"e.g.:\n"
|
"e.g.:\n"
|
||||||
" nnet-am-fix 1.mdl 1_fixed.mdl\n"
|
" nnet-am-fix 1.mdl 1_fixed.mdl\n"
|
||||||
"or:\n"
|
"or:\n"
|
||||||
" nnet-am-shrink-rows --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n";
|
" nnet-am-fix --get-counts-from=1.gradient 1.mdl 1_shrunk.mdl\n";
|
||||||
|
|
||||||
bool binary_write = true;
|
bool binary_write = true;
|
||||||
NnetFixConfig config;
|
NnetFixConfig config;
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
// nnet-cpubin/nnet-am-limit-rank.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "util/common-utils.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "nnet-cpu/nnet-limit-rank.h"
|
||||||
|
#include "nnet-cpu/am-nnet.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "tree/context-dep.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
try {
|
||||||
|
using namespace kaldi;
|
||||||
|
typedef kaldi::int32 int32;
|
||||||
|
|
||||||
|
const char *usage =
|
||||||
|
"Copy a (cpu-based) neural net and its associated transition model,\n"
|
||||||
|
"but modify it to reduce the effective parameter count by limiting\n"
|
||||||
|
"the rank of weight matrices.\n"
|
||||||
|
"\n"
|
||||||
|
"Usage: nnet-am-limit-rank [options] <nnet-in> <nnet-out>\n"
|
||||||
|
"e.g.:\n"
|
||||||
|
" nnet-am-limit-rank 1.mdl 1_limited.mdl\n";
|
||||||
|
|
||||||
|
|
||||||
|
bool binary_write = true;
|
||||||
|
NnetLimitRankOpts config;
|
||||||
|
|
||||||
|
ParseOptions po(usage);
|
||||||
|
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||||
|
config.Register(&po);
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
if (po.NumArgs() != 2) {
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string nnet_rxfilename = po.GetArg(1),
|
||||||
|
nnet_wxfilename = po.GetArg(2);
|
||||||
|
|
||||||
|
TransitionModel trans_model;
|
||||||
|
AmNnet am_nnet;
|
||||||
|
{
|
||||||
|
bool binary;
|
||||||
|
Input ki(nnet_rxfilename, &binary);
|
||||||
|
trans_model.Read(ki.Stream(), binary);
|
||||||
|
am_nnet.Read(ki.Stream(), binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
LimitRankParallel(config, &am_nnet.GetNnet());
|
||||||
|
|
||||||
|
{
|
||||||
|
Output ko(nnet_wxfilename, binary_write);
|
||||||
|
trans_model.Write(ko.Stream(), binary_write);
|
||||||
|
am_nnet.Write(ko.Stream(), binary_write);
|
||||||
|
}
|
||||||
|
KALDI_LOG << "Copied neural net from " << nnet_rxfilename
|
||||||
|
<< " to " << nnet_wxfilename;
|
||||||
|
return 0;
|
||||||
|
} catch(const std::exception &e) {
|
||||||
|
std::cerr << e.what() << '\n';
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,92 @@
|
||||||
|
// nnet-cpubin/nnet-am-rescale.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "util/common-utils.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "nnet-cpu/rescale-nnet.h"
|
||||||
|
#include "nnet-cpu/am-nnet.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "tree/context-dep.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
try {
|
||||||
|
using namespace kaldi;
|
||||||
|
typedef kaldi::int32 int32;
|
||||||
|
|
||||||
|
const char *usage =
|
||||||
|
"Rescale the parameters in a neural net to achieve certain target\n"
|
||||||
|
"statistics, relating to the average derivative of the sigmoids\n"
|
||||||
|
"measured at some supplied data. This relates to how saturated\n"
|
||||||
|
"the sigmoids are (we try to match the statistics of `good' neural\n"
|
||||||
|
"nets).\n"
|
||||||
|
"\n"
|
||||||
|
"Usage: nnet-am-rescale [options] <nnet-in> <examples-in> <nnet-out>\n"
|
||||||
|
"e.g.:\n"
|
||||||
|
" nnet-am-rescale 1.mdl valid.egs 1_rescaled.mdl\n";
|
||||||
|
|
||||||
|
bool binary_write = true;
|
||||||
|
NnetRescaleConfig config;
|
||||||
|
|
||||||
|
ParseOptions po(usage);
|
||||||
|
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||||
|
config.Register(&po);
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
if (po.NumArgs() != 3) {
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string nnet_rxfilename = po.GetArg(1),
|
||||||
|
egs_rspecifier = po.GetArg(2),
|
||||||
|
nnet_wxfilename = po.GetArg(3);
|
||||||
|
|
||||||
|
TransitionModel trans_model;
|
||||||
|
AmNnet am_nnet;
|
||||||
|
{
|
||||||
|
bool binary;
|
||||||
|
Input ki(nnet_rxfilename, &binary);
|
||||||
|
trans_model.Read(ki.Stream(), binary);
|
||||||
|
am_nnet.Read(ki.Stream(), binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NnetTrainingExample> egs;
|
||||||
|
|
||||||
|
// This block adds samples to "egs".
|
||||||
|
SequentialNnetTrainingExampleReader example_reader(
|
||||||
|
egs_rspecifier);
|
||||||
|
for (; !example_reader.Done(); example_reader.Next())
|
||||||
|
egs.push_back(example_reader.Value());
|
||||||
|
KALDI_LOG << "Read " << egs.size() << " examples.";
|
||||||
|
KALDI_ASSERT(!egs.empty());
|
||||||
|
|
||||||
|
RescaleNnet(config, egs, &am_nnet.GetNnet());
|
||||||
|
|
||||||
|
{
|
||||||
|
Output ko(nnet_wxfilename, binary_write);
|
||||||
|
trans_model.Write(ko.Stream(), binary_write);
|
||||||
|
am_nnet.Write(ko.Stream(), binary_write);
|
||||||
|
}
|
||||||
|
KALDI_LOG << "Rescaled neural net and wrote it to " << nnet_wxfilename;
|
||||||
|
return 0;
|
||||||
|
} catch(const std::exception &e) {
|
||||||
|
std::cerr << e.what() << '\n';
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,72 @@
|
||||||
|
// nnet-cpubin/nnet-am-stats.cc
|
||||||
|
|
||||||
|
// Copyright 2012 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||||
|
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||||
|
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||||
|
// See the Apache 2 License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "base/kaldi-common.h"
|
||||||
|
#include "util/common-utils.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "nnet-cpu/nnet-stats.h"
|
||||||
|
#include "nnet-cpu/am-nnet.h"
|
||||||
|
#include "hmm/transition-model.h"
|
||||||
|
#include "tree/context-dep.h"
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
try {
|
||||||
|
using namespace kaldi;
|
||||||
|
typedef kaldi::int32 int32;
|
||||||
|
|
||||||
|
const char *usage =
|
||||||
|
"Print some statistics about the average derivatives of the sigmoid layers\n"
|
||||||
|
"of the neural net, that are stored in the net\n"
|
||||||
|
"\n"
|
||||||
|
"Usage: nnet-am-stats [options] <nnet-in>\n"
|
||||||
|
"e.g.:\n"
|
||||||
|
" nnet-am-stats 1.mdl 1_fixed.mdl\n";
|
||||||
|
|
||||||
|
NnetStatsConfig config;
|
||||||
|
|
||||||
|
ParseOptions po(usage);
|
||||||
|
config.Register(&po);
|
||||||
|
|
||||||
|
po.Read(argc, argv);
|
||||||
|
|
||||||
|
if (po.NumArgs() != 1) {
|
||||||
|
po.PrintUsage();
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string nnet_rxfilename = po.GetArg(1);
|
||||||
|
|
||||||
|
TransitionModel trans_model;
|
||||||
|
AmNnet am_nnet;
|
||||||
|
{
|
||||||
|
bool binary;
|
||||||
|
Input ki(nnet_rxfilename, &binary);
|
||||||
|
trans_model.Read(ki.Stream(), binary);
|
||||||
|
am_nnet.Read(ki.Stream(), binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<NnetStats> stats;
|
||||||
|
GetNnetStats(config, am_nnet.GetNnet(), &stats);
|
||||||
|
KALDI_ASSERT(!stats.empty());
|
||||||
|
for (size_t i = 0; i < stats.size(); i++)
|
||||||
|
stats[i].PrintStats(std::cout);
|
||||||
|
return 0;
|
||||||
|
} catch(const std::exception &e) {
|
||||||
|
std::cerr << e.what() << '\n';
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -85,7 +85,7 @@ int main(int argc, char *argv[]) {
|
||||||
KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
|
KALDI_LOG << "Selected a subset of " << egs.size() << " out of " << num_read
|
||||||
<< " neural-network training examples ";
|
<< " neural-network training examples ";
|
||||||
|
|
||||||
return (static_cast<size_t>(n) == egs.size() ? 0 : 1);
|
return (num_read != 0 ? 0 : 1);
|
||||||
} catch(const std::exception &e) {
|
} catch(const std::exception &e) {
|
||||||
std::cerr << e.what() << '\n';
|
std::cerr << e.what() << '\n';
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -88,8 +88,8 @@ void CacheTgtMat::AddData(const CuMatrix<BaseFloat> &features, const CuMatrix<Ba
|
||||||
features_.CopyRowsFromMat(leftover, features_leftover_, 0, 0);
|
features_.CopyRowsFromMat(leftover, features_leftover_, 0, 0);
|
||||||
targets_.CopyRowsFromMat(leftover, targets_leftover_, 0, 0);
|
targets_.CopyRowsFromMat(leftover, targets_leftover_, 0, 0);
|
||||||
|
|
||||||
features_leftover_.Destroy();
|
features_leftover_.Resize(0, 0);
|
||||||
targets_leftover_.Destroy();
|
targets_leftover_.Resize(0, 0);
|
||||||
filling_pos_ += leftover;
|
filling_pos_ += leftover;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -91,7 +91,7 @@ void Cache::AddData(const CuMatrix<BaseFloat> &features, const std::vector<int32
|
||||||
targets_leftover_.begin()+leftover,
|
targets_leftover_.begin()+leftover,
|
||||||
targets_.begin());
|
targets_.begin());
|
||||||
|
|
||||||
features_leftover_.Destroy();
|
features_leftover_.Resize(0, 0);
|
||||||
targets_leftover_.resize(0);
|
targets_leftover_.resize(0);
|
||||||
filling_pos_ += leftover;
|
filling_pos_ += leftover;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
|
||||||
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
||||||
|
|
||||||
// compute derivative wrt. activations of last layer of neurons
|
// compute derivative wrt. activations of last layer of neurons
|
||||||
diff->CopyFromMat(net_out);
|
*diff = net_out;
|
||||||
diff->AddMat(-1.0, target);
|
diff->AddMat(-1.0, target);
|
||||||
|
|
||||||
// we'll not produce per-frame classification accuracy for soft labels
|
// we'll not produce per-frame classification accuracy for soft labels
|
||||||
|
@ -40,7 +40,8 @@ void Xent::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &t
|
||||||
|
|
||||||
// :TODO: reimplement when needed
|
// :TODO: reimplement when needed
|
||||||
// compute xentropy (ON CPU)
|
// compute xentropy (ON CPU)
|
||||||
Matrix<BaseFloat> target_host, net_out_host;
|
Matrix<BaseFloat> target_host(target.NumRows(), target.NumCols(), kUndefined),
|
||||||
|
net_out_host(net_out.NumRows(), net_out.NumCols(), kUndefined);
|
||||||
target.CopyToMat(&target_host);
|
target.CopyToMat(&target_host);
|
||||||
net_out.CopyToMat(&net_out_host);
|
net_out.CopyToMat(&net_out_host);
|
||||||
BaseFloat val;
|
BaseFloat val;
|
||||||
|
@ -69,7 +70,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
|
||||||
// get the xentropy and global error
|
// get the xentropy and global error
|
||||||
target_device_.CopyFromVec(target);
|
target_device_.CopyFromVec(target);
|
||||||
if(&net_out != diff) { //<allow no-copy speedup
|
if(&net_out != diff) { //<allow no-copy speedup
|
||||||
diff->CopyFromMat(net_out);
|
*diff = net_out;
|
||||||
}
|
}
|
||||||
cu::DiffXent(target_device_, diff, &log_post_tgt_);
|
cu::DiffXent(target_device_, diff, &log_post_tgt_);
|
||||||
//
|
//
|
||||||
|
@ -84,6 +85,7 @@ void Xent::EvalVec(const CuMatrix<BaseFloat> &net_out, const std::vector<int32>
|
||||||
// log(sum_row(net_out.*target_mat)))
|
// log(sum_row(net_out.*target_mat)))
|
||||||
// they now are stored in vector log_post_tgt_
|
// they now are stored in vector log_post_tgt_
|
||||||
//
|
//
|
||||||
|
log_post_tgt_host_.Resize(log_post_tgt_.Dim());
|
||||||
log_post_tgt_.CopyToVec(&log_post_tgt_host_);
|
log_post_tgt_.CopyToVec(&log_post_tgt_host_);
|
||||||
loss_ -= log_post_tgt_host_.Sum();
|
loss_ -= log_post_tgt_host_.Sum();
|
||||||
|
|
||||||
|
@ -110,9 +112,10 @@ std::string Xent::Report() {
|
||||||
void Mse::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &target, CuMatrix<BaseFloat> *diff) {
|
void Mse::Eval(const CuMatrix<BaseFloat> &net_out, const CuMatrix<BaseFloat> &target, CuMatrix<BaseFloat> *diff) {
|
||||||
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
||||||
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
||||||
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
|
||||||
|
|
||||||
// compute derivative w.r.t. neural nerwork outputs
|
// compute derivative w.r.t. neural nerwork outputs
|
||||||
|
diff->Resize(net_out.NumRows(), net_out.NumCols());
|
||||||
diff->CopyFromMat(net_out);
|
diff->CopyFromMat(net_out);
|
||||||
diff->AddMat(-1.0, target);
|
diff->AddMat(-1.0, target);
|
||||||
|
|
||||||
|
@ -147,9 +150,9 @@ std::string Mse::Report() {
|
||||||
void MseProgress::Eval(const CuMatrix<BaseFloat>& net_out, const CuMatrix<BaseFloat>& target, CuMatrix<BaseFloat>* diff) {
|
void MseProgress::Eval(const CuMatrix<BaseFloat>& net_out, const CuMatrix<BaseFloat>& target, CuMatrix<BaseFloat>* diff) {
|
||||||
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
KALDI_ASSERT(net_out.NumCols() == target.NumCols());
|
||||||
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
KALDI_ASSERT(net_out.NumRows() == target.NumRows());
|
||||||
diff->Resize(net_out.NumRows(),net_out.NumCols());
|
|
||||||
|
|
||||||
//compute derivative w.r.t. neural nerwork outputs
|
//compute derivative w.r.t. neural nerwork outputs
|
||||||
|
diff->Resize(net_out.NumRows(),net_out.NumCols());
|
||||||
diff->CopyFromMat(net_out);
|
diff->CopyFromMat(net_out);
|
||||||
diff->AddMat(-1.0,target);
|
diff->AddMat(-1.0,target);
|
||||||
|
|
||||||
|
|
|
@ -94,8 +94,7 @@ int main(int argc, char *argv[]) {
|
||||||
//the pointer will be given to the nnet, so we don't need to call delete
|
//the pointer will be given to the nnet, so we don't need to call delete
|
||||||
|
|
||||||
//convert Vector to CuVector
|
//convert Vector to CuVector
|
||||||
CuVector<BaseFloat> cu_shift;
|
CuVector<BaseFloat> cu_shift(shift);
|
||||||
cu_shift.CopyFromVec(shift);
|
|
||||||
|
|
||||||
//set the weights
|
//set the weights
|
||||||
shift_component->SetShiftVec(cu_shift);
|
shift_component->SetShiftVec(cu_shift);
|
||||||
|
@ -110,8 +109,7 @@ int main(int argc, char *argv[]) {
|
||||||
//the pointer will be given to the nnet, so we don't need to call delete
|
//the pointer will be given to the nnet, so we don't need to call delete
|
||||||
|
|
||||||
//convert Vector to CuVector
|
//convert Vector to CuVector
|
||||||
CuVector<BaseFloat> cu_scale;
|
CuVector<BaseFloat> cu_scale(scale);
|
||||||
cu_scale.CopyFromVec(scale);
|
|
||||||
|
|
||||||
//set the weights
|
//set the weights
|
||||||
scale_component->SetScaleVec(cu_scale);
|
scale_component->SetScaleVec(cu_scale);
|
||||||
|
|
|
@ -129,6 +129,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// push priors to GPU
|
// push priors to GPU
|
||||||
|
priors.Resize(tmp_priors.Dim());
|
||||||
priors.CopyFromVec(tmp_priors);
|
priors.CopyFromVec(tmp_priors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,7 +151,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// push it to gpu
|
// push it to gpu
|
||||||
feats.CopyFromMat(mat);
|
feats = mat;
|
||||||
// fwd-pass
|
// fwd-pass
|
||||||
nnet_transf.Feedforward(feats, &feats_transf);
|
nnet_transf.Feedforward(feats, &feats_transf);
|
||||||
nnet.Feedforward(feats_transf, &nnet_out);
|
nnet.Feedforward(feats_transf, &nnet_out);
|
||||||
|
@ -170,6 +171,7 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//download from GPU
|
//download from GPU
|
||||||
|
nnet_out_host.Resize(nnet_out.NumRows(), nnet_out.NumCols());
|
||||||
nnet_out.CopyToMat(&nnet_out_host);
|
nnet_out.CopyToMat(&nnet_out_host);
|
||||||
//check for NaN/inf
|
//check for NaN/inf
|
||||||
for(int32 r=0; r<nnet_out_host.NumRows(); r++) {
|
for(int32 r=0; r<nnet_out_host.NumRows(); r++) {
|
||||||
|
|
|
@ -223,12 +223,13 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
//3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
|
//3) propagate the feature to get the log-posteriors (nnet w/o sofrmax)
|
||||||
// push features to GPU
|
// push features to GPU
|
||||||
feats.CopyFromMat(mat);
|
feats = mat;
|
||||||
// possibly apply transform
|
// possibly apply transform
|
||||||
nnet_transf.Feedforward(feats, &feats_transf);
|
nnet_transf.Feedforward(feats, &feats_transf);
|
||||||
// propagate through the nnet (assuming w/o softmax)
|
// propagate through the nnet (assuming w/o softmax)
|
||||||
nnet.Propagate(feats_transf, &nnet_out);
|
nnet.Propagate(feats_transf, &nnet_out);
|
||||||
// pop it back to the HOST
|
// transfer it back to the host
|
||||||
|
nnet_out_h.Resize(nnet_out.NumRows(), nnet_out.NumCols(), kUndefined);
|
||||||
nnet_out.CopyToMat(&nnet_out_h);
|
nnet_out.CopyToMat(&nnet_out_h);
|
||||||
// TODO: poccibly divide by priors
|
// TODO: poccibly divide by priors
|
||||||
|
|
||||||
|
@ -277,7 +278,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
//7) backpropagate through the nnet
|
//7) backpropagate through the nnet
|
||||||
if (!crossvalidate) {
|
if (!crossvalidate) {
|
||||||
nnet_diff.CopyFromMat(nnet_diff_h);
|
nnet_diff = nnet_diff_h;
|
||||||
nnet.Backpropagate(nnet_diff, NULL);
|
nnet.Backpropagate(nnet_diff, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -139,8 +139,8 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// push features/targets to GPU
|
// push features/targets to GPU
|
||||||
feats.CopyFromMat(fea_mat);
|
feats = fea_mat;
|
||||||
targets.CopyFromMat(tgt_mat);
|
targets = tgt_mat;
|
||||||
// possibly apply feature transform
|
// possibly apply feature transform
|
||||||
nnet_transf.Feedforward(feats, &feats_transf);
|
nnet_transf.Feedforward(feats, &feats_transf);
|
||||||
// add to cache
|
// add to cache
|
||||||
|
|
|
@ -142,7 +142,7 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// push features to GPU
|
// push features to GPU
|
||||||
feats.CopyFromMat(mat);
|
feats = mat;
|
||||||
// possibly apply transform
|
// possibly apply transform
|
||||||
nnet_transf.Feedforward(feats, &feats_transf);
|
nnet_transf.Feedforward(feats, &feats_transf);
|
||||||
// add to cache
|
// add to cache
|
||||||
|
|
|
@ -138,6 +138,7 @@ int main(int argc, char *argv[]) {
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
} else { //dimension OK
|
} else { //dimension OK
|
||||||
// push features to GPU
|
// push features to GPU
|
||||||
|
feats.Resize(mat.NumRows(), mat.NumCols(), kUndefined);
|
||||||
feats.CopyFromMat(mat);
|
feats.CopyFromMat(mat);
|
||||||
// possibly apply transform
|
// possibly apply transform
|
||||||
nnet_transf.Feedforward(feats, &feats_transf);
|
nnet_transf.Feedforward(feats, &feats_transf);
|
||||||
|
|
|
@ -132,7 +132,8 @@ int main(int argc, char *argv[]) {
|
||||||
rbm_transf.Feedforward(feats, &feats_transf);
|
rbm_transf.Feedforward(feats, &feats_transf);
|
||||||
// subsample the feats to get faster epochs
|
// subsample the feats to get faster epochs
|
||||||
if(drop_data > 0.0) {
|
if(drop_data > 0.0) {
|
||||||
Matrix<BaseFloat> mat2;
|
Matrix<BaseFloat> mat2(feats_transf.NumRows(), feats_transf.NumCols(),
|
||||||
|
kUndefined);
|
||||||
feats_transf.CopyToMat(&mat2);
|
feats_transf.CopyToMat(&mat2);
|
||||||
for(int32 r=mat2.NumRows()-1; r >= 0; r--) {
|
for(int32 r=mat2.NumRows()-1; r >= 0; r--) {
|
||||||
if(RandUniform() < drop_data) {
|
if(RandUniform() < drop_data) {
|
||||||
|
|
|
@ -62,8 +62,7 @@ int main(int argc, char *argv[]) {
|
||||||
//the pointer will be given to the nnet, so we don't need to call delete
|
//the pointer will be given to the nnet, so we don't need to call delete
|
||||||
|
|
||||||
//convert Matrix to CuMatrix
|
//convert Matrix to CuMatrix
|
||||||
CuMatrix<BaseFloat> cu_transform;
|
CuMatrix<BaseFloat> cu_transform(transform);
|
||||||
cu_transform.CopyFromMat(transform);
|
|
||||||
|
|
||||||
//set the weights
|
//set the weights
|
||||||
layer->SetLinearity(cu_transform);
|
layer->SetLinearity(cu_transform);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче