зеркало из https://github.com/mozilla/kaldi.git
Added KL-HMM
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3241 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
3bf9adc11e
Коммит
cdd493df56
|
@ -9,8 +9,9 @@
|
|||
export train_cmd="queue.pl -l arch=*64"
|
||||
export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
|
||||
export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
|
||||
export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
|
||||
export cuda_cmd="queue.pl -l gpu=1"
|
||||
|
||||
#export cuda_cmd="..."
|
||||
|
||||
|
||||
#b) BUT cluster options
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2013 Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
. cmd.sh
|
||||
|
||||
states=20000
|
||||
dir=exp/tri4b_pretrain-dbn_dnn/
|
||||
|
||||
steps/kl_hmm/build_tree.sh --cmd "$big_memory_cmd" --thresh -1 --nnet_dir exp/tri4b_pretrain-dbn_dnn/ \
|
||||
${states} data-fmllr-tri4b/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b-${states} || exit 1;
|
||||
|
||||
utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri4b-${states} exp/tri4b-${states}/graph_bd_tgpr || exit 1;
|
||||
|
||||
steps/kl_hmm/train_kl_hmm.sh --nj 30 --cmd "$big_memory_cmd" --model exp/tri4b-${states}/final.mdl data-fmllr-tri4b/train_si284 exp/tri4b-${states} $dir/kl-hmm-${states}
|
||||
|
||||
steps/kl_hmm/decode_kl_hmm.sh --nj 10 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \
|
||||
--config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_dev93 $dir/decode_dev93_kl-hmm-bd-${states}_tst
|
||||
|
||||
steps/kl_hmm/decode_kl_hmm.sh --nj 8 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \
|
||||
--config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_eval92 $dir/decode_eval92_kl-hmm-bd-${states}_tst
|
||||
|
||||
|
|
@ -324,6 +324,8 @@ local/run_sgmm2.sh
|
|||
# You probably wany to run the hybrid recipe as it is complementary:
|
||||
local/run_dnn.sh
|
||||
|
||||
# You probably want to try KL-HMM
|
||||
#local/run_kl_hmm.sh
|
||||
|
||||
# Getting results [see RESULTS file]
|
||||
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey),
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration.
|
||||
stage=-4 # This allows restarting after partway, when something when wrong.
|
||||
config=
|
||||
cmd=run.pl
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
num_iters=35 # Number of iterations of training
|
||||
max_iter_inc=25 # Last iter to increase #Gauss on.
|
||||
beam=10
|
||||
retry_beam=40
|
||||
boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
|
||||
power=0.25 # Exponent for number of gaussians according to occurrence counts
|
||||
cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves
|
||||
thresh=20
|
||||
use_gpu="no"
|
||||
nnet_dir=
|
||||
context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
|
||||
tmpdir=
|
||||
no_softmax=true
|
||||
# End configuration.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f path.sh ] && . ./path.sh;
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 5 ]; then
|
||||
echo "Usage: steps/train_deltas.sh <num-leaves> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
|
||||
echo "e.g.: steps/train_deltas.sh 2000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
echo " --thresh "
|
||||
echo " --cluster_thresh "
|
||||
echo " --nnet_dir "
|
||||
echo " --context_opts "
|
||||
echo " --tmpdir "
|
||||
echo " --no-softmax "
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
numleaves=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
alidir=$4
|
||||
dir=$5
|
||||
|
||||
|
||||
for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
|
||||
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
numgauss=$numleaves
|
||||
incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
|
||||
ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
|
||||
nj=`cat $alidir/num_jobs` || exit 1;
|
||||
mkdir -p $dir/log
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
sdata=$data/split$nj;
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
|
||||
nnet=${nnet_dir}/final.nnet
|
||||
feature_transform=${nnet_dir}/final.feature_transform
|
||||
|
||||
featsdim="ark:copy-feats scp:$data/feats.scp ark:- |"
|
||||
nnetfeats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f ${nnet_dir}/norm_vars ]; then
|
||||
norm_vars=$(cat ${nnet_dir}/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
nnetfeats="$nnetfeats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
featsdim="$featsdim apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f ${nnet_dir}/delta_order ]; then
|
||||
delta_order=$(cat ${nnet_dir}/delta_order)
|
||||
nnetfeats="$nnetfeats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
featsdim="$featsdim add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
feats="ark,s,cs:nnet-forward "
|
||||
if [[ ! -z $feature_transform ]]; then
|
||||
feats=${feats}" --feature-transform=$feature_transform "
|
||||
fi
|
||||
feats=${feats}"--no-softmax=$no_softmax --use-gpu=$use_gpu $nnet \"$nnetfeats\" ark:- |"
|
||||
|
||||
feat_dim=$(feat-to-dim --print-args=false "$featsdim" -)
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
if [[ ! -z $tmpdir ]]; then
|
||||
mkdir -p $tmpdir
|
||||
else
|
||||
tmpdir=$dir
|
||||
fi
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "$0: accumulating tree stats"
|
||||
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
|
||||
acc-tree-stats $context_opts --var-floor=1.0 --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
|
||||
"ark:gunzip -c $alidir/ali.JOB.gz|" $tmpdir/JOB.treeacc || exit 1;
|
||||
sum-tree-stats $dir/treeacc $tmpdir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
|
||||
rm $tmpdir/*.treeacc
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
echo "$0: getting questions for tree-building, via clustering"
|
||||
# preparing questions, roots file...
|
||||
cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
|
||||
cat $lang/phones/extra_questions.int >> $dir/questions.int
|
||||
compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
|
||||
|
||||
echo "$0: building the tree"
|
||||
# $cmd $dir/log/build_tree.log \
|
||||
build-tree $context_opts --verbose=1 --max-leaves=$numleaves --cluster-thresh=$cluster_thresh --thresh=$thresh $dir/treeacc $lang/phones/roots.int \
|
||||
$dir/questions.qst $lang/topo $dir/tree &> $dir/log/build_tree.log || exit 1;
|
||||
|
||||
gmm-init-model-flat --dim=$feat_dim $dir/tree $lang/topo $dir/1.mdl
|
||||
|
||||
rm $dir/treeacc
|
||||
fi
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
# Convert the alignments.
|
||||
echo "$0: converting alignments from $alidir to use current tree"
|
||||
$cmd JOB=1:$nj $dir/log/convert.JOB.log \
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
|
||||
"ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "$0: compiling graphs of transcripts"
|
||||
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:utils/sym2int.pl -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
|
||||
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
rm $dir/final.mdl 2>/dev/null
|
||||
ln -s 1.mdl $dir/final.mdl
|
||||
|
||||
# Summarize warning messages...
|
||||
utils/summarize_warnings.pl $dir/log
|
||||
|
||||
echo "$0: Done building the tree in $dir"
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Karel Vesely,
|
||||
# Daniel Povey,
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
nnet= # Optionally pre-select network to use for getting state-likelihoods
|
||||
feature_transform= # Optionally pre-select feature transform (in front of nnet)
|
||||
model= # Optionally pre-select transition model
|
||||
|
||||
stage=0 # stage=1 skips lattice generation
|
||||
nj=4
|
||||
cmd=run.pl
|
||||
max_active=7000 # maximum of active tokens
|
||||
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
|
||||
beam=13.0 # GMM:13.0
|
||||
latbeam=8.0 # GMM:6.0
|
||||
acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
|
||||
scoring_opts="--min-lmwt 1 --max-lmwt 12"
|
||||
skip_scoring=false
|
||||
use_gpu="no" # disable gpu
|
||||
parallel_opts=""
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh; # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
|
||||
echo " where the DNN + transition model is."
|
||||
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
|
||||
echo ""
|
||||
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
|
||||
echo "which are then sent through feature-transform. It works out what type"
|
||||
echo "of features you used from content of srcdir."
|
||||
echo ""
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo ""
|
||||
echo " --nnet <nnet> # which nnet to use (opt.)"
|
||||
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
|
||||
echo " --model <model> # which transition model to use (opt.)"
|
||||
echo ""
|
||||
echo " --acwt <float> # select acoustic scale for decoding"
|
||||
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
|
||||
sdata=$data/split$nj;
|
||||
|
||||
mkdir -p $dir/log
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
|
||||
nnet=$srcdir/final.nnet;
|
||||
fi
|
||||
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
|
||||
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
|
||||
model=$srcdir/final.mdl;
|
||||
fi
|
||||
|
||||
# find the feature_transform to use
|
||||
if [ -z "$feature_transform" ]; then
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
fi
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f $srcdir/norm_vars ]; then
|
||||
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f $srcdir/delta_order ]; then
|
||||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
# Run the decoding in the queue
|
||||
if [ $stage -le 0 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
|
||||
nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
|
||||
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
|
||||
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
# Run the scoring
|
||||
if ! $skip_scoring ; then
|
||||
[ ! -x local/score.sh ] && \
|
||||
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
|
||||
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
|
||||
fi
|
||||
|
||||
exit 0;
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Karel Vesely,
|
||||
# Daniel Povey,
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
nnet= # Optionally pre-select network to use for getting state-likelihoods
|
||||
feature_transform= # Optionally pre-select feature transform (in front of nnet)
|
||||
model= # Optionally pre-select transition model
|
||||
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
|
||||
|
||||
stage=0 # stage=1 skips lattice generation
|
||||
nj=32
|
||||
cmd=$decode_cmd
|
||||
max_active=7000 # maximum of active tokens
|
||||
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
|
||||
use_gpu="no" # disable gpu
|
||||
parallel_opts=""
|
||||
tmpdir=
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh; # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [options] <data-dir> <align-dir> <kl-hmm-dir>"
|
||||
echo "... where <kl-hmm-dir> is assumed to be a sub-directory of the directory"
|
||||
echo " where the DNN + transition model is."
|
||||
echo "e.g.: $0 data/train exp/dnn1/kl-hmm-train"
|
||||
echo ""
|
||||
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
|
||||
echo "which are then sent through feature-transform. It works out what type"
|
||||
echo "of features you used from content of srcdir."
|
||||
echo ""
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo ""
|
||||
echo " --nnet <nnet> # which nnet to use (opt.)"
|
||||
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
|
||||
echo " --model <model> # which transition model to use (opt.)"
|
||||
echo " --tmpdir >dir> # Temp directory to store the statistics, becuase they can get big (opt.)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
data=$1
|
||||
alidir=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
|
||||
sdata=$data/split$nj;
|
||||
|
||||
mkdir -p $dir/log
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
|
||||
nnet=$srcdir/final.nnet;
|
||||
fi
|
||||
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
|
||||
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
|
||||
model=$srcdir/final.mdl;
|
||||
fi
|
||||
|
||||
# find the feature_transform to use
|
||||
if [ -z "$feature_transform" ]; then
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
fi
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet_i $nnet $model; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f $srcdir/norm_vars ]; then
|
||||
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f $srcdir/delta_order ]; then
|
||||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
ali="ark:gunzip -c $alidir/ali.*.gz |"
|
||||
|
||||
if [[ ! -z $tmpdir ]]; then
|
||||
mkdir -p $tmpdir
|
||||
else
|
||||
tmpdir=$dir
|
||||
fi
|
||||
|
||||
nkl_states=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
|
||||
if [ $stage -le 0 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/acc-stats.JOB.log \
|
||||
nnet-kl-hmm-acc --nkl-states=${nkl_states} "ark:nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet \"$feats\" ark:- |" "ark:ali-to-pdf --print-args=false $alidir/final.mdl \"$ali\" ark:- |" $tmpdir/kl-hmm-stats.JOB
|
||||
fi
|
||||
|
||||
sum-matrices $dir/accumulated-kl-hmm-stats $tmpdir/kl-hmm-stats.*
|
||||
|
||||
rm $tmpdir/kl-hmm-stats.*
|
||||
|
||||
nnet-kl-hmm-mat-to-component $dir/kl-hmm.nnet $dir/accumulated-kl-hmm-stats
|
||||
|
||||
nnet-concat $dir/../final.nnet $dir/kl-hmm.nnet $dir/final.nnet
|
||||
|
||||
exit 0;
|
|
@ -21,9 +21,11 @@
|
|||
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-activation.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
#include "nnet/nnet-affine-transform.h"
|
||||
#include "nnet/nnet-rbm.h"
|
||||
#include "nnet/nnet-various.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
@ -40,6 +42,7 @@ const struct Component::key_value Component::kMarkerMap[] = {
|
|||
{ Component::kCopy,"<copy>" },
|
||||
{ Component::kAddShift,"<addshift>" },
|
||||
{ Component::kRescale,"<rescale>" },
|
||||
{ Component::kKlHmm,"<klhmm>" }
|
||||
};
|
||||
|
||||
|
||||
|
@ -119,6 +122,9 @@ Component* Component::Read(std::istream &is, bool binary) {
|
|||
case Component::kRescale :
|
||||
p_comp = new Rescale(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kKlHmm :
|
||||
p_comp = new KlHmm(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kUnknown :
|
||||
default :
|
||||
KALDI_ERR << "Missing type: " << token;
|
||||
|
|
|
@ -64,7 +64,8 @@ class Component {
|
|||
kTranspose,
|
||||
kBlockLinearity,
|
||||
kAddShift,
|
||||
kRescale
|
||||
kRescale,
|
||||
kKlHmm
|
||||
} ComponentType;
|
||||
/// A pair of type and marker
|
||||
struct key_value {
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
// nnet/nnet-kl-hmm.h
|
||||
|
||||
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
|
||||
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
|
||||
// Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#ifndef KALDI_NNET_NNET_KL_HMM_H_
|
||||
#define KALDI_NNET_NNET_KL_HMM_H_
|
||||
|
||||
#include "nnet/nnet-component.h"
|
||||
#include "cudamatrix/cu-math.h"
|
||||
#include "cudamatrix/cu-rand.h"
|
||||
#include "matrix/kaldi-vector.h"
|
||||
#include "matrix/kaldi-matrix.h"
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
||||
class KlHmm : public Component {
|
||||
public:
|
||||
KlHmm(int32 dim_in, int32 dim_out)
|
||||
: Component(dim_in, dim_out), kl_stats_(dim_out, dim_in, kSetZero)
|
||||
{ }
|
||||
~KlHmm()
|
||||
{ }
|
||||
|
||||
Component* Copy() const { return new KlHmm(*this); }
|
||||
ComponentType GetType() const {
|
||||
return kKlHmm;
|
||||
}
|
||||
|
||||
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
|
||||
if (kl_inv_q_.NumRows() == 0) {
|
||||
// Copy the CudaMatrix to a Matrix
|
||||
Matrix<BaseFloat> in_tmp(in.NumRows(), in.NumCols());
|
||||
in.CopyToMat(&in_tmp);
|
||||
// Just check if there are posteriors in the Matrix (just check the first row)
|
||||
BaseFloat post_sum=in_tmp.Row(0).Sum();
|
||||
KALDI_ASSERT(ApproxEqual(post_sum, 1.0));
|
||||
// Get a tmp Matrix of the stats
|
||||
Matrix<BaseFloat> kl_stats_tmp(kl_stats_);
|
||||
// Init a vector to get the sum of the rows (for normalization)
|
||||
Vector<BaseFloat> row_sum(kl_stats_.NumRows(), kSetZero);
|
||||
// Get the sum of the posteriors for normalization
|
||||
row_sum.AddColSumMat(1, kl_stats_tmp);
|
||||
// Apply floor to make sure there is no zero
|
||||
row_sum.ApplyFloor(1e-20);
|
||||
// Invert the sum (to normalize)
|
||||
row_sum.InvertElements();
|
||||
// Normalizing the statistics vector
|
||||
kl_stats_tmp.MulRowsVec(row_sum);
|
||||
//Apply floor before inversion and logarithm
|
||||
kl_stats_tmp.ApplyFloor(1e-20);
|
||||
//Apply invesion
|
||||
kl_stats_tmp.InvertElements();
|
||||
//Apply logarithm
|
||||
kl_stats_tmp.ApplyLog();
|
||||
//Inverted and logged values
|
||||
kl_inv_q_.Resize(kl_stats_.NumRows(),kl_stats_.NumCols());
|
||||
//Holds now log (1/Q)
|
||||
kl_inv_q_.CopyFromMat(kl_stats_tmp);
|
||||
}
|
||||
// Get the logarithm of the features for the Entropy calculation
|
||||
// Copy the CudaMatrix to a Matrix
|
||||
Matrix<BaseFloat> in_log_tmp(in.NumRows(), in.NumCols());
|
||||
in.CopyToMat(&in_log_tmp);
|
||||
// Flooring and log
|
||||
in_log_tmp.ApplyFloor(1e-20);
|
||||
in_log_tmp.ApplyLog();
|
||||
CuMatrix<BaseFloat> log_in(in.NumRows(), in.NumCols());
|
||||
log_in.CopyFromMat(in_log_tmp);
|
||||
// P*logP
|
||||
CuMatrix<BaseFloat> tmp_entropy(in);
|
||||
tmp_entropy.MulElements(log_in);
|
||||
// Getting the entropy (sum P*logP)
|
||||
CuVector<BaseFloat> in_entropy(in.NumRows(), kSetZero);
|
||||
in_entropy.AddColSumMat(1,tmp_entropy);
|
||||
// sum P*log (1/Q)
|
||||
out->AddMatMat(1, in, kNoTrans, kl_inv_q_, kTrans, 0);
|
||||
// (sum P*logP) + (sum P*log(1/Q)
|
||||
out->AddVecToCols(1, in_entropy);
|
||||
// return the negative KL-divergence
|
||||
out->Scale(-1);
|
||||
}
|
||||
|
||||
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
|
||||
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
|
||||
KALDI_ERR << "Unimplemented";
|
||||
}
|
||||
|
||||
/// Reads the component content
|
||||
void ReadData(std::istream &is, bool binary) {
|
||||
kl_stats_.Read(is, binary);
|
||||
KALDI_ASSERT(kl_stats_.NumRows() == output_dim_);
|
||||
KALDI_ASSERT(kl_stats_.NumCols() == input_dim_);
|
||||
}
|
||||
|
||||
/// Writes the component content
|
||||
void WriteData(std::ostream &os, bool binary) const {
|
||||
kl_stats_.Write(os, binary);
|
||||
}
|
||||
|
||||
/// Set the statistics matrix
|
||||
void SetStats(const Matrix<BaseFloat> mat) {
|
||||
KALDI_ASSERT(mat.NumRows() == output_dim_);
|
||||
KALDI_ASSERT(mat.NumCols() == input_dim_);
|
||||
kl_stats_.Resize(mat.NumRows(), mat.NumCols());
|
||||
kl_stats_.CopyFromMat(mat);
|
||||
}
|
||||
|
||||
/// Accumulate the statistics for KL-HMM paramter estimation
|
||||
void Accumulate (const Matrix<BaseFloat> &posteriors, const std::vector<int32> &alignment) {
|
||||
KALDI_ASSERT(posteriors.NumRows() == alignment.size());
|
||||
KALDI_ASSERT(posteriors.NumCols() == kl_stats_.NumCols());
|
||||
int32 num_frames = alignment.size();
|
||||
for(int32 i = 0; i < num_frames; i++) {
|
||||
//Convertin the float posterior into a double (to have higher precision during collection)
|
||||
Vector<double> temp(posteriors.Row(i));
|
||||
//Sum all the postiors associated with a particular state
|
||||
kl_stats_.Row(alignment[i]).AddVec(1,temp);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
Matrix<double> kl_stats_;
|
||||
CuMatrix<BaseFloat> kl_inv_q_;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
} // namespace nnet1
|
||||
} // namespace kaldi
|
||||
|
||||
#endif
|
||||
|
|
@ -9,12 +9,13 @@ LDLIBS += $(CUDA_LDLIBS)
|
|||
BINFILES = nnet-train-frmshuff \
|
||||
nnet-train-xent-hardlab-perutt \
|
||||
nnet-train-xent-hardlab-frmshuff \
|
||||
nnet-train-mse-tgtmat-frmshuff \
|
||||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
|
||||
nnet-forward nnet-copy nnet-info nnet-concat \
|
||||
transf-to-nnet cmvn-to-nnet
|
||||
nnet-train-mse-tgtmat-frmshuff \
|
||||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
|
||||
nnet-forward nnet-copy nnet-info nnet-concat \
|
||||
transf-to-nnet cmvn-to-nnet \
|
||||
nnet-kl-hmm-acc nnet-kl-hmm-mat-to-component
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
@ -24,6 +25,6 @@ TESTFILES =
|
|||
|
||||
ADDLIBS = ../nnet/kaldi-nnet.a ../cudamatrix/kaldi-cudamatrix.a ../lat/kaldi-lat.a \
|
||||
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
include ../makefiles/default_rules.mk
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
// nnetbin/nnet-kl-hmm-acc.cc
|
||||
|
||||
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
|
||||
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
|
||||
// Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
try {
|
||||
const char *usage =
|
||||
"Collect the statistics for the Kl-HMM trainign.\n"
|
||||
"Usage: nnet-kl-hmm-acc [options] <feature-rspecifier> <alignments-rspecifier> <kl-hmm-accumulator>\n"
|
||||
"e.g.: \n"
|
||||
" nnet-kl-hmm-acc scp:train.scp ark:train.ali kl-hmm.acc\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool binary = false;
|
||||
int32 n_kl_states = 0;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
|
||||
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string feature_rspecifier = po.GetArg(1),
|
||||
alignments_rspecifier = po.GetArg(2),
|
||||
kl_hmm_accumulator = po.GetArg(3);
|
||||
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
kaldi::int64 total_frames = 0;
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier);
|
||||
int32 posterior_dim = feature_reader.Value().NumCols();
|
||||
KlHmm kl_hmm(posterior_dim,n_kl_states);
|
||||
|
||||
int32 num_done = 0, num_no_alignment = 0, num_other_error = 0;
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string utt = feature_reader.Key();
|
||||
|
||||
if (!alignments_reader.HasKey(utt)) {
|
||||
num_no_alignment++;
|
||||
} else {
|
||||
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const std::vector<int32> &alignment = alignments_reader.Value(utt);
|
||||
|
||||
if ((int32)alignment.size() != mat.NumRows()) {
|
||||
KALDI_WARN << "Alignment has wrong size "<< (alignment.size()) << " vs. "<< (mat.NumRows());
|
||||
num_other_error++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Accumulate the statistics
|
||||
kl_hmm.Accumulate(mat, alignment);
|
||||
// log
|
||||
KALDI_VLOG(2) << "utt " << utt << ", frames " << alignment.size();
|
||||
total_frames += mat.NumRows();
|
||||
}
|
||||
num_done++;
|
||||
}
|
||||
KALDI_WARN << "Before writing...";
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment
|
||||
<< " with no alignments, " << num_other_error
|
||||
<< " with other errors.";
|
||||
|
||||
//store the accumulator
|
||||
{
|
||||
Output out(kl_hmm_accumulator, binary);
|
||||
kl_hmm.WriteData(out.Stream(), binary);
|
||||
}
|
||||
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
// nnetbin/nnet-kl-gmm-sum-accs.cc
|
||||
|
||||
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
|
||||
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
|
||||
// Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
|
||||
try {
|
||||
typedef int32 int32;
|
||||
const char *usage =
|
||||
"Convert matrix of KL-HMM training to nnet component.\n"
|
||||
"Usage: nnet-kl-hmm-mat-to-component [options] nnet-component matrix\n";
|
||||
|
||||
bool binary = true;
|
||||
int32 n_kl_states = 0;
|
||||
int32 n_posterior_dim = 0;
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
|
||||
po.Register("posterior-dim", &n_posterior_dim, "Dimensionality of posterior features");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() < 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_component_filename = po.GetArg(1);
|
||||
std::string mat_filename = po.GetArg(2);
|
||||
|
||||
Matrix<BaseFloat> kl_stats;
|
||||
{
|
||||
bool binary_read;
|
||||
Input ki(mat_filename, &binary_read);
|
||||
kl_stats.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
KlHmm kl_hmm(kl_stats.NumCols(), kl_stats.NumRows());
|
||||
kl_hmm.SetStats(kl_stats);
|
||||
|
||||
|
||||
// Write out the accs
|
||||
{
|
||||
Output ko(nnet_component_filename, binary);
|
||||
kl_hmm.Write(ko.Stream(), binary);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written nnet component to " << nnet_component_filename;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
// nnetbin/nnet-kl-gmm-sum-accs.cc
|
||||
|
||||
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
|
||||
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
|
||||
// Brno University of Technology (Author: Karel Vesely)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using namespace kaldi::nnet1;
|
||||
|
||||
try {
|
||||
typedef int32 int32;
|
||||
const char *usage =
|
||||
"Sum multiple accumulated stats files for KL-HMM training.\n"
|
||||
"Usage: nnet-kl-hmm-sum-accs [options] nnet-component stats-in1 stats-in2 ...\n";
|
||||
|
||||
bool binary = true;
|
||||
int32 n_kl_states = 0;
|
||||
int32 n_posterior_dim = 0;
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
|
||||
po.Register("posterior-dim", &n_posterior_dim, "Dimensionality of posterior features");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() < 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string nnet_component_filename = po.GetArg(1);
|
||||
KlHmm kl_hmm(n_posterior_dim, n_kl_states, NULL);
|
||||
|
||||
int num_accs = po.NumArgs() - 1;
|
||||
for (int i = 2, max = po.NumArgs(); i <= max; i++) {
|
||||
std::string stats_in_filename = po.GetArg(i);
|
||||
Input ki(stats_in_filename);
|
||||
kl_hmm.AddStats(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
// Write out the accs
|
||||
{
|
||||
Output ko(nnet_component_filename, binary);
|
||||
kl_hmm.Write(ko.Stream(), binary);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Summed " << num_accs << " stats ";
|
||||
KALDI_LOG << "Written nnet component to " << nnet_component_filename;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче