git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3241 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
David Imseng 2013-12-01 18:58:22 +00:00
Родитель 3bf9adc11e
Коммит cdd493df56
13 изменённых файлов: 846 добавлений и 9 удалений

Просмотреть файл

@ -9,8 +9,9 @@
export train_cmd="queue.pl -l arch=*64"
export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
export cuda_cmd="queue.pl -l gpu=1"
#export cuda_cmd="..."
#b) BUT cluster options

Просмотреть файл

@ -0,0 +1,24 @@
#!/bin/bash
# Copyright 2013 Idiap Research Institute (Author: David Imseng)
# Apache 2.0
. cmd.sh
states=20000
dir=exp/tri4b_pretrain-dbn_dnn/
steps/kl_hmm/build_tree.sh --cmd "$big_memory_cmd" --thresh -1 --nnet_dir exp/tri4b_pretrain-dbn_dnn/ \
${states} data-fmllr-tri4b/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b-${states} || exit 1;
utils/mkgraph.sh data/lang_test_bd_tgpr exp/tri4b-${states} exp/tri4b-${states}/graph_bd_tgpr || exit 1;
steps/kl_hmm/train_kl_hmm.sh --nj 30 --cmd "$big_memory_cmd" --model exp/tri4b-${states}/final.mdl data-fmllr-tri4b/train_si284 exp/tri4b-${states} $dir/kl-hmm-${states}
steps/kl_hmm/decode_kl_hmm.sh --nj 10 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \
--config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_dev93 $dir/decode_dev93_kl-hmm-bd-${states}_tst
steps/kl_hmm/decode_kl_hmm.sh --nj 8 --cmd "$big_memory_cmd" --acwt 0.1 --nnet $dir/kl-hmm-${states}/final.nnet --model exp/tri4b-${states}/final.mdl \
--config conf/decode_dnn.config exp/tri4b-${states}/graph_bd_tgpr/ data-fmllr-tri4b/test_eval92 $dir/decode_eval92_kl-hmm-bd-${states}_tst

Просмотреть файл

@ -324,6 +324,8 @@ local/run_sgmm2.sh
# You probably wany to run the hybrid recipe as it is complementary:
local/run_dnn.sh
# You probably want to try KL-HMM
#local/run_kl_hmm.sh
# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

Просмотреть файл

@ -0,0 +1,152 @@
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey),
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration.
stage=-4 # This allows restarting after partway, when something when wrong.
config=
cmd=run.pl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
num_iters=35 # Number of iterations of training
max_iter_inc=25 # Last iter to increase #Gauss on.
beam=10
retry_beam=40
boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
power=0.25 # Exponent for number of gaussians according to occurrence counts
cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves
thresh=20
use_gpu="no"
nnet_dir=
context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
tmpdir=
no_softmax=true
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "Usage: steps/train_deltas.sh <num-leaves> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
echo "e.g.: steps/train_deltas.sh 2000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
echo " --thresh "
echo " --cluster_thresh "
echo " --nnet_dir "
echo " --context_opts "
echo " --tmpdir "
echo " --no-softmax "
exit 1;
fi
numleaves=$1
data=$2
lang=$3
alidir=$4
dir=$5
for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
done
numgauss=$numleaves
incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
nj=`cat $alidir/num_jobs` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
nnet=${nnet_dir}/final.nnet
feature_transform=${nnet_dir}/final.feature_transform
featsdim="ark:copy-feats scp:$data/feats.scp ark:- |"
nnetfeats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f ${nnet_dir}/norm_vars ]; then
norm_vars=$(cat ${nnet_dir}/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
nnetfeats="$nnetfeats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
featsdim="$featsdim apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f ${nnet_dir}/delta_order ]; then
delta_order=$(cat ${nnet_dir}/delta_order)
nnetfeats="$nnetfeats add-deltas --delta-order=$delta_order ark:- ark:- |"
featsdim="$featsdim add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
feats="ark,s,cs:nnet-forward "
if [[ ! -z $feature_transform ]]; then
feats=${feats}" --feature-transform=$feature_transform "
fi
feats=${feats}"--no-softmax=$no_softmax --use-gpu=$use_gpu $nnet \"$nnetfeats\" ark:- |"
feat_dim=$(feat-to-dim --print-args=false "$featsdim" -)
rm $dir/.error 2>/dev/null
if [[ ! -z $tmpdir ]]; then
mkdir -p $tmpdir
else
tmpdir=$dir
fi
if [ $stage -le -3 ]; then
echo "$0: accumulating tree stats"
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
acc-tree-stats $context_opts --var-floor=1.0 --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
"ark:gunzip -c $alidir/ali.JOB.gz|" $tmpdir/JOB.treeacc || exit 1;
sum-tree-stats $dir/treeacc $tmpdir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
rm $tmpdir/*.treeacc
fi
if [ $stage -le -2 ]; then
echo "$0: getting questions for tree-building, via clustering"
# preparing questions, roots file...
cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
cat $lang/phones/extra_questions.int >> $dir/questions.int
compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
echo "$0: building the tree"
# $cmd $dir/log/build_tree.log \
build-tree $context_opts --verbose=1 --max-leaves=$numleaves --cluster-thresh=$cluster_thresh --thresh=$thresh $dir/treeacc $lang/phones/roots.int \
$dir/questions.qst $lang/topo $dir/tree &> $dir/log/build_tree.log || exit 1;
gmm-init-model-flat --dim=$feat_dim $dir/tree $lang/topo $dir/1.mdl
rm $dir/treeacc
fi
if [ $stage -le -1 ]; then
# Convert the alignments.
echo "$0: converting alignments from $alidir to use current tree"
$cmd JOB=1:$nj $dir/log/convert.JOB.log \
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
"ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
if [ $stage -le 0 ]; then
echo "$0: compiling graphs of transcripts"
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:utils/sym2int.pl -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
rm $dir/final.mdl 2>/dev/null
ln -s 1.mdl $dir/final.mdl
# Summarize warning messages...
utils/summarize_warnings.pl $dir/log
echo "$0: Done building the tree in $dir"

Просмотреть файл

@ -0,0 +1,121 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely,
# Daniel Povey,
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration section.
nnet= # Optionally pre-select network to use for getting state-likelihoods
feature_transform= # Optionally pre-select feature transform (in front of nnet)
model= # Optionally pre-select transition model
stage=0 # stage=1 skips lattice generation
nj=4
cmd=run.pl
max_active=7000 # maximum of active tokens
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
beam=13.0 # GMM:13.0
latbeam=8.0 # GMM:6.0
acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
scoring_opts="--min-lmwt 1 --max-lmwt 12"
skip_scoring=false
use_gpu="no" # disable gpu
parallel_opts=""
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the DNN + transition model is."
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
echo ""
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
echo "which are then sent through feature-transform. It works out what type"
echo "of features you used from content of srcdir."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo ""
echo " --nnet <nnet> # which nnet to use (opt.)"
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
echo " --model <model> # which transition model to use (opt.)"
echo ""
echo " --acwt <float> # select acoustic scale for decoding"
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
nnet=$srcdir/final.nnet;
fi
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
model=$srcdir/final.mdl;
fi
# find the feature_transform to use
if [ -z "$feature_transform" ]; then
feature_transform=$srcdir/final.feature_transform
fi
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
# check that files exist
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
# Run the decoding in the queue
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
# Run the scoring
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
fi
exit 0;

Просмотреть файл

@ -0,0 +1,121 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely,
# Daniel Povey,
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration section.
nnet= # Optionally pre-select network to use for getting state-likelihoods
feature_transform= # Optionally pre-select feature transform (in front of nnet)
model= # Optionally pre-select transition model
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
stage=0 # stage=1 skips lattice generation
nj=32
cmd=$decode_cmd
max_active=7000 # maximum of active tokens
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
use_gpu="no" # disable gpu
parallel_opts=""
tmpdir=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <data-dir> <align-dir> <kl-hmm-dir>"
echo "... where <kl-hmm-dir> is assumed to be a sub-directory of the directory"
echo " where the DNN + transition model is."
echo "e.g.: $0 data/train exp/dnn1/kl-hmm-train"
echo ""
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
echo "which are then sent through feature-transform. It works out what type"
echo "of features you used from content of srcdir."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo ""
echo " --nnet <nnet> # which nnet to use (opt.)"
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
echo " --model <model> # which transition model to use (opt.)"
echo " --tmpdir >dir> # Temp directory to store the statistics, becuase they can get big (opt.)"
exit 1;
fi
data=$1
alidir=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
nnet=$srcdir/final.nnet;
fi
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
model=$srcdir/final.mdl;
fi
# find the feature_transform to use
if [ -z "$feature_transform" ]; then
feature_transform=$srcdir/final.feature_transform
fi
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
# check that files exist
for f in $sdata/1/feats.scp $nnet_i $nnet $model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
ali="ark:gunzip -c $alidir/ali.*.gz |"
if [[ ! -z $tmpdir ]]; then
mkdir -p $tmpdir
else
tmpdir=$dir
fi
nkl_states=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/acc-stats.JOB.log \
nnet-kl-hmm-acc --nkl-states=${nkl_states} "ark:nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet \"$feats\" ark:- |" "ark:ali-to-pdf --print-args=false $alidir/final.mdl \"$ali\" ark:- |" $tmpdir/kl-hmm-stats.JOB
fi
sum-matrices $dir/accumulated-kl-hmm-stats $tmpdir/kl-hmm-stats.*
rm $tmpdir/kl-hmm-stats.*
nnet-kl-hmm-mat-to-component $dir/kl-hmm.nnet $dir/accumulated-kl-hmm-stats
nnet-concat $dir/../final.nnet $dir/kl-hmm.nnet $dir/final.nnet
exit 0;

Просмотреть файл

@ -21,9 +21,11 @@
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-activation.h"
#include "nnet/nnet-kl-hmm.h"
#include "nnet/nnet-affine-transform.h"
#include "nnet/nnet-rbm.h"
#include "nnet/nnet-various.h"
#include "nnet/nnet-kl-hmm.h"
namespace kaldi {
namespace nnet1 {
@ -40,6 +42,7 @@ const struct Component::key_value Component::kMarkerMap[] = {
{ Component::kCopy,"<copy>" },
{ Component::kAddShift,"<addshift>" },
{ Component::kRescale,"<rescale>" },
{ Component::kKlHmm,"<klhmm>" }
};
@ -119,6 +122,9 @@ Component* Component::Read(std::istream &is, bool binary) {
case Component::kRescale :
p_comp = new Rescale(dim_in, dim_out);
break;
case Component::kKlHmm :
p_comp = new KlHmm(dim_in, dim_out);
break;
case Component::kUnknown :
default :
KALDI_ERR << "Missing type: " << token;

Просмотреть файл

@ -64,7 +64,8 @@ class Component {
kTranspose,
kBlockLinearity,
kAddShift,
kRescale
kRescale,
kKlHmm
} ComponentType;
/// A pair of type and marker
struct key_value {

150
src/nnet/nnet-kl-hmm.h Normal file
Просмотреть файл

@ -0,0 +1,150 @@
// nnet/nnet-kl-hmm.h
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
// Brno University of Technology (Author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_NNET_NNET_KL_HMM_H_
#define KALDI_NNET_NNET_KL_HMM_H_
#include "nnet/nnet-component.h"
#include "cudamatrix/cu-math.h"
#include "cudamatrix/cu-rand.h"
#include "matrix/kaldi-vector.h"
#include "matrix/kaldi-matrix.h"
namespace kaldi {
namespace nnet1 {
class KlHmm : public Component {
public:
KlHmm(int32 dim_in, int32 dim_out)
: Component(dim_in, dim_out), kl_stats_(dim_out, dim_in, kSetZero)
{ }
~KlHmm()
{ }
Component* Copy() const { return new KlHmm(*this); }
ComponentType GetType() const {
return kKlHmm;
}
void PropagateFnc(const CuMatrix<BaseFloat> &in, CuMatrix<BaseFloat> *out) {
if (kl_inv_q_.NumRows() == 0) {
// Copy the CudaMatrix to a Matrix
Matrix<BaseFloat> in_tmp(in.NumRows(), in.NumCols());
in.CopyToMat(&in_tmp);
// Just check if there are posteriors in the Matrix (just check the first row)
BaseFloat post_sum=in_tmp.Row(0).Sum();
KALDI_ASSERT(ApproxEqual(post_sum, 1.0));
// Get a tmp Matrix of the stats
Matrix<BaseFloat> kl_stats_tmp(kl_stats_);
// Init a vector to get the sum of the rows (for normalization)
Vector<BaseFloat> row_sum(kl_stats_.NumRows(), kSetZero);
// Get the sum of the posteriors for normalization
row_sum.AddColSumMat(1, kl_stats_tmp);
// Apply floor to make sure there is no zero
row_sum.ApplyFloor(1e-20);
// Invert the sum (to normalize)
row_sum.InvertElements();
// Normalizing the statistics vector
kl_stats_tmp.MulRowsVec(row_sum);
//Apply floor before inversion and logarithm
kl_stats_tmp.ApplyFloor(1e-20);
//Apply invesion
kl_stats_tmp.InvertElements();
//Apply logarithm
kl_stats_tmp.ApplyLog();
//Inverted and logged values
kl_inv_q_.Resize(kl_stats_.NumRows(),kl_stats_.NumCols());
//Holds now log (1/Q)
kl_inv_q_.CopyFromMat(kl_stats_tmp);
}
// Get the logarithm of the features for the Entropy calculation
// Copy the CudaMatrix to a Matrix
Matrix<BaseFloat> in_log_tmp(in.NumRows(), in.NumCols());
in.CopyToMat(&in_log_tmp);
// Flooring and log
in_log_tmp.ApplyFloor(1e-20);
in_log_tmp.ApplyLog();
CuMatrix<BaseFloat> log_in(in.NumRows(), in.NumCols());
log_in.CopyFromMat(in_log_tmp);
// P*logP
CuMatrix<BaseFloat> tmp_entropy(in);
tmp_entropy.MulElements(log_in);
// Getting the entropy (sum P*logP)
CuVector<BaseFloat> in_entropy(in.NumRows(), kSetZero);
in_entropy.AddColSumMat(1,tmp_entropy);
// sum P*log (1/Q)
out->AddMatMat(1, in, kNoTrans, kl_inv_q_, kTrans, 0);
// (sum P*logP) + (sum P*log(1/Q)
out->AddVecToCols(1, in_entropy);
// return the negative KL-divergence
out->Scale(-1);
}
void BackpropagateFnc(const CuMatrix<BaseFloat> &in, const CuMatrix<BaseFloat> &out,
const CuMatrix<BaseFloat> &out_diff, CuMatrix<BaseFloat> *in_diff) {
KALDI_ERR << "Unimplemented";
}
/// Reads the component content
void ReadData(std::istream &is, bool binary) {
kl_stats_.Read(is, binary);
KALDI_ASSERT(kl_stats_.NumRows() == output_dim_);
KALDI_ASSERT(kl_stats_.NumCols() == input_dim_);
}
/// Writes the component content
void WriteData(std::ostream &os, bool binary) const {
kl_stats_.Write(os, binary);
}
/// Set the statistics matrix
void SetStats(const Matrix<BaseFloat> mat) {
KALDI_ASSERT(mat.NumRows() == output_dim_);
KALDI_ASSERT(mat.NumCols() == input_dim_);
kl_stats_.Resize(mat.NumRows(), mat.NumCols());
kl_stats_.CopyFromMat(mat);
}
/// Accumulate the statistics for KL-HMM paramter estimation
void Accumulate (const Matrix<BaseFloat> &posteriors, const std::vector<int32> &alignment) {
KALDI_ASSERT(posteriors.NumRows() == alignment.size());
KALDI_ASSERT(posteriors.NumCols() == kl_stats_.NumCols());
int32 num_frames = alignment.size();
for(int32 i = 0; i < num_frames; i++) {
//Convertin the float posterior into a double (to have higher precision during collection)
Vector<double> temp(posteriors.Row(i));
//Sum all the postiors associated with a particular state
kl_stats_.Row(alignment[i]).AddVec(1,temp);
}
}
private:
Matrix<double> kl_stats_;
CuMatrix<BaseFloat> kl_inv_q_;
};
} // namespace nnet1
} // namespace kaldi
#endif

Просмотреть файл

@ -9,12 +9,13 @@ LDLIBS += $(CUDA_LDLIBS)
BINFILES = nnet-train-frmshuff \
nnet-train-xent-hardlab-perutt \
nnet-train-xent-hardlab-frmshuff \
nnet-train-mse-tgtmat-frmshuff \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet
nnet-train-mse-tgtmat-frmshuff \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet \
nnet-kl-hmm-acc nnet-kl-hmm-mat-to-component
OBJFILES =
@ -24,6 +25,6 @@ TESTFILES =
ADDLIBS = ../nnet/kaldi-nnet.a ../cudamatrix/kaldi-cudamatrix.a ../lat/kaldi-lat.a \
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
../util/kaldi-util.a ../base/kaldi-base.a
../util/kaldi-util.a ../base/kaldi-base.a
include ../makefiles/default_rules.mk

Просмотреть файл

@ -0,0 +1,107 @@
// nnetbin/nnet-kl-hmm-acc.cc
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
// Brno University of Technology (Author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-kl-hmm.h"
#include "base/kaldi-common.h"
#include "util/common-utils.h"
int main(int argc, char *argv[]) {
using namespace kaldi;
using namespace kaldi::nnet1;
try {
const char *usage =
"Collect the statistics for the Kl-HMM trainign.\n"
"Usage: nnet-kl-hmm-acc [options] <feature-rspecifier> <alignments-rspecifier> <kl-hmm-accumulator>\n"
"e.g.: \n"
" nnet-kl-hmm-acc scp:train.scp ark:train.ali kl-hmm.acc\n";
ParseOptions po(usage);
bool binary = false;
int32 n_kl_states = 0;
po.Register("binary", &binary, "Write output in binary mode");
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
po.PrintUsage();
exit(1);
}
std::string feature_rspecifier = po.GetArg(1),
alignments_rspecifier = po.GetArg(2),
kl_hmm_accumulator = po.GetArg(3);
using namespace kaldi;
using namespace kaldi::nnet1;
typedef kaldi::int32 int32;
kaldi::int64 total_frames = 0;
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessInt32VectorReader alignments_reader(alignments_rspecifier);
int32 posterior_dim = feature_reader.Value().NumCols();
KlHmm kl_hmm(posterior_dim,n_kl_states);
int32 num_done = 0, num_no_alignment = 0, num_other_error = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
std::string utt = feature_reader.Key();
if (!alignments_reader.HasKey(utt)) {
num_no_alignment++;
} else {
const Matrix<BaseFloat> &mat = feature_reader.Value();
const std::vector<int32> &alignment = alignments_reader.Value(utt);
if ((int32)alignment.size() != mat.NumRows()) {
KALDI_WARN << "Alignment has wrong size "<< (alignment.size()) << " vs. "<< (mat.NumRows());
num_other_error++;
continue;
}
// Accumulate the statistics
kl_hmm.Accumulate(mat, alignment);
// log
KALDI_VLOG(2) << "utt " << utt << ", frames " << alignment.size();
total_frames += mat.NumRows();
}
num_done++;
}
KALDI_WARN << "Before writing...";
KALDI_LOG << "Done " << num_done << " files, " << num_no_alignment
<< " with no alignments, " << num_other_error
<< " with other errors.";
//store the accumulator
{
Output out(kl_hmm_accumulator, binary);
kl_hmm.WriteData(out.Stream(), binary);
}
return 0;
} catch(const std::exception &e) {
std::cerr << e.what();
return -1;
}
}

Просмотреть файл

@ -0,0 +1,77 @@
// nnetbin/nnet-kl-gmm-sum-accs.cc
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
// Brno University of Technology (Author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-kl-hmm.h"
#include "base/kaldi-common.h"
#include "util/common-utils.h"
int main(int argc, char *argv[]) {
using namespace kaldi;
using namespace kaldi::nnet1;
try {
typedef int32 int32;
const char *usage =
"Convert matrix of KL-HMM training to nnet component.\n"
"Usage: nnet-kl-hmm-mat-to-component [options] nnet-component matrix\n";
bool binary = true;
int32 n_kl_states = 0;
int32 n_posterior_dim = 0;
ParseOptions po(usage);
po.Register("binary", &binary, "Write output in binary mode");
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
po.Register("posterior-dim", &n_posterior_dim, "Dimensionality of posterior features");
po.Read(argc, argv);
if (po.NumArgs() < 2) {
po.PrintUsage();
exit(1);
}
std::string nnet_component_filename = po.GetArg(1);
std::string mat_filename = po.GetArg(2);
Matrix<BaseFloat> kl_stats;
{
bool binary_read;
Input ki(mat_filename, &binary_read);
kl_stats.Read(ki.Stream(), binary_read);
}
KlHmm kl_hmm(kl_stats.NumCols(), kl_stats.NumRows());
kl_hmm.SetStats(kl_stats);
// Write out the accs
{
Output ko(nnet_component_filename, binary);
kl_hmm.Write(ko.Stream(), binary);
}
KALDI_LOG << "Written nnet component to " << nnet_component_filename;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -0,0 +1,74 @@
// nnetbin/nnet-kl-gmm-sum-accs.cc
// Copyright 2013 Idiap Research Institute (Author: David Imseng)
// Karlsruhe Institute of Technology (Author: Ngoc Thang Vu)
// Brno University of Technology (Author: Karel Vesely)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-kl-hmm.h"
#include "base/kaldi-common.h"
#include "util/common-utils.h"
int main(int argc, char *argv[]) {
using namespace kaldi;
using namespace kaldi::nnet1;
try {
typedef int32 int32;
const char *usage =
"Sum multiple accumulated stats files for KL-HMM training.\n"
"Usage: nnet-kl-hmm-sum-accs [options] nnet-component stats-in1 stats-in2 ...\n";
bool binary = true;
int32 n_kl_states = 0;
int32 n_posterior_dim = 0;
ParseOptions po(usage);
po.Register("binary", &binary, "Write output in binary mode");
po.Register("nkl-states", &n_kl_states, "Number of states in Kl-HMM");
po.Register("posterior-dim", &n_posterior_dim, "Dimensionality of posterior features");
po.Read(argc, argv);
if (po.NumArgs() < 2) {
po.PrintUsage();
exit(1);
}
std::string nnet_component_filename = po.GetArg(1);
KlHmm kl_hmm(n_posterior_dim, n_kl_states, NULL);
int num_accs = po.NumArgs() - 1;
for (int i = 2, max = po.NumArgs(); i <= max; i++) {
std::string stats_in_filename = po.GetArg(i);
Input ki(stats_in_filename);
kl_hmm.AddStats(ki.Stream(), binary);
}
// Write out the accs
{
Output ko(nnet_component_filename, binary);
kl_hmm.Write(ko.Stream(), binary);
}
KALDI_LOG << "Summed " << num_accs << " stats ";
KALDI_LOG << "Written nnet component to " << nnet_component_filename;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}