(sandbox/pitch) Merging of trunk to sandbox/pitch to perform reintegration merge afterwards

git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/pitch@3251 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Jan Trmal 2013-12-03 17:28:32 +00:00
Родитель f7e4e7de6b d65f58d275
Коммит 7afa80a24d
50 изменённых файлов: 786 добавлений и 128 удалений

Просмотреть файл

@ -47,7 +47,7 @@ export PATH=$PATH:/export/babel/sanjeev/kaldi-trunk/tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -29,7 +29,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -53,7 +53,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm
@ -87,7 +87,7 @@ cat $trans_file | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1
! merge_ngrams </dev/null >&/dev/null && \
echo merge_ngrams not found in kaldi_lm. You need to have kaldi_lm on your path OR && \
echo You can do the following: && \
echo 1. Install the latest version from http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz && \
echo 1. Install the latest version from http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz && \
echo 2. you delete kaldi_lm, and kaldi_lm.tar.gz in the tools folder. This script will automatically install it. && \
exit 1;

Просмотреть файл

@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -9,8 +9,9 @@
export train_cmd="queue.pl -l arch=*64"
export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
export cuda_cmd="queue.pl -l gpu=1"
#export cuda_cmd="..."
#b) BUT cluster options

Просмотреть файл

@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
else
echo Downloading and installing the kaldi_lm tools
if [ ! -f kaldi_lm.tar.gz ]; then
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
fi
tar -xvzf kaldi_lm.tar.gz || exit 1;
cd kaldi_lm

Просмотреть файл

@ -324,6 +324,8 @@ local/run_sgmm2.sh
# You probably wany to run the hybrid recipe as it is complementary:
local/run_dnn.sh
# You probably want to try KL-HMM
#local/run_kl_hmm.sh
# Getting results [see RESULTS file]
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done

Просмотреть файл

@ -0,0 +1,152 @@
#!/bin/bash
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey),
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration.
stage=-4 # This allows restarting after partway, when something when wrong.
config=
cmd=run.pl
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
num_iters=35 # Number of iterations of training
max_iter_inc=25 # Last iter to increase #Gauss on.
beam=10
retry_beam=40
boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
power=0.25 # Exponent for number of gaussians according to occurrence counts
cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves
thresh=20
use_gpu="no"
nnet_dir=
context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
tmpdir=
no_softmax=true
# End configuration.
echo "$0 $@" # Print the command line for logging
[ -f path.sh ] && . ./path.sh;
. parse_options.sh || exit 1;
if [ $# != 5 ]; then
echo "Usage: steps/train_deltas.sh <num-leaves> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
echo "e.g.: steps/train_deltas.sh 2000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
echo "main options (for others, see top of script file)"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo " --config <config-file> # config containing options"
echo " --stage <stage> # stage to do partial re-run from."
echo " --thresh "
echo " --cluster_thresh "
echo " --nnet_dir "
echo " --context_opts "
echo " --tmpdir "
echo " --no-softmax "
exit 1;
fi
numleaves=$1
data=$2
lang=$3
alidir=$4
dir=$5
for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
done
numgauss=$numleaves
incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
nj=`cat $alidir/num_jobs` || exit 1;
mkdir -p $dir/log
echo $nj > $dir/num_jobs
sdata=$data/split$nj;
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
nnet=${nnet_dir}/final.nnet
feature_transform=${nnet_dir}/final.feature_transform
featsdim="ark:copy-feats scp:$data/feats.scp ark:- |"
nnetfeats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f ${nnet_dir}/norm_vars ]; then
norm_vars=$(cat ${nnet_dir}/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
nnetfeats="$nnetfeats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
featsdim="$featsdim apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f ${nnet_dir}/delta_order ]; then
delta_order=$(cat ${nnet_dir}/delta_order)
nnetfeats="$nnetfeats add-deltas --delta-order=$delta_order ark:- ark:- |"
featsdim="$featsdim add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
feats="ark,s,cs:nnet-forward "
if [[ ! -z $feature_transform ]]; then
feats=${feats}" --feature-transform=$feature_transform "
fi
feats=${feats}"--no-softmax=$no_softmax --use-gpu=$use_gpu $nnet \"$nnetfeats\" ark:- |"
feat_dim=$(feat-to-dim --print-args=false "$featsdim" -)
rm $dir/.error 2>/dev/null
if [[ ! -z $tmpdir ]]; then
mkdir -p $tmpdir
else
tmpdir=$dir
fi
if [ $stage -le -3 ]; then
echo "$0: accumulating tree stats"
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
acc-tree-stats $context_opts --var-floor=1.0 --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
"ark:gunzip -c $alidir/ali.JOB.gz|" $tmpdir/JOB.treeacc || exit 1;
sum-tree-stats $dir/treeacc $tmpdir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
rm $tmpdir/*.treeacc
fi
if [ $stage -le -2 ]; then
echo "$0: getting questions for tree-building, via clustering"
# preparing questions, roots file...
cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
cat $lang/phones/extra_questions.int >> $dir/questions.int
compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
echo "$0: building the tree"
# $cmd $dir/log/build_tree.log \
build-tree $context_opts --verbose=1 --max-leaves=$numleaves --cluster-thresh=$cluster_thresh --thresh=$thresh $dir/treeacc $lang/phones/roots.int \
$dir/questions.qst $lang/topo $dir/tree &> $dir/log/build_tree.log || exit 1;
gmm-init-model-flat --dim=$feat_dim $dir/tree $lang/topo $dir/1.mdl
rm $dir/treeacc
fi
if [ $stage -le -1 ]; then
# Convert the alignments.
echo "$0: converting alignments from $alidir to use current tree"
$cmd JOB=1:$nj $dir/log/convert.JOB.log \
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
"ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
fi
if [ $stage -le 0 ]; then
echo "$0: compiling graphs of transcripts"
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
"ark:utils/sym2int.pl -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
fi
rm $dir/final.mdl 2>/dev/null
ln -s 1.mdl $dir/final.mdl
# Summarize warning messages...
utils/summarize_warnings.pl $dir/log
echo "$0: Done building the tree in $dir"

Просмотреть файл

@ -0,0 +1,121 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely,
# Daniel Povey,
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration section.
nnet= # Optionally pre-select network to use for getting state-likelihoods
feature_transform= # Optionally pre-select feature transform (in front of nnet)
model= # Optionally pre-select transition model
stage=0 # stage=1 skips lattice generation
nj=4
cmd=run.pl
max_active=7000 # maximum of active tokens
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
beam=13.0 # GMM:13.0
latbeam=8.0 # GMM:6.0
acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
scoring_opts="--min-lmwt 1 --max-lmwt 12"
skip_scoring=false
use_gpu="no" # disable gpu
parallel_opts=""
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
echo " where the DNN + transition model is."
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
echo ""
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
echo "which are then sent through feature-transform. It works out what type"
echo "of features you used from content of srcdir."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo ""
echo " --nnet <nnet> # which nnet to use (opt.)"
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
echo " --model <model> # which transition model to use (opt.)"
echo ""
echo " --acwt <float> # select acoustic scale for decoding"
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
exit 1;
fi
graphdir=$1
data=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
nnet=$srcdir/final.nnet;
fi
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
model=$srcdir/final.mdl;
fi
# find the feature_transform to use
if [ -z "$feature_transform" ]; then
feature_transform=$srcdir/final.feature_transform
fi
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
# check that files exist
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
# Run the decoding in the queue
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
# Run the scoring
if ! $skip_scoring ; then
[ ! -x local/score.sh ] && \
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
fi
exit 0;

Просмотреть файл

@ -0,0 +1,121 @@
#!/bin/bash
# Copyright 2012-2013 Karel Vesely,
# Daniel Povey,
# Idiap Research Institute (Author: David Imseng)
# Apache 2.0
# Begin configuration section.
nnet= # Optionally pre-select network to use for getting state-likelihoods
feature_transform= # Optionally pre-select feature transform (in front of nnet)
model= # Optionally pre-select transition model
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
stage=0 # stage=1 skips lattice generation
nj=32
cmd=$decode_cmd
max_active=7000 # maximum of active tokens
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
use_gpu="no" # disable gpu
parallel_opts=""
tmpdir=
# End configuration section.
echo "$0 $@" # Print the command line for logging
[ -f ./path.sh ] && . ./path.sh; # source the path.
. parse_options.sh || exit 1;
if [ $# != 3 ]; then
echo "Usage: $0 [options] <data-dir> <align-dir> <kl-hmm-dir>"
echo "... where <kl-hmm-dir> is assumed to be a sub-directory of the directory"
echo " where the DNN + transition model is."
echo "e.g.: $0 data/train exp/dnn1/kl-hmm-train"
echo ""
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
echo "which are then sent through feature-transform. It works out what type"
echo "of features you used from content of srcdir."
echo ""
echo "main options (for others, see top of script file)"
echo " --config <config-file> # config containing options"
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
echo ""
echo " --nnet <nnet> # which nnet to use (opt.)"
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
echo " --model <model> # which transition model to use (opt.)"
echo " --tmpdir >dir> # Temp directory to store the statistics, becuase they can get big (opt.)"
exit 1;
fi
data=$1
alidir=$2
dir=$3
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
sdata=$data/split$nj;
mkdir -p $dir/log
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
echo $nj > $dir/num_jobs
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
nnet=$srcdir/final.nnet;
fi
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
model=$srcdir/final.mdl;
fi
# find the feature_transform to use
if [ -z "$feature_transform" ]; then
feature_transform=$srcdir/final.feature_transform
fi
if [ ! -f $feature_transform ]; then
echo "Missing feature_transform '$feature_transform'"
exit 1
fi
# check that files exist
for f in $sdata/1/feats.scp $nnet_i $nnet $model; do
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
done
# Create the feature stream:
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
# Optionally add cmvn
if [ -f $srcdir/norm_vars ]; then
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
fi
# Optionally add deltas
if [ -f $srcdir/delta_order ]; then
delta_order=$(cat $srcdir/delta_order)
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
fi
ali="ark:gunzip -c $alidir/ali.*.gz |"
if [[ ! -z $tmpdir ]]; then
mkdir -p $tmpdir
else
tmpdir=$dir
fi
nkl_states=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
if [ $stage -le 0 ]; then
$cmd $parallel_opts JOB=1:$nj $dir/log/acc-stats.JOB.log \
nnet-kl-hmm-acc --nkl-states=${nkl_states} "ark:nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet \"$feats\" ark:- |" "ark:ali-to-pdf --print-args=false $alidir/final.mdl \"$ali\" ark:- |" $tmpdir/kl-hmm-stats.JOB
fi
sum-matrices $dir/accumulated-kl-hmm-stats $tmpdir/kl-hmm-stats.*
rm $tmpdir/kl-hmm-stats.*
nnet-kl-hmm-mat-to-component $dir/kl-hmm.nnet $dir/accumulated-kl-hmm-stats
nnet-concat $dir/../final.nnet $dir/kl-hmm.nnet $dir/final.nnet
exit 0;

Просмотреть файл

@ -43,7 +43,7 @@ texts=""
nu=`cat $data/utt2spk | wc -l`
nf=`cat $data/feats.scp | wc -l`
nt=`cat $data/text | wc -l`
nt=`cat $data/text 2>/dev/null | wc -l` # take it as zero if no such file
if [ $nu -ne $nf ]; then
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
echo " may produce incorrectly split data."
@ -61,7 +61,7 @@ if [ ! -d $s1 ]; then
else
need_to_split=false
for f in utt2spk spk2utt feats.scp text wav.scp cmvn.scp spk2gender \
segments reco2file_and_channel; do
vad.scp segments reco2file_and_channel; do
if [[ -f $data/$f && ( ! -f $s1/$f || $s1/$f -ot $data/$f ) ]]; then
need_to_split=true
fi
@ -75,6 +75,7 @@ fi
for n in `seq $numsplit`; do
mkdir -p $data/split$numsplit/$n
feats="$feats $data/split$numsplit/$n/feats.scp"
vads="$vads $data/split$numsplit/$n/vad.scp"
texts="$texts $data/split$numsplit/$n/text"
utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
done
@ -88,8 +89,10 @@ fi
utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
[ -f $data/text ] && \
utils/split_scp.pl $utt2spk_opt $data/text $texts
[ -f $data/text ] && utils/split_scp.pl $utt2spk_opt $data/text $texts
[ -f $data/vad.scp ] && utils/split_scp.pl $utt2spk_opt $data/vad.scp $vads
# If lockfile is not installed, just don't lock it. It's not a big deal.
which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock

Просмотреть файл

@ -1,5 +1,6 @@
#!/bin/bash
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Copyright 2010-2011 Microsoft Corporation
# 2012-2013 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
@ -89,6 +90,7 @@ fi
function do_filtering {
# assumes the utt2spk and spk2utt files already exist.
[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
[ -f $srcdir/vad.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender

Просмотреть файл

@ -58,7 +58,9 @@ int main(int argc, char *argv[]) {
po.Register("thresh", &thresh, "Log-likelihood change threshold for "
"tree-building");
po.Register("cluster-thresh", &cluster_thresh, "Log-likelihood change "
"threshold for clustering after tree-building");
"threshold for clustering after tree-building. 0 means "
"no clustering; -1 means use as a clustering threshold the "
"likelihood change of the final split.");
po.Read(argc, argv);

Просмотреть файл

@ -103,7 +103,7 @@ void CuDevice::SelectGpuId(std::string use_gpu) {
KALDI_WARN << "Will try again to get a GPU after " << sec_sleep
<< " seconds.";
sleep(sec_sleep);
//
cudaGetLastError(); // reset the error state
e = cudaThreadSynchronize(); //<< 2nd trial to get CUDA context.
if (e != cudaSuccess) {
if (use_gpu == "yes") {

Просмотреть файл

@ -835,15 +835,19 @@ static void UnitTestCuMatrixSymInvertPosDef() {
Real alpha = 0.3, beta = 1.75432;
M.SymAddMat2(alpha, N, trans, beta);
// M.AddMatMat(alpha, N, trans, N, other_trans, beta);
SpMatrix<Real> S(CuSpMatrix<Real>(M, kTakeLower));
CuSpMatrix<Real> spTemp(M, kTakeLower);
SpMatrix<Real> S(spTemp);
S.Invert();
CuMatrix<Real> M_orig(CuSpMatrix<Real>(M, kTakeLower));
CuSpMatrix<Real> spTemp2(M, kTakeLower);
CuMatrix<Real> M_orig(spTemp2);
M.SymInvertPosDef();
CuMatrix<Real> M_inverted(CuSpMatrix<Real>(M, kTakeLower));
CuSpMatrix<Real> spTemp3(M, kTakeLower);
CuMatrix<Real> M_inverted(spTemp3);
CuMatrix<Real> M_prod(dimM, dimM);
M_prod.AddMatMat(Real(1.0), M_orig, kNoTrans, M_inverted, kNoTrans, Real(0.0));
KALDI_ASSERT(M_prod.IsUnit());
SpMatrix<Real> S2(CuSpMatrix<Real>(M, kTakeLower));
CuSpMatrix<Real> spTemp4(M, kTakeLower);
SpMatrix<Real> S2(spTemp4);
KALDI_ASSERT(ApproxEqual(S, S2, (Real)0.1));
KALDI_ASSERT(dimM == 0 || S.Trace() != 0);
}

Просмотреть файл

@ -144,6 +144,7 @@ preprocessor variables, setting compile options, linking with libraries, and so
\section build_setup_platforms Which platforms has Kaldi been compiled on?
We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including
Ubuntu, CentOS, Debian and SUSE), and Darwin.
Ubuntu, CentOS, Debian and SUSE), and Darwin. We recommend you use g++ version
4.4 or above for the source to compile.
*/

Просмотреть файл

@ -21,8 +21,6 @@
#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
#include <cassert>
#include <cstdlib>
#include <string>
#include <vector>

Просмотреть файл

@ -21,8 +21,8 @@ OBJFILES =
TESTFILES =
ADDLIBS = ../feat/kaldi-feat.a ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
../tree/kaldi-tree.a ../matrix/kaldi-matrix.a ../util/kaldi-util.a \
../base/kaldi-base.a
../thread/kaldi-thread.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
../util/kaldi-util.a ../base/kaldi-base.a
include ../makefiles/default_rules.mk

Просмотреть файл

@ -60,6 +60,7 @@ int main(int argc, char *argv[]) {
BaseFloatMatrixWriter feat_writer(wspecifier);
int32 num_done = 0, num_err = 0;
int64 frames_in = 0, frames_out = 0;
// process all keys
for (; !feat_reader.Done(); feat_reader.Next()) {
@ -71,6 +72,9 @@ int main(int argc, char *argv[]) {
int32 num_indexes = 0;
for (int32 k = offset; k < feats.NumRows(); k += n)
num_indexes++; // k is the index.
frames_in += feats.NumRows();
frames_out += num_indexes;
if (num_indexes == 0) {
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
@ -88,8 +92,9 @@ int main(int argc, char *argv[]) {
feat_writer.Write(utt, output);
num_done++;
}
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
KALDI_LOG << "Sub-sampled " << num_done << " feature matrices; " << num_err
<< " with errors.";
KALDI_LOG << "Reduced " << frames_in << " frames to " << frames_out;
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();

Просмотреть файл

@ -6,7 +6,7 @@ include ../kaldi.mk
BINFILES = fgmm-global-acc-stats fgmm-global-sum-accs fgmm-global-est \
fgmm-global-merge fgmm-global-to-gmm fgmm-gselect fgmm-global-get-frame-likes \
fgmm-global-acc-stats-twofeats fgmm-global-copy fgmm-global-mixdown \
fgmm-global-gselect-to-post
fgmm-global-gselect-to-post fgmm-global-info
OBJFILES =
@ -16,7 +16,7 @@ OBJFILES =
TESTFILES =
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a ../thread/kaldi-thread.a \
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
../util/kaldi-util.a ../base/kaldi-base.a

Просмотреть файл

@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
const char *usage =
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
"of vectors of floats.\n"
"of vectors of floats. If --average=true, prints out the average per-frame\n"
"log-likelihood for each utterance, as a single float.\n"
"Usage: fgmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
"<likes-out-wspecifier>\n"
"e.g.: fgmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
ParseOptions po(usage);
bool average = false;
std::string gselect_rspecifier;
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
"to limit the #Gaussians accessed on each frame.");
po.Register("average", &average, "If true, print out the average per-frame "
"log-likelihood as a single float per utterance.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
BaseFloatVectorWriter likes_writer(likes_wspecifier);
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
int32 num_done = 0, num_err = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
tot_like += likes.Sum();
tot_frames += file_frames;
likes_writer.Write(key, likes);
if (average)
average_likes_writer.Write(key, likes.Sum() / file_frames);
else
likes_writer.Write(key, likes);
num_done++;
}
KALDI_LOG << "Done " << num_done << " files; " << num_err

Просмотреть файл

@ -14,8 +14,8 @@ OBJFILES = diag-gmm.o diag-gmm-normal.o mle-diag-gmm.o am-diag-gmm.o \
LIBNAME = kaldi-gmm
ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \
../base/kaldi-base.a
ADDLIBS = ../tree/kaldi-tree.a ../thread/kaldi-thread.a ../util/kaldi-util.a \
../matrix/kaldi-matrix.a ../base/kaldi-base.a

Просмотреть файл

@ -528,9 +528,9 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
Vector<BaseFloat> *loglikes) const {
loglikes->Resize(gconsts_.Dim(), kUndefined);
loglikes->CopyFromVec(gconsts_);
if (static_cast<int32>(data.Dim()) != Dim()) {
if (data.Dim() != Dim()) {
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
<< "mismatch " << (data.Dim()) << " vs. "<< (Dim());
<< "mismatch " << data.Dim() << " vs. "<< Dim();
}
Vector<BaseFloat> data_sq(data);
data_sq.ApplyPow(2.0);
@ -542,6 +542,26 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
}
void DiagGmm::LogLikelihoods(const MatrixBase<BaseFloat> &data,
Matrix<BaseFloat> *loglikes) const {
KALDI_ASSERT(data.NumRows() != 0);
loglikes->Resize(data.NumRows(), gconsts_.Dim(), kUndefined);
loglikes->CopyRowsFromVec(gconsts_);
if (data.NumCols() != Dim()) {
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
<< "mismatch " << data.NumCols() << " vs. "<< Dim();
}
Matrix<BaseFloat> data_sq(data);
data_sq.ApplyPow(2.0);
// loglikes += means * inv(vars) * data.
loglikes->AddMatMat(1.0, data, kNoTrans, means_invvars_, kTrans, 1.0);
// loglikes += -0.5 * inv(vars) * data_sq.
loglikes->AddMatMat(-0.5, data_sq, kNoTrans, inv_vars_, kTrans, 1.0);
}
void DiagGmm::LogLikelihoodsPreselect(const VectorBase<BaseFloat> &data,
const std::vector<int32> &indices,
Vector<BaseFloat> *loglikes) const {
@ -777,6 +797,79 @@ BaseFloat DiagGmm::GaussianSelection(const VectorBase<BaseFloat> &data,
return tot_loglike;
}
BaseFloat DiagGmm::GaussianSelection(const MatrixBase<BaseFloat> &data,
int32 num_gselect,
std::vector<std::vector<int32> > *output) const {
double ans = 0.0;
int32 num_frames = data.NumRows(), num_gauss = NumGauss();
int32 max_mem = 10000000; // Don't devote more than 10Mb to loglikes_mat;
// break up the utterance if needed.
int32 mem_needed = num_frames * num_gauss * sizeof(BaseFloat);
if (mem_needed > max_mem) {
// Break into parts and recurse, we don't want to consume too
// much memory.
int32 num_parts = (mem_needed + max_mem - 1) / max_mem;
int32 part_frames = (data.NumRows() + num_parts - 1) / num_parts;
double tot_ans = 0.0;
std::vector<std::vector<int32> > part_output;
output->clear();
output->resize(num_frames);
for (int32 p = 0; p < num_parts; p++) {
int32 start_frame = p * part_frames,
this_num_frames = std::min(num_frames - start_frame, part_frames);
SubMatrix<BaseFloat> data_part(data, start_frame, this_num_frames,
0, data.NumCols());
tot_ans += GaussianSelection(data_part, num_gselect, &part_output);
for (int32 t = 0; t < this_num_frames; t++)
(*output)[start_frame + t].swap(part_output[t]);
}
KALDI_ASSERT(!output->back().empty());
return tot_ans;
}
KALDI_ASSERT(num_frames != 0);
Matrix<BaseFloat> loglikes_mat(num_frames, num_gauss, kUndefined);
this->LogLikelihoods(data, &loglikes_mat);
output->clear();
output->resize(num_frames);
for (int32 i = 0; i < num_frames; i++) {
SubVector<BaseFloat> loglikes(loglikes_mat, i);
BaseFloat thresh;
if (num_gselect < num_gauss) {
Vector<BaseFloat> loglikes_copy(loglikes);
BaseFloat *ptr = loglikes_copy.Data();
std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
thresh = ptr[num_gauss-num_gselect];
} else {
thresh = -std::numeric_limits<BaseFloat>::infinity();
}
BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
std::vector<std::pair<BaseFloat, int32> > pairs;
for (int32 p = 0; p < num_gauss; p++) {
if (loglikes(p) >= thresh) {
pairs.push_back(std::make_pair(loglikes(p), p));
}
}
std::sort(pairs.begin(), pairs.end(),
std::greater<std::pair<BaseFloat, int32> >());
std::vector<int32> &this_output = (*output)[i];
for (int32 j = 0;
j < num_gselect && j < static_cast<int32>(pairs.size());
j++) {
this_output.push_back(pairs[j].second);
tot_loglike = LogAdd(tot_loglike, pairs[j].first);
}
KALDI_ASSERT(!this_output.empty());
ans += tot_loglike;
}
return ans;
}
BaseFloat DiagGmm::GaussianSelectionPreselect(
const VectorBase<BaseFloat> &data,

Просмотреть файл

@ -81,6 +81,13 @@ class DiagGmm {
void LogLikelihoods(const VectorBase<BaseFloat> &data,
Vector<BaseFloat> *loglikes) const;
/// This version of the LogLikelihoods function operates on
/// a sequence of frames simultaneously; the row index of both "data" and
/// "loglikes" is the frame index.
void LogLikelihoods(const MatrixBase<BaseFloat> &data,
Matrix<BaseFloat> *loglikes) const;
/// Outputs the per-component log-likelihoods of a subset of mixture
/// components. Note: at output, loglikes->Dim() will equal indices.size().
/// loglikes[i] will correspond to the log-likelihood of the Gaussian
@ -89,13 +96,20 @@ class DiagGmm {
const std::vector<int32> &indices,
Vector<BaseFloat> *loglikes) const;
/// Get gaussian selection information for one frame. Returns log-like for
/// Get gaussian selection information for one frame. Returns og-like
/// this frame. Output is the best "num_gselect" indices, sorted from best to
/// worst likelihood. If "num_gselect" > NumGauss(), sets it to NumGauss().
BaseFloat GaussianSelection(const VectorBase<BaseFloat> &data,
int32 num_gselect,
std::vector<int32> *output) const;
/// This version of the Gaussian selection function works for a sequence
/// of frames rather than just a single frame. Returns sum of the log-likes
/// over all frames.
BaseFloat GaussianSelection(const MatrixBase<BaseFloat> &data,
int32 num_gselect,
std::vector<std::vector<int32> > *output) const;
/// Get gaussian selection information for one frame. Returns log-like for
/// this frame. Output is the best "num_gselect" indices that were
/// preselected, sorted from best to worst likelihood. If "num_gselect" >

Просмотреть файл

@ -372,6 +372,31 @@ UnitTestEstimateDiagGmm() {
test_io(*gmm, est_gmm, true, feats); // Binary mode
}
{ // Test multi-threaded update.
GmmFlagsType flags_all = kGmmAll;
est_gmm.Resize(gmm->NumGauss(),
gmm->Dim(), flags_all);
est_gmm.SetZero(flags_all);
Vector<BaseFloat> weights(counter);
for (size_t i = 0; i < counter; i++)
weights(i) = 0.5 + 0.1 * (rand() % 10);
float loglike = 0.0;
for (size_t i = 0; i < counter; i++) {
loglike += weights(i) *
est_gmm.AccumulateFromDiag(*gmm, feats.Row(i), weights(i));
}
AccumDiagGmm est_gmm2(*gmm, flags_all);
int32 num_threads = 2;
float loglike2 =
est_gmm2.AccumulateFromDiagMultiThreaded(*gmm, feats, weights, num_threads);
AssertEqual(loglike, loglike2);
est_gmm.AssertEqual(est_gmm2);
}
delete gmm;
}

Просмотреть файл

@ -1,6 +1,6 @@
// gmm/mle-diag-gmm.cc
// Copyright 2009-2012 Saarland University; Georg Stemmer; Jan Silovsky;
// Copyright 2009-2013 Saarland University; Georg Stemmer; Jan Silovsky;
// Microsoft Corporation; Yanmin Qian;
// Johns Hopkins University (author: Daniel Povey);
// Cisco Systems (author: Neha Agrawal)
@ -26,6 +26,7 @@
#include "gmm/diag-gmm.h"
#include "gmm/mle-diag-gmm.h"
#include "thread/kaldi-thread.h"
namespace kaldi {
@ -202,7 +203,6 @@ BaseFloat AccumDiagGmm::AccumulateFromDiag(const DiagGmm &gmm,
return log_like;
}
// Careful: this wouldn't be valid if it were used to update the
// Gaussian weights.
void AccumDiagGmm::SmoothStats(BaseFloat tau) {
@ -478,5 +478,84 @@ void MapDiagGmmUpdate(const MapDiagGmmOptions &config,
}
class AccumulateMultiThreadedClass: public MultiThreadable {
public:
AccumulateMultiThreadedClass(const DiagGmm &diag_gmm,
const MatrixBase<BaseFloat> &data,
const VectorBase<BaseFloat> &frame_weights,
AccumDiagGmm *accum,
double *tot_like):
diag_gmm_(diag_gmm), data_(data),
frame_weights_(frame_weights), dest_accum_(accum),
tot_like_ptr_(tot_like), tot_like_(0.0) { }
AccumulateMultiThreadedClass(const AccumulateMultiThreadedClass &other):
diag_gmm_(other.diag_gmm_), data_(other.data_),
frame_weights_(other.frame_weights_), dest_accum_(other.dest_accum_),
accum_(diag_gmm_, dest_accum_->Flags()), tot_like_ptr_(other.tot_like_ptr_),
tot_like_(0.0) {
KALDI_ASSERT(data_.NumRows() == frame_weights_.Dim());
}
void operator () () {
int32 num_frames = data_.NumRows(), num_threads = num_threads_,
block_size = (num_frames + num_threads - 1) / num_threads,
block_start = block_size * thread_id_,
block_end = std::min(num_frames, block_start + block_size);
tot_like_ = 0.0;
double tot_weight = 0.0;
for (int32 t = block_start; t < block_end; t++) {
tot_like_ += frame_weights_(t) *
accum_.AccumulateFromDiag(diag_gmm_, data_.Row(t), frame_weights_(t));
tot_weight += frame_weights_(t);
}
KALDI_VLOG(3) << "Thread " << thread_id_ << " saw average likeliood/frame "
<< (tot_like_ / tot_weight) << " over " << tot_weight
<< " (weighted) frames.";
}
~AccumulateMultiThreadedClass() {
if (accum_.Dim() != 0) { // if our accumulator is set up (this is not true
// for the single object we use to initialize the others)
dest_accum_->Add(1.0, accum_);
*tot_like_ptr_ += tot_like_;
}
}
private:
const DiagGmm &diag_gmm_;
const MatrixBase<BaseFloat> &data_;
const VectorBase<BaseFloat> &frame_weights_;
AccumDiagGmm *dest_accum_;
AccumDiagGmm accum_;
double *tot_like_ptr_;
double tot_like_;
};
BaseFloat AccumDiagGmm::AccumulateFromDiagMultiThreaded(
const DiagGmm &gmm,
const MatrixBase<BaseFloat> &data,
const VectorBase<BaseFloat> &frame_weights,
int32 num_threads) {
double tot_like = 0.0;
AccumulateMultiThreadedClass accumulator(gmm, data, frame_weights,
this, &tot_like);
{
// Note: everything happens in the constructor and destructor of
// the object created below.
MultiThreader<AccumulateMultiThreadedClass> threader(num_threads,
accumulator);
// we need to make sure it's destroyed before we access the
// value of tot_like.
}
return tot_like;
}
void AccumDiagGmm::AssertEqual(const AccumDiagGmm &other) {
KALDI_ASSERT(dim_ == other.dim_ && num_comp_ == other.num_comp_ &&
flags_ == other.flags_);
KALDI_ASSERT(occupancy_.ApproxEqual(other.occupancy_));
KALDI_ASSERT(mean_accumulator_.ApproxEqual(other.mean_accumulator_));
KALDI_ASSERT(variance_accumulator_.ApproxEqual(other.variance_accumulator_));
}
} // End of namespace kaldi

Просмотреть файл

@ -142,6 +142,16 @@ class AccumDiagGmm {
const VectorBase<BaseFloat> &data,
BaseFloat frame_posterior);
/// This does the same job as AccumulateFromDiag, but using
/// multiple threads. Returns sum of (log-likelihood times
/// frame weight) over all frames.
BaseFloat AccumulateFromDiagMultiThreaded(
const DiagGmm &gmm,
const MatrixBase<BaseFloat> &data,
const VectorBase<BaseFloat> &frame_weights,
int32 num_threads);
/// Increment the stats for this component by the specified amount
/// (not all parts may be taken, depending on flags).
/// Note: x_stats and x2_stats are assumed to already be multiplied by "occ"
@ -173,7 +183,9 @@ class AccumDiagGmm {
const VectorBase<double> &occupancy() const { return occupancy_; }
const MatrixBase<double> &mean_accumulator() const { return mean_accumulator_; }
const MatrixBase<double> &variance_accumulator() const { return variance_accumulator_; }
// used in testing.
void AssertEqual(const AccumDiagGmm &other);
private:
int32 dim_;
int32 num_comp_;

Просмотреть файл

@ -26,7 +26,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
gmm-global-info
OBJFILES =
@ -34,10 +35,8 @@ OBJFILES =
TESTFILES =
# Note: we intentionally list kaldi-gmm.a both before and after kaldi-transform.a, because
# each requires symbols from the other.
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a ../gmm/kaldi-gmm.a \
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
../thread/kaldi-thread.a ../util/kaldi-util.a ../base/kaldi-base.a

Просмотреть файл

@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
const char *usage =
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
"of vectors of floats.\n"
"of vectors of floats. If --average=true, prints out the average per-frame\n"
"log-likelihood for each utterance, as a single float.\n"
"Usage: gmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
"<likes-out-wspecifier>\n"
"e.g.: gmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
ParseOptions po(usage);
bool average = false;
std::string gselect_rspecifier;
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
"to limit the #Gaussians accessed on each frame.");
po.Register("average", &average, "If true, print out the average per-frame "
"log-likelihood as a single float per utterance.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
BaseFloatVectorWriter likes_writer(likes_wspecifier);
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
int32 num_done = 0, num_err = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
tot_like += likes.Sum();
tot_frames += file_frames;
likes_writer.Write(key, likes);
if (average)
average_likes_writer.Write(key, likes.Sum() / file_frames);
else
likes_writer.Write(key, likes);
num_done++;
}
KALDI_LOG << "Done " << num_done << " files; " << num_err

Просмотреть файл

@ -61,13 +61,16 @@ void InitGmmFromRandomFrames(const Matrix<BaseFloat> &feats, DiagGmm *gmm) {
void TrainOneIter(const Matrix<BaseFloat> &feats,
const MleDiagGmmOptions &gmm_opts,
int32 iter,
int32 num_threads,
DiagGmm *gmm) {
AccumDiagGmm gmm_acc(*gmm, kGmmAll);
double tot_like = 0.0;
for (int32 t = 0; t < feats.NumRows(); t++)
tot_like += gmm_acc.AccumulateFromDiag(*gmm, feats.Row(t), 1.0);
Vector<BaseFloat> frame_weights(feats.NumRows(), kUndefined);
frame_weights.Set(1.0);
double tot_like;
tot_like = gmm_acc.AccumulateFromDiagMultiThreaded(*gmm, feats, frame_weights,
num_threads);
KALDI_LOG << "Likelihood per frame on iteration " << iter
<< " was " << (tot_like / feats.NumRows()) << " over "
@ -97,17 +100,24 @@ int main(int argc, char *argv[]) {
bool binary = true;
int32 num_gauss = 100;
int32 num_gauss_init = 0;
int32 num_iters = 50;
int32 num_frames = 200000;
int32 srand_seed = 0;
int32 num_threads = 4;
po.Register("binary", &binary, "Write output in binary mode");
po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model");
po.Register("num-gauss-init", &num_gauss_init, "Number of Gaussians in "
"the model initially (if nonzero and less than num_gauss, "
"we'll do mixture splitting)");
po.Register("num-iters", &num_iters, "Number of iterations of training");
po.Register("num-frames", &num_frames, "Number of feature vectors to store in "
"memory and train on (randomly chosen from the input features)");
po.Register("srand", &srand_seed, "Seed for random number generator ");
po.Register("num-threads", &num_threads, "Number of threads used for "
"statistics accumulation");
gmm_opts.Register(&po);
po.Read(argc, argv);
@ -132,7 +142,7 @@ int main(int argc, char *argv[]) {
int64 num_read = 0, dim = 0;
KALDI_LOG << "Reading features (will keep " << num_frames << " frames.)";
for (; !feature_reader.Done(); feature_reader.Next()) {
const Matrix<BaseFloat> &this_feats = feature_reader.Value();
for (int32 t = 0; t < this_feats.NumRows(); t++) {
@ -160,15 +170,36 @@ int main(int argc, char *argv[]) {
KALDI_WARN << "Number of frames read " << num_read << " was less than "
<< "target number " << num_frames << ", using all we read.";
feats.Resize(num_read, dim, kCopyData);
} else {
BaseFloat percent = num_frames * 100.0 / num_read;
KALDI_LOG << "Kept " << num_frames << " out of " << num_read
<< " input frames = " << percent << "%.";
}
DiagGmm gmm(num_gauss, dim);
KALDI_LOG << "Initializing GMM means from random frames";
InitGmmFromRandomFrames(feats, &gmm);
if (num_gauss_init <= 0 || num_gauss_init > num_gauss)
num_gauss_init = num_gauss;
for (int32 iter = 0; iter < num_iters; iter++)
TrainOneIter(feats, gmm_opts, iter, &gmm);
DiagGmm gmm(num_gauss_init, dim);
KALDI_LOG << "Initializing GMM means from random frames to "
<< num_gauss_init << " Gaussians.";
InitGmmFromRandomFrames(feats, &gmm);
// we'll increase the #Gaussians by splitting,
// till halfway through training.
int32 cur_num_gauss = num_gauss_init,
gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2);
for (int32 iter = 0; iter < num_iters; iter++) {
TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
if (next_num_gauss > gmm.NumGauss()) {
KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
gmm.Split(next_num_gauss, 0.1);
cur_num_gauss = next_num_gauss;
}
}
WriteKaldiObject(gmm, model_wxfilename, binary);
KALDI_LOG << "Wrote model to " << model_wxfilename;

Просмотреть файл

@ -105,9 +105,8 @@ int main(int argc, char *argv[]) {
gmm.GaussianSelectionPreselect(mat.Row(i), preselect[i],
num_gselect, &(gselect[i]));
} else { // No "preselect" [i.e. no existing gselect]: simple case.
for (int32 i = 0; i < mat.NumRows(); i++)
tot_like_this_file +=
gmm.GaussianSelection(mat.Row(i), num_gselect, &(gselect[i]));
tot_like_this_file =
gmm.GaussianSelection(mat, num_gselect, &gselect);
}
gselect_writer.Write(utt, gselect);

Просмотреть файл

@ -61,6 +61,7 @@ int main(int argc, char *argv[]) {
<< trans_model.NumTransitionStates() << '\n';
std::cout << "feature dimension " << am_gmm.Dim() << '\n';
std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n';
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;

Просмотреть файл

@ -21,9 +21,11 @@
#include "nnet/nnet-nnet.h"
#include "nnet/nnet-activation.h"
#include "nnet/nnet-kl-hmm.h"
#include "nnet/nnet-affine-transform.h"
#include "nnet/nnet-rbm.h"
#include "nnet/nnet-various.h"
#include "nnet/nnet-kl-hmm.h"
namespace kaldi {
namespace nnet1 {
@ -40,6 +42,7 @@ const struct Component::key_value Component::kMarkerMap[] = {
{ Component::kCopy,"<copy>" },
{ Component::kAddShift,"<addshift>" },
{ Component::kRescale,"<rescale>" },
{ Component::kKlHmm,"<klhmm>" }
};
@ -119,6 +122,9 @@ Component* Component::Read(std::istream &is, bool binary) {
case Component::kRescale :
p_comp = new Rescale(dim_in, dim_out);
break;
case Component::kKlHmm :
p_comp = new KlHmm(dim_in, dim_out);
break;
case Component::kUnknown :
default :
KALDI_ERR << "Missing type: " << token;

Просмотреть файл

@ -64,7 +64,8 @@ class Component {
kTranspose,
kBlockLinearity,
kAddShift,
kRescale
kRescale,
kKlHmm
} ComponentType;
/// A pair of type and marker
struct key_value {

Просмотреть файл

@ -9,12 +9,13 @@ LDLIBS += $(CUDA_LDLIBS)
BINFILES = nnet-train-frmshuff \
nnet-train-xent-hardlab-perutt \
nnet-train-xent-hardlab-frmshuff \
nnet-train-mse-tgtmat-frmshuff \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet
nnet-train-mse-tgtmat-frmshuff \
nnet-train-mmi-sequential \
nnet-train-mpe-sequential \
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
nnet-forward nnet-copy nnet-info nnet-concat \
transf-to-nnet cmvn-to-nnet \
nnet-kl-hmm-acc nnet-kl-hmm-mat-to-component
OBJFILES =
@ -24,6 +25,6 @@ TESTFILES =
ADDLIBS = ../nnet/kaldi-nnet.a ../cudamatrix/kaldi-cudamatrix.a ../lat/kaldi-lat.a \
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
../util/kaldi-util.a ../base/kaldi-base.a
../util/kaldi-util.a ../base/kaldi-base.a
include ../makefiles/default_rules.mk

Просмотреть файл

@ -60,14 +60,12 @@ int main(int argc, char** argv) {
"e.g.: ./online-audio-client 192.168.50.12 9012 'scp:wav_files.scp'\n\n";
ParseOptions po(usage);
bool htk = false, vtt = false, silent = false;
bool htk = false, vtt = false;
int32 channel = -1;
int32 packet_size = 1024;
po.Register("htk", &htk, "Save the result to an HTK label file");
po.Register("vtt", &vtt, "Save the result to a WebVTT subtitle file");
po.Register("silent", &silent,
"Don't print any output (except for errors)");
po.Register(
"channel", &channel,
"Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
@ -116,10 +114,8 @@ int main(int argc, char** argv) {
return -1;
}
if (!silent) {
std::cout << "Connected to KALDI server at host " << server_addr_str
<< " port " << server_port << std::endl;
}
KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str
<< " port " << server_port << std::endl;
char* pack_buffer = new char[packet_size];
@ -127,8 +123,7 @@ int main(int argc, char** argv) {
for (; !reader.Done(); reader.Next()) {
std::string wav_key = reader.Key();
if (!silent)
std::cout << "File: " << wav_key << std::endl;
KALDI_VLOG(2) << "File: " << wav_key << std::endl;
const WaveData &wav_data = reader.Value();
@ -260,10 +255,10 @@ int main(int argc, char** argv) {
}
}
if (!silent) {
{
float speed = total_input_dur / total_reco_dur;
std::cout << "Recognized (" << speed << "xRT): " << reco_output
<< std::endl;
KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output
<< std::endl;
}
if (htk) {

Просмотреть файл

@ -116,9 +116,6 @@ int32 main(int argc, char *argv[]) {
return 1;
}
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR<< "Invalid left/right context parameters!";
std::string model_rspecifier = po.GetArg(1), fst_rspecifier = po.GetArg(2),
word_syms_filename = po.GetArg(3), silence_phones_str = po.GetArg(4),
word_boundary_filename = po.GetOptArg(6), lda_mat_rspecifier = po
@ -210,10 +207,6 @@ int32 main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -85,10 +85,7 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 4)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string model_rxfilename = po.GetArg(1),
fst_rxfilename = po.GetArg(2),
word_syms_filename = po.GetArg(3),
@ -151,10 +148,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -102,9 +102,6 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 5)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string model_rxfilename = po.GetArg(1),
fst_rxfilename = po.GetArg(2),
@ -163,10 +160,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -83,10 +83,7 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 7)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string wav_rspecifier = po.GetArg(1),
model_rspecifier = po.GetArg(2),
fst_rspecifier = po.GetArg(3),
@ -181,10 +178,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -43,15 +43,15 @@ namespace kaldi {
Here, we will still accept objects of some class C with an operator () that
takes no arguments. C may also have a constructor and a destructor that do
something (typically the constructor just sets variables, and the destructor
does some kind of output). We
have a templated class TaskSequencer<C> which is responsible for running
the jobs in parallel. It has a function Run() that will accept a new object
of class C; this will block until a thread is free, at which time it will
start running the operator () of the class. When classes are finished running,
the object will be deleted. Class TaskSequencer guarantees that the
destructors will be called sequentially (not in parallel) and in the same
order the objects were given to the Run() function, so that it is safe for
the destructor to have side effects such as outputting data.
does some kind of output). We have a templated class TaskSequencer<C> which
is responsible for running the jobs in parallel. It has a function Run()
that will accept a new object of class C; this will block until a thread is
free, at which time it will spawn a thread that starts running the operator
() of the class. When classes are finished running, the objects will be
deleted. Class TaskSequencer guarantees that the destructors will be called
sequentially (not in parallel) and in the same order the objects were given
to the Run() function, so that it is safe for the destructor to have side
effects such as outputting data.
Note: the destructor of TaskSequencer will wait for any remaining jobs that
are still running and will call the destructors.

Просмотреть файл

@ -34,7 +34,7 @@ TESTFILES =
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
../transform/kaldi-transform.a ../tied/kaldi-tied.a \
../gmm/kaldi-gmm.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
../gmm/kaldi-gmm.a ../thread/kaldi-thread.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
../matrix/kaldi-matrix.a ../util/kaldi-util.a ../base/kaldi-base.a
include ../makefiles/default_rules.mk

Просмотреть файл

@ -14,6 +14,7 @@ OBJFILES = regression-tree.o regtree-mllr-diag-gmm.o lda-estimate.o \
LIBNAME = kaldi-transform
ADDLIBS = ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a
ADDLIBS = ../gmm/kaldi-gmm.a ../thread/kaldi-thread.a ../tree/kaldi-tree.a \
../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a
include ../makefiles/default_rules.mk

Просмотреть файл

@ -406,8 +406,8 @@ class PipeInputImpl: public InputImplBase {
return false;
} else {
#ifndef _MSC_VER
fb_ = new PipebufType(f_, // Using his constructor won't lead the
// destructor close the stream.
fb_ = new PipebufType(f_, // Using this constructor won't lead the
// destructor to close the stream.
(binary ? std::ios_base::in|std::ios_base::binary
:std::ios_base::in));
KALDI_ASSERT(fb_ != NULL); // or would be alloc error.

Просмотреть файл

@ -128,7 +128,8 @@ sph2pipe_v2.5: sph2pipe_v2.5.tar.gz
tar xzf sph2pipe_v2.5.tar.gz
sph2pipe_v2.5.tar.gz:
wget -T 10 -t 3 http://merlin.fit.vutbr.cz/kaldi/sph2pipe_v2.5.tar.gz
wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz || \
wget -T 10 -t 3 http://www.danielpovey.com/files/kaldi/sph2pipe_v2.5.tar.gz
openblas: openblas_compiled