зеркало из https://github.com/mozilla/kaldi.git
(sandbox/pitch) Merging of trunk to sandbox/pitch to perform reintegration merge afterwards
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/pitch@3251 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Коммит
7afa80a24d
|
@ -47,7 +47,7 @@ export PATH=$PATH:/export/babel/sanjeev/kaldi-trunk/tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -29,7 +29,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -53,7 +53,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
@ -87,7 +87,7 @@ cat $trans_file | awk -v wmap=$dir/word_map 'BEGIN{while((getline<wmap)>0)map[$1
|
|||
! merge_ngrams </dev/null >&/dev/null && \
|
||||
echo merge_ngrams not found in kaldi_lm. You need to have kaldi_lm on your path OR && \
|
||||
echo You can do the following: && \
|
||||
echo 1. Install the latest version from http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz && \
|
||||
echo 1. Install the latest version from http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz && \
|
||||
echo 2. you delete kaldi_lm, and kaldi_lm.tar.gz in the tools folder. This script will automatically install it. && \
|
||||
exit 1;
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -9,8 +9,9 @@
|
|||
export train_cmd="queue.pl -l arch=*64"
|
||||
export decode_cmd="queue.pl -l arch=*64,mem_free=2G,ram_free=2G"
|
||||
export mkgraph_cmd="queue.pl -l arch=*64,ram_free=4G,mem_free=4G"
|
||||
export big_memory_cmd="queue.pl -l arch=*64,ram_free=8G,mem_free=8G"
|
||||
export cuda_cmd="queue.pl -l gpu=1"
|
||||
|
||||
#export cuda_cmd="..."
|
||||
|
||||
|
||||
#b) BUT cluster options
|
||||
|
|
|
@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH
|
|||
else
|
||||
echo Downloading and installing the kaldi_lm tools
|
||||
if [ ! -f kaldi_lm.tar.gz ]; then
|
||||
wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1;
|
||||
fi
|
||||
tar -xvzf kaldi_lm.tar.gz || exit 1;
|
||||
cd kaldi_lm
|
||||
|
|
|
@ -324,6 +324,8 @@ local/run_sgmm2.sh
|
|||
# You probably wany to run the hybrid recipe as it is complementary:
|
||||
local/run_dnn.sh
|
||||
|
||||
# You probably want to try KL-HMM
|
||||
#local/run_kl_hmm.sh
|
||||
|
||||
# Getting results [see RESULTS file]
|
||||
# for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey),
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration.
|
||||
stage=-4 # This allows restarting after partway, when something when wrong.
|
||||
config=
|
||||
cmd=run.pl
|
||||
scale_opts="--transition-scale=1.0 --acoustic-scale=0.1 --self-loop-scale=0.1"
|
||||
num_iters=35 # Number of iterations of training
|
||||
max_iter_inc=25 # Last iter to increase #Gauss on.
|
||||
beam=10
|
||||
retry_beam=40
|
||||
boost_silence=1.0 # Factor by which to boost silence likelihoods in alignment
|
||||
power=0.25 # Exponent for number of gaussians according to occurrence counts
|
||||
cluster_thresh=-1 # for build-tree control final bottom-up clustering of leaves
|
||||
thresh=20
|
||||
use_gpu="no"
|
||||
nnet_dir=
|
||||
context_opts= # e.g. set this to "--context-width 5 --central-position 2" for quinphone.
|
||||
tmpdir=
|
||||
no_softmax=true
|
||||
# End configuration.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f path.sh ] && . ./path.sh;
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 5 ]; then
|
||||
echo "Usage: steps/train_deltas.sh <num-leaves> <data-dir> <lang-dir> <alignment-dir> <exp-dir>"
|
||||
echo "e.g.: steps/train_deltas.sh 2000 data/train_si84_half data/lang exp/mono_ali exp/tri1"
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --stage <stage> # stage to do partial re-run from."
|
||||
echo " --thresh "
|
||||
echo " --cluster_thresh "
|
||||
echo " --nnet_dir "
|
||||
echo " --context_opts "
|
||||
echo " --tmpdir "
|
||||
echo " --no-softmax "
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
numleaves=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
alidir=$4
|
||||
dir=$5
|
||||
|
||||
|
||||
for f in $alidir/final.mdl $alidir/ali.1.gz $data/feats.scp $lang/phones.txt; do
|
||||
[ ! -f $f ] && echo "train_deltas.sh: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
numgauss=$numleaves
|
||||
incgauss=$[($totgauss-$numgauss)/$max_iter_inc] # per-iter increment for #Gauss
|
||||
ciphonelist=`cat $lang/phones/context_indep.csl` || exit 1;
|
||||
nj=`cat $alidir/num_jobs` || exit 1;
|
||||
mkdir -p $dir/log
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
sdata=$data/split$nj;
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
|
||||
nnet=${nnet_dir}/final.nnet
|
||||
feature_transform=${nnet_dir}/final.feature_transform
|
||||
|
||||
featsdim="ark:copy-feats scp:$data/feats.scp ark:- |"
|
||||
nnetfeats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f ${nnet_dir}/norm_vars ]; then
|
||||
norm_vars=$(cat ${nnet_dir}/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
nnetfeats="$nnetfeats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
featsdim="$featsdim apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$data/utt2spk scp:$data/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f ${nnet_dir}/delta_order ]; then
|
||||
delta_order=$(cat ${nnet_dir}/delta_order)
|
||||
nnetfeats="$nnetfeats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
featsdim="$featsdim add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
feats="ark,s,cs:nnet-forward "
|
||||
if [[ ! -z $feature_transform ]]; then
|
||||
feats=${feats}" --feature-transform=$feature_transform "
|
||||
fi
|
||||
feats=${feats}"--no-softmax=$no_softmax --use-gpu=$use_gpu $nnet \"$nnetfeats\" ark:- |"
|
||||
|
||||
feat_dim=$(feat-to-dim --print-args=false "$featsdim" -)
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
if [[ ! -z $tmpdir ]]; then
|
||||
mkdir -p $tmpdir
|
||||
else
|
||||
tmpdir=$dir
|
||||
fi
|
||||
|
||||
if [ $stage -le -3 ]; then
|
||||
echo "$0: accumulating tree stats"
|
||||
$cmd JOB=1:$nj $dir/log/acc_tree.JOB.log \
|
||||
acc-tree-stats $context_opts --var-floor=1.0 --ci-phones=$ciphonelist $alidir/final.mdl "$feats" \
|
||||
"ark:gunzip -c $alidir/ali.JOB.gz|" $tmpdir/JOB.treeacc || exit 1;
|
||||
sum-tree-stats $dir/treeacc $tmpdir/*.treeacc 2>$dir/log/sum_tree_acc.log || exit 1;
|
||||
rm $tmpdir/*.treeacc
|
||||
fi
|
||||
|
||||
if [ $stage -le -2 ]; then
|
||||
echo "$0: getting questions for tree-building, via clustering"
|
||||
# preparing questions, roots file...
|
||||
cluster-phones $context_opts $dir/treeacc $lang/phones/sets.int $dir/questions.int 2> $dir/log/questions.log || exit 1;
|
||||
cat $lang/phones/extra_questions.int >> $dir/questions.int
|
||||
compile-questions $context_opts $lang/topo $dir/questions.int $dir/questions.qst 2>$dir/log/compile_questions.log || exit 1;
|
||||
|
||||
echo "$0: building the tree"
|
||||
# $cmd $dir/log/build_tree.log \
|
||||
build-tree $context_opts --verbose=1 --max-leaves=$numleaves --cluster-thresh=$cluster_thresh --thresh=$thresh $dir/treeacc $lang/phones/roots.int \
|
||||
$dir/questions.qst $lang/topo $dir/tree &> $dir/log/build_tree.log || exit 1;
|
||||
|
||||
gmm-init-model-flat --dim=$feat_dim $dir/tree $lang/topo $dir/1.mdl
|
||||
|
||||
rm $dir/treeacc
|
||||
fi
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
# Convert the alignments.
|
||||
echo "$0: converting alignments from $alidir to use current tree"
|
||||
$cmd JOB=1:$nj $dir/log/convert.JOB.log \
|
||||
convert-ali $alidir/final.mdl $dir/1.mdl $dir/tree \
|
||||
"ark:gunzip -c $alidir/ali.JOB.gz|" "ark:|gzip -c >$dir/ali.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
if [ $stage -le 0 ]; then
|
||||
echo "$0: compiling graphs of transcripts"
|
||||
$cmd JOB=1:$nj $dir/log/compile_graphs.JOB.log \
|
||||
compile-train-graphs $dir/tree $dir/1.mdl $lang/L.fst \
|
||||
"ark:utils/sym2int.pl -f 2- $lang/words.txt < $data/split$nj/JOB/text |" \
|
||||
"ark:|gzip -c >$dir/fsts.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
rm $dir/final.mdl 2>/dev/null
|
||||
ln -s 1.mdl $dir/final.mdl
|
||||
|
||||
# Summarize warning messages...
|
||||
utils/summarize_warnings.pl $dir/log
|
||||
|
||||
echo "$0: Done building the tree in $dir"
|
||||
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Karel Vesely,
|
||||
# Daniel Povey,
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
nnet= # Optionally pre-select network to use for getting state-likelihoods
|
||||
feature_transform= # Optionally pre-select feature transform (in front of nnet)
|
||||
model= # Optionally pre-select transition model
|
||||
|
||||
stage=0 # stage=1 skips lattice generation
|
||||
nj=4
|
||||
cmd=run.pl
|
||||
max_active=7000 # maximum of active tokens
|
||||
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
|
||||
beam=13.0 # GMM:13.0
|
||||
latbeam=8.0 # GMM:6.0
|
||||
acwt=0.1 # GMM:0.0833, note: only really affects pruning (scoring is on lattices).
|
||||
scoring_opts="--min-lmwt 1 --max-lmwt 12"
|
||||
skip_scoring=false
|
||||
use_gpu="no" # disable gpu
|
||||
parallel_opts=""
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh; # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [options] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo "... where <decode-dir> is assumed to be a sub-directory of the directory"
|
||||
echo " where the DNN + transition model is."
|
||||
echo "e.g.: $0 exp/dnn1/graph_tgpr data/test exp/dnn1/decode_tgpr"
|
||||
echo ""
|
||||
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
|
||||
echo "which are then sent through feature-transform. It works out what type"
|
||||
echo "of features you used from content of srcdir."
|
||||
echo ""
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo ""
|
||||
echo " --nnet <nnet> # which nnet to use (opt.)"
|
||||
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
|
||||
echo " --model <model> # which transition model to use (opt.)"
|
||||
echo ""
|
||||
echo " --acwt <float> # select acoustic scale for decoding"
|
||||
echo " --scoring-opts <opts> # options forwarded to local/score.sh"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
|
||||
sdata=$data/split$nj;
|
||||
|
||||
mkdir -p $dir/log
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
|
||||
nnet=$srcdir/final.nnet;
|
||||
fi
|
||||
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
|
||||
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
|
||||
model=$srcdir/final.mdl;
|
||||
fi
|
||||
|
||||
# find the feature_transform to use
|
||||
if [ -z "$feature_transform" ]; then
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
fi
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet_i $nnet $model $graphdir/HCLG.fst; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f $srcdir/norm_vars ]; then
|
||||
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f $srcdir/delta_order ]; then
|
||||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
|
||||
# Run the decoding in the queue
|
||||
if [ $stage -le 0 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/decode.JOB.log \
|
||||
nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet "$feats" ark:- \| \
|
||||
latgen-faster-mapped --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$latbeam \
|
||||
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$model $graphdir/HCLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
|
||||
fi
|
||||
|
||||
# Run the scoring
|
||||
if ! $skip_scoring ; then
|
||||
[ ! -x local/score.sh ] && \
|
||||
echo "Not scoring because local/score.sh does not exist or not executable." && exit 1;
|
||||
local/score.sh $scoring_opts --cmd "$cmd" $data $graphdir $dir || exit 1;
|
||||
fi
|
||||
|
||||
exit 0;
|
|
@ -0,0 +1,121 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2012-2013 Karel Vesely,
|
||||
# Daniel Povey,
|
||||
# Idiap Research Institute (Author: David Imseng)
|
||||
# Apache 2.0
|
||||
|
||||
# Begin configuration section.
|
||||
nnet= # Optionally pre-select network to use for getting state-likelihoods
|
||||
feature_transform= # Optionally pre-select feature transform (in front of nnet)
|
||||
model= # Optionally pre-select transition model
|
||||
class_frame_counts= # Optionally pre-select class-counts used to compute PDF priors
|
||||
|
||||
stage=0 # stage=1 skips lattice generation
|
||||
nj=32
|
||||
cmd=$decode_cmd
|
||||
max_active=7000 # maximum of active tokens
|
||||
max_mem=50000000 # limit the fst-size to 50MB (larger fsts are minimized)
|
||||
use_gpu="no" # disable gpu
|
||||
parallel_opts=""
|
||||
tmpdir=
|
||||
# End configuration section.
|
||||
|
||||
echo "$0 $@" # Print the command line for logging
|
||||
|
||||
[ -f ./path.sh ] && . ./path.sh; # source the path.
|
||||
. parse_options.sh || exit 1;
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: $0 [options] <data-dir> <align-dir> <kl-hmm-dir>"
|
||||
echo "... where <kl-hmm-dir> is assumed to be a sub-directory of the directory"
|
||||
echo " where the DNN + transition model is."
|
||||
echo "e.g.: $0 data/train exp/dnn1/kl-hmm-train"
|
||||
echo ""
|
||||
echo "This script works on plain or modified features (CMN,delta+delta-delta),"
|
||||
echo "which are then sent through feature-transform. It works out what type"
|
||||
echo "of features you used from content of srcdir."
|
||||
echo ""
|
||||
echo "main options (for others, see top of script file)"
|
||||
echo " --config <config-file> # config containing options"
|
||||
echo " --nj <nj> # number of parallel jobs"
|
||||
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
|
||||
echo ""
|
||||
echo " --nnet <nnet> # which nnet to use (opt.)"
|
||||
echo " --feature-transform <nnet> # select transform in front of nnet (opt.)"
|
||||
echo " --model <model> # which transition model to use (opt.)"
|
||||
echo " --tmpdir >dir> # Temp directory to store the statistics, becuase they can get big (opt.)"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
data=$1
|
||||
alidir=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # The model directory is one level up from decoding directory.
|
||||
sdata=$data/split$nj;
|
||||
|
||||
mkdir -p $dir/log
|
||||
[[ -d $sdata && $data/feats.scp -ot $sdata ]] || split_data.sh $data $nj || exit 1;
|
||||
echo $nj > $dir/num_jobs
|
||||
|
||||
if [ -z "$nnet" ]; then # if --nnet <nnet> was not specified on the command line...
|
||||
nnet=$srcdir/final.nnet;
|
||||
fi
|
||||
[ -z "$nnet" ] && echo "Error nnet '$nnet' does not exist!" && exit 1;
|
||||
|
||||
if [ -z "$model" ]; then # if --model <mdl> was not specified on the command line...
|
||||
model=$srcdir/final.mdl;
|
||||
fi
|
||||
|
||||
# find the feature_transform to use
|
||||
if [ -z "$feature_transform" ]; then
|
||||
feature_transform=$srcdir/final.feature_transform
|
||||
fi
|
||||
if [ ! -f $feature_transform ]; then
|
||||
echo "Missing feature_transform '$feature_transform'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check that files exist
|
||||
for f in $sdata/1/feats.scp $nnet_i $nnet $model; do
|
||||
[ ! -f $f ] && echo "$0: no such file $f" && exit 1;
|
||||
done
|
||||
|
||||
# Create the feature stream:
|
||||
feats="ark,s,cs:copy-feats scp:$sdata/JOB/feats.scp ark:- |"
|
||||
# Optionally add cmvn
|
||||
if [ -f $srcdir/norm_vars ]; then
|
||||
norm_vars=$(cat $srcdir/norm_vars 2>/dev/null)
|
||||
[ ! -f $sdata/1/cmvn.scp ] && echo "$0: cannot find cmvn stats $sdata/1/cmvn.scp" && exit 1
|
||||
feats="$feats apply-cmvn --norm-vars=$norm_vars --utt2spk=ark:$sdata/JOB/utt2spk scp:$sdata/JOB/cmvn.scp ark:- ark:- |"
|
||||
fi
|
||||
# Optionally add deltas
|
||||
if [ -f $srcdir/delta_order ]; then
|
||||
delta_order=$(cat $srcdir/delta_order)
|
||||
feats="$feats add-deltas --delta-order=$delta_order ark:- ark:- |"
|
||||
fi
|
||||
|
||||
ali="ark:gunzip -c $alidir/ali.*.gz |"
|
||||
|
||||
if [[ ! -z $tmpdir ]]; then
|
||||
mkdir -p $tmpdir
|
||||
else
|
||||
tmpdir=$dir
|
||||
fi
|
||||
|
||||
nkl_states=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
|
||||
if [ $stage -le 0 ]; then
|
||||
$cmd $parallel_opts JOB=1:$nj $dir/log/acc-stats.JOB.log \
|
||||
nnet-kl-hmm-acc --nkl-states=${nkl_states} "ark:nnet-forward --feature-transform=$feature_transform --use-gpu=$use_gpu $nnet \"$feats\" ark:- |" "ark:ali-to-pdf --print-args=false $alidir/final.mdl \"$ali\" ark:- |" $tmpdir/kl-hmm-stats.JOB
|
||||
fi
|
||||
|
||||
sum-matrices $dir/accumulated-kl-hmm-stats $tmpdir/kl-hmm-stats.*
|
||||
|
||||
rm $tmpdir/kl-hmm-stats.*
|
||||
|
||||
nnet-kl-hmm-mat-to-component $dir/kl-hmm.nnet $dir/accumulated-kl-hmm-stats
|
||||
|
||||
nnet-concat $dir/../final.nnet $dir/kl-hmm.nnet $dir/final.nnet
|
||||
|
||||
exit 0;
|
|
@ -43,7 +43,7 @@ texts=""
|
|||
|
||||
nu=`cat $data/utt2spk | wc -l`
|
||||
nf=`cat $data/feats.scp | wc -l`
|
||||
nt=`cat $data/text | wc -l`
|
||||
nt=`cat $data/text 2>/dev/null | wc -l` # take it as zero if no such file
|
||||
if [ $nu -ne $nf ]; then
|
||||
echo "split_data.sh: warning, #lines is (utt2spk,feats.scp) is ($nu,$nf); this script "
|
||||
echo " may produce incorrectly split data."
|
||||
|
@ -61,7 +61,7 @@ if [ ! -d $s1 ]; then
|
|||
else
|
||||
need_to_split=false
|
||||
for f in utt2spk spk2utt feats.scp text wav.scp cmvn.scp spk2gender \
|
||||
segments reco2file_and_channel; do
|
||||
vad.scp segments reco2file_and_channel; do
|
||||
if [[ -f $data/$f && ( ! -f $s1/$f || $s1/$f -ot $data/$f ) ]]; then
|
||||
need_to_split=true
|
||||
fi
|
||||
|
@ -75,6 +75,7 @@ fi
|
|||
for n in `seq $numsplit`; do
|
||||
mkdir -p $data/split$numsplit/$n
|
||||
feats="$feats $data/split$numsplit/$n/feats.scp"
|
||||
vads="$vads $data/split$numsplit/$n/vad.scp"
|
||||
texts="$texts $data/split$numsplit/$n/text"
|
||||
utt2spks="$utt2spks $data/split$numsplit/$n/utt2spk"
|
||||
done
|
||||
|
@ -88,8 +89,10 @@ fi
|
|||
utils/split_scp.pl $utt2spk_opt $data/utt2spk $utt2spks || exit 1
|
||||
|
||||
utils/split_scp.pl $utt2spk_opt $data/feats.scp $feats || exit 1
|
||||
[ -f $data/text ] && \
|
||||
utils/split_scp.pl $utt2spk_opt $data/text $texts
|
||||
|
||||
[ -f $data/text ] && utils/split_scp.pl $utt2spk_opt $data/text $texts
|
||||
|
||||
[ -f $data/vad.scp ] && utils/split_scp.pl $utt2spk_opt $data/vad.scp $vads
|
||||
|
||||
# If lockfile is not installed, just don't lock it. It's not a big deal.
|
||||
which lockfile >&/dev/null && lockfile -l 60 $data/.split_lock
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
# 2012-2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
# Apache 2.0
|
||||
|
||||
|
||||
|
@ -89,6 +90,7 @@ fi
|
|||
function do_filtering {
|
||||
# assumes the utt2spk and spk2utt files already exist.
|
||||
[ -f $srcdir/feats.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/feats.scp >$destdir/feats.scp
|
||||
[ -f $srcdir/vad.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/vad.scp >$destdir/vad.scp
|
||||
[ -f $srcdir/wav.scp ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/wav.scp >$destdir/wav.scp
|
||||
[ -f $srcdir/text ] && utils/filter_scp.pl $destdir/utt2spk <$srcdir/text >$destdir/text
|
||||
[ -f $srcdir/spk2gender ] && utils/filter_scp.pl $destdir/spk2utt <$srcdir/spk2gender >$destdir/spk2gender
|
||||
|
|
|
@ -58,7 +58,9 @@ int main(int argc, char *argv[]) {
|
|||
po.Register("thresh", &thresh, "Log-likelihood change threshold for "
|
||||
"tree-building");
|
||||
po.Register("cluster-thresh", &cluster_thresh, "Log-likelihood change "
|
||||
"threshold for clustering after tree-building");
|
||||
"threshold for clustering after tree-building. 0 means "
|
||||
"no clustering; -1 means use as a clustering threshold the "
|
||||
"likelihood change of the final split.");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
|
|
@ -103,7 +103,7 @@ void CuDevice::SelectGpuId(std::string use_gpu) {
|
|||
KALDI_WARN << "Will try again to get a GPU after " << sec_sleep
|
||||
<< " seconds.";
|
||||
sleep(sec_sleep);
|
||||
//
|
||||
cudaGetLastError(); // reset the error state
|
||||
e = cudaThreadSynchronize(); //<< 2nd trial to get CUDA context.
|
||||
if (e != cudaSuccess) {
|
||||
if (use_gpu == "yes") {
|
||||
|
|
|
@ -835,15 +835,19 @@ static void UnitTestCuMatrixSymInvertPosDef() {
|
|||
Real alpha = 0.3, beta = 1.75432;
|
||||
M.SymAddMat2(alpha, N, trans, beta);
|
||||
// M.AddMatMat(alpha, N, trans, N, other_trans, beta);
|
||||
SpMatrix<Real> S(CuSpMatrix<Real>(M, kTakeLower));
|
||||
CuSpMatrix<Real> spTemp(M, kTakeLower);
|
||||
SpMatrix<Real> S(spTemp);
|
||||
S.Invert();
|
||||
CuMatrix<Real> M_orig(CuSpMatrix<Real>(M, kTakeLower));
|
||||
CuSpMatrix<Real> spTemp2(M, kTakeLower);
|
||||
CuMatrix<Real> M_orig(spTemp2);
|
||||
M.SymInvertPosDef();
|
||||
CuMatrix<Real> M_inverted(CuSpMatrix<Real>(M, kTakeLower));
|
||||
CuSpMatrix<Real> spTemp3(M, kTakeLower);
|
||||
CuMatrix<Real> M_inverted(spTemp3);
|
||||
CuMatrix<Real> M_prod(dimM, dimM);
|
||||
M_prod.AddMatMat(Real(1.0), M_orig, kNoTrans, M_inverted, kNoTrans, Real(0.0));
|
||||
KALDI_ASSERT(M_prod.IsUnit());
|
||||
SpMatrix<Real> S2(CuSpMatrix<Real>(M, kTakeLower));
|
||||
CuSpMatrix<Real> spTemp4(M, kTakeLower);
|
||||
SpMatrix<Real> S2(spTemp4);
|
||||
KALDI_ASSERT(ApproxEqual(S, S2, (Real)0.1));
|
||||
KALDI_ASSERT(dimM == 0 || S.Trace() != 0);
|
||||
}
|
||||
|
|
|
@ -144,6 +144,7 @@ preprocessor variables, setting compile options, linking with libraries, and so
|
|||
\section build_setup_platforms Which platforms has Kaldi been compiled on?
|
||||
|
||||
We have compiled Kaldi on Windows, Cygwin, various flavors of Linux (including
|
||||
Ubuntu, CentOS, Debian and SUSE), and Darwin.
|
||||
Ubuntu, CentOS, Debian and SUSE), and Darwin. We recommend you use g++ version
|
||||
4.4 or above for the source to compile.
|
||||
|
||||
*/
|
||||
|
|
|
@ -21,8 +21,6 @@
|
|||
#ifndef KALDI_FEAT_FEATURE_FUNCTIONS_H_
|
||||
#define KALDI_FEAT_FEATURE_FUNCTIONS_H_
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
|
|
@ -21,8 +21,8 @@ OBJFILES =
|
|||
TESTFILES =
|
||||
|
||||
ADDLIBS = ../feat/kaldi-feat.a ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
|
||||
../tree/kaldi-tree.a ../matrix/kaldi-matrix.a ../util/kaldi-util.a \
|
||||
../base/kaldi-base.a
|
||||
../thread/kaldi-thread.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
include ../makefiles/default_rules.mk
|
||||
|
||||
|
|
|
@ -60,6 +60,7 @@ int main(int argc, char *argv[]) {
|
|||
BaseFloatMatrixWriter feat_writer(wspecifier);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
int64 frames_in = 0, frames_out = 0;
|
||||
|
||||
// process all keys
|
||||
for (; !feat_reader.Done(); feat_reader.Next()) {
|
||||
|
@ -71,6 +72,9 @@ int main(int argc, char *argv[]) {
|
|||
int32 num_indexes = 0;
|
||||
for (int32 k = offset; k < feats.NumRows(); k += n)
|
||||
num_indexes++; // k is the index.
|
||||
|
||||
frames_in += feats.NumRows();
|
||||
frames_out += num_indexes;
|
||||
|
||||
if (num_indexes == 0) {
|
||||
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
|
||||
|
@ -88,8 +92,9 @@ int main(int argc, char *argv[]) {
|
|||
feat_writer.Write(utt, output);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
|
||||
KALDI_LOG << "Sub-sampled " << num_done << " feature matrices; " << num_err
|
||||
<< " with errors.";
|
||||
KALDI_LOG << "Reduced " << frames_in << " frames to " << frames_out;
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
|
|
|
@ -6,7 +6,7 @@ include ../kaldi.mk
|
|||
BINFILES = fgmm-global-acc-stats fgmm-global-sum-accs fgmm-global-est \
|
||||
fgmm-global-merge fgmm-global-to-gmm fgmm-gselect fgmm-global-get-frame-likes \
|
||||
fgmm-global-acc-stats-twofeats fgmm-global-copy fgmm-global-mixdown \
|
||||
fgmm-global-gselect-to-post
|
||||
fgmm-global-gselect-to-post fgmm-global-info
|
||||
|
||||
|
||||
OBJFILES =
|
||||
|
@ -16,7 +16,7 @@ OBJFILES =
|
|||
TESTFILES =
|
||||
|
||||
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
|
||||
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
|
||||
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a ../thread/kaldi-thread.a \
|
||||
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
|
|
|
@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
|
||||
"of vectors of floats.\n"
|
||||
"of vectors of floats. If --average=true, prints out the average per-frame\n"
|
||||
"log-likelihood for each utterance, as a single float.\n"
|
||||
"Usage: fgmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
|
||||
"<likes-out-wspecifier>\n"
|
||||
"e.g.: fgmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool average = false;
|
||||
std::string gselect_rspecifier;
|
||||
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
|
||||
"to limit the #Gaussians accessed on each frame.");
|
||||
po.Register("average", &average, "If true, print out the average per-frame "
|
||||
"log-likelihood as a single float per utterance.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
|
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
BaseFloatVectorWriter likes_writer(likes_wspecifier);
|
||||
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
|
||||
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
|
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
tot_like += likes.Sum();
|
||||
tot_frames += file_frames;
|
||||
likes_writer.Write(key, likes);
|
||||
if (average)
|
||||
average_likes_writer.Write(key, likes.Sum() / file_frames);
|
||||
else
|
||||
likes_writer.Write(key, likes);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Done " << num_done << " files; " << num_err
|
||||
|
|
|
@ -14,8 +14,8 @@ OBJFILES = diag-gmm.o diag-gmm-normal.o mle-diag-gmm.o am-diag-gmm.o \
|
|||
|
||||
LIBNAME = kaldi-gmm
|
||||
|
||||
ADDLIBS = ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a \
|
||||
../base/kaldi-base.a
|
||||
ADDLIBS = ../tree/kaldi-tree.a ../thread/kaldi-thread.a ../util/kaldi-util.a \
|
||||
../matrix/kaldi-matrix.a ../base/kaldi-base.a
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -528,9 +528,9 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
|
|||
Vector<BaseFloat> *loglikes) const {
|
||||
loglikes->Resize(gconsts_.Dim(), kUndefined);
|
||||
loglikes->CopyFromVec(gconsts_);
|
||||
if (static_cast<int32>(data.Dim()) != Dim()) {
|
||||
if (data.Dim() != Dim()) {
|
||||
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
|
||||
<< "mismatch " << (data.Dim()) << " vs. "<< (Dim());
|
||||
<< "mismatch " << data.Dim() << " vs. "<< Dim();
|
||||
}
|
||||
Vector<BaseFloat> data_sq(data);
|
||||
data_sq.ApplyPow(2.0);
|
||||
|
@ -542,6 +542,26 @@ void DiagGmm::LogLikelihoods(const VectorBase<BaseFloat> &data,
|
|||
}
|
||||
|
||||
|
||||
void DiagGmm::LogLikelihoods(const MatrixBase<BaseFloat> &data,
|
||||
Matrix<BaseFloat> *loglikes) const {
|
||||
KALDI_ASSERT(data.NumRows() != 0);
|
||||
loglikes->Resize(data.NumRows(), gconsts_.Dim(), kUndefined);
|
||||
loglikes->CopyRowsFromVec(gconsts_);
|
||||
if (data.NumCols() != Dim()) {
|
||||
KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension "
|
||||
<< "mismatch " << data.NumCols() << " vs. "<< Dim();
|
||||
}
|
||||
Matrix<BaseFloat> data_sq(data);
|
||||
data_sq.ApplyPow(2.0);
|
||||
|
||||
// loglikes += means * inv(vars) * data.
|
||||
loglikes->AddMatMat(1.0, data, kNoTrans, means_invvars_, kTrans, 1.0);
|
||||
// loglikes += -0.5 * inv(vars) * data_sq.
|
||||
loglikes->AddMatMat(-0.5, data_sq, kNoTrans, inv_vars_, kTrans, 1.0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void DiagGmm::LogLikelihoodsPreselect(const VectorBase<BaseFloat> &data,
|
||||
const std::vector<int32> &indices,
|
||||
Vector<BaseFloat> *loglikes) const {
|
||||
|
@ -777,6 +797,79 @@ BaseFloat DiagGmm::GaussianSelection(const VectorBase<BaseFloat> &data,
|
|||
return tot_loglike;
|
||||
}
|
||||
|
||||
BaseFloat DiagGmm::GaussianSelection(const MatrixBase<BaseFloat> &data,
|
||||
int32 num_gselect,
|
||||
std::vector<std::vector<int32> > *output) const {
|
||||
double ans = 0.0;
|
||||
int32 num_frames = data.NumRows(), num_gauss = NumGauss();
|
||||
|
||||
int32 max_mem = 10000000; // Don't devote more than 10Mb to loglikes_mat;
|
||||
// break up the utterance if needed.
|
||||
int32 mem_needed = num_frames * num_gauss * sizeof(BaseFloat);
|
||||
if (mem_needed > max_mem) {
|
||||
// Break into parts and recurse, we don't want to consume too
|
||||
// much memory.
|
||||
int32 num_parts = (mem_needed + max_mem - 1) / max_mem;
|
||||
int32 part_frames = (data.NumRows() + num_parts - 1) / num_parts;
|
||||
double tot_ans = 0.0;
|
||||
std::vector<std::vector<int32> > part_output;
|
||||
output->clear();
|
||||
output->resize(num_frames);
|
||||
for (int32 p = 0; p < num_parts; p++) {
|
||||
int32 start_frame = p * part_frames,
|
||||
this_num_frames = std::min(num_frames - start_frame, part_frames);
|
||||
SubMatrix<BaseFloat> data_part(data, start_frame, this_num_frames,
|
||||
0, data.NumCols());
|
||||
tot_ans += GaussianSelection(data_part, num_gselect, &part_output);
|
||||
for (int32 t = 0; t < this_num_frames; t++)
|
||||
(*output)[start_frame + t].swap(part_output[t]);
|
||||
}
|
||||
KALDI_ASSERT(!output->back().empty());
|
||||
return tot_ans;
|
||||
}
|
||||
|
||||
KALDI_ASSERT(num_frames != 0);
|
||||
Matrix<BaseFloat> loglikes_mat(num_frames, num_gauss, kUndefined);
|
||||
this->LogLikelihoods(data, &loglikes_mat);
|
||||
|
||||
output->clear();
|
||||
output->resize(num_frames);
|
||||
|
||||
for (int32 i = 0; i < num_frames; i++) {
|
||||
SubVector<BaseFloat> loglikes(loglikes_mat, i);
|
||||
|
||||
BaseFloat thresh;
|
||||
if (num_gselect < num_gauss) {
|
||||
Vector<BaseFloat> loglikes_copy(loglikes);
|
||||
BaseFloat *ptr = loglikes_copy.Data();
|
||||
std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss);
|
||||
thresh = ptr[num_gauss-num_gselect];
|
||||
} else {
|
||||
thresh = -std::numeric_limits<BaseFloat>::infinity();
|
||||
}
|
||||
BaseFloat tot_loglike = -std::numeric_limits<BaseFloat>::infinity();
|
||||
std::vector<std::pair<BaseFloat, int32> > pairs;
|
||||
for (int32 p = 0; p < num_gauss; p++) {
|
||||
if (loglikes(p) >= thresh) {
|
||||
pairs.push_back(std::make_pair(loglikes(p), p));
|
||||
}
|
||||
}
|
||||
std::sort(pairs.begin(), pairs.end(),
|
||||
std::greater<std::pair<BaseFloat, int32> >());
|
||||
std::vector<int32> &this_output = (*output)[i];
|
||||
for (int32 j = 0;
|
||||
j < num_gselect && j < static_cast<int32>(pairs.size());
|
||||
j++) {
|
||||
this_output.push_back(pairs[j].second);
|
||||
tot_loglike = LogAdd(tot_loglike, pairs[j].first);
|
||||
}
|
||||
KALDI_ASSERT(!this_output.empty());
|
||||
ans += tot_loglike;
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
|
||||
BaseFloat DiagGmm::GaussianSelectionPreselect(
|
||||
const VectorBase<BaseFloat> &data,
|
||||
|
|
|
@ -81,6 +81,13 @@ class DiagGmm {
|
|||
void LogLikelihoods(const VectorBase<BaseFloat> &data,
|
||||
Vector<BaseFloat> *loglikes) const;
|
||||
|
||||
/// This version of the LogLikelihoods function operates on
|
||||
/// a sequence of frames simultaneously; the row index of both "data" and
|
||||
/// "loglikes" is the frame index.
|
||||
void LogLikelihoods(const MatrixBase<BaseFloat> &data,
|
||||
Matrix<BaseFloat> *loglikes) const;
|
||||
|
||||
|
||||
/// Outputs the per-component log-likelihoods of a subset of mixture
|
||||
/// components. Note: at output, loglikes->Dim() will equal indices.size().
|
||||
/// loglikes[i] will correspond to the log-likelihood of the Gaussian
|
||||
|
@ -89,13 +96,20 @@ class DiagGmm {
|
|||
const std::vector<int32> &indices,
|
||||
Vector<BaseFloat> *loglikes) const;
|
||||
|
||||
/// Get gaussian selection information for one frame. Returns log-like for
|
||||
/// Get gaussian selection information for one frame. Returns og-like
|
||||
/// this frame. Output is the best "num_gselect" indices, sorted from best to
|
||||
/// worst likelihood. If "num_gselect" > NumGauss(), sets it to NumGauss().
|
||||
BaseFloat GaussianSelection(const VectorBase<BaseFloat> &data,
|
||||
int32 num_gselect,
|
||||
std::vector<int32> *output) const;
|
||||
|
||||
/// This version of the Gaussian selection function works for a sequence
|
||||
/// of frames rather than just a single frame. Returns sum of the log-likes
|
||||
/// over all frames.
|
||||
BaseFloat GaussianSelection(const MatrixBase<BaseFloat> &data,
|
||||
int32 num_gselect,
|
||||
std::vector<std::vector<int32> > *output) const;
|
||||
|
||||
/// Get gaussian selection information for one frame. Returns log-like for
|
||||
/// this frame. Output is the best "num_gselect" indices that were
|
||||
/// preselected, sorted from best to worst likelihood. If "num_gselect" >
|
||||
|
|
|
@ -372,6 +372,31 @@ UnitTestEstimateDiagGmm() {
|
|||
test_io(*gmm, est_gmm, true, feats); // Binary mode
|
||||
}
|
||||
|
||||
{ // Test multi-threaded update.
|
||||
GmmFlagsType flags_all = kGmmAll;
|
||||
est_gmm.Resize(gmm->NumGauss(),
|
||||
gmm->Dim(), flags_all);
|
||||
est_gmm.SetZero(flags_all);
|
||||
|
||||
Vector<BaseFloat> weights(counter);
|
||||
for (size_t i = 0; i < counter; i++)
|
||||
weights(i) = 0.5 + 0.1 * (rand() % 10);
|
||||
|
||||
|
||||
float loglike = 0.0;
|
||||
for (size_t i = 0; i < counter; i++) {
|
||||
loglike += weights(i) *
|
||||
est_gmm.AccumulateFromDiag(*gmm, feats.Row(i), weights(i));
|
||||
}
|
||||
AccumDiagGmm est_gmm2(*gmm, flags_all);
|
||||
int32 num_threads = 2;
|
||||
float loglike2 =
|
||||
est_gmm2.AccumulateFromDiagMultiThreaded(*gmm, feats, weights, num_threads);
|
||||
AssertEqual(loglike, loglike2);
|
||||
est_gmm.AssertEqual(est_gmm2);
|
||||
}
|
||||
|
||||
|
||||
delete gmm;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// gmm/mle-diag-gmm.cc
|
||||
|
||||
// Copyright 2009-2012 Saarland University; Georg Stemmer; Jan Silovsky;
|
||||
// Copyright 2009-2013 Saarland University; Georg Stemmer; Jan Silovsky;
|
||||
// Microsoft Corporation; Yanmin Qian;
|
||||
// Johns Hopkins University (author: Daniel Povey);
|
||||
// Cisco Systems (author: Neha Agrawal)
|
||||
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/mle-diag-gmm.h"
|
||||
#include "thread/kaldi-thread.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
@ -202,7 +203,6 @@ BaseFloat AccumDiagGmm::AccumulateFromDiag(const DiagGmm &gmm,
|
|||
return log_like;
|
||||
}
|
||||
|
||||
|
||||
// Careful: this wouldn't be valid if it were used to update the
|
||||
// Gaussian weights.
|
||||
void AccumDiagGmm::SmoothStats(BaseFloat tau) {
|
||||
|
@ -478,5 +478,84 @@ void MapDiagGmmUpdate(const MapDiagGmmOptions &config,
|
|||
}
|
||||
|
||||
|
||||
class AccumulateMultiThreadedClass: public MultiThreadable {
|
||||
public:
|
||||
AccumulateMultiThreadedClass(const DiagGmm &diag_gmm,
|
||||
const MatrixBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &frame_weights,
|
||||
AccumDiagGmm *accum,
|
||||
double *tot_like):
|
||||
diag_gmm_(diag_gmm), data_(data),
|
||||
frame_weights_(frame_weights), dest_accum_(accum),
|
||||
tot_like_ptr_(tot_like), tot_like_(0.0) { }
|
||||
AccumulateMultiThreadedClass(const AccumulateMultiThreadedClass &other):
|
||||
diag_gmm_(other.diag_gmm_), data_(other.data_),
|
||||
frame_weights_(other.frame_weights_), dest_accum_(other.dest_accum_),
|
||||
accum_(diag_gmm_, dest_accum_->Flags()), tot_like_ptr_(other.tot_like_ptr_),
|
||||
tot_like_(0.0) {
|
||||
KALDI_ASSERT(data_.NumRows() == frame_weights_.Dim());
|
||||
}
|
||||
void operator () () {
|
||||
int32 num_frames = data_.NumRows(), num_threads = num_threads_,
|
||||
block_size = (num_frames + num_threads - 1) / num_threads,
|
||||
block_start = block_size * thread_id_,
|
||||
block_end = std::min(num_frames, block_start + block_size);
|
||||
tot_like_ = 0.0;
|
||||
double tot_weight = 0.0;
|
||||
for (int32 t = block_start; t < block_end; t++) {
|
||||
tot_like_ += frame_weights_(t) *
|
||||
accum_.AccumulateFromDiag(diag_gmm_, data_.Row(t), frame_weights_(t));
|
||||
tot_weight += frame_weights_(t);
|
||||
}
|
||||
KALDI_VLOG(3) << "Thread " << thread_id_ << " saw average likeliood/frame "
|
||||
<< (tot_like_ / tot_weight) << " over " << tot_weight
|
||||
<< " (weighted) frames.";
|
||||
}
|
||||
~AccumulateMultiThreadedClass() {
|
||||
if (accum_.Dim() != 0) { // if our accumulator is set up (this is not true
|
||||
// for the single object we use to initialize the others)
|
||||
dest_accum_->Add(1.0, accum_);
|
||||
*tot_like_ptr_ += tot_like_;
|
||||
}
|
||||
}
|
||||
private:
|
||||
const DiagGmm &diag_gmm_;
|
||||
const MatrixBase<BaseFloat> &data_;
|
||||
const VectorBase<BaseFloat> &frame_weights_;
|
||||
AccumDiagGmm *dest_accum_;
|
||||
AccumDiagGmm accum_;
|
||||
double *tot_like_ptr_;
|
||||
double tot_like_;
|
||||
};
|
||||
|
||||
|
||||
BaseFloat AccumDiagGmm::AccumulateFromDiagMultiThreaded(
|
||||
const DiagGmm &gmm,
|
||||
const MatrixBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &frame_weights,
|
||||
int32 num_threads) {
|
||||
|
||||
double tot_like = 0.0;
|
||||
AccumulateMultiThreadedClass accumulator(gmm, data, frame_weights,
|
||||
this, &tot_like);
|
||||
{
|
||||
// Note: everything happens in the constructor and destructor of
|
||||
// the object created below.
|
||||
MultiThreader<AccumulateMultiThreadedClass> threader(num_threads,
|
||||
accumulator);
|
||||
// we need to make sure it's destroyed before we access the
|
||||
// value of tot_like.
|
||||
}
|
||||
return tot_like;
|
||||
}
|
||||
|
||||
void AccumDiagGmm::AssertEqual(const AccumDiagGmm &other) {
|
||||
KALDI_ASSERT(dim_ == other.dim_ && num_comp_ == other.num_comp_ &&
|
||||
flags_ == other.flags_);
|
||||
KALDI_ASSERT(occupancy_.ApproxEqual(other.occupancy_));
|
||||
KALDI_ASSERT(mean_accumulator_.ApproxEqual(other.mean_accumulator_));
|
||||
KALDI_ASSERT(variance_accumulator_.ApproxEqual(other.variance_accumulator_));
|
||||
}
|
||||
|
||||
|
||||
} // End of namespace kaldi
|
||||
|
|
|
@ -142,6 +142,16 @@ class AccumDiagGmm {
|
|||
const VectorBase<BaseFloat> &data,
|
||||
BaseFloat frame_posterior);
|
||||
|
||||
/// This does the same job as AccumulateFromDiag, but using
|
||||
/// multiple threads. Returns sum of (log-likelihood times
|
||||
/// frame weight) over all frames.
|
||||
BaseFloat AccumulateFromDiagMultiThreaded(
|
||||
const DiagGmm &gmm,
|
||||
const MatrixBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &frame_weights,
|
||||
int32 num_threads);
|
||||
|
||||
|
||||
/// Increment the stats for this component by the specified amount
|
||||
/// (not all parts may be taken, depending on flags).
|
||||
/// Note: x_stats and x2_stats are assumed to already be multiplied by "occ"
|
||||
|
@ -173,7 +183,9 @@ class AccumDiagGmm {
|
|||
const VectorBase<double> &occupancy() const { return occupancy_; }
|
||||
const MatrixBase<double> &mean_accumulator() const { return mean_accumulator_; }
|
||||
const MatrixBase<double> &variance_accumulator() const { return variance_accumulator_; }
|
||||
|
||||
|
||||
// used in testing.
|
||||
void AssertEqual(const AccumDiagGmm &other);
|
||||
private:
|
||||
int32 dim_;
|
||||
int32 num_comp_;
|
||||
|
|
|
@ -26,7 +26,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
|
|||
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
|
||||
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
|
||||
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
|
||||
gmm-global-info
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
@ -34,10 +35,8 @@ OBJFILES =
|
|||
|
||||
TESTFILES =
|
||||
|
||||
# Note: we intentionally list kaldi-gmm.a both before and after kaldi-transform.a, because
|
||||
# each requires symbols from the other.
|
||||
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
|
||||
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a ../gmm/kaldi-gmm.a \
|
||||
../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
|
||||
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
|
||||
../thread/kaldi-thread.a ../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
|
|
|
@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
|
||||
"of vectors of floats.\n"
|
||||
"of vectors of floats. If --average=true, prints out the average per-frame\n"
|
||||
"log-likelihood for each utterance, as a single float.\n"
|
||||
"Usage: gmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
|
||||
"<likes-out-wspecifier>\n"
|
||||
"e.g.: gmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool average = false;
|
||||
std::string gselect_rspecifier;
|
||||
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
|
||||
"to limit the #Gaussians accessed on each frame.");
|
||||
po.Register("average", &average, "If true, print out the average per-frame "
|
||||
"log-likelihood as a single float per utterance.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
|
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
BaseFloatVectorWriter likes_writer(likes_wspecifier);
|
||||
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
|
||||
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
|
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
tot_like += likes.Sum();
|
||||
tot_frames += file_frames;
|
||||
likes_writer.Write(key, likes);
|
||||
if (average)
|
||||
average_likes_writer.Write(key, likes.Sum() / file_frames);
|
||||
else
|
||||
likes_writer.Write(key, likes);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Done " << num_done << " files; " << num_err
|
||||
|
|
|
@ -61,13 +61,16 @@ void InitGmmFromRandomFrames(const Matrix<BaseFloat> &feats, DiagGmm *gmm) {
|
|||
void TrainOneIter(const Matrix<BaseFloat> &feats,
|
||||
const MleDiagGmmOptions &gmm_opts,
|
||||
int32 iter,
|
||||
int32 num_threads,
|
||||
DiagGmm *gmm) {
|
||||
AccumDiagGmm gmm_acc(*gmm, kGmmAll);
|
||||
|
||||
double tot_like = 0.0;
|
||||
|
||||
for (int32 t = 0; t < feats.NumRows(); t++)
|
||||
tot_like += gmm_acc.AccumulateFromDiag(*gmm, feats.Row(t), 1.0);
|
||||
Vector<BaseFloat> frame_weights(feats.NumRows(), kUndefined);
|
||||
frame_weights.Set(1.0);
|
||||
|
||||
double tot_like;
|
||||
tot_like = gmm_acc.AccumulateFromDiagMultiThreaded(*gmm, feats, frame_weights,
|
||||
num_threads);
|
||||
|
||||
KALDI_LOG << "Likelihood per frame on iteration " << iter
|
||||
<< " was " << (tot_like / feats.NumRows()) << " over "
|
||||
|
@ -97,17 +100,24 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
bool binary = true;
|
||||
int32 num_gauss = 100;
|
||||
int32 num_gauss_init = 0;
|
||||
int32 num_iters = 50;
|
||||
int32 num_frames = 200000;
|
||||
int32 srand_seed = 0;
|
||||
int32 num_threads = 4;
|
||||
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("num-gauss", &num_gauss, "Number of Gaussians in the model");
|
||||
po.Register("num-gauss-init", &num_gauss_init, "Number of Gaussians in "
|
||||
"the model initially (if nonzero and less than num_gauss, "
|
||||
"we'll do mixture splitting)");
|
||||
po.Register("num-iters", &num_iters, "Number of iterations of training");
|
||||
po.Register("num-frames", &num_frames, "Number of feature vectors to store in "
|
||||
"memory and train on (randomly chosen from the input features)");
|
||||
po.Register("srand", &srand_seed, "Seed for random number generator ");
|
||||
|
||||
po.Register("num-threads", &num_threads, "Number of threads used for "
|
||||
"statistics accumulation");
|
||||
|
||||
gmm_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
@ -132,7 +142,7 @@ int main(int argc, char *argv[]) {
|
|||
int64 num_read = 0, dim = 0;
|
||||
|
||||
KALDI_LOG << "Reading features (will keep " << num_frames << " frames.)";
|
||||
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
const Matrix<BaseFloat> &this_feats = feature_reader.Value();
|
||||
for (int32 t = 0; t < this_feats.NumRows(); t++) {
|
||||
|
@ -160,15 +170,36 @@ int main(int argc, char *argv[]) {
|
|||
KALDI_WARN << "Number of frames read " << num_read << " was less than "
|
||||
<< "target number " << num_frames << ", using all we read.";
|
||||
feats.Resize(num_read, dim, kCopyData);
|
||||
} else {
|
||||
BaseFloat percent = num_frames * 100.0 / num_read;
|
||||
KALDI_LOG << "Kept " << num_frames << " out of " << num_read
|
||||
<< " input frames = " << percent << "%.";
|
||||
}
|
||||
|
||||
DiagGmm gmm(num_gauss, dim);
|
||||
|
||||
KALDI_LOG << "Initializing GMM means from random frames";
|
||||
InitGmmFromRandomFrames(feats, &gmm);
|
||||
if (num_gauss_init <= 0 || num_gauss_init > num_gauss)
|
||||
num_gauss_init = num_gauss;
|
||||
|
||||
for (int32 iter = 0; iter < num_iters; iter++)
|
||||
TrainOneIter(feats, gmm_opts, iter, &gmm);
|
||||
DiagGmm gmm(num_gauss_init, dim);
|
||||
|
||||
KALDI_LOG << "Initializing GMM means from random frames to "
|
||||
<< num_gauss_init << " Gaussians.";
|
||||
InitGmmFromRandomFrames(feats, &gmm);
|
||||
|
||||
// we'll increase the #Gaussians by splitting,
|
||||
// till halfway through training.
|
||||
int32 cur_num_gauss = num_gauss_init,
|
||||
gauss_inc = (num_gauss - num_gauss_init) / (num_iters / 2);
|
||||
|
||||
for (int32 iter = 0; iter < num_iters; iter++) {
|
||||
TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
|
||||
|
||||
int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
|
||||
if (next_num_gauss > gmm.NumGauss()) {
|
||||
KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
|
||||
gmm.Split(next_num_gauss, 0.1);
|
||||
cur_num_gauss = next_num_gauss;
|
||||
}
|
||||
}
|
||||
|
||||
WriteKaldiObject(gmm, model_wxfilename, binary);
|
||||
KALDI_LOG << "Wrote model to " << model_wxfilename;
|
||||
|
|
|
@ -105,9 +105,8 @@ int main(int argc, char *argv[]) {
|
|||
gmm.GaussianSelectionPreselect(mat.Row(i), preselect[i],
|
||||
num_gselect, &(gselect[i]));
|
||||
} else { // No "preselect" [i.e. no existing gselect]: simple case.
|
||||
for (int32 i = 0; i < mat.NumRows(); i++)
|
||||
tot_like_this_file +=
|
||||
gmm.GaussianSelection(mat.Row(i), num_gselect, &(gselect[i]));
|
||||
tot_like_this_file =
|
||||
gmm.GaussianSelection(mat, num_gselect, &gselect);
|
||||
}
|
||||
|
||||
gselect_writer.Write(utt, gselect);
|
||||
|
|
|
@ -61,6 +61,7 @@ int main(int argc, char *argv[]) {
|
|||
<< trans_model.NumTransitionStates() << '\n';
|
||||
std::cout << "feature dimension " << am_gmm.Dim() << '\n';
|
||||
std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n';
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
|
|
|
@ -21,9 +21,11 @@
|
|||
|
||||
#include "nnet/nnet-nnet.h"
|
||||
#include "nnet/nnet-activation.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
#include "nnet/nnet-affine-transform.h"
|
||||
#include "nnet/nnet-rbm.h"
|
||||
#include "nnet/nnet-various.h"
|
||||
#include "nnet/nnet-kl-hmm.h"
|
||||
|
||||
namespace kaldi {
|
||||
namespace nnet1 {
|
||||
|
@ -40,6 +42,7 @@ const struct Component::key_value Component::kMarkerMap[] = {
|
|||
{ Component::kCopy,"<copy>" },
|
||||
{ Component::kAddShift,"<addshift>" },
|
||||
{ Component::kRescale,"<rescale>" },
|
||||
{ Component::kKlHmm,"<klhmm>" }
|
||||
};
|
||||
|
||||
|
||||
|
@ -119,6 +122,9 @@ Component* Component::Read(std::istream &is, bool binary) {
|
|||
case Component::kRescale :
|
||||
p_comp = new Rescale(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kKlHmm :
|
||||
p_comp = new KlHmm(dim_in, dim_out);
|
||||
break;
|
||||
case Component::kUnknown :
|
||||
default :
|
||||
KALDI_ERR << "Missing type: " << token;
|
||||
|
|
|
@ -64,7 +64,8 @@ class Component {
|
|||
kTranspose,
|
||||
kBlockLinearity,
|
||||
kAddShift,
|
||||
kRescale
|
||||
kRescale,
|
||||
kKlHmm
|
||||
} ComponentType;
|
||||
/// A pair of type and marker
|
||||
struct key_value {
|
||||
|
|
|
@ -9,12 +9,13 @@ LDLIBS += $(CUDA_LDLIBS)
|
|||
BINFILES = nnet-train-frmshuff \
|
||||
nnet-train-xent-hardlab-perutt \
|
||||
nnet-train-xent-hardlab-frmshuff \
|
||||
nnet-train-mse-tgtmat-frmshuff \
|
||||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
|
||||
nnet-forward nnet-copy nnet-info nnet-concat \
|
||||
transf-to-nnet cmvn-to-nnet
|
||||
nnet-train-mse-tgtmat-frmshuff \
|
||||
nnet-train-mmi-sequential \
|
||||
nnet-train-mpe-sequential \
|
||||
rbm-train-cd1-frmshuff rbm-convert-to-nnet \
|
||||
nnet-forward nnet-copy nnet-info nnet-concat \
|
||||
transf-to-nnet cmvn-to-nnet \
|
||||
nnet-kl-hmm-acc nnet-kl-hmm-mat-to-component
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
@ -24,6 +25,6 @@ TESTFILES =
|
|||
|
||||
ADDLIBS = ../nnet/kaldi-nnet.a ../cudamatrix/kaldi-cudamatrix.a ../lat/kaldi-lat.a \
|
||||
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a ../matrix/kaldi-matrix.a \
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
include ../makefiles/default_rules.mk
|
||||
|
|
|
@ -60,14 +60,12 @@ int main(int argc, char** argv) {
|
|||
"e.g.: ./online-audio-client 192.168.50.12 9012 'scp:wav_files.scp'\n\n";
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool htk = false, vtt = false, silent = false;
|
||||
bool htk = false, vtt = false;
|
||||
int32 channel = -1;
|
||||
int32 packet_size = 1024;
|
||||
|
||||
po.Register("htk", &htk, "Save the result to an HTK label file");
|
||||
po.Register("vtt", &vtt, "Save the result to a WebVTT subtitle file");
|
||||
po.Register("silent", &silent,
|
||||
"Don't print any output (except for errors)");
|
||||
po.Register(
|
||||
"channel", &channel,
|
||||
"Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
|
||||
|
@ -116,10 +114,8 @@ int main(int argc, char** argv) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (!silent) {
|
||||
std::cout << "Connected to KALDI server at host " << server_addr_str
|
||||
<< " port " << server_port << std::endl;
|
||||
}
|
||||
KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str
|
||||
<< " port " << server_port << std::endl;
|
||||
|
||||
char* pack_buffer = new char[packet_size];
|
||||
|
||||
|
@ -127,8 +123,7 @@ int main(int argc, char** argv) {
|
|||
for (; !reader.Done(); reader.Next()) {
|
||||
std::string wav_key = reader.Key();
|
||||
|
||||
if (!silent)
|
||||
std::cout << "File: " << wav_key << std::endl;
|
||||
KALDI_VLOG(2) << "File: " << wav_key << std::endl;
|
||||
|
||||
const WaveData &wav_data = reader.Value();
|
||||
|
||||
|
@ -260,10 +255,10 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
if (!silent) {
|
||||
{
|
||||
float speed = total_input_dur / total_reco_dur;
|
||||
std::cout << "Recognized (" << speed << "xRT): " << reco_output
|
||||
<< std::endl;
|
||||
KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
if (htk) {
|
||||
|
|
|
@ -116,9 +116,6 @@ int32 main(int argc, char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR<< "Invalid left/right context parameters!";
|
||||
|
||||
std::string model_rspecifier = po.GetArg(1), fst_rspecifier = po.GetArg(2),
|
||||
word_syms_filename = po.GetArg(3), silence_phones_str = po.GetArg(4),
|
||||
word_boundary_filename = po.GetOptArg(6), lda_mat_rspecifier = po
|
||||
|
@ -210,10 +207,6 @@ int32 main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -85,10 +85,7 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 4)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
fst_rxfilename = po.GetArg(2),
|
||||
word_syms_filename = po.GetArg(3),
|
||||
|
@ -151,10 +148,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -102,9 +102,6 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 5)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
fst_rxfilename = po.GetArg(2),
|
||||
|
@ -163,10 +160,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -83,10 +83,7 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 7)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
|
||||
std::string wav_rspecifier = po.GetArg(1),
|
||||
model_rspecifier = po.GetArg(2),
|
||||
fst_rspecifier = po.GetArg(3),
|
||||
|
@ -181,10 +178,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,15 +43,15 @@ namespace kaldi {
|
|||
Here, we will still accept objects of some class C with an operator () that
|
||||
takes no arguments. C may also have a constructor and a destructor that do
|
||||
something (typically the constructor just sets variables, and the destructor
|
||||
does some kind of output). We
|
||||
have a templated class TaskSequencer<C> which is responsible for running
|
||||
the jobs in parallel. It has a function Run() that will accept a new object
|
||||
of class C; this will block until a thread is free, at which time it will
|
||||
start running the operator () of the class. When classes are finished running,
|
||||
the object will be deleted. Class TaskSequencer guarantees that the
|
||||
destructors will be called sequentially (not in parallel) and in the same
|
||||
order the objects were given to the Run() function, so that it is safe for
|
||||
the destructor to have side effects such as outputting data.
|
||||
does some kind of output). We have a templated class TaskSequencer<C> which
|
||||
is responsible for running the jobs in parallel. It has a function Run()
|
||||
that will accept a new object of class C; this will block until a thread is
|
||||
free, at which time it will spawn a thread that starts running the operator
|
||||
() of the class. When classes are finished running, the objects will be
|
||||
deleted. Class TaskSequencer guarantees that the destructors will be called
|
||||
sequentially (not in parallel) and in the same order the objects were given
|
||||
to the Run() function, so that it is safe for the destructor to have side
|
||||
effects such as outputting data.
|
||||
|
||||
Note: the destructor of TaskSequencer will wait for any remaining jobs that
|
||||
are still running and will call the destructors.
|
||||
|
|
|
@ -34,7 +34,7 @@ TESTFILES =
|
|||
|
||||
ADDLIBS = ../decoder/kaldi-decoder.a ../lat/kaldi-lat.a ../feat/kaldi-feat.a \
|
||||
../transform/kaldi-transform.a ../tied/kaldi-tied.a \
|
||||
../gmm/kaldi-gmm.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
|
||||
../gmm/kaldi-gmm.a ../thread/kaldi-thread.a ../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
|
||||
../matrix/kaldi-matrix.a ../util/kaldi-util.a ../base/kaldi-base.a
|
||||
|
||||
include ../makefiles/default_rules.mk
|
||||
|
|
|
@ -14,6 +14,7 @@ OBJFILES = regression-tree.o regtree-mllr-diag-gmm.o lda-estimate.o \
|
|||
|
||||
LIBNAME = kaldi-transform
|
||||
|
||||
ADDLIBS = ../gmm/kaldi-gmm.a ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a
|
||||
ADDLIBS = ../gmm/kaldi-gmm.a ../thread/kaldi-thread.a ../tree/kaldi-tree.a \
|
||||
../util/kaldi-util.a ../matrix/kaldi-matrix.a ../base/kaldi-base.a
|
||||
|
||||
include ../makefiles/default_rules.mk
|
||||
|
|
|
@ -406,8 +406,8 @@ class PipeInputImpl: public InputImplBase {
|
|||
return false;
|
||||
} else {
|
||||
#ifndef _MSC_VER
|
||||
fb_ = new PipebufType(f_, // Using his constructor won't lead the
|
||||
// destructor close the stream.
|
||||
fb_ = new PipebufType(f_, // Using this constructor won't lead the
|
||||
// destructor to close the stream.
|
||||
(binary ? std::ios_base::in|std::ios_base::binary
|
||||
:std::ios_base::in));
|
||||
KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
|
||||
|
|
|
@ -128,7 +128,8 @@ sph2pipe_v2.5: sph2pipe_v2.5.tar.gz
|
|||
tar xzf sph2pipe_v2.5.tar.gz
|
||||
|
||||
sph2pipe_v2.5.tar.gz:
|
||||
wget -T 10 -t 3 http://merlin.fit.vutbr.cz/kaldi/sph2pipe_v2.5.tar.gz
|
||||
wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz || \
|
||||
wget -T 10 -t 3 http://www.danielpovey.com/files/kaldi/sph2pipe_v2.5.tar.gz
|
||||
|
||||
openblas: openblas_compiled
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче