зеркало из https://github.com/mozilla/kaldi.git
Adding fMPE scripts; changes to fMPE code.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@772 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
138a71faf4
Коммит
bad94ae0bc
21
COPYING
21
COPYING
|
@ -1,4 +1,24 @@
|
|||
|
||||
Update to legal notice, made Feb. 2012. We would like to clarify that we
|
||||
are using a convention where multiple names in the Apache copyright headers,
|
||||
for example
|
||||
|
||||
// Copyright 2009-2012 Yanmin Qian Arnab Ghoshal
|
||||
|
||||
does not necessarily signify joint ownership of copyright of that file, except
|
||||
in cases where all those names were present in the original release made in
|
||||
March 2011-- you can use the version history to work this out, if this matters
|
||||
to you. Instead, we intend that those contributors who later modified the file,
|
||||
agree to release their changes under the Apache license, but do not claim to
|
||||
jointly own the copyright of the original material (which would require an agreement
|
||||
with the original contributors). The conventional way of signifying
|
||||
this is to duplicate the Apache headers at the top of each file each time
|
||||
a change is made by a different author, but this would quickly become impractical.
|
||||
|
||||
The original legal notice is below. Note: we are continuing to modify it by
|
||||
adding the names of new contributors.
|
||||
|
||||
---
|
||||
Legal Notices
|
||||
|
||||
Each of the files comprising Kaldi v1.0 have been separately licensed by
|
||||
|
@ -18,6 +38,7 @@ Individual Contributors (in alphabetical order)
|
|||
Arnab Ghoshal
|
||||
Go Vivace Inc.
|
||||
Mirko Hannemann
|
||||
Navdeep Jaitly
|
||||
Microsoft Corporation
|
||||
Petr Motlicek
|
||||
Ariya Rastrow
|
||||
|
|
|
@ -26,4 +26,7 @@ Recipes in progress:
|
|||
sampling rate).
|
||||
This directory is a work in progress.
|
||||
|
||||
|
||||
gp: GlobalPhone. This is a multilingual speech corpus.
|
||||
|
||||
timit: TIMIT, which is an old corpus of carefully read speech.
|
||||
|
|
|
@ -28,7 +28,7 @@ exit 1;
|
|||
# shorten to WAV to take out the empty files and those with compression errors.
|
||||
# So set WORKDIR to someplace with enough disk space. That is where MFCCs will
|
||||
# get created, as well as the FST versions of LMs.
|
||||
WORKDIR=/path/with/disk/space
|
||||
WORKDIR=/mnt/matylda6/jhu09/qpovey/temp_gp
|
||||
cp -r conf local utils steps install.sh path.sh $WORKDIR
|
||||
cd $WORKDIR
|
||||
# INSTALLING REQUIRED TOOLS:
|
||||
|
@ -39,7 +39,7 @@ cd $WORKDIR
|
|||
{ echo "shorten and/or sox not found on PATH. Installing...";
|
||||
install.sh }
|
||||
|
||||
local/gp_data_prep.sh --config-dir=$PWD/conf --corpus-dir=/path/to/GlobalPhone --lm-dir=/path/to/lms --work-dir=$WORKDIR
|
||||
local/gp_data_prep.sh --config-dir=$PWD/conf --corpus-dir=/mnt/matylda2/data/GLOBALPHONE --lm-dir=/path/to/lms --work-dir=$WORKDIR
|
||||
# On Eddie: local/gp_data_prep.sh --config-dir=$PWD/conf --corpus-dir=$PWD/corpus --lm-dir=$PWD/corpus/language_models --work-dir=$PWD
|
||||
|
||||
local/gp_format_data.sh --hmm-proto=conf/topo.proto --work-dir=$PWD
|
||||
|
|
|
@ -5,38 +5,38 @@ for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | scripts/best_wer.sh;
|
|||
# monophone; delta+accel
|
||||
exp/mono/decode/wer_4:%WER 9.830049 [ 1232 / 12533, 143 ins, 289 del, 800 sub ]
|
||||
# First triphone pass; delta+accel
|
||||
exp/tri1/decode/wer_6:%WER 3.694247 [ 463 / 12533, 69 ins, 100 del, 294 sub ]
|
||||
exp/tri1/decode/wer_6:%WER 3.893721 [ 488 / 12533, 69 ins, 96 del, 323 sub ]
|
||||
# Second triphone pass; delta+accel
|
||||
exp/tri2a/decode/wer_7:%WER 3.638395 [ 456 / 12533, 61 ins, 107 del, 288 sub ]
|
||||
exp/tri2a/decode/wer_7:%WER 3.486795 [ 437 / 12533, 65 ins, 91 del, 281 sub ]
|
||||
# [as tri2a, but] LDA+MLLT
|
||||
exp/tri2b/decode/wer_7:%WER 3.534668 [ 443 / 12533, 74 ins, 88 del, 281 sub ]
|
||||
exp/tri2b/decode/wer_6:%WER 3.359132 [ 421 / 12533, 73 ins, 71 del, 277 sub ]
|
||||
# LDA + exponential transform (note: this is with speaker adaptation)
|
||||
exp/tri2c/decode/wer_5:%WER 2.848480 [ 357 / 12533, 62 ins, 61 del, 234 sub ]
|
||||
exp/tri2c/decode/wer_5:%WER 2.905492 [ 364 / 12528, 68 ins, 59 del, 237 sub ]
|
||||
# LDA+MLLT+MMI.
|
||||
exp/tri3a/decode/wer_7:%WER 3.502753 [ 439 / 12533, 75 ins, 83 del, 281 sub ]
|
||||
exp/tri3a/decode/wer_7:%WER 3.084052 [ 386 / 12516, 54 ins, 67 del, 265 sub ]
|
||||
# LDA+MLLT+boosted MMI [note: errors are not identical, although WER is same]
|
||||
exp/tri3b/decode/wer_7:%WER 3.454879 [ 433 / 12533, 75 ins, 80 del, 278 sub ]
|
||||
exp/tri3b/decode/wer_5:%WER 3.155960 [ 395 / 12516, 74 ins, 50 del, 271 sub ]
|
||||
# LDA+MLLT+MCE
|
||||
exp/tri3c/decode/wer_7:%WER 3.183595 [ 399 / 12533, 62 ins, 79 del, 258 sub ]
|
||||
exp/tri3c/decode/wer_6:%WER 3.047953 [ 382 / 12533, 56 ins, 69 del, 257 sub ]
|
||||
# LDA+MLLT+SAT
|
||||
exp/tri3d/decode/wer_6:%WER 2.553259 [ 320 / 12533, 43 ins, 63 del, 214 sub ]
|
||||
exp/tri3d/decode/wer_7:%WER 2.234102 [ 280 / 12533, 35 ins, 62 del, 183 sub ]
|
||||
# LDA+MLLT+SAT+MMI
|
||||
exp/tri4a/decode/wer_6:%WER 2.473470 [ 310 / 12533, 43 ins, 62 del, 205 sub ]
|
||||
exp/tri4a/decode/wer_6:%WER 2.146334 [ 269 / 12533, 37 ins, 43 del, 189 sub ]
|
||||
# LDA+MLLT+SAT, extra phase of builting on top of 3d (no help)
|
||||
exp/tri4d/decode/wer_5:%WER 2.800606 [ 351 / 12533, 47 ins, 68 del, 236 sub ]
|
||||
exp/tri4d/decode/wer_5:%WER 2.457512 [ 308 / 12533, 50 ins, 54 del, 204 sub ]
|
||||
# LDA+MLLT + SGMM with speaker vectors
|
||||
exp/sgmm3d/decode/wer_4:%WER 2.186228 [ 274 / 12533, 41 ins, 42 del, 191 sub ]
|
||||
exp/sgmm3d/decode/wer_6:%WER 2.305912 [ 289 / 12533, 53 ins, 52 del, 184 sub ]
|
||||
# LDA+ET + SGMM with speaker vectors.
|
||||
exp/sgmm3e/decode/wer_5:%WER 2.242081 [ 281 / 12533, 44 ins, 47 del, 190 sub ]
|
||||
exp/sgmm3e/decode/wer_4:%WER 2.042608 [ 256 / 12533, 39 ins, 38 del, 179 sub ]
|
||||
# LDA+MLLT+SAT + SGMM with speaker vectors.
|
||||
exp/sgmm4f/decode/wer_5:%WER 2.226123 [ 279 / 12533, 56 ins, 49 del, 174 sub ]
|
||||
exp/sgmm4f/decode/wer_7:%WER 1.970797 [ 247 / 12533, 36 ins, 56 del, 155 sub ]
|
||||
# + FMLLR on top of it all.
|
||||
exp/sgmm4f/decode_fmllr/wer_6:%WER 2.202186 [ 276 / 12533, 39 ins, 59 del, 178 sub ]
|
||||
exp/sgmm4f/decode_fmllr/wer_5:%WER 1.954839 [ 245 / 12533, 40 ins, 47 del, 158 sub ]
|
||||
|
||||
# System combination via lattices: combine tri1 and tri2a
|
||||
exp/combine_1_2a/decode/wer_6:%WER 3.518711 [ 441 / 12533, 62 ins, 97 del, 282 sub ]
|
||||
# System combination via lattices: combine sgmm4f and tri3d.
|
||||
exp/combine_sgmm4f_tri3d/decode/wer_5:%WER 2.082502 [ 261 / 12533, 36 ins, 48 del, 177 sub ]
|
||||
exp/combine_sgmm4f_tri3d/decode/wer_5:%WER 1.763345 [ 221 / 12533, 32 ins, 42 del, 147 sub ]
|
||||
# System combination via lattices: combine sgmm4f and tri4a.
|
||||
exp/combine_sgmm4f_tri4a/decode/wer_5:%WER 2.082502 [ 261 / 12533, 37 ins, 49 del, 175 sub ]
|
||||
exp/combine_sgmm4f_tri4a/decode/wer_6:%WER 1.715471 [ 215 / 12533, 31 ins, 39 del, 145 sub ]
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2010-2011 Microsoft Corporation
|
||||
# Copyright 2010-2012 Microsoft Corporation Daniel Povey
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -27,13 +27,24 @@
|
|||
# ali, final.mdl, final.mat
|
||||
|
||||
boost=0 # boosting constant, for boosted MMI.
|
||||
tau=100 # Tau value.
|
||||
tau=200 # Tau value.
|
||||
merge=true # if true, cancel num and den counts as described in
|
||||
# the boosted MMI paper.
|
||||
|
||||
if [ $1 == "--boost" ]; then # e.g. "--boost 0.05"
|
||||
shift;
|
||||
boost=$1;
|
||||
shift;
|
||||
fi
|
||||
for x in `seq 4`; do
|
||||
if [ $1 == "--boost" ]; then # e.g. "--boost 0.05"
|
||||
boost=$2;
|
||||
shift 2;
|
||||
fi
|
||||
if [ $1 == "--smooth-to-model" ]; then
|
||||
shift;
|
||||
smooth_to_model=true
|
||||
fi
|
||||
if [ $1 == "--tau" ]; then # e.g. "--tau 200
|
||||
tau=$2
|
||||
shift 2;
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
echo "Usage: steps/train_lda_etc_mmi.sh <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
|
@ -99,7 +110,7 @@ scripts/mkgraph.sh $dir/lang $alidir $dir/dengraph || exit 1;
|
|||
|
||||
echo "Making denominator lattices"
|
||||
|
||||
|
||||
if false; then ##temp
|
||||
rm $dir/.error 2>/dev/null
|
||||
for n in 0 1 2 3; do
|
||||
gmm-latgen-simple --beam=$beam --lattice-beam=$latticebeam --acoustic-scale=$acwt \
|
||||
|
@ -113,45 +124,33 @@ if [ -f $dir/.error ]; then
|
|||
echo "Error creating denominator lattices"
|
||||
exit 1;
|
||||
fi
|
||||
fi ##temp
|
||||
|
||||
# No need to create "numerator" alignments/lattices: we just use the
|
||||
# alignments in $alidir.
|
||||
|
||||
echo "Note: ignore absolute offsets in the objective function values"
|
||||
echo "This is caused by not having LM, lexicon or transition-probs in numerator"
|
||||
|
||||
x=0;
|
||||
while [ $x -lt $num_iters ]; do
|
||||
echo "Iteration $x: getting denominator stats."
|
||||
# Get denominator stats...
|
||||
if [ $x -eq 0 ]; then
|
||||
( lattice-to-post --acoustic-scale=$acwt "ark:gunzip -c $dir/lat?.gz|" ark:- | \
|
||||
gmm-acc-stats $dir/$x.mdl "$feats" ark:- $dir/den_acc.$x.acc ) \
|
||||
2>$dir/acc_den.$x.log || exit 1;
|
||||
else # Need to recompute acoustic likelihoods...
|
||||
( gmm-rescore-lattice $dir/$x.mdl "ark:gunzip -c $dir/lat?.gz|" "$feats" ark:- | \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- | \
|
||||
gmm-acc-stats $dir/$x.mdl "$feats" ark:- $dir/den_acc.$x.acc ) \
|
||||
2>$dir/acc_den.$x.log || exit 1;
|
||||
fi
|
||||
echo "Iteration $x: getting numerator stats."
|
||||
# Get numerator stats...
|
||||
gmm-acc-stats-ali $dir/$x.mdl "$feats" ark:$alidir/ali $dir/num_acc.$x.acc \
|
||||
2>$dir/acc_num.$x.log || exit 1;
|
||||
echo "Iteration $x: getting stats."
|
||||
( gmm-rescore-lattice $dir/$x.mdl "ark:gunzip -c $dir/lat?.gz|" "$feats" ark:- | \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- | \
|
||||
sum-post --merge=$merge --scale1=-1 \
|
||||
ark:- "ark,s,cs:ali-to-post ark:$alidir/ali ark:- |" ark:- | \
|
||||
gmm-acc-stats2 $dir/$x.mdl "$feats" ark:- $dir/num_acc.$x.acc $dir/den_acc.$x.acc ) \
|
||||
2>$dir/acc.$x.log || exit 1;
|
||||
|
||||
( gmm-est-gaussians-ebw $dir/$x.mdl "gmm-ismooth-stats --tau=$tau $dir/num_acc.$x.acc $dir/num_acc.$x.acc -|" \
|
||||
$dir/den_acc.$x.acc - | \
|
||||
# This tau is only used for smoothing "to the model".
|
||||
( gmm-est-gaussians-ebw --tau=$tau $dir/$x.mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - | \
|
||||
gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl ) \
|
||||
2>$dir/update.$x.log || exit 1;
|
||||
|
||||
den=`grep Overall $dir/acc_den.$x.log | grep lattice-to-post | awk '{print $7}'`
|
||||
num=`grep Overall $dir/acc_num.$x.log | grep gmm-acc-stats-ali | awk '{print $11}'`
|
||||
diff=`perl -e "print ($num * $acwt - $den);"`
|
||||
impr=`grep Overall $dir/update.$x.log | head -1 | awk '{print $10;}'`
|
||||
impr=`perl -e "print ($impr * $acwt);"` # auxf impr normalized by multiplying by
|
||||
# kappa, so it's comparable to an objective-function change.
|
||||
echo On iter $x, objf was $diff, auxf improvement was $impr | tee $dir/objf.$x.log
|
||||
|
||||
objf=`grep Overall $dir/acc.$x.log | grep gmm-acc-stats2 | awk '{print $10}'`
|
||||
nf=`grep Overall $dir/acc.$x.log | grep gmm-acc-stats2 | awk '{print $12}'`
|
||||
impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10*$12;}'`
|
||||
impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
|
||||
# for the canceling of stats.
|
||||
echo On iter $x, objf was $objf, auxf improvement from MMI was $impr | tee $dir/objf.$x.log
|
||||
rm $dir/*.acc
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
--use-energy=false # only non-default option.
|
|
@ -0,0 +1,22 @@
|
|||
<Topology>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
NONSILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.75 <Transition> 1 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.75 <Transition> 2 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 2 0.75 <Transition> 3 0.25 </State>
|
||||
<State> 3 </State>
|
||||
</TopologyEntry>
|
||||
<TopologyEntry>
|
||||
<ForPhones>
|
||||
SILENCEPHONES
|
||||
</ForPhones>
|
||||
<State> 0 <PdfClass> 0 <Transition> 0 0.25 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 </State>
|
||||
<State> 1 <PdfClass> 1 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 2 <PdfClass> 2 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 3 <PdfClass> 3 <Transition> 1 0.25 <Transition> 2 0.25 <Transition> 3 0.25 <Transition> 4 0.25 </State>
|
||||
<State> 4 <PdfClass> 4 <Transition> 4 0.25 <Transition> 5 0.75 </State>
|
||||
<State> 5 </State>
|
||||
</TopologyEntry>
|
||||
</Topology>
|
|
@ -103,4 +103,3 @@ done
|
|||
|
||||
# example of showing the alignments:
|
||||
# show-alignments data/lang/phones.txt $dir/30.mdl ark:$dir/cur.ali | head -4
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
. path.sh
|
||||
local/timit_data_prep.sh /ais/gobi2/speech/TIMIT
|
||||
local/timit_train_lms.sh data/local
|
||||
local/timit_format_data.sh
|
||||
#local/timit_data_prep.sh /ais/gobi2/speech/TIMIT
|
||||
local/timit_data_prep.sh /mnt/matylda2/data/TIMIT || exit 1;
|
||||
local/timit_train_lms.sh data/local || exit 1;
|
||||
local/timit_format_data.sh || exit 1;
|
||||
|
||||
# mfccdir should be some place with a largish disk where you
|
||||
# want to store MFCC features.
|
||||
|
@ -9,13 +10,13 @@ mfccdir=mfccs
|
|||
|
||||
steps/make_mfcc.sh data/train exp/make_mfcc/train $mfccdir 4
|
||||
for test in train test dev ; do
|
||||
steps/make_mfcc.sh data/$test exp/make_mfcc/$test $mfccdir 4
|
||||
steps/make_mfcc.sh data/$test exp/make_mfcc/$test $mfccdir 4 || exit 1;
|
||||
done
|
||||
|
||||
# train monophone system.
|
||||
steps/train_mono.sh data/train data/lang exp/mono
|
||||
steps/train_mono.sh data/train data/lang exp/mono || exit 1;
|
||||
|
||||
scripts/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph
|
||||
scripts/mkgraph.sh --mono data/lang_test exp/mono exp/mono/graph || exit 1;
|
||||
echo "Decoding test datasets."
|
||||
for test in dev test ; do
|
||||
steps/decode_deltas.sh exp/mono data/$test data/lang exp/mono/decode_$test &
|
||||
|
@ -25,8 +26,7 @@ scripts/average_wer.sh exp/mono/decode_*/wer > exp/mono/wer
|
|||
|
||||
# Get alignments from monophone system.
|
||||
echo "Creating training alignments to use to train other systems such as ANN-HMM."
|
||||
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali
|
||||
steps/align_deltas.sh data/train data/lang exp/mono exp/mono_ali || exit 1;
|
||||
echo "Creating dev alignments to use to train other systems such as ANN-HMM."
|
||||
steps/align_deltas.sh data/dev data/lang exp/mono exp/mono_ali_dev
|
||||
|
||||
steps/align_deltas.sh data/dev data/lang exp/mono exp/mono_ali_dev || exit 1;
|
||||
|
||||
|
|
|
@ -22,12 +22,15 @@ exp/tri2b/decode_tgpr_dev93_fromlats/wer_15:%WER 16.71 [ 1376 / 8234, 267 ins, 1
|
|||
exp/tri2b/decode_tgpr_dev93_tg/wer_16:%WER 16.26 [ 1339 / 8234, 267 ins, 141 del, 931 sub ]
|
||||
exp/tri2b/decode_tgpr_dev93_tg_biglm/wer_16:%WER 16.42 [ 1352 / 8234, 269 ins, 142 del, 941 sub ]
|
||||
|
||||
exp/tri2b/decode_tgpr_eval92/wer_16:%WER 11.54 [ 651 / 5643, 146 ins, 42 del, 463 sub ]
|
||||
exp/tri2b/decode_tgpr_eval92/wer_17:%WER 11.45 [ 646 / 5643, 140 ins, 46 del, 460 sub ]
|
||||
|
||||
# +MMI
|
||||
exp/tri2b_mmi/decode_tgpr_eval92/wer_16:%WER 11.08 [ 625 / 5643, 125 ins, 44 del, 456 sub ]
|
||||
exp/tri2b_mmi/decode_tgpr_eval92/wer_14:%WER 10.63 [ 600 / 5643, 124 ins, 45 del, 431 sub ]
|
||||
# +boosting
|
||||
exp/tri2b_mmi_b0.1/decode_tgpr_eval92/wer_16:%WER 10.83 [ 611 / 5643, 122 ins, 43 del, 446 sub ]
|
||||
exp/tri2b_mmi_b0.1/decode_tgpr_eval92/wer_16:%WER 10.69 [ 603 / 5643, 119 ins, 48 del, 436 sub ]
|
||||
# +fMMI
|
||||
exp/tri2b_fmmi_b0.1/decode_tgpr_eval92/wer_15:%WER 10.26 [ 579 / 5643, 111 ins, 39 del, 429 sub ]
|
||||
|
||||
# +MCE
|
||||
exp/tri2b_mce/decode_tgpr_eval92/wer_16:%WER 11.15 [ 629 / 5643, 132 ins, 45 del, 452 sub ]
|
||||
|
||||
|
@ -69,8 +72,17 @@ exp/tri4b/decode_tgpr_dev93/wer_13:%WER 12.53 [ 1032 / 8234, 242 ins, 79 del, 71
|
|||
exp/tri4b/decode_tgpr_eval92/wer_16:%WER 8.05 [ 454 / 5643, 119 ins, 23 del, 312 sub ]
|
||||
|
||||
# +MMI
|
||||
exp/tri4b_mmi/decode_tgpr_dev93/wer_14:%WER 11.53 [ 949 / 8234, 203 ins, 82 del, 664 sub ]
|
||||
exp/tri4b_mmi_b0.1/decode_tgpr_dev93/wer_16:%WER 11.45 [ 943 / 8234, 191 ins, 87 del, 665 sub ]
|
||||
exp/tri4b_mmi/decode_tgpr_dev93/wer_12:%WER 11.28 [ 929 / 8234, 206 ins, 76 del, 647 sub ]
|
||||
#+boosting
|
||||
exp/tri4b_mmi_b0.1/decode_tgpr_dev93/wer_16:%WER 11.25 [ 926 / 8234, 176 ins, 94 del, 656 sub ]
|
||||
# increasing beam from 13 to 15 to see effect.
|
||||
exp/tri4b_mmi_b0.1/decode_tgpr_dev93_b15/wer_14:%WER 10.72 [ 883 / 8234, 172 ins, 84 del, 627 sub ]
|
||||
exp/tri4b_mmi_b0.1/decode_tgpr_eval92/wer_14:%WER 7.34 [ 414 / 5643, 105 ins, 20 del, 289 sub ]
|
||||
|
||||
#+fMMI
|
||||
exp/tri4b_fmmi_b0.1/decode_tgpr_dev93/wer_13:%WER 10.86 [ 894 / 8234, 167 ins, 89 del, 638 sub ]
|
||||
exp/tri4b_fmmi_b0.1/decode_tgpr_eval92/wer_12:%WER 7.25 [ 409 / 5643, 111 ins, 14 del, 284 sub ]
|
||||
|
||||
|
||||
# LDA+MLLT+SAT, SI-284, full retraining starting from 3b [c.f. 4b]
|
||||
exp/tri4c/decode_tgpr_dev93/wer_16:%WER 12.10 [ 996 / 8234, 220 ins, 83 del, 693 sub ]
|
||||
|
|
|
@ -164,6 +164,18 @@ steps/train_lda_etc_mmi.sh --num-jobs 10 --boost 0.1 --cmd "$train_cmd" \
|
|||
data/train_si84 data/lang exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b exp/tri2b_mmi_b0.1
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt.sh exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_mmi_b0.1/decode_tgpr_eval92
|
||||
|
||||
# The next 3 commands train and test fMMI+MMI (on top of LDA+MLLT).
|
||||
steps/train_dubm_lda_etc.sh --silence-weight 0.5 \
|
||||
--num-jobs 10 --cmd "$train_cmd" 400 data/train_si84 \
|
||||
data/lang exp/tri2b_ali_si84 exp/dubm2b
|
||||
steps/train_lda_etc_mmi_fmmi.sh \
|
||||
--num-jobs 10 --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train_si84 data/lang exp/tri2b_ali_si84 exp/dubm2b exp/tri2b_denlats_si84 \
|
||||
exp/tri2b exp/tri2b_fmmi_b0.1
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_fmpe.sh \
|
||||
exp/tri2b/graph_tgpr data/test_eval92 exp/tri2b_fmmi_b0.1/decode_tgpr_eval92
|
||||
|
||||
|
||||
steps/train_lda_etc_mce.sh --cmd "$train_cmd" --num-jobs 10 data/train_si84 data/lang \
|
||||
exp/tri2b_ali_si84 exp/tri2b_denlats_si84 exp/tri2b exp/tri2b_mce
|
||||
scripts/decode.sh --num-jobs 10 --cmd "$decode_cmd" steps/decode_lda_mllt.sh \
|
||||
|
@ -222,7 +234,8 @@ scripts/mkgraph.sh data/lang_test_tgpr exp/tri4b exp/tri4b/graph_tgpr
|
|||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b/decode_tgpr_dev93
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_mllt_sat.sh exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b/decode_tgpr_eval92
|
||||
|
||||
# Train and test MMI, and boosted MMI, on tri4b.
|
||||
# Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
|
||||
# all the data).
|
||||
# Making num-jobs 40 as want to keep them under 4 hours long (or will fail
|
||||
# on regular queue at BUT).
|
||||
steps/align_lda_mllt_sat.sh --num-jobs 40 --cmd "$train_cmd" \
|
||||
|
@ -235,6 +248,25 @@ scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tg
|
|||
steps/train_lda_etc_mmi.sh --boost 0.1 --num-jobs 40 --cmd "$train_cmd" \
|
||||
data/train_si284 data/lang exp/tri4b_ali_si284 exp/tri4b_denlats_si284 exp/tri4b exp/tri4b_mmi_b0.1
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi_b0.1/decode_tgpr_dev93 exp/tri4b/decode_tgpr_dev93
|
||||
scripts/decode.sh --opts "--beam 15" --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi_b0.1/decode_tgpr_dev93_b15 exp/tri4b/decode_tgpr_dev93
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc.sh exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b_mmi_b0.1/decode_tgpr_eval92 exp/tri4b/decode_tgpr_eval92
|
||||
|
||||
# Train fMMI+MMI system on top of 4b.
|
||||
steps/train_dubm_lda_etc.sh --silence-weight 0.5 \
|
||||
--num-jobs 40 --cmd "$train_cmd" 600 data/train_si284 \
|
||||
data/lang exp/tri4b_ali_si284 exp/dubm4b
|
||||
steps/train_lda_etc_mmi_fmmi.sh \
|
||||
--num-jobs 40 --boost 0.1 --cmd "$train_cmd" \
|
||||
data/train_si284 data/lang exp/tri4b_ali_si284 exp/dubm4b exp/tri4b_denlats_si284 \
|
||||
exp/tri4b exp/tri4b_fmmi_b0.1
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc_fmpe.sh \
|
||||
exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b_fmmi_b0.1/decode_tgpr_eval92 \
|
||||
exp/tri4b/decode_tgpr_eval92
|
||||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_lda_etc_fmpe.sh \
|
||||
exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_fmmi_b0.1/decode_tgpr_dev93 \
|
||||
exp/tri4b/decode_tgpr_dev93
|
||||
|
||||
|
||||
|
||||
# Train UBM, for SGMM system on top of LDA+MLLT.
|
||||
steps/train_ubm_lda_etc.sh --num-jobs 10 --cmd "$train_cmd" \
|
||||
|
@ -245,6 +277,7 @@ scripts/mkgraph.sh data/lang_test_tgpr exp/sgmm3c exp/sgmm3c/graph_tgpr
|
|||
scripts/decode.sh --cmd "$decode_cmd" steps/decode_sgmm_lda_etc.sh exp/sgmm3c/graph_tgpr \
|
||||
data/test_dev93 exp/sgmm3c/decode_tgpr_dev93
|
||||
|
||||
|
||||
# Decode using 3 Gaussians (not 15) for gselect in 1st pass, for fast decoding.
|
||||
scripts/decode.sh --opts "--first-pass-gselect 3" --cmd "$decode_cmd" \
|
||||
steps/decode_sgmm_lda_etc.sh exp/sgmm3c/graph_tgpr data/test_dev93 exp/sgmm3c/decode_tgpr_dev93_gs3
|
||||
|
|
|
@ -62,7 +62,7 @@ fi
|
|||
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.mat $graphdir/HCLG.fst $transdir/$jobid.trans"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_mllt.sh: no such file $f";
|
||||
echo "decode_lda_etc.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Decoding script for LDA + optionally MLLT + [some speaker-specific transforms]
|
||||
# + fMPE.
|
||||
# This decoding script takes as an argument a previous decoding directory where it
|
||||
# can find some transforms.
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
numjobs=1
|
||||
jobid=0
|
||||
beam=13.0
|
||||
rescore=false
|
||||
for x in `seq 3`; do
|
||||
if [ "$1" == "-j" ]; then
|
||||
shift;
|
||||
numjobs=$1;
|
||||
jobid=$2;
|
||||
shift 2;
|
||||
fi
|
||||
if [ "$1" == "--beam" ]; then
|
||||
beam=$2;
|
||||
shift 2;
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $# != 4 ]; then
|
||||
# Note: transform-dir has to be last because scripts/decode.sh expects decode-dir to be #3 arg.
|
||||
echo "Usage: steps/decode_lda_etc.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir> <transform-dir>"
|
||||
echo " e.g.: steps/decode_lda_etc.sh -j 8 0 exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b_mmi/decode_tgpr_dev93 exp/tri4b/decode_tgpr_dev93"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
transdir=$4
|
||||
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ $numjobs -gt 1 ]; then
|
||||
mydata=$data/split$numjobs/$jobid
|
||||
else
|
||||
mydata=$data
|
||||
fi
|
||||
|
||||
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.fmpe $srcdir/final.mat $graphdir/HCLG.fst $transdir/$jobid.trans"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_etc_fmpe.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
basefeats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- | transform-feats --utt2spk=ark:$mydata/utt2spk ark:$transdir/$jobid.trans ark:- ark:- |"
|
||||
|
||||
# Get the Gaussian-selection info for the fMPE.
|
||||
ngselect=2; # Just the 2 top Gaussians.
|
||||
gmm-gselect --n=$ngselect $srcdir/final.fmpe "$basefeats" \
|
||||
"ark:|gzip -c >$dir/gselect.$jobid.gz" 2>$dir/gselect.$jobid.log
|
||||
|
||||
|
||||
# Now set up the fMPE features.
|
||||
feats="$basefeats fmpe-apply-transform $srcdir/final.fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.$jobid.gz|' ark:- |"
|
||||
|
||||
gmm-latgen-faster --max-active=7000 --beam=$beam --lattice-beam=6.0 \
|
||||
--acoustic-scale=0.083333 \
|
||||
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
|
||||
2> $dir/decode$jobid.log || exit 1;
|
|
@ -0,0 +1,64 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Decoding script that works with a GMM model and the baseline
|
||||
# [e.g. MFCC] features plus cepstral mean subtraction plus
|
||||
# LDA+MLLT or similar transform, plus fMPE/FMMI.
|
||||
# This script just generates lattices for a single broken-up
|
||||
# piece of the data.
|
||||
|
||||
if [ -f ./path.sh ]; then . ./path.sh; fi
|
||||
|
||||
numjobs=1
|
||||
jobid=0
|
||||
rescore=false
|
||||
if [ "$1" == "-j" ]; then
|
||||
shift;
|
||||
numjobs=$1;
|
||||
jobid=$2;
|
||||
shift; shift;
|
||||
fi
|
||||
|
||||
if [ $# != 3 ]; then
|
||||
echo "Usage: steps/decode_lda_mllt_fmpe.sh [-j num-jobs job-number] <graph-dir> <data-dir> <decode-dir>"
|
||||
echo " e.g.: steps/decode_lda_mllt_fmpe.sh -j 8 0 exp/tri2b/graph_tgpr data/test_dev93 exp/tri2b_fmmi/decode_dev93_tgpr"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
|
||||
graphdir=$1
|
||||
data=$2
|
||||
dir=$3
|
||||
srcdir=`dirname $dir`; # Assume model directory one level up from decoding directory.
|
||||
|
||||
mkdir -p $dir
|
||||
|
||||
if [ $numjobs -gt 1 ]; then
|
||||
mydata=$data/split$numjobs/$jobid
|
||||
else
|
||||
mydata=$data
|
||||
fi
|
||||
|
||||
requirements="$mydata/feats.scp $srcdir/final.mdl $srcdir/final.fmpe $srcdir/final.mat $graphdir/HCLG.fst"
|
||||
for f in $requirements; do
|
||||
if [ ! -f $f ]; then
|
||||
echo "decode_lda_mllt_fmpe.sh: no such file $f";
|
||||
exit 1;
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
basefeats="ark:compute-cmvn-stats --spk2utt=ark:$mydata/spk2utt scp:$mydata/feats.scp ark:- | apply-cmvn --norm-vars=false --utt2spk=ark:$mydata/utt2spk ark:- scp:$mydata/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
|
||||
|
||||
# Get the Gaussian-selection info for the fMPE.
|
||||
ngselect=2; # Just the 2 top Gaussians.
|
||||
gmm-gselect --n=$ngselect $srcdir/final.fmpe "$basefeats" \
|
||||
"ark:|gzip -c >$dir/gselect.$jobid.gz" 2>$dir/gselect.$jobid.log
|
||||
|
||||
# Now set up the fMPE features.
|
||||
feats="$basefeats fmpe-apply-transform $srcdir/final.fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.$jobid.gz|' ark:- |"
|
||||
|
||||
gmm-latgen-faster --max-active=7000 --beam=13.0 --lattice-beam=6.0 --acoustic-scale=0.083333 \
|
||||
--allow-partial=true --word-symbol-table=$graphdir/words.txt \
|
||||
$srcdir/final.mdl $graphdir/HCLG.fst "$feats" "ark:|gzip -c > $dir/lat.$jobid.gz" \
|
||||
2> $dir/decode.$jobid.log || exit 1;
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This trains a diagonal-covariance UBM (i.e. just a global
|
||||
# mixture of Gaussians, or GMM).
|
||||
|
||||
# Train UBM from a trained HMM/GMM system [with splice+LDA+[MLLT/ET/MLLT+SAT] features]
|
||||
# Alignment directory is used for the CMN and transforms.
|
||||
# A UBM is just a single mixture of Gaussians (full-covariance, in our case), that's trained
|
||||
# on all the data. This will later be used in Subspace Gaussian Mixture Model (SGMM)
|
||||
# training.
|
||||
|
||||
nj=4
|
||||
cmd=scripts/run.pl
|
||||
silweight=
|
||||
for x in 1 2; do
|
||||
if [ $1 == "--num-jobs" ]; then
|
||||
shift
|
||||
nj=$1
|
||||
shift
|
||||
fi
|
||||
if [ $1 == "--cmd" ]; then
|
||||
shift
|
||||
cmd=$1
|
||||
shift
|
||||
fi
|
||||
if [ $1 == "--silence-weight" ]; then
|
||||
shift
|
||||
silweight=$1 # e.g. to weight down silence in training.
|
||||
shift
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $# != 5 ]; then
|
||||
echo "Usage: steps/train_ubm_lda_etc.sh <num-comps> <data-dir> <lang-dir> <ali-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_ubm_lda_etc.sh 400 data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm3c"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
numcomps=$1
|
||||
data=$2
|
||||
lang=$3
|
||||
alidir=$4
|
||||
dir=$5
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
|
||||
mkdir -p $dir/log
|
||||
|
||||
if [ ! -d $data/split$nj -o $data/split$nj -ot $data/feats.scp ]; then
|
||||
scripts/split_data.sh $data $nj
|
||||
fi
|
||||
|
||||
n1=`get_splits.pl $nj | awk '{print $1}'`
|
||||
[ -f $alidir/$n1.trans ] && echo "Using speaker transforms from $alidir"
|
||||
|
||||
for n in `get_splits.pl $nj`; do
|
||||
featspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
|
||||
if [ -f $alidir/$n1.trans ]; then
|
||||
featspart[$n]="${featspart[$n]} transform-feats --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.trans ark:- ark:- |"
|
||||
fi
|
||||
if [ ! -z "$silweight" ]; then
|
||||
weightspart[$n]="--weights='ark,s,cs:gunzip -c $alidir/$n.ali.gz | ali-to-post ark:- ark:- | weight-silence-post $silweight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
|
||||
fi
|
||||
done
|
||||
|
||||
ngselect=50
|
||||
|
||||
intermediate=2000
|
||||
if [ $[$numcomps*2] -gt $intermediate ]; then
|
||||
intermediate=$[$numcomps*2];
|
||||
fi
|
||||
|
||||
echo "Clustering model $alidir/final.mdl to get initial UBM"
|
||||
# typically: --intermediate-numcomps=2000 --ubm-numcomps=400
|
||||
|
||||
if [ ! -s $dir/0.dubm ]; then
|
||||
$cmd $dir/log/cluster.log \
|
||||
init-ubm --intermediate-numcomps=$intermediate --ubm-numcomps=$numcomps \
|
||||
--verbose=2 --fullcov-ubm=false $alidir/final.mdl $alidir/final.occs \
|
||||
$dir/0.dubm || exit 1;
|
||||
fi
|
||||
rm $dir/.error 2>/dev/null
|
||||
# First do Gaussian selection to 50 components, which will be used
|
||||
# as the initial screen for all further passes.
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/gselect.$n.log \
|
||||
gmm-gselect --n=$ngselect $dir/0.dubm "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/gselect.$n.gz" &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error doing GMM selection" && exit 1;
|
||||
|
||||
for x in 0 1 2 3; do
|
||||
echo "Pass $x"
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/acc.$x.$n.log \
|
||||
gmm-global-acc-stats ${weightspart[$n]} "--gselect=ark,s,cs:gunzip -c $dir/gselect.$n.gz|" \
|
||||
$dir/$x.dubm "${featspart[$n]}" $dir/$x.$n.acc || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error accumulating stats for UBM estimation on pass $x" && exit 1;
|
||||
lowcount_opt="--remove-low-count-gaussians=false"
|
||||
[ $x -eq 3 ] && lowcount_opt= # Only remove low-count Gaussians on last iter-- keeps gselect info valid.
|
||||
$cmd $dir/log/update.$x.log \
|
||||
gmm-global-est $lowcount_opt --verbose=2 $dir/$x.dubm "gmm-global-sum-accs - $dir/$x.*.acc |" \
|
||||
$dir/$[$x+1].dubm || exit 1;
|
||||
rm $dir/$x.*.acc $dir/$x.dubm
|
||||
done
|
||||
|
||||
rm $dir/gselect.*.gz
|
||||
rm $dir/final.dubm 2>/dev/null
|
||||
mv $dir/4.dubm $dir/final.dubm || exit 1;
|
||||
|
|
@ -20,10 +20,8 @@
|
|||
# [something] may be MLLT, or ET, or MLLT + SAT. Any speaker-specific
|
||||
# transforms are expected to be located in the alignment directory.
|
||||
# This script never re-estimates any transforms, it just does model
|
||||
# training. To make this faster, it initializes the model from the
|
||||
# old system's model, i.e. for each p.d.f., it takes the best-match pdf
|
||||
# from the old system (based on overlap of tree-stats counts), and
|
||||
# uses that GMM to initialize the current GMM.
|
||||
# training.
|
||||
|
||||
# Basically we are doing 4 iterations of Extended Baum-Welch (EBW)
|
||||
# estimation, as described in Dan Povey's thesis, with a few differences:
|
||||
# (i) we have the option of "boosting", as in "Boosted MMI", which increases
|
||||
|
@ -47,7 +45,9 @@
|
|||
niters=4
|
||||
nj=4
|
||||
boost=0.0
|
||||
tau=100
|
||||
tau=200
|
||||
merge=true # if true, cancel num and den counts as described in
|
||||
# the boosted MMI paper.
|
||||
cmd=scripts/run.pl
|
||||
acwt=0.1
|
||||
stage=0
|
||||
|
@ -69,6 +69,9 @@ for x in `seq 8`; do
|
|||
if [ $1 == "--acwt" ]; then
|
||||
shift; acwt=$1; shift
|
||||
fi
|
||||
if [ $1 == "--tau" ]; then
|
||||
shift; tau=$1; shift
|
||||
fi
|
||||
if [ $1 == "--stage" ]; then
|
||||
shift; stage=$1; shift
|
||||
fi
|
||||
|
@ -121,58 +124,60 @@ rm $dir/.error 2>/dev/null
|
|||
cur_mdl=$srcdir/final.mdl
|
||||
x=0
|
||||
while [ $x -lt $niters ]; do
|
||||
echo "Iteration $x: getting denominator stats."
|
||||
# Get denominator stats... For simplicity we rescore the lattice
|
||||
echo "Iteration $x: getting stats."
|
||||
# Get denominator and numerator stats together... This involves
|
||||
# merging the num and den posteriors, and (if $merge==true), canceling
|
||||
# the +ve and -ve occupancies on each frame.
|
||||
# For simplicity we rescore the lattice
|
||||
# on all iterations, even though it shouldn't be necessary on the zeroth
|
||||
# (but we want this script to work even if $srcdir doesn't contain the
|
||||
# model used to generate the lattice).
|
||||
# model used to generate the lattice).
|
||||
if [ $stage -le $x ]; then
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/acc_den.$x.$n.log \
|
||||
$cmd $dir/log/acc.$x.$n.log \
|
||||
gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
||||
gmm-acc-stats $cur_mdl "${featspart[$n]}" ark:- $dir/den_acc.$x.$n.acc \
|
||||
|| touch $dir/.error &
|
||||
sum-post --merge=$merge --scale1=-1 \
|
||||
ark:- "ark,s,cs:gunzip -c $alidir/$n.ali.gz | ali-to-post ark:- ark:- |" ark:- \| \
|
||||
gmm-acc-stats2 $cur_mdl "${featspart[$n]}" ark,s,cs:- \
|
||||
$dir/num_acc.$x.$n.acc $dir/den_acc.$x.$n.acc || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo Error accumulating den stats on iter $x && exit 1;
|
||||
[ -f $dir/.error ] && echo Error accumulating stats on iter $x && exit 1;
|
||||
$cmd $dir/log/den_acc_sum.$x.log \
|
||||
gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
|
||||
rm $dir/den_acc.$x.*.acc
|
||||
|
||||
echo "Iteration $x: getting numerator stats."
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/acc_num.$x.$n.log \
|
||||
gmm-acc-stats-ali $cur_mdl "${featspart[$n]}" "ark:gunzip -c $alidir/$n.ali.gz|" \
|
||||
$dir/num_acc.$x.$n.acc || touch $dir/.error &
|
||||
done
|
||||
wait;
|
||||
[ -f $dir/.error ] && echo Error accumulating num stats on iter $x && exit 1;
|
||||
$cmd $dir/log/num_acc_sum.$x.log \
|
||||
gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
|
||||
rm $dir/num_acc.$x.*.acc
|
||||
|
||||
# note: this tau value is for smoothing to model parameters;
|
||||
# you need to use gmm-ismooth-stats to smooth to the ML stats,
|
||||
# but anyway this script does canceling of num and den stats on
|
||||
# each frame (as suggested in the Boosted MMI paper) which would
|
||||
# make smoothing to ML impossible without accumulating extra stats.
|
||||
|
||||
$cmd $dir/log/update.$x.log \
|
||||
gmm-est-gaussians-ebw $cur_mdl "gmm-ismooth-stats --tau=$tau $dir/num_acc.$x.acc $dir/num_acc.$x.acc -|" \
|
||||
$dir/den_acc.$x.acc - \| \
|
||||
gmm-est-gaussians-ebw --tau=$tau $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
|
||||
gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
|
||||
else
|
||||
echo "not doing this iteration because --stage=$stage"
|
||||
fi
|
||||
cur_mdl=$dir/$[$x+1].mdl
|
||||
|
||||
# Some diagnostics
|
||||
den=`grep Overall $dir/log/acc_den.$x.*.log | grep lattice-to-post | awk '{p+=$7*$9; nf+=$9;} END{print p/nf;}'`
|
||||
num=`grep Overall $dir/log/acc_num.$x.*.log | grep gmm-acc-stats-ali | awk '{p+=$11*$13; nf+=$13;} END{print p/nf}'`
|
||||
diff=`perl -e "print ($num * $acwt - $den);"`
|
||||
impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10;}'`
|
||||
impr=`perl -e "print ($impr * $acwt);"` # auxf impr normalized by multiplying by
|
||||
# kappa, so it's comparable to an objective-function change.
|
||||
echo On iter $x, objf was $diff, auxf improvement was $impr | tee $dir/objf.$x.log
|
||||
# Some diagnostics.. note, this objf is somewhat comparable to the
|
||||
# MMI objective function divided by the acoustic weight, and differences in it
|
||||
# are comparable to the auxf improvement printed by the update program.
|
||||
objf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf;}'`
|
||||
nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ nf+=$12; } END{print nf;}'`
|
||||
impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10*$12;}'`
|
||||
impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
|
||||
# for the canceling of stats.
|
||||
echo On iter $x, objf was $objf, auxf improvement from MMI was $impr | tee $dir/objf.$x.log
|
||||
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
echo "Succeeded with $niters iterations of MMI training (boosting factor = $boost)"
|
||||
|
||||
( cd $dir; ln -s $x.mdl final.mdl )
|
||||
( cd $dir; rm final.mdl; ln -s $x.mdl final.mdl )
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
#!/bin/bash
|
||||
# by Dan Povey, 2012. Apache.
|
||||
|
||||
# This script does MMI discriminative training, including
|
||||
# feature-space (like fMPE) and model-space components.
|
||||
# If you give the --boost option it does "boosted MMI" (BMMI).
|
||||
# On the iterations of training it alternates feature-space
|
||||
# and model-space training. We do 8 iterations in total--
|
||||
# 4 of each type ((B)MMI, f(B)MMI)
|
||||
|
||||
# The features it uses are LDA + [something], where the something
|
||||
# may be just a global transform like MLLT, or may also include
|
||||
# speaker-specific transforms such as SAT. This script just uses
|
||||
# transforms computed in the alignment directory, so it doesn't
|
||||
# need to know what the transform type is (it isn't re-estimating
|
||||
# them itself)
|
||||
|
||||
|
||||
niters=8
|
||||
nj=4
|
||||
boost=0.0
|
||||
lrate=0.01
|
||||
tau=200 # Note: we're doing smoothing "to the previous iteration"
|
||||
# --smooth-from-model so 200 seems like a more sensible default
|
||||
# than 100. We smooth to the previous iteration because now
|
||||
# we are discriminatively training the features (and not using
|
||||
# the indirect differential), so it seems like it wouldn't make
|
||||
# sense to use any element of ML.
|
||||
ngauss=400
|
||||
merge=true # if true, cancel num and den counts as described in
|
||||
# the boosted MMI paper.
|
||||
|
||||
|
||||
cmd=scripts/run.pl
|
||||
acwt=0.1
|
||||
stage=-1
|
||||
|
||||
for x in `seq 8`; do
|
||||
if [ $1 == "--num-jobs" ]; then
|
||||
shift; nj=$1; shift
|
||||
fi
|
||||
if [ $1 == "--learning-rate" ]; then
|
||||
shift; lrate=$1; shift
|
||||
fi
|
||||
if [ $1 == "--num-gauss" ]; then
|
||||
shift; ngauss=$1; shift # #Gauss in GMM for fMPE.
|
||||
fi
|
||||
if [ $1 == "--num-iters" ]; then
|
||||
shift; niters=$1; shift
|
||||
fi
|
||||
if [ $1 == "--boost" ]; then
|
||||
shift; boost=$1; shift
|
||||
fi
|
||||
if [ $1 == "--cmd" ]; then
|
||||
shift; cmd=$1; shift
|
||||
[ -z "$cmd" ] && echo Empty argument to --cmd option && exit 1;
|
||||
fi
|
||||
if [ $1 == "--acwt" ]; then
|
||||
shift; acwt=$1; shift
|
||||
fi
|
||||
if [ $1 == "--tau" ]; then
|
||||
shift; tau=$1; shift
|
||||
fi
|
||||
if [ $1 == "--stage" ]; then # used for finishing partial runs.
|
||||
shift; stage=$1; shift
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $# != 7 ]; then
|
||||
echo "Usage: steps/train_lda_etc_mmi_fmmi.sh <data-dir> <lang-dir> <ali-dir> <dubm-dir> <denlat-dir> <model-dir> <exp-dir>"
|
||||
echo " e.g.: steps/train_lda_etc_mmi_fmmi.sh data/train_si84 data/lang exp/tri2b_ali_si84 exp/ubm2d exp/tri2b_denlats_si84 exp/tri2b exp/tri2b_fmmi"
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
|
||||
data=$1
|
||||
lang=$2
|
||||
alidir=$3
|
||||
dubmdir=$4 # where diagonal UBM is.
|
||||
denlatdir=$5
|
||||
srcdir=$6 # may be same model as in alidir, but may not be, e.g.
|
||||
# if you want to test MMI with different #iters.
|
||||
dir=$7
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
ngselect=2; # Just the 2 top Gaussians. Beyond that wouldn't make much
|
||||
# difference since the posteriors would be very small.
|
||||
mkdir -p $dir/log
|
||||
|
||||
if [ ! -f $srcdir/final.mdl -o ! -f $srcdir/final.mat ]; then
|
||||
echo "Error: alignment dir $alidir does not contain one of final.mdl or final.mat"
|
||||
exit 1;
|
||||
fi
|
||||
cp $srcdir/final.mat $srcdir/tree $dir
|
||||
|
||||
n=`get_splits.pl $nj | awk '{print $1}'`
|
||||
if [ -f $alidir/$n.trans ]; then
|
||||
use_trans=true
|
||||
echo Using transforms from directory $alidir
|
||||
else
|
||||
echo No transforms present in alignment directory: assuming speaker independent.
|
||||
use_trans=false
|
||||
fi
|
||||
|
||||
# Note: ${basefeatspart[$n]} is the features before fMPE.
|
||||
|
||||
for n in `get_splits.pl $nj`; do
|
||||
basefeatspart[$n]="ark,s,cs:apply-cmvn --norm-vars=false --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.cmvn scp:$data/split$nj/$n/feats.scp ark:- | splice-feats ark:- ark:- | transform-feats $alidir/final.mat ark:- ark:- |"
|
||||
$use_trans && basefeatspart[$n]="${basefeatspart[$n]} transform-feats --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.trans ark:- ark:- |"
|
||||
featspart[$n]="${basefeatspart[$n]}" # before 1st iter of fMPE..
|
||||
|
||||
[ ! -f $denlatdir/lat.$n.gz ] && echo No such file $denlatdir/lat.$n.gz && exit 1;
|
||||
latspart[$n]="ark:gunzip -c $denlatdir/lat.$n.gz|"
|
||||
# note: in next line, doesn't matter which model we use, it's only used to map to phones.
|
||||
[ $boost != "0.0" -a $boost != "0" ] && latspart[$n]="${latspart[$n]} lattice-boost-ali --b=$boost --silence-phones=$silphonelist $alidir/final.mdl ark:- 'ark,s,cs:gunzip -c $alidir/$n.ali.gz|' ark:- |"
|
||||
done
|
||||
|
||||
|
||||
# Initialize the fMPE object. Note: we call it .fmpe because
|
||||
# that's what it was called in the original paper, but since
|
||||
# we're using the MMI objective function, it's really fMMI.
|
||||
fmpe-init $dubmdir/final.dubm $dir/0.fmpe || exit 1;
|
||||
|
||||
rm $dir/.error 2>/dev/null
|
||||
|
||||
if [ $stage -le -1 ]; then
|
||||
# Get the gselect (Gaussian selection) info for fMPE.
|
||||
# Note: fMPE object starts with GMM object, so can be read
|
||||
# as one.
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/gselect.$n.log \
|
||||
gmm-gselect --n=$ngselect $dir/0.fmpe "${featspart[$n]}" \
|
||||
"ark:|gzip -c >$dir/gselect.$n.gz" || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo "Error in Gaussian selection phase" && exit 1;
|
||||
fi
|
||||
|
||||
|
||||
cur_mdl=$srcdir/final.mdl
|
||||
cur_fmpe=$dir/0.fmpe
|
||||
x=0
|
||||
while [ $x -lt $niters ]; do
|
||||
if [ $[$x%2] == 0 ]; then
|
||||
echo "Iteration $x: doing fMMI"
|
||||
if [ $stage -le $x ]; then
|
||||
for n in `get_splits.pl $nj`; do
|
||||
numpost="ark,s,cs:gunzip -c $alidir/$n.ali.gz| ali-to-post ark:- ark:-|"
|
||||
# Note: the command gmm-fmpe-acc-stats below requires the "base" features
|
||||
# (without fMPE), not the fMPE features.
|
||||
$cmd $dir/log/acc_fmmi.$x.$n.log \
|
||||
gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
||||
sum-post --scale1=-1 ark:- "$numpost" ark:- \| \
|
||||
gmm-fmpe-acc-stats $cur_mdl $cur_fmpe "${basefeatspart[$n]}" \
|
||||
"ark,s,cs:gunzip -c $dir/gselect.$n.gz|" ark,s,cs:- \
|
||||
$dir/$x.$n.fmpe_acc || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo Error doing fMPE accumulation && exit 1;
|
||||
( sum-matrices $dir/$x.fmpe_acc $dir/$x.*.fmpe_acc && \
|
||||
rm $dir/$x.*.fmpe_acc && \
|
||||
fmpe-est --learning-rate=$lrate $cur_fmpe $dir/$x.fmpe_acc $dir/$[$x+1].fmpe ) \
|
||||
2>$dir/log/est_fmpe.$x.log || exit 1;
|
||||
rm $dir/$[$x+1].mdl 2>/dev/null
|
||||
fi
|
||||
# We need to set the features to use the correct fMPE object.
|
||||
for n in `get_splits.pl $nj`; do
|
||||
featspart[$n]="${basefeatspart[$n]} fmpe-apply-transform $dir/$[$x+1].fmpe ark:- 'ark,s,cs:gunzip -c $dir/gselect.$n.gz|' ark:- |"
|
||||
done
|
||||
cur_fmpe=$dir/$[$x+1].fmpe
|
||||
# Now, diagnostics.
|
||||
objf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf;}'`
|
||||
nf=`grep Overall $dir/log/acc_fmmi.$x.*.log | grep gmm-fmpe-acc-stats | awk '{ nf+=$12; } END{print nf;}'`
|
||||
impr=`grep Objf $dir/log/est_fmpe.$x.log | awk '{print $NF}'`
|
||||
impr=`perl -e "print ($impr/$nf);"` # normalize by #frames.
|
||||
echo On iter $x, objf was $objf, auxf improvement from fMMI was $impr | tee $dir/objf.$x.log
|
||||
else
|
||||
echo "Iteration $x: doing MMI (getting stats)..."
|
||||
# Get denominator stats... For simplicity we rescore the lattice
|
||||
# on all iterations, even though it shouldn't be necessary on the zeroth
|
||||
# (but we want this script to work even if $srcdir doesn't contain the
|
||||
# model used to generate the lattice).
|
||||
if [ $stage -le $x ]; then
|
||||
for n in `get_splits.pl $nj`; do
|
||||
$cmd $dir/log/acc.$x.$n.log \
|
||||
gmm-rescore-lattice $cur_mdl "${latspart[$n]}" "${featspart[$n]}" ark:- \| \
|
||||
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
|
||||
sum-post --merge=$merge --scale1=-1 \
|
||||
ark:- "ark,s,cs:gunzip -c $alidir/$n.ali.gz | ali-to-post ark:- ark:- |" ark:- \| \
|
||||
gmm-acc-stats2 $cur_mdl "${featspart[$n]}" ark,s,cs:- \
|
||||
$dir/num_acc.$x.$n.acc $dir/den_acc.$x.$n.acc || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
[ -f $dir/.error ] && echo Error accumulating stats on iter $x && exit 1;
|
||||
$cmd $dir/log/den_acc_sum.$x.log \
|
||||
gmm-sum-accs $dir/den_acc.$x.acc $dir/den_acc.$x.*.acc || exit 1;
|
||||
rm $dir/den_acc.$x.*.acc
|
||||
$cmd $dir/log/num_acc_sum.$x.log \
|
||||
gmm-sum-accs $dir/num_acc.$x.acc $dir/num_acc.$x.*.acc || exit 1;
|
||||
rm $dir/num_acc.$x.*.acc
|
||||
|
||||
|
||||
# note: this tau value is for smoothing to model parameters;
|
||||
# you need to use gmm-ismooth-stats to smooth to the ML stats,
|
||||
# but anyway this script does canceling of num and den stats on
|
||||
# each frame (as suggested in the Boosted MMI paper) which would
|
||||
# make smoothing to ML impossible without accumulating extra stats.
|
||||
$cmd $dir/log/update.$x.log \
|
||||
gmm-est-gaussians-ebw --tau=$tau $cur_mdl $dir/num_acc.$x.acc $dir/den_acc.$x.acc - \| \
|
||||
gmm-est-weights-ebw - $dir/num_acc.$x.acc $dir/den_acc.$x.acc $dir/$[$x+1].mdl || exit 1;
|
||||
else
|
||||
echo "not doing this iteration because --stage=$stage"
|
||||
fi
|
||||
|
||||
# Some diagnostics.. note, this objf is somewhat comparable to the
|
||||
# MMI objective function divided by the acoustic weight, and differences in it
|
||||
# are comparable to the auxf improvement printed by the update program.
|
||||
objf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ p+=$10*$12; nf+=$12; } END{print p/nf;}'`
|
||||
nf=`grep Overall $dir/log/acc.$x.*.log | grep gmm-acc-stats2 | awk '{ nf+=$12; } END{print nf;}'`
|
||||
impr=`grep Overall $dir/log/update.$x.log | head -1 | awk '{print $10*$12;}'`
|
||||
impr=`perl -e "print ($impr/$nf);"` # renormalize by "real" #frames, to correct
|
||||
# for the canceling of stats.
|
||||
echo On iter $x, objf was $objf, auxf improvement was $impr | tee $dir/objf.$x.log
|
||||
cur_mdl=$dir/$[$x+1].mdl
|
||||
fi
|
||||
x=$[$x+1]
|
||||
done
|
||||
|
||||
echo "Succeeded with $niters iterations of MMI+fMMI training (boosting factor = $boost)"
|
||||
|
||||
( cd $dir; rm final.mdl 2>/dev/null; ln -s `basename $cur_mdl` final.mdl;
|
||||
rm final.fmpe 2>/dev/null; ln -s `basename $cur_fmpe` final.fmpe )
|
||||
|
||||
# Now do some cleanup.
|
||||
rm $dir/gselect.*.gz $dir/*.acc $dir/*.fmpe_acc
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
nj=4
|
||||
cmd=scripts/run.pl
|
||||
silweight=
|
||||
for x in 1 2; do
|
||||
if [ $1 == "--num-jobs" ]; then
|
||||
shift
|
||||
|
@ -33,6 +34,11 @@ for x in 1 2; do
|
|||
cmd=$1
|
||||
shift
|
||||
fi
|
||||
if [ $1 == "--silence-weight" ]; then
|
||||
shift
|
||||
silweight=$1 # e.g. to weight down silence in training.
|
||||
shift
|
||||
fi
|
||||
done
|
||||
|
||||
if [ $# != 5 ]; then
|
||||
|
@ -48,6 +54,7 @@ data=$2
|
|||
lang=$3
|
||||
alidir=$4
|
||||
dir=$5
|
||||
silphonelist=`cat $lang/silphones.csl`
|
||||
|
||||
mkdir -p $dir/log
|
||||
|
||||
|
@ -63,6 +70,9 @@ for n in `get_splits.pl $nj`; do
|
|||
if [ -f $alidir/$n1.trans ]; then
|
||||
featspart[$n]="${featspart[$n]} transform-feats --utt2spk=ark:$data/split$nj/$n/utt2spk ark:$alidir/$n.trans ark:- ark:- |"
|
||||
fi
|
||||
if [ ! -z "$silweight" ]; then
|
||||
weightspart[$n]="--weights='gunzip -c $alidir/$n.ali.gz | ali-to-post ark:- ark:- | weight-silence-post $silweight $silphonelist $alidir/final.mdl ark:- ark:- | post-to-weights ark:- ark:- |'"
|
||||
fi
|
||||
done
|
||||
|
||||
ngselect1=50
|
||||
|
@ -98,7 +108,7 @@ for x in 0 1 2 3; do
|
|||
$cmd $dir/log/acc.$x.$n.log \
|
||||
gmm-gselect --n=$ngselect2 "--gselect=ark,s,cs:gunzip -c $dir/gselect_diag.$n.gz|" \
|
||||
"fgmm-global-to-gmm $dir/$x.ubm - |" "${featspart[$n]}" ark:- \| \
|
||||
fgmm-global-acc-stats --gselect=ark,s,cs:- $dir/$x.ubm "${featspart[$n]}" \
|
||||
fgmm-global-acc-stats ${weightspart[$n]} --gselect=ark,s,cs:- $dir/$x.ubm "${featspart[$n]}" \
|
||||
$dir/$x.$n.acc || touch $dir/.error &
|
||||
done
|
||||
wait
|
||||
|
|
|
@ -63,7 +63,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" build-tree-two-level treeacc roots.txt 1.qst topo tree tree.map\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
int32 P = 1, N = 3;
|
||||
|
||||
bool cluster_leaves = true;
|
||||
|
|
|
@ -39,6 +39,7 @@ void ScalePosteriors(BaseFloat scale, Posterior *post) {
|
|||
// note: Posterior is vector<vector<pair<int,BaseFloat> > >
|
||||
void MergePosteriors(const Posterior &post1,
|
||||
const Posterior &post2,
|
||||
bool merge,
|
||||
Posterior *post) {
|
||||
KALDI_ASSERT(post1.size() == post2.size()); // precondition.
|
||||
post->resize(post1.size());
|
||||
|
@ -49,10 +50,14 @@ void MergePosteriors(const Posterior &post1,
|
|||
post1[i].begin(), post1[i].end());
|
||||
(*post)[i].insert((*post)[i].end(),
|
||||
post2[i].begin(), post2[i].end());
|
||||
MergePairVectorSumming(&((*post)[i])); // This sorts on
|
||||
// the transition-id merges the entries with the same
|
||||
// key (i.e. same .first element; same transition-id), and
|
||||
// gets rid of entries with zero .second element.
|
||||
if (merge) { // combine and sum up entries with same transition-id.
|
||||
MergePairVectorSumming(&((*post)[i])); // This sorts on
|
||||
// the transition-id merges the entries with the same
|
||||
// key (i.e. same .first element; same transition-id), and
|
||||
// gets rid of entries with zero .second element.
|
||||
} else { // just to keep them pretty, merge them.
|
||||
std::sort( (*post)[i].begin(), (*post)[i].end() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,10 +75,12 @@ int main(int argc, char *argv[]) {
|
|||
"Usage: sum-post post-rspecifier1 post-rspecifier2 post-wspecifier\n";
|
||||
|
||||
BaseFloat scale1 = 1.0, scale2 = 1.0;
|
||||
|
||||
bool merge = true;
|
||||
ParseOptions po(usage);
|
||||
po.Register("scale1", &scale1, "Scale for first set of posteriors");
|
||||
po.Register("scale2", &scale2, "Scale for second set of posteriors");
|
||||
po.Register("merge", &merge, "If true, merge posterior entries for "
|
||||
"same transition-id (canceling positive and negative parts)");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
|
@ -111,7 +118,7 @@ int main(int argc, char *argv[]) {
|
|||
ScalePosteriors(scale1, &posterior1);
|
||||
ScalePosteriors(scale2, &posterior2);
|
||||
kaldi::Posterior posterior_out;
|
||||
MergePosteriors(posterior1, posterior2, &posterior_out);
|
||||
MergePosteriors(posterior1, posterior2, merge, &posterior_out);
|
||||
posterior_writer.Write(key, posterior_out);
|
||||
num_done++;
|
||||
}
|
||||
|
|
|
@ -3,10 +3,11 @@ all:
|
|||
EXTRA_CXXFLAGS = -Wno-sign-compare
|
||||
include ../kaldi.mk
|
||||
|
||||
BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats compute-cmvn-stats \
|
||||
add-deltas remove-mean apply-cmvn transform-feats copy-feats compose-transforms \
|
||||
splice-feats extract-segments subset-feats feat-to-len feat-to-dim \
|
||||
fmpe-apply-transform fmpe-acc-stats fmpe-init fmpe-update
|
||||
BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
|
||||
compute-cmvn-stats add-deltas remove-mean apply-cmvn transform-feats \
|
||||
copy-feats compose-transforms splice-feats extract-segments subset-feats \
|
||||
feat-to-len feat-to-dim fmpe-apply-transform fmpe-acc-stats fmpe-init \
|
||||
fmpe-est fmpe-copy
|
||||
|
||||
|
||||
OBJFILES =
|
||||
|
@ -17,8 +18,8 @@ all: $(BINFILES)
|
|||
TESTFILES =
|
||||
|
||||
$(BINFILES): ../feat/kaldi-feature.a ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
|
||||
../tree/kaldi-tree.a ../matrix/kaldi-matrix.a ../util/kaldi-util.a \
|
||||
../base/kaldi-base.a
|
||||
../tree/kaldi-tree.a ../matrix/kaldi-matrix.a ../util/kaldi-util.a \
|
||||
../base/kaldi-base.a
|
||||
|
||||
# Rule below would expand to, e.g.:
|
||||
# ../base/kaldi-base.a:
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using kaldi::int32;
|
||||
try {
|
||||
const char *usage =
|
||||
"Apply fMPE transform to features\n"
|
||||
|
@ -55,13 +56,13 @@ int main(int argc, char *argv[]) {
|
|||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
|
||||
// fmpe stats...
|
||||
Matrix<BaseFloat> stats(fmpe.ProjectionNumRows() * 2,
|
||||
fmpe.ProjectionNumCols());
|
||||
SubMatrix<BaseFloat> stats_plus(stats, 0, fmpe.ProjectionNumRows(),
|
||||
0, fmpe.ProjectionNumCols());
|
||||
SubMatrix<BaseFloat> stats_minus(stats, fmpe.ProjectionNumRows(),
|
||||
fmpe.ProjectionNumRows(),
|
||||
0, fmpe.ProjectionNumCols());
|
||||
Matrix<BaseFloat> stats(fmpe.ProjectionTNumRows() * 2,
|
||||
fmpe.ProjectionTNumCols());
|
||||
SubMatrix<BaseFloat> stats_plus(stats, 0, fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
SubMatrix<BaseFloat> stats_minus(stats, fmpe.ProjectionTNumRows(),
|
||||
fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// featbin/fmpe-apply-transform.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
// Copyright 2012 Daniel Povey Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -21,6 +21,7 @@
|
|||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using kaldi::int32;
|
||||
try {
|
||||
const char *usage =
|
||||
"Apply fMPE transform to features\n"
|
||||
|
@ -34,7 +35,7 @@ int main(int argc, char *argv[]) {
|
|||
// no non-default options.
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
if (po.NumArgs() != 4) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
// featbin/fmpe-copy.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "transform/fmpe.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
try {
|
||||
const char *usage =
|
||||
"Copy fMPE transform\n"
|
||||
"Usage: fmpe-init [options...] <fmpe-in> <fmpe-out>\n"
|
||||
"E.g. fmpe-copy --binary=false 1.fmpe text.fmpe\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
FmpeOptions opts;
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "If true, output fMPE object in binary mode.");
|
||||
opts.Register(&po);
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string fmpe_rxfilename = po.GetArg(1),
|
||||
fmpe_wxfilename = po.GetArg(2);
|
||||
|
||||
Fmpe fmpe;
|
||||
{
|
||||
bool binary_in;
|
||||
Input ki(fmpe_rxfilename, &binary_in);
|
||||
fmpe.Read(ki.Stream(), binary_in);
|
||||
}
|
||||
|
||||
|
||||
Output ko(fmpe_wxfilename, binary);
|
||||
fmpe.Write(ko.Stream(), binary);
|
||||
|
||||
KALDI_LOG << "Copyied fMPE object to " << fmpe_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
// featbin/fmpe-update.cc
|
||||
// featbin/fmpe-est.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
// Copyright 2012 Daniel Povey Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -24,8 +24,8 @@ int main(int argc, char *argv[]) {
|
|||
try {
|
||||
const char *usage =
|
||||
"Initialize fMPE transform (to zeo)\n"
|
||||
"Usage: fmpe-update [options...] <fmpe-in> <stats-in> <fmpe-out>\n"
|
||||
"E.g. fmpe-update 1.fmpe 1.accs 2.fmpe\n";
|
||||
"Usage: fmpe-est [options...] <fmpe-in> <stats-in> <fmpe-out>\n"
|
||||
"E.g. fmpe-est 1.fmpe 1.accs 2.fmpe\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
FmpeUpdateOptions opts;
|
||||
|
@ -58,18 +58,18 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
// the matrix is in two parts, for the "plus" and "minus"
|
||||
// parts of the gradient that we stored separately.
|
||||
SubMatrix<BaseFloat> stats_plus(stats, 0, fmpe.ProjectionNumRows(),
|
||||
0, fmpe.ProjectionNumCols());
|
||||
SubMatrix<BaseFloat> stats_minus(stats, fmpe.ProjectionNumRows(),
|
||||
fmpe.ProjectionNumRows(),
|
||||
0, fmpe.ProjectionNumCols());
|
||||
SubMatrix<BaseFloat> stats_plus(stats, 0, fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
SubMatrix<BaseFloat> stats_minus(stats, fmpe.ProjectionTNumRows(),
|
||||
fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
|
||||
fmpe.Update(opts, stats_plus, stats_minus);
|
||||
|
||||
Output ko(fmpe_wxfilename, binary);
|
||||
fmpe.Write(ko.Stream(), binary);
|
||||
|
||||
KALDI_LOG << "Initialized fMPE object and wrote to"
|
||||
KALDI_LOG << "Updated fMPE object and wrote to "
|
||||
<< fmpe_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception& e) {
|
|
@ -1,6 +1,6 @@
|
|||
// featbin/fmpe-init.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
// Copyright 2012 Daniel Povey Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
@ -23,7 +23,7 @@ int main(int argc, char *argv[]) {
|
|||
using namespace kaldi;
|
||||
try {
|
||||
const char *usage =
|
||||
"Initialize fMPE transform (to zeo)\n"
|
||||
"Initialize fMPE transform (to zero)\n"
|
||||
"Usage: fmpe-init [options...] <diag-gmm-in> <fmpe-out>\n"
|
||||
"E.g. fmpe-init 1.ubm 1.fmpe\n";
|
||||
|
||||
|
@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
|
|||
Output ko(fmpe_wxfilename, binary);
|
||||
fmpe.Write(ko.Stream(), binary);
|
||||
|
||||
KALDI_LOG << "Initialized fMPE object and wrote to"
|
||||
KALDI_LOG << "Initialized fMPE object and wrote to "
|
||||
<< fmpe_wxfilename;
|
||||
return 0;
|
||||
} catch(const std::exception& e) {
|
||||
|
|
|
@ -8,7 +8,7 @@ TESTFILES = diag-gmm-test mle-diag-gmm-test full-gmm-test mle-full-gmm-test \
|
|||
am-diag-gmm-test ebw-diag-gmm-test
|
||||
|
||||
OBJFILES = diag-gmm.o diag-gmm-normal.o mle-diag-gmm.o am-diag-gmm.o mle-am-diag-gmm.o \
|
||||
full-gmm.o full-gmm-normal.o mle-full-gmm.o fmpe-am-diag-gmm.o model-common.o \
|
||||
full-gmm.o full-gmm-normal.o mle-full-gmm.o model-common.o \
|
||||
model-test-common.o ebw-diag-gmm.o
|
||||
|
||||
LIBFILE = kaldi-gmm.a
|
||||
|
|
|
@ -148,11 +148,10 @@ void UpdateEbwDiagGmm(const AccumDiagGmm &num_stats, // with I-smoothing, if use
|
|||
if (den_has_stats)
|
||||
var_stats.AddVec(-1.0, den_stats.variance_accumulator().Row(g));
|
||||
}
|
||||
double D = opts.E * den_count / 2; // E*gamma_den/2 where E = 2;
|
||||
// We initialize to half the value of D that would be dictated by
|
||||
// E; this is part of the strategy used to ensure that the value of
|
||||
// D we use is at least twice the value that would ensure positive
|
||||
// variances.
|
||||
double D = (opts.tau + opts.E * den_count) / 2;
|
||||
// We initialize to half the value of D that would be dictated by E (and
|
||||
// tau); this is part of the strategy used to ensure that the value of D we
|
||||
// use is at least twice the value that would ensure positive variances.
|
||||
|
||||
int32 iter, max_iter = 100;
|
||||
for (iter = 0; iter < max_iter; iter++) { // will normally break from the loop
|
||||
|
@ -184,7 +183,7 @@ void UpdateEbwDiagGmm(const AccumDiagGmm &num_stats, // with I-smoothing, if use
|
|||
D *= 1.1;
|
||||
}
|
||||
}
|
||||
if (iter > 0 && num_floored_out != NULL) *num_floored_out++;
|
||||
if (iter > 0 && num_floored_out != NULL) (*num_floored_out)++;
|
||||
if (iter == max_iter) KALDI_WARN << "Dropped off end of loop, recomputing D. (unexpected.)";
|
||||
}
|
||||
// copy to natural representation according to flags.
|
||||
|
|
|
@ -31,10 +31,14 @@ namespace kaldi {
|
|||
// Options for Extended Baum-Welch Gaussian update.
|
||||
struct EbwOptions {
|
||||
BaseFloat E;
|
||||
EbwOptions(): E(2.0) { }
|
||||
BaseFloat tau; // This is only useful for smoothing "to the model":
|
||||
// if you want to smooth to ML stats, you need to use gmm-ismooth-stats
|
||||
EbwOptions(): E(2.0), tau(0.0) { }
|
||||
void Register(ParseOptions *po) {
|
||||
std::string module = "EbwOptions: ";
|
||||
po->Register("E", &E, module+"Constant E for Extended Baum-Welch (EBW) update");
|
||||
po->Register("tau", &tau, module+"Tau value for smoothing to the model "
|
||||
"parameters only (for smoothing to ML stats, use gmm-ismooth-stats");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -1,892 +0,0 @@
|
|||
// gmm/fmpe-am-diag-gmm.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
#include "util/stl-utils.h"
|
||||
#include "tree/clusterable-classes.h"
|
||||
#include "tree/cluster-utils.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
void FmpeAccumModelDiff::Read(std::istream &in_stream, bool binary) {
|
||||
int32 dimension, num_components;
|
||||
std::string token;
|
||||
|
||||
ExpectToken(in_stream, binary, "<FMPEMODELDIFFS>");
|
||||
ExpectToken(in_stream, binary, "<VECSIZE>");
|
||||
ReadBasicType(in_stream, binary, &dimension);
|
||||
ExpectToken(in_stream, binary, "<NUMCOMPONENTS>");
|
||||
ReadBasicType(in_stream, binary, &num_components);
|
||||
|
||||
Resize(num_components, dimension);
|
||||
|
||||
ReadToken(in_stream, binary, &token);
|
||||
while (token != "</FMPEMODELDIFFS>") {
|
||||
if (token == "<MLE_OCCUPANCY>") {
|
||||
mle_occupancy_.Read(in_stream, binary);
|
||||
} else if (token == "<MEANDIFFS>") {
|
||||
mean_diff_accumulator_.Read(in_stream, binary);
|
||||
} else if (token == "<DIAGVARDIFFS>") {
|
||||
variance_diff_accumulator_.Read(in_stream, binary);
|
||||
} else {
|
||||
KALDI_ERR << "Unexpected token '" << token << "' in model file ";
|
||||
}
|
||||
ReadToken(in_stream, binary, &token);
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeAccumModelDiff::Write(std::ostream &out_stream, bool binary) const {
|
||||
WriteToken(out_stream, binary, "<FMPEMODELDIFFS>");
|
||||
WriteToken(out_stream, binary, "<VECSIZE>");
|
||||
WriteBasicType(out_stream, binary, dim_);
|
||||
WriteToken(out_stream, binary, "<NUMCOMPONENTS>");
|
||||
WriteBasicType(out_stream, binary, num_comp_);
|
||||
|
||||
// convert into BaseFloat before writing things
|
||||
Vector<BaseFloat> occupancy_bf(mle_occupancy_.Dim());
|
||||
Matrix<BaseFloat> mean_diff_accumulator_bf(mean_diff_accumulator_.NumRows(),
|
||||
mean_diff_accumulator_.NumCols());
|
||||
Matrix<BaseFloat> variance_diff_accumulator_bf(variance_diff_accumulator_.NumRows(),
|
||||
variance_diff_accumulator_.NumCols());
|
||||
occupancy_bf.CopyFromVec(mle_occupancy_);
|
||||
mean_diff_accumulator_bf.CopyFromMat(mean_diff_accumulator_);
|
||||
variance_diff_accumulator_bf.CopyFromMat(variance_diff_accumulator_);
|
||||
|
||||
WriteToken(out_stream, binary, "<MLE_OCCUPANCY>");
|
||||
occupancy_bf.Write(out_stream, binary);
|
||||
WriteToken(out_stream, binary, "<MEANDIFFS>");
|
||||
mean_diff_accumulator_bf.Write(out_stream, binary);
|
||||
WriteToken(out_stream, binary, "<DIAGVARDIFFS>");
|
||||
variance_diff_accumulator_bf.Write(out_stream, binary);
|
||||
WriteToken(out_stream, binary, "</FMPEMODELDIFFS>");
|
||||
}
|
||||
|
||||
void FmpeAccumModelDiff::Resize(int32 num_comp, int32 dim) {
|
||||
KALDI_ASSERT(num_comp > 0 && dim > 0);
|
||||
num_comp_ = num_comp;
|
||||
dim_ = dim;
|
||||
mle_occupancy_.Resize(num_comp);
|
||||
mean_diff_accumulator_.Resize(num_comp, dim);
|
||||
variance_diff_accumulator_.Resize(num_comp, dim);
|
||||
}
|
||||
|
||||
void FmpeAccumModelDiff::SetZero() {
|
||||
mle_occupancy_.SetZero();
|
||||
mean_diff_accumulator_.SetZero();
|
||||
variance_diff_accumulator_.SetZero();
|
||||
}
|
||||
|
||||
void FmpeAccumModelDiff::ComputeModelParaDiff(const DiagGmm& diag_gmm,
|
||||
const AccumDiagGmm& num_acc,
|
||||
const AccumDiagGmm& den_acc,
|
||||
const AccumDiagGmm& mle_acc) {
|
||||
KALDI_ASSERT(num_acc.NumGauss() == num_comp_ && num_acc.Dim() == dim_);
|
||||
KALDI_ASSERT(den_acc.NumGauss() == num_comp_); // den_acc.Dim() may not be defined,
|
||||
// if we used the "compressed form" of accs where den only has counts.
|
||||
KALDI_ASSERT(mle_acc.NumGauss() == num_comp_ && mle_acc.Dim() == dim_);
|
||||
|
||||
Matrix<double> mean_diff_tmp(num_comp_, dim_);
|
||||
Matrix<double> var_diff_tmp(num_comp_, dim_);
|
||||
Matrix<double> mat_tmp(num_comp_, dim_);
|
||||
Vector<double> occ_diff(num_comp_);
|
||||
Matrix<double> means_invvars(num_comp_, dim_);
|
||||
Matrix<double> inv_vars(num_comp_, dim_);
|
||||
|
||||
occ_diff.CopyFromVec(num_acc.occupancy());
|
||||
occ_diff.AddVec(-1.0, den_acc.occupancy());
|
||||
|
||||
means_invvars.CopyFromMat(diag_gmm.means_invvars(), kNoTrans);
|
||||
inv_vars.CopyFromMat(diag_gmm.inv_vars(), kNoTrans);
|
||||
/// compute the means differentials first
|
||||
mean_diff_tmp.CopyFromMat(num_acc.mean_accumulator(), kNoTrans);
|
||||
if (den_acc.Flags() & kGmmMeans) // probably will be false.
|
||||
mean_diff_tmp.AddMat(-1.0, den_acc.mean_accumulator(), kNoTrans);
|
||||
mean_diff_tmp.MulElements(inv_vars);
|
||||
|
||||
mat_tmp.CopyFromMat(means_invvars, kNoTrans);
|
||||
mat_tmp.MulRowsVec(occ_diff);
|
||||
|
||||
mean_diff_tmp.AddMat(-1.0, mat_tmp, kNoTrans);
|
||||
|
||||
/// compute the means differetials
|
||||
mean_diff_accumulator_.CopyFromMat(mean_diff_tmp, kNoTrans);
|
||||
|
||||
/// compute the vars differentials second
|
||||
var_diff_tmp.CopyFromMat(num_acc.variance_accumulator(), kNoTrans);
|
||||
if (den_acc.Flags() & kGmmVariances) // probably will be false.
|
||||
var_diff_tmp.AddMat(-1.0, den_acc.variance_accumulator(), kNoTrans);
|
||||
|
||||
var_diff_tmp.MulElements(inv_vars);
|
||||
var_diff_tmp.MulElements(inv_vars);
|
||||
|
||||
mat_tmp.CopyFromMat(num_acc.mean_accumulator(), kNoTrans);
|
||||
if (den_acc.Flags() & kGmmMeans) // probably will be false.
|
||||
mat_tmp.AddMat(-1.0, den_acc.mean_accumulator(), kNoTrans);
|
||||
mat_tmp.MulElements(inv_vars);
|
||||
mat_tmp.MulElements(means_invvars);
|
||||
|
||||
var_diff_tmp.AddMat(-2.0, mat_tmp, kNoTrans);
|
||||
|
||||
mat_tmp.CopyFromMat(means_invvars, kNoTrans);
|
||||
mat_tmp.MulElements(means_invvars);
|
||||
mat_tmp.AddMat(-1.0, inv_vars, kNoTrans);
|
||||
mat_tmp.MulRowsVec(occ_diff);
|
||||
|
||||
var_diff_tmp.AddMat(1.0, mat_tmp, kNoTrans);
|
||||
var_diff_tmp.Scale(0.5);
|
||||
|
||||
/// compute the vars differentials
|
||||
variance_diff_accumulator_.CopyFromMat(var_diff_tmp, kNoTrans);
|
||||
|
||||
/// copy to obtain the mle occupation probapility
|
||||
mle_occupancy_.CopyFromVec(mle_acc.occupancy());
|
||||
}
|
||||
|
||||
void FmpeAccs::Write(std::ostream &out_stream, bool binary) const {
|
||||
uint32 tmp_uint32;
|
||||
|
||||
WriteToken(out_stream, binary, "<FMPEACCS>");
|
||||
|
||||
WriteToken(out_stream, binary, "<NumGaussians>");
|
||||
tmp_uint32 = static_cast<uint32>(config_.gmm_num_comps);
|
||||
WriteBasicType(out_stream, binary, tmp_uint32);
|
||||
WriteToken(out_stream, binary, "<LengthContextExp>");
|
||||
tmp_uint32 = static_cast<uint32>(config_.context_windows.NumRows());
|
||||
WriteBasicType(out_stream, binary, tmp_uint32);
|
||||
WriteToken(out_stream, binary, "<DIMENSION>");
|
||||
WriteBasicType(out_stream, binary, dim_);
|
||||
if (!binary) out_stream << "\n";
|
||||
|
||||
// convert into BaseFloat before writing things
|
||||
Matrix<BaseFloat> mat_bf(dim_, dim_ + 1);
|
||||
|
||||
if (p_.size() != 0) {
|
||||
WriteToken(out_stream, binary, "<P>");
|
||||
for (int32 i = 0; i < config_.gmm_num_comps; ++i) {
|
||||
for (int32 j = 0; j < config_.context_windows.NumRows(); ++j) {
|
||||
mat_bf.CopyFromMat(p_[i][j], kNoTrans);
|
||||
mat_bf.Write(out_stream, binary);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (n_.size() != 0) {
|
||||
WriteToken(out_stream, binary, "<N>");
|
||||
for (int32 i = 0; i < config_.gmm_num_comps; ++i) {
|
||||
for (int32 j = 0; j < config_.context_windows.NumRows(); ++j) {
|
||||
mat_bf.CopyFromMat(n_[i][j], kNoTrans);
|
||||
mat_bf.Write(out_stream, binary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert into BaseFloat before writing things
|
||||
Vector<BaseFloat> diff_bf(diff_.Dim());
|
||||
Vector<BaseFloat> direct_diff_bf(direct_diff_.Dim());
|
||||
Vector<BaseFloat> indirect_diff_bf(indirect_diff_.Dim());
|
||||
diff_bf.CopyFromVec(diff_);
|
||||
direct_diff_bf.CopyFromVec(direct_diff_);
|
||||
indirect_diff_bf.CopyFromVec(indirect_diff_);
|
||||
|
||||
WriteToken(out_stream, binary, "<DIFFERENTIAL>");
|
||||
diff_bf.Write(out_stream, binary);
|
||||
WriteToken(out_stream, binary, "<DIRECTDIFFERENTIAL>");
|
||||
direct_diff_bf.Write(out_stream, binary);
|
||||
WriteToken(out_stream, binary, "<INDIRECTDIFFERENTIAL>");
|
||||
indirect_diff_bf.Write(out_stream, binary);
|
||||
|
||||
WriteToken(out_stream, binary, "</FMPEACCS>");
|
||||
}
|
||||
|
||||
void FmpeAccs::Read(std::istream &in_stream, bool binary,
|
||||
bool add) {
|
||||
uint32 tmp_uint32;
|
||||
std::string token;
|
||||
|
||||
ExpectToken(in_stream, binary, "<FMPACCS>");
|
||||
|
||||
ExpectToken(in_stream, binary, "<NumGaussians>");
|
||||
ReadBasicType(in_stream, binary, &tmp_uint32);
|
||||
int32 num_gaussians = static_cast<int32>(tmp_uint32);
|
||||
ExpectToken(in_stream, binary, "<LengthContExp>");
|
||||
ReadBasicType(in_stream, binary, &tmp_uint32);
|
||||
int32 length_cont_exp = static_cast<int32>(tmp_uint32);
|
||||
ExpectToken(in_stream, binary, "<DIMENSION>");
|
||||
ReadBasicType(in_stream, binary, &dim_);
|
||||
|
||||
ReadToken(in_stream, binary, &token);
|
||||
|
||||
while (token != "</FMPEACCS>") {
|
||||
if (token == "<P>") {
|
||||
p_.resize(num_gaussians);
|
||||
for (size_t i = 0; i < p_.size(); ++i) {
|
||||
p_[i].resize(length_cont_exp);
|
||||
for (size_t j = 0; j < p_[i].size(); ++j) {
|
||||
p_[i][j].Read(in_stream, binary, add);
|
||||
}
|
||||
}
|
||||
} else if (token == "<N>") {
|
||||
n_.resize(num_gaussians);
|
||||
for (size_t i = 0; i < n_.size(); ++i) {
|
||||
n_[i].resize(length_cont_exp);
|
||||
for (size_t j = 0; j < n_[i].size(); ++j) {
|
||||
n_[i][j].Read(in_stream, binary, add);
|
||||
}
|
||||
}
|
||||
} else if (token == "<DIFFERENTIALS>") {
|
||||
diff_.Read(in_stream, binary, add);
|
||||
} else if (token == "<DIRECTDIFFERENTIALS>") {
|
||||
direct_diff_.Read(in_stream, binary, add);
|
||||
} else if (token == "<INDIRECTDIFFERENTIALS>") {
|
||||
indirect_diff_.Read(in_stream, binary, add);
|
||||
} else {
|
||||
KALDI_ERR << "Unexpected token '" << token << "' in model file ";
|
||||
}
|
||||
ReadToken(in_stream, binary, &token);
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeAccs::ReadModelDiffs(std::istream &in_stream, bool binary) {
|
||||
int32 num_pdfs;
|
||||
int32 dim;
|
||||
ExpectToken(in_stream, binary, "<DIMENSION>");
|
||||
ReadBasicType(in_stream, binary, &dim);
|
||||
ExpectToken(in_stream, binary, "<NUMPDFS>");
|
||||
ReadBasicType(in_stream, binary, &num_pdfs);
|
||||
KALDI_ASSERT((num_pdfs > 0) && (dim > 0));
|
||||
|
||||
if (model_diff_accumulators_.size() != static_cast<size_t> (num_pdfs))
|
||||
KALDI_ERR << "Reading model differentials but num-pdfs do not match: "
|
||||
<< (model_diff_accumulators_.size()) << " vs. "
|
||||
<< (num_pdfs);
|
||||
for (std::vector<FmpeAccumModelDiff*>::iterator it = model_diff_accumulators_.begin(),
|
||||
end = model_diff_accumulators_.end(); it != end; ++it) {
|
||||
(*it)->Read(in_stream, binary);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void FmpeAccs::InitPNandDiff(int32 num_gmm_gauss, int32 con_exp, int32 dim) {
|
||||
p_.resize(num_gmm_gauss);
|
||||
for (int32 i = 0; i < num_gmm_gauss; ++i) {
|
||||
p_[i].resize(con_exp);
|
||||
for (int32 j = 0; j < con_exp; ++j) {
|
||||
p_[i][j].Resize(dim, dim + 1);
|
||||
}
|
||||
}
|
||||
|
||||
n_.resize(num_gmm_gauss);
|
||||
for (int32 i = 0; i < num_gmm_gauss; ++i) {
|
||||
n_[i].resize(con_exp);
|
||||
for (int32 j = 0; j < con_exp; ++j) {
|
||||
n_[i][j].Resize(dim, dim + 1);
|
||||
}
|
||||
}
|
||||
|
||||
diff_.Resize(dim);
|
||||
direct_diff_.Resize(dim);
|
||||
indirect_diff_.Resize(dim);
|
||||
}
|
||||
|
||||
void FmpeAccs::InitModelDiff(const AmDiagGmm &model) {
|
||||
DeletePointers(&model_diff_accumulators_); // in case was non-empty when called.
|
||||
model_diff_accumulators_.resize(model.NumPdfs(), NULL);
|
||||
for (int32 i = 0; i < model.NumPdfs(); i++) {
|
||||
model_diff_accumulators_[i] = new FmpeAccumModelDiff();
|
||||
model_diff_accumulators_[i]->Resize(model.GetPdf(i));
|
||||
}
|
||||
}
|
||||
|
||||
/// Initialization, do InitModelDiff if true when accumulating,
|
||||
/// and otherwise don't do when sum accumulations
|
||||
void FmpeAccs::Init(const AmDiagGmm &am_model, bool update) {
|
||||
dim_ = am_model.Dim();
|
||||
|
||||
InitPNandDiff(config_.gmm_num_comps, config_.context_windows.NumRows(), dim_);
|
||||
|
||||
if (update) {
|
||||
InitModelDiff(am_model);
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeAccs::InitializeGMMs(const DiagGmm &gmm, const DiagGmm &gmm_cluster_centers,
|
||||
std::vector<int32> &gaussian_cluster_center_map) {
|
||||
gmm_.CopyFromDiagGmm(gmm);
|
||||
gmm_cluster_centers_.CopyFromDiagGmm(gmm_cluster_centers);
|
||||
gaussian_cluster_center_map_.resize(gaussian_cluster_center_map.size());
|
||||
gaussian_cluster_center_map_ = gaussian_cluster_center_map;
|
||||
}
|
||||
|
||||
void FmpeAccs::ComputeOneFrameOffsetFeature(const VectorBase<BaseFloat>& data,
|
||||
std::vector<std::pair<int32, Vector<double> > > *offset) const {
|
||||
KALDI_ASSERT((data.Dim() == gmm_.Dim()) && (data.Dim() == gmm_cluster_centers_.Dim()));
|
||||
KALDI_ASSERT((gmm_.NumGauss() != 0) && (gmm_cluster_centers_.NumGauss() != 0)
|
||||
&& (gmm_.NumGauss() > gmm_cluster_centers_.NumGauss())
|
||||
&& (config_.gmm_cluster_centers_nbest < gmm_cluster_centers_.NumGauss())
|
||||
&& (config_.gmm_gaussian_nbest < gmm_.NumGauss()))
|
||||
|
||||
int32 dim = data.Dim();
|
||||
int32 num_gauss = gmm_.NumGauss();
|
||||
int32 num_cluster_centers = gmm_cluster_centers_.NumGauss();
|
||||
int32 gmm_cluster_centers_nbest = config_.gmm_cluster_centers_nbest;
|
||||
|
||||
std::set<int32> pruned_centers;
|
||||
Vector<BaseFloat> loglikes(num_cluster_centers);
|
||||
gmm_cluster_centers_.LogLikelihoods(data, &loglikes);
|
||||
Vector<BaseFloat> loglikes_copy(loglikes);
|
||||
BaseFloat *ptr = loglikes_copy.Data();
|
||||
std::nth_element(ptr, ptr+num_cluster_centers-gmm_cluster_centers_nbest, ptr+num_cluster_centers);
|
||||
BaseFloat thresh = ptr[num_cluster_centers-gmm_cluster_centers_nbest];
|
||||
for (int32 g = 0; g < num_cluster_centers; g++) {
|
||||
if (loglikes(g) >= thresh)
|
||||
pruned_centers.insert(g);
|
||||
}
|
||||
|
||||
std::vector< std::pair<double, int32> > pruned_gauss;
|
||||
for (int32 gauss_index = 0; gauss_index < num_gauss; gauss_index++) {
|
||||
int32 current_cluster = gaussian_cluster_center_map_[gauss_index];
|
||||
if (pruned_centers.end() != pruned_centers.find(current_cluster)) {
|
||||
double loglike = gmm_.ComponentLogLikelihood(data, gauss_index);
|
||||
pruned_gauss.push_back(std::make_pair(loglike, gauss_index));
|
||||
}
|
||||
}
|
||||
KALDI_ASSERT(!pruned_gauss.empty());
|
||||
|
||||
int32 gmm_gaussian_nbest = config_.gmm_gaussian_nbest;
|
||||
std::nth_element(pruned_gauss.begin(),
|
||||
pruned_gauss.end() - gmm_gaussian_nbest,
|
||||
pruned_gauss.end());
|
||||
pruned_gauss.erase(pruned_gauss.begin(),
|
||||
pruned_gauss.end() - gmm_gaussian_nbest);
|
||||
|
||||
double weight = 0.0;
|
||||
for (int32 i = 0; i < pruned_gauss.size(); ++i) {
|
||||
weight += exp(pruned_gauss[i].first);
|
||||
}
|
||||
for (int32 i = 0; i < pruned_gauss.size(); ++i) {
|
||||
pruned_gauss[i].first = exp(pruned_gauss[i].first) / weight;
|
||||
}
|
||||
|
||||
Vector<BaseFloat> tmp_offset(dim + 1);
|
||||
SubVector<BaseFloat> sub_tmp_offset(tmp_offset, 1, dim);
|
||||
Vector<BaseFloat> tmp_mean(dim);
|
||||
Vector<BaseFloat> tmp_var(dim);
|
||||
for (int32 i = 0; i < pruned_gauss.size(); ++i) {
|
||||
tmp_offset(0) = pruned_gauss[i].first * 5.0;
|
||||
sub_tmp_offset.CopyFromVec(data);
|
||||
gmm_.GetComponentMean(pruned_gauss[i].second, &tmp_mean);
|
||||
sub_tmp_offset.AddVec(-1.0, tmp_mean);
|
||||
gmm_.GetComponentVariance(pruned_gauss[i].second, &tmp_var);
|
||||
tmp_var.ApplyPow(0.5);
|
||||
sub_tmp_offset.DivElemByElem(tmp_var);
|
||||
sub_tmp_offset.Scale(pruned_gauss[i].first);
|
||||
|
||||
offset->push_back(std::make_pair(pruned_gauss[i].second, tmp_offset));
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeAccs::ComputeWholeFileOffsetFeature(const MatrixBase<BaseFloat>& data,
|
||||
std::vector<std::vector<std::pair<int32, Vector<double> > > > *whole_file_offset) const {
|
||||
int32 nframe = data.NumRows();
|
||||
whole_file_offset->reserve(nframe);
|
||||
|
||||
for (int32 i = 0; i < nframe; i++) {
|
||||
std::vector<std::pair<int32, Vector<double> > > offset;
|
||||
ComputeOneFrameOffsetFeature(data.Row(i), &offset);
|
||||
whole_file_offset->push_back(offset);
|
||||
}
|
||||
}
|
||||
|
||||
bool Gauss_index_lower(std::pair<int32, Vector<double> > M,
|
||||
std::pair<int32, Vector<double> > N) {
|
||||
return M.first < N.first;
|
||||
}
|
||||
|
||||
void FmpeAccs::ComputeContExpOffsetFeature(
|
||||
const std::vector<std::vector<std::pair<int32, Vector<double> > >* > &offset_win,
|
||||
std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > *ht) const {
|
||||
KALDI_ASSERT((config_.context_windows.NumCols() == offset_win.size()));
|
||||
|
||||
std::vector<std::pair<int32, Vector<double> > > offset_tmp;
|
||||
std::vector<std::pair<int32, Vector<double> > > offset_uniq_tmp;
|
||||
|
||||
for (int32 i = 0; i < config_.context_windows.NumRows(); i++) {
|
||||
// for every context
|
||||
for (int32 j = 0; j < config_.context_windows.NumCols(); j++) {
|
||||
if (config_.context_windows(i, j) > 0.0) {
|
||||
if (offset_win[j]->empty() == 0) {
|
||||
for (int32 k = 0; k < offset_win[j]->size(); k++) {
|
||||
offset_tmp.push_back((*offset_win[j])[k]);
|
||||
offset_tmp.back().second.Scale(config_.context_windows(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (offset_tmp.empty() == 0) {
|
||||
std::sort(offset_tmp.begin(), offset_tmp.end(), Gauss_index_lower);
|
||||
offset_uniq_tmp.push_back(offset_tmp[0]);
|
||||
for (int32 igauss = 1; igauss < offset_tmp.size(); igauss++) {
|
||||
if (offset_tmp[igauss].first == offset_tmp[igauss - 1].first) {
|
||||
offset_uniq_tmp.back().second.AddVec(1.0, offset_tmp[igauss].second);
|
||||
} else {
|
||||
offset_uniq_tmp.push_back(offset_tmp[igauss]);
|
||||
}
|
||||
}
|
||||
|
||||
ht->push_back(std::make_pair(i, offset_uniq_tmp));
|
||||
offset_tmp.clear();
|
||||
offset_uniq_tmp.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeAccs::ComputeHighDimemsionFeature(
|
||||
const std::vector<std::vector<std::pair<int32, Vector<double> > > > &whole_file_offset_feat,
|
||||
int32 frame_index,
|
||||
std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > *ht) const {
|
||||
KALDI_ASSERT((frame_index >= 0) && (frame_index < whole_file_offset_feat.size()));
|
||||
|
||||
int32 lenght_context_windows = config_.context_windows.NumCols();
|
||||
int32 half_len_win = lenght_context_windows / 2;
|
||||
int32 num_frame = whole_file_offset_feat.size();
|
||||
std::vector<std::vector<std::pair<int32, Vector<double> > >* > offset_win;
|
||||
std::vector<std::pair<int32, Vector<double> > > empty_feat;
|
||||
|
||||
for (int32 i = (frame_index - half_len_win);
|
||||
i < (frame_index - half_len_win + lenght_context_windows); i++) {
|
||||
/// we append zero if the index is out of the whole file feature lenght
|
||||
if ((i < 0) || (i >= num_frame)) {
|
||||
offset_win.push_back(&empty_feat);
|
||||
} else {
|
||||
offset_win.push_back(
|
||||
const_cast<std::vector<std::pair<int32, Vector<double> > >* >
|
||||
(&(whole_file_offset_feat[i])));
|
||||
}
|
||||
}
|
||||
|
||||
ComputeContExpOffsetFeature(offset_win, ht);
|
||||
}
|
||||
|
||||
void FmpeAccs::ProjectHighDimensionFeature(
|
||||
const std::vector< std::vector< Matrix<double> > > &M,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht,
|
||||
Vector<double> *fea_out) const {
|
||||
KALDI_ASSERT((M.size() == gmm_.NumGauss())
|
||||
&& (M[0].size() == ht.size())
|
||||
&& (M[0][0].NumRows() == gmm_.Dim())
|
||||
&& (M[0][0].NumCols() == gmm_.Dim() + 1));
|
||||
|
||||
int32 dim = gmm_.Dim();
|
||||
Vector<double> tmp_fea(dim);
|
||||
tmp_fea.SetZero();
|
||||
|
||||
for(int32 i = 0; i < ht.size(); i++) {
|
||||
int32 cont_index = ht[i].first;
|
||||
for (int32 j = 0; j < ht[i].second.size(); j++) {
|
||||
int32 gauss_index = ht[i].second[j].first;
|
||||
tmp_fea.AddMatVec(1.0, M[gauss_index][cont_index], kNoTrans, ht[i].second[j].second, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
fea_out->CopyFromVec(tmp_fea);
|
||||
}
|
||||
|
||||
void FmpeAccs::ObtainNewFmpeFeature(
|
||||
const VectorBase<BaseFloat> &data,
|
||||
const std::vector< std::vector< Matrix<double> > > &M,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht,
|
||||
Vector<double> *fea_new) const {
|
||||
KALDI_ASSERT((data.Dim() == gmm_.Dim()));
|
||||
|
||||
Vector<double> tmp_fea(data.Dim());
|
||||
ProjectHighDimensionFeature(M, ht, &tmp_fea);
|
||||
|
||||
fea_new->CopyFromVec(data);
|
||||
fea_new->AddVec(1.0, tmp_fea);
|
||||
}
|
||||
|
||||
void FmpeAccs::AccumulateDirectDiffFromPosteriors(const DiagGmm &gmm,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &posteriors,
|
||||
Vector<double> *direct_diff) {
|
||||
KALDI_ASSERT(gmm.Dim() == Dim());
|
||||
KALDI_ASSERT(gmm.NumGauss() == posteriors.Dim());
|
||||
KALDI_ASSERT(static_cast<int32>(data.Dim()) == Dim());
|
||||
KALDI_ASSERT(direct_diff->Dim() == Dim());
|
||||
|
||||
Matrix<double> means_invvars(gmm.NumGauss(), gmm.Dim());
|
||||
Matrix<double> inv_vars(gmm.NumGauss(), gmm.Dim());
|
||||
Matrix<double> data_tmp(gmm.NumGauss(), gmm.Dim());
|
||||
Matrix<double> mat_tmp(gmm.NumGauss(), gmm.Dim());
|
||||
Vector<double> post_scale(gmm.NumGauss());
|
||||
|
||||
means_invvars.CopyFromMat(gmm.means_invvars(), kNoTrans);
|
||||
inv_vars.CopyFromMat(gmm.inv_vars(), kNoTrans);
|
||||
|
||||
for (int32 i = 0; i < data_tmp.NumRows(); i++) {
|
||||
data_tmp.Row(i).AddVec(1.0, data);
|
||||
}
|
||||
data_tmp.MulElements(inv_vars);
|
||||
|
||||
mat_tmp.CopyFromMat(means_invvars, kNoTrans);
|
||||
mat_tmp.AddMat(-1.0, data_tmp, kNoTrans);
|
||||
|
||||
post_scale.CopyFromVec(posteriors);
|
||||
post_scale.Scale(config_.lat_prob_scale);
|
||||
|
||||
direct_diff->AddMatVec(1.0, mat_tmp, kTrans, post_scale, 1.0);
|
||||
}
|
||||
|
||||
void FmpeAccs::AccumulateInDirectDiffFromPosteriors(const DiagGmm &gmm,
|
||||
const FmpeAccumModelDiff &fmpe_diaggmm_diff_acc,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &posteriors,
|
||||
Vector<double> *indirect_diff) {
|
||||
KALDI_ASSERT(gmm.NumGauss() == fmpe_diaggmm_diff_acc.NumGauss());
|
||||
KALDI_ASSERT(gmm.NumGauss() == posteriors.Dim());
|
||||
KALDI_ASSERT(gmm.Dim() == fmpe_diaggmm_diff_acc.Dim());
|
||||
KALDI_ASSERT(gmm.Dim() == Dim());
|
||||
KALDI_ASSERT(static_cast<int32>(data.Dim()) == Dim());
|
||||
KALDI_ASSERT(indirect_diff->Dim() == Dim());
|
||||
|
||||
Matrix<double> mat_tmp(gmm.NumGauss(), gmm.Dim());
|
||||
Vector<double> vec_tmp(gmm.NumGauss());
|
||||
|
||||
gmm.GetMeans(&mat_tmp);
|
||||
for (int32 i = 0; i < mat_tmp.NumRows(); i++) {
|
||||
mat_tmp.Row(i).AddVec(-1.0, data);
|
||||
}
|
||||
mat_tmp.MulElements(fmpe_diaggmm_diff_acc.variance_diff_accumulator());
|
||||
mat_tmp.Scale(-2.0);
|
||||
mat_tmp.AddMat(1.0, fmpe_diaggmm_diff_acc.mean_diff_accumulator(), kNoTrans);
|
||||
// should be scaled in compute model difficientials,
|
||||
// but used here just for convenient
|
||||
mat_tmp.Scale(config_.lat_prob_scale);
|
||||
|
||||
vec_tmp.CopyFromVec(posteriors);
|
||||
vec_tmp.DivElemByElem(fmpe_diaggmm_diff_acc.mle_occupancy());
|
||||
|
||||
indirect_diff->AddMatVec(1.0, mat_tmp, kTrans, vec_tmp, 1.0);
|
||||
}
|
||||
|
||||
void FmpeAccs::AccumulateInDirectDiffFromDiag(const DiagGmm &gmm,
|
||||
const FmpeAccumModelDiff &fmpe_diaggmm_diff_acc,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
BaseFloat frame_posterior,
|
||||
Vector<double> *indirect_diff) {
|
||||
KALDI_ASSERT(gmm.NumGauss() == fmpe_diaggmm_diff_acc.NumGauss());
|
||||
KALDI_ASSERT(gmm.Dim() == fmpe_diaggmm_diff_acc.Dim());
|
||||
KALDI_ASSERT(gmm.Dim() == Dim());
|
||||
KALDI_ASSERT(static_cast<int32>(data.Dim()) == Dim());
|
||||
KALDI_ASSERT(indirect_diff->Dim() == Dim());
|
||||
|
||||
Vector<BaseFloat> posteriors(gmm.NumGauss());
|
||||
gmm.ComponentPosteriors(data, &posteriors);
|
||||
posteriors.Scale(frame_posterior);
|
||||
|
||||
AccumulateInDirectDiffFromPosteriors(gmm, fmpe_diaggmm_diff_acc,
|
||||
data, posteriors, indirect_diff);
|
||||
}
|
||||
|
||||
void FmpeAccs::AccumulateFromDifferential(const VectorBase<double> &direct_diff,
|
||||
const VectorBase<double> &indirect_diff,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht) {
|
||||
KALDI_ASSERT((direct_diff.Dim() == indirect_diff.Dim()));
|
||||
KALDI_ASSERT(direct_diff.Dim() == Dim());
|
||||
|
||||
Vector<double> diff(direct_diff);
|
||||
diff.AddVec(1.0, indirect_diff);
|
||||
|
||||
int32 dim = gmm_.Dim();
|
||||
Matrix<double> tmp(dim, dim + 1);
|
||||
tmp.SetZero();
|
||||
|
||||
/// accumulate the p and n statistics
|
||||
for (int32 i = 0; i < ht.size(); i++) {
|
||||
int32 cont_index = ht[i].first;
|
||||
for (int32 j = 0; j < ht[i].second.size(); j++) {
|
||||
int32 gauss_index = ht[i].second[j].first;
|
||||
tmp.AddVecVec(1.0, diff, ht[i].second[j].second);
|
||||
|
||||
for (int32 r = 0; r < dim; r++) {
|
||||
for (int32 c = 0;c < (dim + 1); c++) {
|
||||
if (tmp(r, c) > 0.0) {
|
||||
p_[gauss_index][cont_index](r, c) += tmp(r, c);
|
||||
}
|
||||
else {
|
||||
n_[gauss_index][cont_index](r, c) -= tmp(r, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmp.SetZero();
|
||||
}
|
||||
}
|
||||
|
||||
/// accumulate the direct/indirect and total differentials
|
||||
diff_.AddVec(1.0, diff);
|
||||
direct_diff_.AddVec(1.0, direct_diff);
|
||||
indirect_diff_.AddVec(1.0, indirect_diff);
|
||||
}
|
||||
|
||||
FmpeUpdater::FmpeUpdater(const FmpeAccs &accs)
|
||||
: config_(accs.config()), dim_(accs.Dim()) {
|
||||
Init(config_.gmm_num_comps, config_.context_windows.NumRows(), dim_);
|
||||
};
|
||||
|
||||
FmpeUpdater::FmpeUpdater(const FmpeUpdater &other)
|
||||
: config_(other.config_), avg_std_var_(other.avg_std_var_),
|
||||
dim_(other.dim_) {
|
||||
if (other.M_.size() != 0) {
|
||||
M_.resize(other.M_.size());
|
||||
for (int32 i = 0; i < other.M_.size(); ++i) {
|
||||
M_[i].resize(other.M_[i].size());
|
||||
for (int32 j = 0; j < other.M_[i].size(); ++j) {
|
||||
M_[i][j].Resize(other.M_[i][j].NumRows(), other.M_[i][j].NumCols());
|
||||
M_[i][j].CopyFromMat(other.M_[i][j], kNoTrans);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeUpdater::Init(int32 num_gmm_gauss, int32 con_exp, int32 dim) {
|
||||
M_.resize(num_gmm_gauss);
|
||||
for (int32 i = 0; i < num_gmm_gauss; ++i) {
|
||||
M_[i].resize(con_exp);
|
||||
for (int32 j = 0; j < con_exp; ++j) {
|
||||
M_[i][j].Resize(dim, dim + 1);
|
||||
}
|
||||
}
|
||||
|
||||
avg_std_var_.Resize(dim);
|
||||
}
|
||||
|
||||
void FmpeUpdater::Write(std::ostream &out_stream, bool binary) const {
|
||||
uint32 tmp_uint32;
|
||||
|
||||
WriteToken(out_stream, binary, "<FMPE>");
|
||||
|
||||
WriteToken(out_stream, binary, "<NumGaussians>");
|
||||
tmp_uint32 = static_cast<uint32>(config_.gmm_num_comps);
|
||||
WriteBasicType(out_stream, binary, tmp_uint32);
|
||||
WriteToken(out_stream, binary, "<LengthContExp>");
|
||||
tmp_uint32 = static_cast<uint32>(config_.context_windows.NumRows());
|
||||
WriteBasicType(out_stream, binary, tmp_uint32);
|
||||
WriteToken(out_stream, binary, "<DIMENSION>");
|
||||
WriteBasicType(out_stream, binary, dim_);
|
||||
if (!binary) out_stream << "\n";
|
||||
|
||||
// convert into BaseFloat before writing things
|
||||
Matrix<BaseFloat> mat_bf(dim_, dim_ + 1);
|
||||
|
||||
if (M_.size() != 0) {
|
||||
WriteToken(out_stream, binary, "<PROJ_MAT>");
|
||||
for (int32 i = 0; i < config_.gmm_num_comps; ++i) {
|
||||
for (int32 j = 0; j < config_.context_windows.NumRows(); ++j) {
|
||||
mat_bf.CopyFromMat(M_[i][j], kNoTrans);
|
||||
mat_bf.Write(out_stream, binary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WriteToken(out_stream, binary, "</FMPE>");
|
||||
}
|
||||
|
||||
void FmpeUpdater::Read(std::istream &in_stream, bool binary) {
|
||||
uint32 tmp_uint32;
|
||||
std::string token;
|
||||
|
||||
ExpectToken(in_stream, binary, "<FMPE>");
|
||||
|
||||
ExpectToken(in_stream, binary, "<NumGaussians>");
|
||||
ReadBasicType(in_stream, binary, &tmp_uint32);
|
||||
int32 num_gaussians = static_cast<int32>(tmp_uint32);
|
||||
ExpectToken(in_stream, binary, "<LengthContExp>");
|
||||
ReadBasicType(in_stream, binary, &tmp_uint32);
|
||||
int32 length_cont_exp = static_cast<int32>(tmp_uint32);
|
||||
ExpectToken(in_stream, binary, "<DIMENSION>");
|
||||
ReadBasicType(in_stream, binary, &dim_);
|
||||
|
||||
ReadToken(in_stream, binary, &token);
|
||||
|
||||
while (token != "</FMPE>") {
|
||||
if (token == "<PROJ_MAT>") {
|
||||
M_.resize(num_gaussians);
|
||||
for (size_t i = 0; i < M_.size(); ++i) {
|
||||
M_[i].resize(length_cont_exp);
|
||||
for (size_t j = 0; j < M_[i].size(); ++j) {
|
||||
M_[i][j].Read(in_stream, binary);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
KALDI_ERR << "Unexpected token '" << token << "' in model file ";
|
||||
}
|
||||
ReadToken(in_stream, binary, &token);
|
||||
}
|
||||
}
|
||||
|
||||
void FmpeUpdater::ComputeAvgStandardDeviation(const AmDiagGmm &am) {
|
||||
Matrix<double> vars_tmp;
|
||||
Vector<double> vec_tmp(am.Dim());
|
||||
|
||||
for (int32 i = 0; i < am.NumPdfs(); i++) {
|
||||
const DiagGmm &gmm = am.GetPdf(i);
|
||||
gmm.GetVars(&vars_tmp);
|
||||
vars_tmp.ApplyPow(0.5);
|
||||
vec_tmp.AddRowSumMat(vars_tmp);
|
||||
}
|
||||
|
||||
vec_tmp.Scale(1 / am.NumGauss());
|
||||
|
||||
avg_std_var_.CopyFromVec(vec_tmp);
|
||||
}
|
||||
|
||||
void FmpeUpdater::Update(const FmpeAccs &accs,
|
||||
BaseFloat *obj_change_out,
|
||||
BaseFloat *count_out) {
|
||||
KALDI_ASSERT((M_.size() == accs.pos().size()) && (M_.size() == accs.neg().size()));
|
||||
KALDI_ASSERT((M_[0].size() == accs.pos()[0].size()) && (M_[0].size() == accs.neg()[0].size())
|
||||
&& M_[0].size() == config_.context_windows.NumRows());
|
||||
KALDI_ASSERT((M_[0][0].NumRows() == accs.pos()[0][0].NumRows())
|
||||
&& (M_[0][0].NumRows() == accs.neg()[0][0].NumRows())
|
||||
&& (M_[0][0].NumRows() == avg_std_var_.Dim()));
|
||||
KALDI_ASSERT((M_[0][0].NumCols() == accs.pos()[0][0].NumCols())
|
||||
&& (M_[0][0].NumCols() == accs.neg()[0][0].NumCols())
|
||||
&& (M_[0][0].NumCols() == (M_[0][0].NumRows() + 1)));
|
||||
|
||||
int32 ngauss = M_.size();
|
||||
int32 n_cont_exp = M_[0].size();
|
||||
int32 dim = M_[0][0].NumRows();
|
||||
|
||||
Matrix<double> pandn_add_tmp(dim, dim + 1);
|
||||
Matrix<double> pandn_sub_tmp(dim, dim + 1);
|
||||
Vector<double> vec_tmp(avg_std_var_);
|
||||
vec_tmp.Scale(1 / config_.E);
|
||||
|
||||
KALDI_LOG << "Updating the projection matrix M, the dim is: [ "
|
||||
<< ngauss << " ][ " << n_cont_exp << " ][ " << dim << " ][ " << dim + 1
|
||||
<< " ] -> [nGauss][nContExp][fea_dim][fea_dim + 1]";
|
||||
|
||||
for (int32 gauss_index = 0; gauss_index < ngauss; gauss_index++) {
|
||||
for (int32 icon_exp = 0; icon_exp < n_cont_exp; icon_exp++) {
|
||||
pandn_add_tmp.CopyFromMat(accs.pos()[gauss_index][icon_exp], kNoTrans);
|
||||
pandn_add_tmp.AddMat(1.0, accs.neg()[gauss_index][icon_exp], kNoTrans);
|
||||
pandn_sub_tmp.CopyFromMat(accs.pos()[gauss_index][icon_exp], kNoTrans);
|
||||
pandn_sub_tmp.AddMat(-1.0, accs.neg()[gauss_index][icon_exp], kNoTrans);
|
||||
pandn_sub_tmp.DivElements(pandn_add_tmp);
|
||||
pandn_sub_tmp.MulRowsVec(vec_tmp);
|
||||
|
||||
M_[gauss_index][icon_exp].AddMat(1.0, pandn_sub_tmp, kNoTrans);
|
||||
}
|
||||
}
|
||||
|
||||
/// add some code to calculate the objective function change // TODO
|
||||
}
|
||||
|
||||
void ClusterGmmToClusterCenters(const DiagGmm &gmm,
|
||||
int32 num_cluster_centers,
|
||||
BaseFloat cluster_varfloor,
|
||||
DiagGmm *ubm_cluster_centers,
|
||||
std::vector<int32> *cluster_center_map) {
|
||||
// Bottom-up clustering of the Gaussians in the gmm model
|
||||
KALDI_ASSERT(num_cluster_centers < gmm.NumGauss());
|
||||
int32 dim = gmm.Dim();
|
||||
Vector<BaseFloat> tmp_mean(dim);
|
||||
Vector<BaseFloat> tmp_var(dim);
|
||||
int32 num_gaussians = gmm.NumGauss();
|
||||
std::vector<Clusterable*> gauss_clusters;
|
||||
gauss_clusters.reserve(num_cluster_centers);
|
||||
|
||||
for (int32 gauss_index = 0; gauss_index < num_gaussians; gauss_index++) {
|
||||
gmm.GetComponentMean(gauss_index, &tmp_mean);
|
||||
gmm.GetComponentVariance(gauss_index, &tmp_var);
|
||||
tmp_var.AddVec2(1.0, tmp_mean); // make it x^2 stats.
|
||||
BaseFloat this_weight = gmm.weights()(gauss_index);
|
||||
tmp_mean.Scale(this_weight);
|
||||
tmp_var.Scale(this_weight);
|
||||
gauss_clusters.push_back(new GaussClusterable(tmp_mean, tmp_var,
|
||||
cluster_varfloor, this_weight));
|
||||
}
|
||||
|
||||
std::vector<Clusterable*> gauss_clusters_out;
|
||||
KALDI_VLOG(1) << "Creating " << num_cluster_centers << " gaussian clusters centers.";
|
||||
ClusterBottomUp(gauss_clusters, kBaseFloatMax, num_cluster_centers,
|
||||
&gauss_clusters_out,
|
||||
cluster_center_map /*get the cluster assignments*/);
|
||||
DeletePointers(&gauss_clusters);
|
||||
|
||||
// Next, put the clustered Gaussians centers into a single GMM.
|
||||
KALDI_VLOG(1) << "Putting " << num_cluster_centers << " Gaussians cluster centers"
|
||||
<< "into a single GMM model.";
|
||||
Matrix<BaseFloat> tmp_means(num_cluster_centers, dim);
|
||||
Matrix<BaseFloat> tmp_vars(num_cluster_centers, dim);
|
||||
Vector<BaseFloat> tmp_weights(num_cluster_centers);
|
||||
Vector<BaseFloat> tmp_vec(dim);
|
||||
DiagGmm tmp_gmm;
|
||||
for (int32 gauss_index = 0; gauss_index < num_cluster_centers; gauss_index++) {
|
||||
GaussClusterable *this_cluster = static_cast<GaussClusterable*>(
|
||||
gauss_clusters_out[gauss_index]);
|
||||
BaseFloat weight = this_cluster->count();
|
||||
tmp_weights(gauss_index) = weight;
|
||||
tmp_vec.CopyFromVec(this_cluster->x_stats());
|
||||
tmp_vec.Scale(1/weight);
|
||||
tmp_means.CopyRowFromVec(tmp_vec, gauss_index);
|
||||
tmp_vec.CopyFromVec(this_cluster->x2_stats());
|
||||
tmp_vec.Scale(1/weight);
|
||||
tmp_vec.AddVec2(-1.0, tmp_means.Row(gauss_index)); // x^2 stats to var.
|
||||
tmp_vars.CopyRowFromVec(tmp_vec, gauss_index);
|
||||
}
|
||||
DeletePointers(&gauss_clusters_out);
|
||||
|
||||
tmp_gmm.Resize(num_cluster_centers, dim);
|
||||
tmp_weights.Scale(1.0/tmp_weights.Sum());
|
||||
tmp_gmm.SetWeights(tmp_weights);
|
||||
tmp_vars.InvertElements(); // need inverse vars...
|
||||
tmp_gmm.SetInvVarsAndMeans(tmp_vars, tmp_means);
|
||||
|
||||
KALDI_VLOG(1) << "Obtain " << tmp_gmm.NumGauss() << " Gaussians cluster centers.";
|
||||
ubm_cluster_centers->CopyFromDiagGmm(tmp_gmm);
|
||||
}
|
||||
|
||||
void ObtainUbmAndSomeClusterCenters(
|
||||
const AmDiagGmm &am,
|
||||
const Vector<BaseFloat> &state_occs,
|
||||
const FmpeConfig &config,
|
||||
DiagGmm *gmm_out,
|
||||
DiagGmm *gmm_cluster_centers_out,
|
||||
std::vector<int32> *gaussian_cluster_center_map_out) {
|
||||
/// First clusters the Gaussians in an acoustic model to a single GMM with specified
|
||||
/// number of components. Using the same algorithm in the SGMM's UBM
|
||||
/// initialization
|
||||
kaldi::UbmClusteringOptions ubm_opts;
|
||||
ubm_opts.ubm_numcomps = config.gmm_num_comps;
|
||||
ClusterGaussiansToUbm(am, state_occs, ubm_opts, gmm_out);
|
||||
|
||||
/// Clusters the Gaussians in the gmm model to some cluster centers, which is for
|
||||
/// more efficient evaluation of the gaussian posteriors just with
|
||||
/// the most likely cluster centers
|
||||
ClusterGmmToClusterCenters(*gmm_out, config.gmm_num_cluster_centers, config.cluster_varfloor,
|
||||
gmm_cluster_centers_out, gaussian_cluster_center_map_out);
|
||||
|
||||
}
|
||||
|
||||
} // End of namespace kaldi
|
|
@ -1,388 +0,0 @@
|
|||
// gmm/fmpe-am-diag-gmm.h
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#ifndef KALDI_GMM_FMPE_AM_DIAG_GMM_H_
|
||||
#define KALDI_GMM_FMPE_AM_DIAG_GMM_H_ 1
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "gmm/mle-diag-gmm.h"
|
||||
#include "gmm/ebw-diag-gmm.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
struct FmpeConfig {
|
||||
/// Number of the Gaussian components in the gmm model
|
||||
int32 gmm_num_comps;
|
||||
/// Number of the Gaussian cluster centers for fast evaluation
|
||||
int32 gmm_num_cluster_centers;
|
||||
/// the cluster var floor
|
||||
BaseFloat cluster_varfloor;
|
||||
/// Number of highest-scoring of the best cluster centers
|
||||
int32 gmm_cluster_centers_nbest;
|
||||
/// Number of highest-scoring of the best gaussians
|
||||
int32 gmm_gaussian_nbest;
|
||||
/// The lat prob scale
|
||||
double lat_prob_scale;
|
||||
/// The constant that contrals the overall learning rate
|
||||
double E;
|
||||
/// The Matrix indicates the length of context expansion
|
||||
/// and the weight of each corresponding context frame. e.g.[9][17]
|
||||
Matrix<BaseFloat> context_windows;
|
||||
|
||||
/*
|
||||
Matrix<BaseFloat> context_windows;
|
||||
// Normal dimension is [9][17]
|
||||
// Example would be
|
||||
// context_windows = [ 0 0 0 0 0 0 0 0 1.0 0 0 0 0 0 0 0 0
|
||||
// 0 0 0 0 0 0 0 0 0 1.0 0 0 0 0 0 0 0
|
||||
// .... etc.
|
||||
// Then your nlength_context_expansion variable equals
|
||||
// the NumRows() of this.
|
||||
// Then you don't have to hard-code the computation in ComputeContExpOffsetFeature.
|
||||
// Note: the code in ComputeContExpOffsetFeature that iterates over
|
||||
// context_windows will check for zeros, so it will not have to do any work if
|
||||
// it finds a zero feature.
|
||||
// Also be careful when the same Gaussian index is present on more than one frame,
|
||||
// that you are adding the values together, not replacing one with the other or
|
||||
// creating duplicates with the same index. [maybe use function DeDuplicateVector(
|
||||
// std::vector<std::pair<int32, Vector<BaseFloat> >*), that would first sort on the
|
||||
// int32 and then add together and combine any sets of elements with the same
|
||||
// integer value.
|
||||
*/
|
||||
FmpeConfig() {
|
||||
gmm_num_comps = 2048;
|
||||
gmm_num_cluster_centers = 128;
|
||||
cluster_varfloor = 0.01;
|
||||
gmm_cluster_centers_nbest = 25;
|
||||
gmm_gaussian_nbest = 2;
|
||||
lat_prob_scale = 0.083;
|
||||
E = 10.0;
|
||||
}
|
||||
|
||||
void Register(ParseOptions *po) {
|
||||
po->Register("gmm-num-comps", &gmm_num_comps, "Number of the Gaussian"
|
||||
" components in the gmm model to calculate the gaussian posteriors.");
|
||||
po->Register("gmm-num-cluster-centers", &gmm_num_cluster_centers, "Number"
|
||||
" of the Gaussian cluster centers for fast posteriors evaluation.");
|
||||
po->Register("cluster-varfloor", &cluster_varfloor,
|
||||
"Variance floor used in bottom-up state clustering.");
|
||||
po->Register("gmm-cluster-centers-nbest", &gmm_cluster_centers_nbest,
|
||||
"Number of highest-scoring of the best cluster centers.");
|
||||
po->Register("gmm-gaussian-nbest", &gmm_gaussian_nbest, "Number of"
|
||||
" of highest-scoring of the best gaussians.");
|
||||
po->Register("lat-prob-scale", &lat_prob_scale,
|
||||
"The lattice probability scale, very important.");
|
||||
po->Register("E", &E, "The constant that contrals the overall learning rate.");
|
||||
}
|
||||
};
|
||||
|
||||
/** \class FmpeAccumModelDiff
|
||||
* Class for computing the basic model parameter differentials from
|
||||
* the mpe statistics produced in the first pass of fmpe training
|
||||
*/
|
||||
class FmpeAccumModelDiff {
|
||||
public:
|
||||
FmpeAccumModelDiff(): dim_(0), num_comp_(0) {}
|
||||
explicit FmpeAccumModelDiff(const DiagGmm &gmm) {
|
||||
Resize(gmm);
|
||||
}
|
||||
|
||||
void Read(std::istream &in_stream, bool binary);
|
||||
void Write(std::ostream &out_stream, bool binary) const;
|
||||
|
||||
/// Allocates memory for accumulators
|
||||
void Resize(int32 num_comp, int32 dim);
|
||||
/// Calls ResizeAccumulators based on gmm
|
||||
void Resize(const DiagGmm &gmm);
|
||||
|
||||
/// Returns the number of mixture components
|
||||
int32 NumGauss() const { return num_comp_; }
|
||||
/// Returns the dimensionality of the feature vectors
|
||||
int32 Dim() const { return dim_; }
|
||||
|
||||
void SetZero();
|
||||
|
||||
// Accessors
|
||||
const Vector<double>& mle_occupancy() const { return mle_occupancy_; }
|
||||
const Matrix<double>& mean_diff_accumulator() const { return mean_diff_accumulator_; }
|
||||
const Matrix<double>& variance_diff_accumulator() const { return variance_diff_accumulator_; }
|
||||
|
||||
/// Computes the Model parameter differentials using the statistics from
|
||||
/// the MPE training, including the numerator and denominator accumulators
|
||||
/// and applies I-smoothing to the numerator accs, if needed,
|
||||
/// which using mle_acc.
|
||||
void ComputeModelParaDiff(const DiagGmm &diag_gmm,
|
||||
const AccumDiagGmm &num_acc,
|
||||
const AccumDiagGmm &den_acc,
|
||||
const AccumDiagGmm &mle_acc);
|
||||
|
||||
|
||||
private:
|
||||
int32 dim_;
|
||||
int32 num_comp_;
|
||||
|
||||
/// Accumulators
|
||||
Vector<double> mle_occupancy_;
|
||||
Matrix<double> mean_diff_accumulator_;
|
||||
Matrix<double> variance_diff_accumulator_;
|
||||
|
||||
// Cannot have copy constructor and assigment operator
|
||||
KALDI_DISALLOW_COPY_AND_ASSIGN(FmpeAccumModelDiff);
|
||||
};
|
||||
|
||||
inline void FmpeAccumModelDiff::Resize(const DiagGmm &gmm) {
|
||||
Resize(gmm.NumGauss(), gmm.Dim());
|
||||
}
|
||||
|
||||
/** \class FmpeAccs
|
||||
* Class for accumulate the positive and negative statistics
|
||||
* for computing the feature-level minimum phone error estimate of the
|
||||
* parameters of projection M matrix.
|
||||
* The acoustic model is diagonal Gaussian mixture models
|
||||
*/
|
||||
class FmpeAccs {
|
||||
public:
|
||||
explicit FmpeAccs(const FmpeConfig &config)
|
||||
: config_(config) {};
|
||||
|
||||
~FmpeAccs() {}
|
||||
|
||||
void Read(std::istream &in_stream, bool binary, bool add);
|
||||
void Write(std::ostream &out_stream, bool binary) const;
|
||||
|
||||
/// Read the am model's parameters differentials
|
||||
void ReadModelDiffs(std::istream &in_stream, bool binary);
|
||||
|
||||
/// Initializes the P and N statistics, and model parameter differentials if needed
|
||||
void Init(const AmDiagGmm &am_model, bool update);
|
||||
|
||||
/// Initializes the P and N statistics, and diff statistics
|
||||
void InitPNandDiff(int32 num_gmm_gauss, int32 con_exp, int32 dim);
|
||||
|
||||
/// Initializes the model parameter differentials
|
||||
void InitModelDiff(const AmDiagGmm &model);
|
||||
|
||||
/// Initializes the GMMs for computing the high dimensional features
|
||||
void InitializeGMMs(const DiagGmm &gmm, const DiagGmm &gmm_cluster_centers,
|
||||
std::vector<int32> &gaussian_cluster_center_map);
|
||||
|
||||
/// Compute the offset feature given one frame data
|
||||
void ComputeOneFrameOffsetFeature(const VectorBase<BaseFloat>& data,
|
||||
std::vector<std::pair<int32, Vector<double> > > *offset) const;
|
||||
|
||||
/// Compute all the offset features given the whole file data
|
||||
void ComputeWholeFileOffsetFeature(const MatrixBase<BaseFloat>& data,
|
||||
std::vector<std::vector<std::pair<int32, Vector<double> > > > *whole_file_offset) const;
|
||||
|
||||
/// Compute the context expansion high dimension feature
|
||||
/// The high dimension offset feature with the context expansion: "ht";
|
||||
/// the vector "ht" store the expanded offset feature corresponding
|
||||
/// each context. And each element of "ht" is the relative context's
|
||||
/// offset feature, which stored as the pair, including the used
|
||||
/// gaussian index and the corresponding offset feature
|
||||
/// vector. This structure is designed for the sparse vector ht.
|
||||
/// dim is [nContExp * nGaussian * (fea_dim + 1)]
|
||||
/// "offset_win" stores the current corresponding offset features
|
||||
/// which are used to compute "ht"
|
||||
void ComputeContExpOffsetFeature(
|
||||
const std::vector<std::vector<std::pair<int32, Vector<double> > >* > &offset_win,
|
||||
std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > *ht) const;
|
||||
|
||||
/// obtain the current needed context expension high dimension feature using
|
||||
/// the whole file offset features as the inputs which is indexed
|
||||
/// by the current frame's number frame_index
|
||||
void ComputeHighDimemsionFeature(
|
||||
const std::vector<std::vector<std::pair<int32, Vector<double> > > > &whole_file_offset_feat,
|
||||
int32 frame_index,
|
||||
std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > *ht) const;
|
||||
|
||||
/// Prject the high dimension features down to the dimension of the original
|
||||
/// features and add them to the origianl features.
|
||||
/// This is the sparse multiply using the non-sparse matrix M and
|
||||
/// the sparse high dimension vector ht
|
||||
void ProjectHighDimensionFeature(
|
||||
const std::vector< std::vector< Matrix<double> > > &M,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht,
|
||||
Vector<double> *fea_out) const;
|
||||
|
||||
/// Add the projected feature to the old feature and obtain the new fmpe feature
|
||||
void ObtainNewFmpeFeature(const VectorBase<BaseFloat> &data,
|
||||
const std::vector< std::vector< Matrix<double> > > &M,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht,
|
||||
Vector<double> *fea_new) const;
|
||||
|
||||
/// Accumulate the direct differentials
|
||||
void AccumulateDirectDiffFromPosteriors(const DiagGmm &gmm,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &posteriors,
|
||||
Vector<double> *direct_diff);
|
||||
|
||||
/// Accumulate the indirect differentials from posteriors
|
||||
void AccumulateInDirectDiffFromPosteriors(const DiagGmm &gmm,
|
||||
const FmpeAccumModelDiff &fmpe_diaggmm_diff_acc,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
const VectorBase<BaseFloat> &posteriors,
|
||||
Vector<double> *indirect_diff);
|
||||
|
||||
/// Accumulate the indirect differentials from a DiagGmm model
|
||||
void AccumulateInDirectDiffFromDiag(const DiagGmm &gmm,
|
||||
const FmpeAccumModelDiff &fmpe_diaggmm_diff_acc,
|
||||
const VectorBase<BaseFloat> &data,
|
||||
BaseFloat frame_posterior,
|
||||
Vector<double> *indirect_diff);
|
||||
|
||||
/// Accumulate the statistics about the positive and negative differential
|
||||
void AccumulateFromDifferential(const VectorBase<double> &direct_diff,
|
||||
const VectorBase<double> &indirect_diff,
|
||||
const std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > &ht);
|
||||
|
||||
// Accessors
|
||||
FmpeAccumModelDiff& GetAccsModelDiff(int32 pdf_index);
|
||||
const FmpeAccumModelDiff& GetAccsModelDiff(int32 pdf_index) const;
|
||||
|
||||
const std::vector< std::vector< Matrix<double> > >& pos() const { return p_; }
|
||||
const std::vector< std::vector< Matrix<double> > >& neg() const { return n_; }
|
||||
const FmpeConfig& config() const { return config_; }
|
||||
|
||||
/// Returns the number of mixture components in the GMM model
|
||||
int32 NumGaussInGmm() const { return gmm_.NumGauss(); }
|
||||
/// Returns the number of cluster centers in the cluster center GMM
|
||||
int32 NumClusterCenter() const { return gmm_cluster_centers_.NumGauss(); }
|
||||
/// Returns the dimensionality of the feature vectors
|
||||
int32 Dim() const { return dim_; }
|
||||
|
||||
private:
|
||||
FmpeConfig config_;
|
||||
/// These contain the gmm models used to calculate the high deminsion
|
||||
/// offet feature : one compute the high dimension vector gaussian posteriors,
|
||||
/// and the other one is just for more efficient computing using
|
||||
/// the most likely cluster centers
|
||||
DiagGmm gmm_;
|
||||
DiagGmm gmm_cluster_centers_;
|
||||
|
||||
/// The mapping between the gmm_ model and the cluster centers of gmm_cluster_centers_
|
||||
std::vector<int32> gaussian_cluster_center_map_;
|
||||
|
||||
/// The basic model parameter differentials for the AmDiagGmm
|
||||
std::vector<FmpeAccumModelDiff*> model_diff_accumulators_;
|
||||
|
||||
/// The positive accumulated matrix p_ij; dim is [nGauss][nContExp][fea_dim][fea_dim + 1].
|
||||
std::vector< std::vector< Matrix<double> > > p_;
|
||||
/// The negative accumulated matrix n_ij; dim is [nGauss][nContExp][fea_dim][fea_dim + 1].
|
||||
std::vector< std::vector< Matrix<double> > > n_;
|
||||
/// The summation of the differential
|
||||
Vector<double> diff_;
|
||||
/// The summation of the direct differential
|
||||
Vector<double> direct_diff_;
|
||||
/// The summation of the indirect differential
|
||||
Vector<double> indirect_diff_;
|
||||
|
||||
/// The feature dim
|
||||
int32 dim_;
|
||||
|
||||
// Cannot have copy constructor and assigment operator
|
||||
KALDI_DISALLOW_COPY_AND_ASSIGN(FmpeAccs);
|
||||
};
|
||||
|
||||
inline FmpeAccumModelDiff& FmpeAccs::GetAccsModelDiff(int32 pdf_index) {
|
||||
KALDI_ASSERT((static_cast<size_t>(pdf_index) < model_diff_accumulators_.size())
|
||||
&& (model_diff_accumulators_[pdf_index] != NULL));
|
||||
return *(model_diff_accumulators_[pdf_index]);
|
||||
}
|
||||
|
||||
inline const FmpeAccumModelDiff& FmpeAccs::GetAccsModelDiff(int32 pdf_index) const {
|
||||
KALDI_ASSERT((static_cast<size_t>(pdf_index) < model_diff_accumulators_.size())
|
||||
&& (model_diff_accumulators_[pdf_index] != NULL));
|
||||
return *(model_diff_accumulators_[pdf_index]);
|
||||
}
|
||||
|
||||
/** \class FmpeUpdater
|
||||
* Class for containing the functions that updating the feature-level
|
||||
* minimum phone error estimate of the parameters of projection M matrix
|
||||
* that adds offsets to the original feature.
|
||||
* The acoustic model is diagonal Gaussian mixture models
|
||||
*/
|
||||
class FmpeUpdater {
|
||||
public:
|
||||
explicit FmpeUpdater(const FmpeAccs &accs);
|
||||
~FmpeUpdater() {}
|
||||
|
||||
// provide copy constructor.
|
||||
explicit FmpeUpdater(const FmpeUpdater &other);
|
||||
|
||||
void Read(std::istream &in_stream, bool binary);
|
||||
void Write(std::ostream &out_stream, bool binary) const;
|
||||
|
||||
/// Initializes feature projection Matrix M
|
||||
void Init(int32 num_gmm_gauss, int32 con_exp, int32 dim);
|
||||
|
||||
/// compute the average standard deviation of gaussians
|
||||
/// in the current AmDiagGmm set
|
||||
void ComputeAvgStandardDeviation(const AmDiagGmm &am);
|
||||
|
||||
/// Update the projection matrix M
|
||||
void Update(const FmpeAccs &accs,
|
||||
BaseFloat *obj_change_out,
|
||||
BaseFloat *count_out);
|
||||
|
||||
// Accessors
|
||||
const std::vector< std::vector< Matrix<double> > >& ProjMat() const { return M_; }
|
||||
const FmpeConfig& config() const { return config_; }
|
||||
|
||||
private:
|
||||
FmpeConfig config_;
|
||||
|
||||
/// The average standard deviation of gaussians in the current AmDiagGmm set
|
||||
Vector<double> avg_std_var_;
|
||||
|
||||
/// The feature projection matrix; dim is [nGauss][nContExp][fea_dim][fea_dim + 1].
|
||||
std::vector< std::vector< Matrix<double> > > M_;
|
||||
|
||||
/// The feature dim
|
||||
int32 dim_;
|
||||
};
|
||||
|
||||
/** Clusters the Gaussians in the gmm model to some cluster centers
|
||||
*/
|
||||
void ClusterGmmToClusterCenters(const DiagGmm &gmm,
|
||||
int32 num_cluster_centers,
|
||||
BaseFloat cluster_varfloor,
|
||||
DiagGmm *ubm_cluster_centers,
|
||||
std::vector<int32> *cluster_center_map);
|
||||
|
||||
/** First clusters the Gaussians in an acoustic model to a single GMM with specified
|
||||
* number of components. Using the same algorithm in the SGMM's UBM
|
||||
* initialization, and then Clusters the Gaussians in the gmm model
|
||||
* to some cluster centers, which is for more efficient evaluation of the
|
||||
* gaussian posteriors just with the most likely cluster centers
|
||||
*/
|
||||
void ObtainUbmAndSomeClusterCenters(
|
||||
const AmDiagGmm &am,
|
||||
const Vector<BaseFloat> &state_occs,
|
||||
const FmpeConfig &config,
|
||||
DiagGmm *gmm_out,
|
||||
DiagGmm *gmm_cluster_centers_out,
|
||||
std::vector<int32> *gaussian_cluster_center_map_out);
|
||||
|
||||
|
||||
} // End namespace kaldi
|
||||
|
||||
|
||||
#endif // KALDI_GMM_FMPE_AM_DIAG_GMM_H_
|
|
@ -15,13 +15,13 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
|
|||
gmm-est-fmllr-gpost gmm-est-fmllr gmm-est-regtree-fmllr-ali \
|
||||
gmm-est-regtree-mllr gmm-decode-kaldi gmm-compute-likes \
|
||||
gmm-decode-faster-regtree-mllr gmm-et-apply-c gmm-latgen-simple \
|
||||
gmm-rescore-lattice gmm-decode-biglm-faster fmpe-gmm-model-diffs-est \
|
||||
fmpe-gmm-acc-stats-gpost fmpe-gmm-sum-accs fmpe-init-gmms fmpe-gmm-est \
|
||||
gmm-rescore-lattice gmm-decode-biglm-faster \
|
||||
gmm-est-gaussians-ebw gmm-est-weights-ebw gmm-latgen-faster gmm-copy \
|
||||
gmm-global-acc-stats gmm-global-est gmm-global-sum-accs gmm-gselect \
|
||||
gmm-latgen-biglm-faster gmm-ismooth-stats gmm-global-get-frame-likes \
|
||||
gmm-global-est-fmllr gmm-global-to-fgmm gmm-global-acc-stats-twofeats \
|
||||
gmm-global-copy gmm-align-compiled-plusphones gmm-get-feat-deriv
|
||||
gmm-global-copy gmm-align-compiled-plusphones gmm-get-feat-deriv \
|
||||
gmm-fmpe-acc-stats gmm-acc-stats2
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -1,186 +0,0 @@
|
|||
// gmmbin/fmpe-gmm-acc-stats-gpost.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
try {
|
||||
const char *usage =
|
||||
"Accumulate positive and negative stats for Fmpe training (reading in gaussian-level posteriors).\n"
|
||||
"Note: not yet tested.\n"
|
||||
"Usage: fmpe-gmm-acc-stats-gpost [options] <model-in> <model-diffs-in> <gmms-model-in> <feature-rspecifier> <gposteriors-ebw-rspecifier> <gposteriors-mle-rspecifier> <stats-out>\n"
|
||||
"e.g.: \n"
|
||||
" fmpe-gmm-acc-stats-gpost 1.mdl 1.model.diffs 1.gmm scp:train.scp ark:1.ebw.gpost ark:1.mle.gpost 1.fmpe.acc\n";
|
||||
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
bool binary = false;
|
||||
FmpeConfig fmpe_opts;
|
||||
int32 gmm_cluster_centers_nbest = 25;
|
||||
int32 gmm_gaussian_nbest = 2;
|
||||
double lat_prob_scale = 0.083;
|
||||
double E = 10.0;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Register("gmm-cluster-centers-nbest", &gmm_cluster_centers_nbest,
|
||||
"Number of highest-scoring of the best cluster centers.");
|
||||
po.Register("gmm-gaussian-nbest", &gmm_gaussian_nbest, "Number of"
|
||||
" of highest-scoring of the best gaussians.");
|
||||
po.Register("lat-prob-scale", &lat_prob_scale,
|
||||
"The lattice probability scale, very important.");
|
||||
po.Register("E", &E, "The constant that contrals the overall learning rate.");
|
||||
|
||||
fmpe_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
||||
if (po.NumArgs() != 7) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_filename = po.GetArg(1),
|
||||
model_diffs_filename = po.GetArg(2),
|
||||
gmms_model_filename = po.GetArg(3),
|
||||
feature_rspecifier = po.GetArg(4),
|
||||
gposteriors_ebw_rspecifier = po.GetArg(5),
|
||||
gposteriors_mle_rspecifier = po.GetArg(6),
|
||||
accs_wxfilename = po.GetArg(7);
|
||||
|
||||
using namespace kaldi;
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(model_filename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_gmm.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
FmpeAccs fmpe_accs(fmpe_opts);
|
||||
fmpe_accs.Init(am_gmm, true);
|
||||
{
|
||||
bool binary;
|
||||
Input ki(model_diffs_filename, &binary);
|
||||
fmpe_accs.ReadModelDiffs(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
kaldi::DiagGmm gmm;
|
||||
kaldi::DiagGmm gmm_clusters;
|
||||
std::vector<int32> gaussian_cluster_center_map;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(gmms_model_filename, &binary);
|
||||
gmm.Read(ki.Stream(), binary);
|
||||
gmm_clusters.Read(ki.Stream(), binary);
|
||||
ReadIntegerVector(ki.Stream(), binary, &gaussian_cluster_center_map);
|
||||
}
|
||||
|
||||
fmpe_accs.InitializeGMMs(gmm, gmm_clusters, gaussian_cluster_center_map);
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessGauPostReader gposteriors_ebw_reader(gposteriors_ebw_rspecifier);
|
||||
RandomAccessGauPostReader gposteriors_mle_reader(gposteriors_mle_rspecifier);
|
||||
|
||||
int32 num_done = 0, num_no_posterior = 0, num_other_error = 0;
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string key = feature_reader.Key();
|
||||
if ((!gposteriors_ebw_reader.HasKey(key)) &&
|
||||
(!gposteriors_mle_reader.HasKey(key))) {
|
||||
num_no_posterior++;
|
||||
} else {
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const GauPost &gpost_ebw = gposteriors_ebw_reader.Value(key);
|
||||
const GauPost &gpost_mle = gposteriors_ebw_reader.Value(key);
|
||||
|
||||
if ((static_cast<int32>(gpost_ebw.size()) != mat.NumRows()) &&
|
||||
(static_cast<int32>(gpost_mle.size()) != mat.NumRows())) {
|
||||
KALDI_WARN << "Gaussian Posterior vector has wrong size : gpost-ebw. " <<
|
||||
(gpost_ebw.size()) << "gpost-mle. " << (gpost_mle.size()) << " vs. "<< (mat.NumRows());
|
||||
num_other_error++;
|
||||
continue;
|
||||
}
|
||||
|
||||
num_done++;
|
||||
|
||||
std::vector<std::vector<std::pair<int32, Vector<double> > > > whole_file_offset;
|
||||
std::vector<std::pair<int32, std::vector<std::pair<int32, Vector<double> > > > > ht;
|
||||
|
||||
fmpe_accs.ComputeWholeFileOffsetFeature(mat, &whole_file_offset);
|
||||
|
||||
for (size_t i = 0; i < mat.NumRows(); i++) {
|
||||
fmpe_accs.ComputeHighDimemsionFeature(whole_file_offset, i, &ht);
|
||||
Vector<double> direct_diff(mat.NumCols()), indirect_diff(mat.NumCols());
|
||||
/// compute the direct differentials
|
||||
for (size_t j = 0; j < gpost_ebw[i].size(); j++) {
|
||||
int32 tid = gpost_ebw[i][j].first, // transition identifier.
|
||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||
fmpe_accs.AccumulateDirectDiffFromPosteriors(am_gmm.GetPdf(pdf_id),
|
||||
mat.Row(i),
|
||||
gpost_ebw[i][j].second,
|
||||
&direct_diff);
|
||||
}
|
||||
/// compute the indirect differentials
|
||||
for (size_t j = 0; j < gpost_mle[i].size(); j++) {
|
||||
int32 tid = gpost_mle[i][j].first, // transition identifier.
|
||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||
fmpe_accs.AccumulateInDirectDiffFromPosteriors(am_gmm.GetPdf(pdf_id),
|
||||
fmpe_accs.GetAccsModelDiff(pdf_id),
|
||||
mat.Row(i),
|
||||
gpost_mle[i][j].second,
|
||||
&indirect_diff);
|
||||
}
|
||||
fmpe_accs.AccumulateFromDifferential(direct_diff, indirect_diff, ht);
|
||||
ht.clear();
|
||||
}
|
||||
if (num_done % 50 == 0) {
|
||||
KALDI_LOG << "Processed " << num_done << " utterances.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior
|
||||
<< " with no posteriors, " << num_other_error
|
||||
<< " with other errors.";
|
||||
|
||||
{
|
||||
Output ko(accs_wxfilename, binary);
|
||||
fmpe_accs.Write(ko.Stream(), binary);
|
||||
}
|
||||
KALDI_LOG << "Written accs.";
|
||||
if (num_done != 0) return 0;
|
||||
else return 1;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
// gmmbin/fmpe-gmm-est.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "tree/context-dep.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Estimate fMPE transforms.\n"
|
||||
"Note: not yet tested.\n"
|
||||
"Usage: fmpe-gmm-est [options] <am-model-in> <fmpe-proj-matrix-in> <stats-in> <fmpe-proj-matrix-out>\n"
|
||||
"e.g.: gmm-est 1.mdl 1.mat 1.acc 2.mat\n";
|
||||
|
||||
bool binary_write = false;
|
||||
FmpeConfig fmpe_opts;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
fmpe_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 4) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
std::string model_in_filename = po.GetArg(1),
|
||||
fmpe_proj_mat_in_filename = po.GetArg(2),
|
||||
stats_filename = po.GetArg(3),
|
||||
fmpe_proj_mat_out_filename = po.GetArg(4);
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary_read;
|
||||
Input ki(model_in_filename, &binary_read);
|
||||
trans_model.Read(ki.Stream(), binary_read);
|
||||
am_gmm.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
FmpeAccs fmpe_accs(fmpe_opts);
|
||||
{
|
||||
bool binary;
|
||||
Input ki(stats_filename, &binary);
|
||||
fmpe_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here.
|
||||
}
|
||||
|
||||
FmpeUpdater fmpe_updater(fmpe_accs);
|
||||
{
|
||||
bool binary;
|
||||
Input ki(fmpe_proj_mat_in_filename, &binary);
|
||||
fmpe_updater.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
{ // update the Fmpe projection matrix
|
||||
BaseFloat obj_change_out, count_out;
|
||||
fmpe_updater.ComputeAvgStandardDeviation(am_gmm);
|
||||
fmpe_updater.Update(fmpe_accs, &obj_change_out, &count_out);
|
||||
}
|
||||
|
||||
{
|
||||
Output ko(fmpe_proj_mat_out_filename, binary_write);
|
||||
fmpe_updater.Write(ko.Stream(), binary_write);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written Fmpe projection matrix to " << fmpe_proj_mat_out_filename;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
// gmmbin/fmpe-gmm-model-diffs-est.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "tree/context-dep.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "gmm/mle-am-diag-gmm.h"
|
||||
//#include "gmm/ebw-am-diag-gmm.h" // TODO wait Arnab to finish the AccumAmEbwDiagGmm Class, then make it active
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Compute the model parameters differentials from the ebw accumulators (in mpe training) for fmpe training.\n"
|
||||
"Usage: fmpe-gmm-model-diffs-est [options] <model-in> <ebw-stats-in> <mle-stats-in> <model-diffs-out>\n"
|
||||
"e.g.: fmpe-gmm-model-diff-est 1.mdl 1.ebw.acc 1.mle.acc 1.model.diffs\n";
|
||||
|
||||
bool binary = false;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 4) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
std::string model_in_filename = po.GetArg(1),
|
||||
ebw_stats_in_filename = po.GetArg(2),
|
||||
mle_stats_in_filename = po.GetArg(3),
|
||||
model_diffs_out_filename = po.GetArg(4);
|
||||
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary_read;
|
||||
Input ki(model_in_filename, &binary_read);
|
||||
trans_model.Read(ki.Stream(), binary_read);
|
||||
am_gmm.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
Vector<double> transition_ebw_accs;
|
||||
// AccumAmEbwDiagGmm gmm_ebw_accs; // TODO wait Arnab to finish the AccumAmEbwDiagGmm Class, then make it active
|
||||
{
|
||||
bool binary;
|
||||
Input ki(ebw_stats_in_filename, &binary);
|
||||
transition_ebw_accs.Read(ki.Stream(), binary);
|
||||
// TODO wait Arnab to finish the AccumAmEbwDiagGmm Class, then make it active
|
||||
// gmm_ebw_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here.
|
||||
}
|
||||
|
||||
Vector<double> transition_mle_accs;
|
||||
AccumAmDiagGmm gmm_mle_accs;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(mle_stats_in_filename, &binary);
|
||||
transition_mle_accs.Read(ki.Stream(), binary);
|
||||
gmm_mle_accs.Read(ki.Stream(), binary, true); // true == add; doesn't matter here.
|
||||
}
|
||||
|
||||
std::vector<FmpeAccumModelDiff*> model_diffs;
|
||||
model_diffs.reserve(am_gmm.NumPdfs());
|
||||
for (int32 i = 0; i < am_gmm.NumPdfs(); i++) {
|
||||
model_diffs.push_back(new FmpeAccumModelDiff(am_gmm.GetPdf(i)));
|
||||
// TODO wait Arnab to finish the AccumAmEbwDiagGmm Class, then make it active
|
||||
// model_diff.back()->ComputeModelParaDiff(am_gmm.GetPdf(i), gmm_ebw_acc.GetAcc(i), gmm_mle_accs.GetAcc(i));
|
||||
}
|
||||
|
||||
// Write out the model diffs
|
||||
{
|
||||
kaldi::Output ko(model_diffs_out_filename, binary);
|
||||
WriteToken(ko.Stream(), binary, "<DIMENSION>");
|
||||
WriteBasicType(ko.Stream(), binary, static_cast<int32>(am_gmm.Dim()));
|
||||
WriteToken(ko.Stream(), binary, "<NUMPDFS>");
|
||||
WriteBasicType(ko.Stream(), binary, static_cast<int32>(model_diffs.size()));
|
||||
for (std::vector<FmpeAccumModelDiff*>::const_iterator it = model_diffs.begin(),
|
||||
end = model_diffs.end(); it != end; ++it) {
|
||||
(*it)->Write(ko.Stream(), binary);
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written model diffs to " << model_diffs_out_filename;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
// gmmbin/fmpe-gmm-sum-accs.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Sum multiple accumulated stats files for Fmpe training.\n"
|
||||
"Usage: fmpe-gmm-sum-accs [options] stats-out stats-in1 stats-in2 ...\n";
|
||||
|
||||
bool binary = false;
|
||||
kaldi::FmpeConfig fmpe_opts;
|
||||
|
||||
kaldi::ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() < 3) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string stats_out_filename = po.GetArg(1);
|
||||
kaldi::FmpeAccs fmpe_accs(fmpe_opts);
|
||||
|
||||
for (int i = 2, max = po.NumArgs(); i <= max; ++i) {
|
||||
std::string stats_in_filename = po.GetArg(i);
|
||||
bool binary_read;
|
||||
kaldi::Input ki(stats_in_filename, &binary_read);
|
||||
fmpe_accs.Read(ki.Stream(), binary_read, true /*add read values*/);
|
||||
}
|
||||
|
||||
// Write out the accs
|
||||
{
|
||||
kaldi::Output ko(stats_out_filename, binary);
|
||||
fmpe_accs.Write(ko.Stream(), binary);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written stats to " << stats_out_filename;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
// gmmbin/fmpe-init-gmms.cc
|
||||
|
||||
// Copyright 2009-2011 Yanmin Qian
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/kaldi-io.h"
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "gmm/fmpe-am-diag-gmm.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
typedef kaldi::int32 int32;
|
||||
typedef kaldi::BaseFloat BaseFloat;
|
||||
|
||||
const char *usage =
|
||||
"Cluster the Gaussians in a diagonal-GMM acoustic model\n"
|
||||
"to two single diag-covariance GMMs used in fmpe: one is the gmm model\n"
|
||||
"for compute gaussian posteriors and one is the gaussian\n"
|
||||
"cluster centers which is used to speed up gaussian calculations"
|
||||
"Usage: fmpe-init-gmms [options] <model-file> <state-occs> <gmm-out> <gmm-cluster-centers-out> <gaussian-cluster-center-map-out>\n";
|
||||
|
||||
bool binary_write = false;
|
||||
int32 gmm_num_comps = 2048;
|
||||
int32 gmm_num_cluster_centers = 128;
|
||||
BaseFloat cluster_varfloor = 0.01;
|
||||
kaldi::FmpeConfig fmpe_opts;
|
||||
kaldi::ParseOptions po(usage);
|
||||
po.Register("binary", &binary_write, "Write output in binary mode");
|
||||
po.Register("gmm-num-comps", &gmm_num_comps, "Number of the Gaussian"
|
||||
" components in the gmm model to calculate the gaussian posteriors.");
|
||||
po.Register("gmm-num-cluster-centers", &gmm_num_cluster_centers, "Number"
|
||||
" of the Gaussian cluster centers for fast posteriors evaluation.");
|
||||
po.Register("cluster-varfloor", &cluster_varfloor,
|
||||
"Variance floor used in bottom-up state clustering.");
|
||||
|
||||
fmpe_opts.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 5) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_in_filename = po.GetArg(1),
|
||||
occs_in_filename = po.GetArg(2),
|
||||
gmm_out_filename = po.GetArg(3),
|
||||
gmm_cluster_centers_out_filename = po.GetArg(4),
|
||||
gauss_cluster_center_map_out_filename = po.GetArg(5);
|
||||
|
||||
kaldi::AmDiagGmm am_gmm;
|
||||
kaldi::TransitionModel trans_model;
|
||||
{
|
||||
bool binary_read;
|
||||
kaldi::Input ki(model_in_filename, &binary_read);
|
||||
trans_model.Read(ki.Stream(), binary_read);
|
||||
am_gmm.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
kaldi::Vector<BaseFloat> state_occs;
|
||||
state_occs.Resize(am_gmm.NumPdfs());
|
||||
{
|
||||
bool binary_read;
|
||||
kaldi::Input ki(occs_in_filename, &binary_read);
|
||||
state_occs.Read(ki.Stream(), binary_read);
|
||||
}
|
||||
|
||||
kaldi::DiagGmm gmm;
|
||||
kaldi::DiagGmm gmm_cluster_centers;
|
||||
std::vector<int32> gaussian_cluster_center_map;
|
||||
ObtainUbmAndSomeClusterCenters(
|
||||
am_gmm,
|
||||
state_occs,
|
||||
fmpe_opts,
|
||||
&gmm,
|
||||
&gmm_cluster_centers,
|
||||
&gaussian_cluster_center_map);
|
||||
|
||||
// Write out the gmms model
|
||||
{
|
||||
kaldi::Output ko(gmm_out_filename, binary_write);
|
||||
gmm.Write(ko.Stream(), binary_write);
|
||||
gmm_cluster_centers.Write(ko.Stream(), binary_write);
|
||||
kaldi::WriteIntegerVector(ko.Stream(), binary_write, gaussian_cluster_center_map);
|
||||
}
|
||||
|
||||
KALDI_LOG << "Written GMMs to " << gmm_out_filename;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
// gmmbin/gmm-acc-stats.cc
|
||||
|
||||
// Copyright 2009-2012 Daniel Povey
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "gmm/mle-am-diag-gmm.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
typedef kaldi::int64 int64;
|
||||
try {
|
||||
const char *usage =
|
||||
"Accumulate stats for GMM training (from posteriors)\n"
|
||||
"This version writes two accumulators (e.g. num and den),\n"
|
||||
"and puts the positive accumulators in num, negative in den\n"
|
||||
"Usage: gmm-acc-stats2 [options] <model> <feature-rspecifier>"
|
||||
"<posteriors-rspecifier> <num-stats-out> <den-stats-out>\n"
|
||||
"e.g.:\n"
|
||||
"gmm-acc-stats 1.mdl \"$feats\" ark:1.post 1.num_acc 1.den_acc\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = true;
|
||||
std::string update_flags_str = "mvwt"; // note: t is ignored, we acc
|
||||
// transition stats regardless.
|
||||
po.Register("binary", &binary, "Write stats in binary mode");
|
||||
po.Register("update-flags", &update_flags_str, "Which GMM parameters to "
|
||||
"update: subset of mvwt.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 5) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
feature_rspecifier = po.GetArg(2),
|
||||
posteriors_rspecifier = po.GetArg(3),
|
||||
num_accs_wxfilename = po.GetArg(4),
|
||||
den_accs_wxfilename = po.GetArg(5);
|
||||
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(model_rxfilename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_gmm.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
Vector<double> num_trans_accs, den_trans_accs;
|
||||
trans_model.InitStats(&num_trans_accs);
|
||||
trans_model.InitStats(&den_trans_accs);
|
||||
AccumAmDiagGmm num_gmm_accs, den_gmm_accs;
|
||||
num_gmm_accs.Init(am_gmm, StringToGmmFlags(update_flags_str));
|
||||
den_gmm_accs.Init(am_gmm, StringToGmmFlags(update_flags_str));
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
||||
|
||||
|
||||
BaseFloat tot_like = 0.0, tot_weight = 0.0;
|
||||
// tot_like is total weighted likelihood (note: weighted
|
||||
// by both +ve and -ve numbers)
|
||||
// tot_t is total weight in posteriors (will often be about zero).
|
||||
int64 tot_frames = 0.0;
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string key = feature_reader.Key();
|
||||
if (!posteriors_reader.HasKey(key)) {
|
||||
num_err++;
|
||||
} else {
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const Posterior &posterior = posteriors_reader.Value(key);
|
||||
|
||||
if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
|
||||
KALDI_WARN << "Posterior vector has wrong size "
|
||||
<< (posterior.size()) << " vs. "
|
||||
<< (mat.NumRows());
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
BaseFloat tot_like_this_file = 0.0, tot_weight_this_file = 0.0;
|
||||
|
||||
for (size_t i = 0; i < posterior.size(); i++) {
|
||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||
int32 tid = posterior[i][j].first,
|
||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||
BaseFloat weight = posterior[i][j].second;
|
||||
trans_model.Accumulate(fabs(weight), tid,
|
||||
(weight > 0.0 ?
|
||||
&num_trans_accs : &den_trans_accs));
|
||||
num_done++;
|
||||
tot_like_this_file +=
|
||||
(weight > 0.0 ? &num_gmm_accs : &den_gmm_accs) ->
|
||||
AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, fabs(weight)) * weight;
|
||||
tot_weight_this_file += weight;
|
||||
}
|
||||
}
|
||||
tot_like += tot_like_this_file;
|
||||
tot_weight += tot_weight_this_file;
|
||||
tot_frames += static_cast<int32>(posterior.size());
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_err
|
||||
<< " had errors.";
|
||||
|
||||
KALDI_LOG << "Overall weighted acoustic likelihood per frame was "
|
||||
<< (tot_like/tot_frames) << " over " << tot_frames << " frames;"
|
||||
<< " average weight per frame was " << (tot_weight / tot_frames);
|
||||
|
||||
{
|
||||
Output ko(num_accs_wxfilename, binary);
|
||||
num_trans_accs.Write(ko.Stream(), binary);
|
||||
num_gmm_accs.Write(ko.Stream(), binary);
|
||||
}
|
||||
{
|
||||
Output ko(den_accs_wxfilename, binary);
|
||||
den_trans_accs.Write(ko.Stream(), binary);
|
||||
den_gmm_accs.Write(ko.Stream(), binary);
|
||||
}
|
||||
KALDI_LOG << "Written accs.";
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ int main(int argc, char *argv[]) {
|
|||
power, min_count);
|
||||
|
||||
if (!occs_out_filename.empty()) {
|
||||
bool binary = false; // write this in text mode-- useful to look at.
|
||||
bool binary = true; // write this in text mode-- useful to look at.
|
||||
kaldi::Output ko(occs_out_filename, binary);
|
||||
state_occs.Write(ko.Stream(), binary);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
// gmmbin/gmm-fmpe-acc-stats.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "transform/fmpe.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using kaldi::int32;
|
||||
try {
|
||||
const char *usage =
|
||||
"Accumulate stats for fMPE training, using GMM model. Note: this could\n"
|
||||
"be done using gmm-get-feat-deriv and fmpe-acc-stats (but you'd be computing\n"
|
||||
"the features twice). Features input should be pre-fMPE features.\n"
|
||||
"\n"
|
||||
"Usage: gmm-fmpe-acc-stats [options] <model-in> <fmpe-in> <feature-rspecifier> "
|
||||
"<gselect-rspecifier> <posteriors-rspecifier> <fmpe-stats-out>\n"
|
||||
"e.g.: \n"
|
||||
" gmm-fmpe-acc-stats 1.mdl 1.fmpe \"$feats\" ark:1.gselect ark:1.post 1.fmpe_stats\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "If true, write stats in binary mode.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 6) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
fmpe_rxfilename = po.GetArg(2),
|
||||
feature_rspecifier = po.GetArg(3),
|
||||
gselect_rspecifier = po.GetArg(4),
|
||||
posteriors_rspecifier = po.GetArg(5),
|
||||
stats_wxfilename = po.GetArg(6);
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(model_rxfilename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_gmm.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
Fmpe fmpe;
|
||||
{
|
||||
bool binary_in;
|
||||
Input ki(fmpe_rxfilename, &binary_in);
|
||||
fmpe.Read(ki.Stream(), binary_in);
|
||||
}
|
||||
|
||||
// fmpe stats...
|
||||
Matrix<BaseFloat> stats(fmpe.ProjectionTNumRows() * 2,
|
||||
fmpe.ProjectionTNumCols());
|
||||
SubMatrix<BaseFloat> stats_plus(stats, 0, fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
SubMatrix<BaseFloat> stats_minus(stats, fmpe.ProjectionTNumRows(),
|
||||
fmpe.ProjectionTNumRows(),
|
||||
0, fmpe.ProjectionTNumCols());
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
||||
|
||||
BaseFloat tot_like = 0.0; // tot like weighted by posterior.
|
||||
int32 num_frames = 0;
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string key = feature_reader.Key();
|
||||
if (!posteriors_reader.HasKey(key)) {
|
||||
num_err++;
|
||||
KALDI_WARN << "No posteriors for utterance " << key;
|
||||
continue;
|
||||
}
|
||||
const Matrix<BaseFloat> &feat_in = feature_reader.Value();
|
||||
const Posterior &posterior = posteriors_reader.Value(key);
|
||||
|
||||
if (static_cast<int32>(posterior.size()) != feat_in.NumRows()) {
|
||||
KALDI_WARN << "Posterior vector has wrong size " <<
|
||||
(posterior.size()) << " vs. "<< (feat_in.NumRows());
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!gselect_reader.HasKey(key)) {
|
||||
KALDI_WARN << "No gselect information for key " << key;
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
const std::vector<std::vector<int32> > &gselect =
|
||||
gselect_reader.Value(key);
|
||||
if (static_cast<int32>(gselect.size()) != feat_in.NumRows()) {
|
||||
KALDI_WARN << "gselect information has wrong size";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
num_done++;
|
||||
Matrix<BaseFloat> fmpe_feat(feat_in.NumRows(), feat_in.NumCols());
|
||||
fmpe.ComputeFeatures(feat_in, gselect, &fmpe_feat);
|
||||
fmpe_feat.AddMat(1.0, feat_in);
|
||||
|
||||
Matrix<BaseFloat> feat_deriv;
|
||||
|
||||
tot_like += ComputeAmGmmFeatureDeriv(am_gmm, trans_model, posterior,
|
||||
fmpe_feat, &feat_deriv);
|
||||
num_frames += feat_in.NumRows();
|
||||
|
||||
fmpe.AccStats(feat_in, gselect, feat_deriv, &stats_plus, &stats_minus);
|
||||
|
||||
if (num_done % 100 == 0)
|
||||
KALDI_LOG << "Processed " << num_done << " utterances.";
|
||||
}
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_err
|
||||
<< " with errors.";
|
||||
KALDI_LOG << "Overall weighted acoustic likelihood per frame is "
|
||||
<< (tot_like/num_frames) << " over " << num_frames << " frames.";
|
||||
|
||||
Output ko(stats_wxfilename, binary);
|
||||
stats.Write(ko.Stream(), binary);
|
||||
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
// gmmbin/gmm-get-feat-deriv.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "transform/fmpe.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
using namespace kaldi;
|
||||
using kaldi::int32;
|
||||
try {
|
||||
const char *usage =
|
||||
"From GMM model and posteriors (which don't have to be positive),\n"
|
||||
"output for each utterance a matrix of likelihood derivatives w.r.t.\n"
|
||||
"the features.\n"
|
||||
"E.g. used in feature-space discriminative training.\n"
|
||||
"\n"
|
||||
"Usage: gmm-get-feat-deriv [options] <model-in> <feature-rspecifier> "
|
||||
"<posteriors-rspecifier> <feature-deriv-wspecifier>\n"
|
||||
"e.g.: \n"
|
||||
" gmm-get-feat-deriv 1.mdl \"$feats\" ark:1.post ark:1.deriv\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 4) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_filename = po.GetArg(1),
|
||||
feature_rspecifier = po.GetArg(2),
|
||||
posteriors_rspecifier = po.GetArg(3),
|
||||
deriv_wspecifier = po.GetArg(4);
|
||||
|
||||
AmDiagGmm am_gmm;
|
||||
TransitionModel trans_model;
|
||||
{
|
||||
bool binary;
|
||||
Input ki(model_filename, &binary);
|
||||
trans_model.Read(ki.Stream(), binary);
|
||||
am_gmm.Read(ki.Stream(), binary);
|
||||
}
|
||||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
||||
BaseFloatMatrixWriter deriv_writer(deriv_wspecifier);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
std::string key = feature_reader.Key();
|
||||
if (!posteriors_reader.HasKey(key)) {
|
||||
KALDI_WARN << "No posteriors for utterance " << key;
|
||||
num_err++;
|
||||
} else {
|
||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||
const Posterior &posterior = posteriors_reader.Value(key);
|
||||
|
||||
if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
|
||||
KALDI_WARN << "Posterior vector has wrong size " <<
|
||||
(posterior.size()) << " vs. "<< (mat.NumRows());
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
num_done++;
|
||||
|
||||
// Derivative of likelihood (or whatever objective func.)
|
||||
// w.r.t. features.
|
||||
Matrix<BaseFloat> deriv;
|
||||
ComputeAmGmmFeatureDeriv(am_gmm, trans_model, posterior,
|
||||
mat, &deriv);
|
||||
|
||||
deriv_writer.Write(key, deriv);
|
||||
if (num_done % 100 == 0)
|
||||
KALDI_LOG << "Processed " << num_done << " utterances.";
|
||||
}
|
||||
}
|
||||
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_err
|
||||
<< " with errors.";
|
||||
if (num_done != 0) return 0;
|
||||
else return 1;
|
||||
} catch(const std::exception& e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -30,7 +30,7 @@ int main(int argc, char *argv[]) {
|
|||
"Convert a full covariance GMM into a diagonal one.\n"
|
||||
"Usage: full-to-tied <full-gmm-in> <diag-gmm-out>\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -106,7 +106,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" init-tied-codebooks tree tree.acc ubm-full tree.map\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
int max_num_gaussians = 512;
|
||||
bool split_gaussians = false;
|
||||
BaseFloat perturb = 0.01;
|
||||
|
|
|
@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
|
|||
" tied-diag-gmm-acc-stats-ali 1.mdl 1.ali scp:train.scp ark:1.ali 1.acc\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ int main(int argc, char *argv[]) {
|
|||
" tied-diag-gmm-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
BaseFloat beam = 200.0;
|
||||
BaseFloat retry_beam = 0.0;
|
||||
BaseFloat acoustic_scale = 1.0;
|
||||
|
|
|
@ -72,7 +72,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" tied-diag-gmm-init-model tree topo tree.map diag0.ubm diag1.ubm 1.mdl\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
|
|
@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" tied-diag-gmm-init-mono topo cb.pdf mono.mdl mono.tree\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
|
|||
" tied-full-gmm-acc-stats-ali 1.mdl 1.ali scp:train.scp ark:1.ali 1.acc\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@ int main(int argc, char *argv[]) {
|
|||
" tied-full-gmm-align-compiled 1.mdl ark:- scp:train.scp t, ark:1.ali\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
BaseFloat beam = 200.0;
|
||||
BaseFloat retry_beam = 0.0;
|
||||
BaseFloat acoustic_scale = 1.0;
|
||||
|
|
|
@ -71,7 +71,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" tied-full-gmm-init-model tree topo tree.map full0.ubm full1.ubm 1.mdl\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
|
|
|
@ -55,7 +55,7 @@ int main(int argc, char *argv[]) {
|
|||
"e.g.: \n"
|
||||
" tied-full-gmm-init-mono topo cb.pdf mono.mdl mono.tree\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
ParseOptions po(usage);
|
||||
po.Register("binary", &binary, "Write output in binary mode");
|
||||
po.Read(argc, argv);
|
||||
|
|
|
@ -167,7 +167,7 @@ try {
|
|||
" tied-lbg tree-old tree-tied topo scp:train.scp ark:ali ubm-full "
|
||||
"tree.map\n";
|
||||
|
||||
bool binary = false;
|
||||
bool binary = true;
|
||||
bool full = true;
|
||||
|
||||
BaseFloat perturb = 0.01;
|
||||
|
|
|
@ -4,7 +4,7 @@ include ../kaldi.mk
|
|||
|
||||
TESTFILES = regtree-fmllr-diag-gmm-test lda-estimate-test \
|
||||
regression-tree-test fmllr-diag-gmm-test exponential-transform-test \
|
||||
regtree-mllr-diag-gmm-test
|
||||
regtree-mllr-diag-gmm-test fmpe-test
|
||||
|
||||
OBJFILES = regression-tree.o regtree-mllr-diag-gmm.o lda-estimate.o \
|
||||
regtree-fmllr-diag-gmm.o cmvn.o transform-common.o fmllr-diag-gmm.o \
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
// transform/fmpe-test.cc
|
||||
|
||||
// Copyright 2012 Daniel Povey
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "gmm/diag-gmm-normal.h"
|
||||
#include "gmm/model-test-common.h"
|
||||
#include "transform/fmpe.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
// Compute derivative of GMM log-likelihood w.r.t. features.
|
||||
// Note: this code copied from gmm-get-feat-deriv.cc; had
|
||||
// to simplify a bit.
|
||||
void GetFeatDeriv(const DiagGmm &gmm,
|
||||
const Matrix<BaseFloat> &feats,
|
||||
Matrix<BaseFloat> *deriv) {
|
||||
|
||||
deriv->Resize(feats.NumRows(), feats.NumCols());
|
||||
|
||||
Vector<BaseFloat> gauss_posteriors;
|
||||
Vector<BaseFloat> temp_vec(feats.NumCols());
|
||||
for (int32 i = 0; i < feats.NumRows(); i++) {
|
||||
SubVector<BaseFloat> this_feat(feats, i);
|
||||
SubVector<BaseFloat> this_deriv(*deriv, i);
|
||||
gmm.ComponentPosteriors(this_feat, &gauss_posteriors);
|
||||
BaseFloat weight = 1.0;
|
||||
gauss_posteriors.Scale(weight);
|
||||
// The next line does: to i'th row of deriv, add
|
||||
// means_invvars^T * gauss_posteriors,
|
||||
// where each row of means_invvars is the mean times
|
||||
// diagonal inverse covariance... after transposing,
|
||||
// this becomes a weighted of these rows, weighted by
|
||||
// the posteriors. This comes from the term
|
||||
// feat^T * inv_var * mean
|
||||
// in the objective function.
|
||||
this_deriv.AddMatVec(1.0, gmm.means_invvars(), kTrans,
|
||||
gauss_posteriors, 1.0);
|
||||
|
||||
// next line does temp_vec == inv_vars^T * gauss_posteriors,
|
||||
// which sets temp_vec to a weighted sum of the inv_vars,
|
||||
// weighed by Gaussian posterior.
|
||||
temp_vec.AddMatVec(1.0, gmm.inv_vars(), kTrans,
|
||||
gauss_posteriors, 0.0);
|
||||
// Add to the derivative, -(this_feat .* temp_vec),
|
||||
// which is the term that comes from the -0.5 * inv_var^T feat_sq,
|
||||
// in the objective function (where inv_var is a vector, and feat_sq
|
||||
// is a vector of squares of the feature values).
|
||||
this_deriv.AddVecVec(-1.0, this_feat, temp_vec, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
// Gets total log-likelihood, summed over all frames.
|
||||
BaseFloat GetGmmLike(const DiagGmm &gmm,
|
||||
const Matrix<BaseFloat> &feats) {
|
||||
BaseFloat ans = 0.0;
|
||||
for (int32 i = 0; i < feats.NumRows(); i++)
|
||||
ans += gmm.LogLikelihood(feats.Row(i));
|
||||
return ans;
|
||||
}
|
||||
|
||||
void TestFmpe() {
|
||||
int32 dim = 10 + (rand() % 10);
|
||||
int32 num_comp = 10 + (rand() % 10);
|
||||
DiagGmm gmm;
|
||||
unittest::InitRandDiagGmm(dim, num_comp, &gmm);
|
||||
|
||||
int32 num_frames = 20;
|
||||
Matrix<BaseFloat> feats(num_frames, dim);
|
||||
|
||||
for (int32 i = 0; i < num_frames; i++)
|
||||
for (int32 j = 0; j < dim; j++)
|
||||
feats(i,j) = RandGauss();
|
||||
|
||||
FmpeOptions opts; // Default.
|
||||
{
|
||||
Fmpe fmpe(gmm, opts);
|
||||
{
|
||||
bool binary = (rand() % 2 == 1);
|
||||
Output ko("tmpf", binary);
|
||||
fmpe.Write(ko.Stream(), binary);
|
||||
}
|
||||
}
|
||||
Fmpe fmpe(gmm, opts);
|
||||
{
|
||||
bool binary_in;
|
||||
Input ki("tmpf", &binary_in);
|
||||
fmpe.Read(ki.Stream(), binary_in);
|
||||
}
|
||||
|
||||
// We'll first be testing that the feature derivative is
|
||||
// accurate, by measuring a small random offset in feature space.
|
||||
{
|
||||
Matrix<BaseFloat> deriv;
|
||||
Matrix<BaseFloat> random_offset(feats.NumRows(), feats.NumCols());
|
||||
for (int32 i = 0; i < feats.NumRows(); i++)
|
||||
for (int32 j = 0; j < feats.NumCols(); j++)
|
||||
random_offset(i,j) = 1.0e-03 * RandGauss();
|
||||
BaseFloat like_before = GetGmmLike(gmm, feats);
|
||||
feats.AddMat(1.0, random_offset);
|
||||
BaseFloat like_after = GetGmmLike(gmm, feats);
|
||||
feats.AddMat(-1.0, random_offset); // undo the change.
|
||||
GetFeatDeriv(gmm, feats, &deriv);
|
||||
BaseFloat change1 = like_after - like_before,
|
||||
change2 = TraceMatMat(random_offset, deriv, kTrans);
|
||||
KALDI_LOG << "Random offset led to like change "
|
||||
<< change1 << " (manually), and " << change2
|
||||
<< " (derivative)";
|
||||
// note: not making this threshold smaller, as don't want
|
||||
// spurious failures. Seems to be OK though.
|
||||
KALDI_ASSERT( fabs(change1-change2) < 0.15*fabs(change1+change2));
|
||||
}
|
||||
|
||||
std::vector<std::vector<int32> > gselect(feats.NumRows()); // make it have all Gaussians...
|
||||
for (int32 i = 0; i < feats.NumRows(); i++)
|
||||
for (int32 j = 0; j < gmm.NumGauss(); j++)
|
||||
gselect[i].push_back(j);
|
||||
|
||||
Matrix<BaseFloat> fmpe_offset;
|
||||
// Check that the fMPE feature offset is zero.
|
||||
fmpe.ComputeFeatures(feats, gselect, &fmpe_offset);
|
||||
KALDI_ASSERT(fmpe_offset.IsZero());
|
||||
|
||||
// Note: we're just using the ML objective function here.
|
||||
// This is just to make sure the derivatives are all computed
|
||||
// correctly.
|
||||
BaseFloat like_before_update = GetGmmLike(gmm, feats);
|
||||
// Now get stats for update.
|
||||
int32 nr = fmpe.ProjectionTNumRows(), nc = fmpe.ProjectionTNumCols();
|
||||
Matrix<BaseFloat> plus_stats(nr, nc), minus_stats(nr, nc);
|
||||
Matrix<BaseFloat> deriv;
|
||||
GetFeatDeriv(gmm, feats, &deriv);
|
||||
fmpe.AccStats(feats, gselect, deriv, &plus_stats, &minus_stats);
|
||||
FmpeUpdateOptions update_opts;
|
||||
update_opts.learning_rate = 0.001; // so linear assumption is more valid.
|
||||
BaseFloat delta = fmpe.Update(update_opts, plus_stats, minus_stats);
|
||||
|
||||
fmpe.ComputeFeatures(feats, gselect, &fmpe_offset);
|
||||
feats.AddMat(1.0, fmpe_offset);
|
||||
|
||||
BaseFloat like_after_update = GetGmmLike(gmm, feats);
|
||||
|
||||
BaseFloat delta2 = like_after_update - like_before_update;
|
||||
KALDI_LOG << "Change predicted by fMPE Update function is "
|
||||
<< delta << ", change computed directly is "
|
||||
<< delta2;
|
||||
KALDI_ASSERT(fabs(delta-delta2) < 0.15 * fabs(delta+delta2));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
kaldi::g_kaldi_verbose_level = 5;
|
||||
for (int i = 0; i <= 10; i++)
|
||||
kaldi::TestFmpe();
|
||||
std::cout << "Test OK.\n";
|
||||
}
|
||||
|
|
@ -19,6 +19,8 @@
|
|||
#include "transform/fmpe.h"
|
||||
#include "util/text-utils.h"
|
||||
#include "gmm/diag-gmm-normal.h"
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
@ -73,7 +75,7 @@ void Fmpe::ComputeC() {
|
|||
// to get centered covariance.
|
||||
C_.Resize(dim);
|
||||
try {
|
||||
TpMatrix<double> Ctmp; Ctmp.Cholesky(x2_stats);
|
||||
TpMatrix<double> Ctmp(dim); Ctmp.Cholesky(x2_stats);
|
||||
C_.CopyFromTp(Ctmp);
|
||||
} catch (...) {
|
||||
KALDI_ERR << "Error initializing fMPE object: cholesky of "
|
||||
|
@ -94,9 +96,9 @@ void Fmpe::ApplyContext(const MatrixBase<BaseFloat> &intermed_feat,
|
|||
// Applies the temporal-context part of the transformation.
|
||||
int32 dim = FeatDim(), ncontexts = NumContexts(),
|
||||
T = intermed_feat.NumRows();
|
||||
KALDI_ASSERT(intermed_feat.NumRows() == dim * ncontexts &&
|
||||
intermed_feat.NumCols() == feat_out->NumCols()
|
||||
&& feat_out->NumRows() == dim);
|
||||
KALDI_ASSERT(intermed_feat.NumCols() == dim * ncontexts &&
|
||||
intermed_feat.NumRows() == feat_out->NumRows()
|
||||
&& feat_out->NumCols() == dim);
|
||||
// note: ncontexts == contexts_.size().
|
||||
for (int32 i = 0; i < ncontexts; i++) {
|
||||
// this_intermed_feat is the chunk of the "intermediate features"
|
||||
|
@ -125,9 +127,9 @@ void Fmpe::ApplyContextReverse(const MatrixBase<BaseFloat> &feat_deriv,
|
|||
// in reverse, for getting derivatives for training.
|
||||
int32 dim = FeatDim(), ncontexts = NumContexts(),
|
||||
T = feat_deriv.NumRows();
|
||||
KALDI_ASSERT(intermed_feat_deriv->NumRows() == dim * ncontexts &&
|
||||
intermed_feat_deriv->NumCols() == feat_deriv.NumCols()
|
||||
&& feat_deriv.NumRows() == dim);
|
||||
KALDI_ASSERT(intermed_feat_deriv->NumCols() == dim * ncontexts &&
|
||||
intermed_feat_deriv->NumRows() == feat_deriv.NumRows()
|
||||
&& feat_deriv.NumCols() == dim);
|
||||
// note: ncontexts == contexts_.size().
|
||||
for (int32 i = 0; i < ncontexts; i++) {
|
||||
// this_intermed_feat is the chunk of the derivative of
|
||||
|
@ -142,7 +144,7 @@ void Fmpe::ApplyContextReverse(const MatrixBase<BaseFloat> &feat_deriv,
|
|||
// but this doesn't dominate the computation and I think this is
|
||||
// clearer.
|
||||
for (int32 t_out = 0; t_out < T; t_out++) { // t_out indexes the output
|
||||
int32 t_in = t_in + t_offset; // t_in indexes the input.
|
||||
int32 t_in = t_out + t_offset; // t_in indexes the input.
|
||||
if (t_in >= 0 && t_in < T) // Discard frames outside range.
|
||||
this_intermed_feat_deriv.Row(t_in).AddVec(weight,
|
||||
feat_deriv.Row(t_out));
|
||||
|
@ -164,7 +166,16 @@ void Fmpe::ApplyC(MatrixBase<BaseFloat> *feat_out, bool reverse) const {
|
|||
}
|
||||
}
|
||||
|
||||
// Constructs the high-dim features and applies the main projection matrix proj_.
|
||||
// Constructs the high-dim features and applies the main projection matrix
|
||||
// projT_. This projects from dimension ngauss*(dim+1) to dim*ncontexts. Note:
|
||||
// because the input vector of size ngauss*(dim+1) is sparse in a blocky way
|
||||
// (i.e. each frame only has a couple of nonzero posteriors), we deal with
|
||||
// sub-matrices of the projection matrix projT_. We actually further optimize
|
||||
// the code by taking all frames in a file that had nonzero posteriors for a
|
||||
// particular Gaussian, and forming a matrix out of the corresponding
|
||||
// high-dimensional features; we can then use a matrix-matrix multiply rather
|
||||
// than using vector-matrix operations.
|
||||
|
||||
void Fmpe::ApplyProjection(const MatrixBase<BaseFloat> &feat_in,
|
||||
const std::vector<std::vector<int32> > &gselect,
|
||||
MatrixBase<BaseFloat> *intermed_feat) const {
|
||||
|
@ -173,17 +184,44 @@ void Fmpe::ApplyProjection(const MatrixBase<BaseFloat> &feat_in,
|
|||
Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
|
||||
Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
|
||||
// the high-dimensional features.
|
||||
|
||||
// "all_posts" is a vector of ((gauss-index, time-index), gaussian
|
||||
// posterior).
|
||||
// We'll compute the posterior information, sort it, and then
|
||||
// go through it in sorted order, which maintains memory locality
|
||||
// when accessing the projection matrix.
|
||||
// Note: if we really cared we could make this use level-3 BLAS
|
||||
// (matrix-matrix multiply), but we'd need to have a temporary
|
||||
// matrix for the output and input.
|
||||
std::vector<std::pair<std::pair<int32,int32>, BaseFloat> > all_posts;
|
||||
|
||||
for (int32 t = 0; t < feat_in.NumRows(); t++) {
|
||||
SubVector<BaseFloat> this_feat(feat_in, t);
|
||||
SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
|
||||
gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
|
||||
// At this point, post will contain log-likes of the selected
|
||||
// Gaussians.
|
||||
post.ApplySoftMax(); // Now they are posteriors (which sum to one).
|
||||
for (int32 i = 0; i < post.Dim(); i++) {
|
||||
int32 gauss = gselect[t][i];
|
||||
all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
|
||||
}
|
||||
}
|
||||
std::sort(all_posts.begin(), all_posts.end());
|
||||
|
||||
bool optimize = true;
|
||||
|
||||
if (!optimize) { // Why do we keep this un-optimized code around?
|
||||
// For clarity, so you can see what's going on, and for easier
|
||||
// comparision with ApplyProjectionReverse which is similar to this
|
||||
// un-optimized segment. Both un-optimized and optimized versions
|
||||
// should give identical transforms (up to tiny roundoff differences).
|
||||
for (size_t i = 0; i < all_posts.size(); i++) {
|
||||
int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
|
||||
SubVector<BaseFloat> this_feat(feat_in, t);
|
||||
SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
|
||||
BaseFloat this_post = all_posts[i].second;
|
||||
SubVector<BaseFloat> this_stddev(stddevs_, gauss);
|
||||
BaseFloat this_post = post(i);
|
||||
|
||||
// The next line is equivalent to setting input_chunk to
|
||||
// -this_post * the gaussian mean / (gaussian stddev). Note: we use
|
||||
// the fact that mean * inv_var * stddev == mean / stddev.
|
||||
|
@ -196,12 +234,55 @@ void Fmpe::ApplyProjection(const MatrixBase<BaseFloat> &feat_in,
|
|||
1.0);
|
||||
// The last element of this input_chunk is the posterior itself
|
||||
// (between 0 and 1).
|
||||
input_chunk(dim) = this_post;
|
||||
input_chunk(dim) = this_post * config_.post_scale;
|
||||
|
||||
// this_intermed_feat += [appropriate chjunk of proj_] * input_chunk.
|
||||
this_intermed_feat.AddMatVec(1.0, proj_.Range(0, dim*ncontexts,
|
||||
gauss*(dim+1), dim+1),
|
||||
kNoTrans, input_chunk, 1.0);
|
||||
// this_intermed_feat += [appropriate chjunk of projT_] * input_chunk.
|
||||
this_intermed_feat.AddMatVec(1.0, projT_.Range(gauss*(dim+1), dim+1,
|
||||
0, dim*ncontexts),
|
||||
kTrans, input_chunk, 1.0);
|
||||
}
|
||||
} else {
|
||||
size_t i = 0;
|
||||
while (i < all_posts.size()) {
|
||||
int32 gauss = all_posts[i].first.first;
|
||||
SubVector<BaseFloat> this_stddev(stddevs_, gauss),
|
||||
this_mean_invvar(gmm_.means_invvars(), gauss);
|
||||
SubMatrix<BaseFloat> this_projT_chunk(projT_, gauss*(dim+1), dim+1,
|
||||
0, dim*ncontexts);
|
||||
int32 batch_size; // number of posteriors with same Gaussian..
|
||||
for (batch_size = 0;
|
||||
batch_size+i < static_cast<int32>(all_posts.size()) &&
|
||||
all_posts[batch_size+i].first.first == gauss;
|
||||
batch_size++); // empty loop body.
|
||||
Matrix<BaseFloat> input_chunks(batch_size, dim+1);
|
||||
Matrix<BaseFloat> intermed_temp(batch_size, dim*ncontexts);
|
||||
for (int32 j = 0; j < batch_size; j++) { // set up "input_chunks"
|
||||
int32 t = all_posts[i+j].first.second;
|
||||
SubVector<BaseFloat> this_feat(feat_in, t);
|
||||
SubVector<BaseFloat> this_input_chunk(input_chunks, j);
|
||||
BaseFloat this_post = all_posts[i+j].second;
|
||||
this_input_chunk.Range(0, dim).AddVecVec(-this_post,
|
||||
this_mean_invvar,
|
||||
this_stddev, 0.0);
|
||||
this_input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat,
|
||||
this_stddev, 1.0);
|
||||
this_input_chunk(dim) = this_post * config_.post_scale;
|
||||
}
|
||||
// The next line is where most of the computation will happen,
|
||||
// during the feature computation phase. We have rearranged
|
||||
// stuff so it's a matrix-matrix operation, for greater
|
||||
// efficiency (when using optimized libraries like ATLAS).
|
||||
intermed_temp.AddMatMat(1.0, input_chunks, kNoTrans,
|
||||
this_projT_chunk, kNoTrans, 0.0);
|
||||
for (int32 j = 0; j < batch_size; j++) { // add data from
|
||||
// intermed_temp to the output "intermed_feat"
|
||||
int32 t = all_posts[i+j].first.second;
|
||||
SubVector<BaseFloat> this_intermed_feat(*intermed_feat, t);
|
||||
SubVector<BaseFloat> this_intermed_temp(intermed_temp, j);
|
||||
// this_intermed_feat += this_intermed_temp.
|
||||
this_intermed_feat.AddVec(1.0, this_intermed_temp);
|
||||
}
|
||||
i += batch_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -221,9 +302,16 @@ void Fmpe::ApplyProjectionReverse(const MatrixBase<BaseFloat> &feat_in,
|
|||
Vector<BaseFloat> post; // will be posteriors of selected Gaussians.
|
||||
Vector<BaseFloat> input_chunk(dim+1); // will be a segment of
|
||||
// the high-dimensional features.
|
||||
|
||||
// "all_posts" is a vector of ((gauss-index, time-index), gaussian
|
||||
// posterior).
|
||||
// We'll compute the posterior information, sort it, and then
|
||||
// go through it in sorted order, which maintains memory locality
|
||||
// when accessing the projection matrix.
|
||||
std::vector<std::pair<std::pair<int32,int32>, BaseFloat> > all_posts;
|
||||
|
||||
for (int32 t = 0; t < feat_in.NumRows(); t++) {
|
||||
SubVector<BaseFloat> this_feat(feat_in, t);
|
||||
SubVector<BaseFloat> this_intermed_feat_deriv(intermed_feat_deriv, t);
|
||||
gmm_.LogLikelihoodsPreselect(this_feat, gselect[t], &post);
|
||||
// At this point, post will contain log-likes of the selected
|
||||
// Gaussians.
|
||||
|
@ -232,35 +320,44 @@ void Fmpe::ApplyProjectionReverse(const MatrixBase<BaseFloat> &feat_in,
|
|||
// The next few lines (where we set up "input_chunk") are identical
|
||||
// to ApplyProjection.
|
||||
int32 gauss = gselect[t][i];
|
||||
SubVector<BaseFloat> this_stddev(stddevs_, gauss);
|
||||
BaseFloat this_post = post(i);
|
||||
input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
|
||||
this_stddev, 0.0);
|
||||
input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
|
||||
1.0);
|
||||
input_chunk(dim) = this_post;
|
||||
|
||||
// If not for accumulating the + and - parts separately, we would be
|
||||
// doing something like:
|
||||
// proj_deriv_.Range(0, dim*ncontexts, gauss*(dim+1), dim+1).AddVecVec(
|
||||
// 1.0, this_intermed_feat_deriv, input_chunk);
|
||||
|
||||
|
||||
SubMatrix<BaseFloat> plus_chunk(*proj_deriv_plus, 0, dim*ncontexts,
|
||||
gauss*(dim+1), dim+1),
|
||||
minus_chunk(*proj_deriv_minus, 0, dim*ncontexts,
|
||||
gauss*(dim+1), dim+1);
|
||||
|
||||
// This next function takes the rank-one matrix
|
||||
// (this_intermed_deriv * input_chunk') and adds the positive
|
||||
// part to proj_deriv_plus, and minus the negative part to
|
||||
// proj_deriv_minus.
|
||||
AddOuterProductPlusMinus(static_cast<BaseFloat>(1.0),
|
||||
this_intermed_feat_deriv,
|
||||
input_chunk,
|
||||
&plus_chunk, &minus_chunk);
|
||||
all_posts.push_back(std::make_pair(std::make_pair(gauss, t), post(i)));
|
||||
}
|
||||
}
|
||||
std::sort(all_posts.begin(), all_posts.end());
|
||||
for (size_t i = 0; i < all_posts.size(); i++) {
|
||||
int32 gauss = all_posts[i].first.first, t = all_posts[i].first.second;
|
||||
BaseFloat this_post = all_posts[i].second;
|
||||
SubVector<BaseFloat> this_feat(feat_in, t);
|
||||
SubVector<BaseFloat> this_intermed_feat_deriv(intermed_feat_deriv, t);
|
||||
SubVector<BaseFloat> this_stddev(stddevs_, gauss);
|
||||
input_chunk.Range(0, dim).AddVecVec(-this_post, gmm_.means_invvars().Row(gauss),
|
||||
this_stddev, 0.0);
|
||||
input_chunk.Range(0, dim).AddVecDivVec(this_post, this_feat, this_stddev,
|
||||
1.0);
|
||||
input_chunk(dim) = this_post * config_.post_scale;
|
||||
|
||||
// If not for accumulating the + and - parts separately, we would be
|
||||
// doing something like:
|
||||
// proj_deriv_.Range(0, dim*ncontexts, gauss*(dim+1), dim+1).AddVecVec(
|
||||
// 1.0, this_intermed_feat_deriv, input_chunk);
|
||||
|
||||
|
||||
SubMatrix<BaseFloat> plus_chunk(*proj_deriv_plus,
|
||||
gauss*(dim+1), dim+1,
|
||||
0, dim*ncontexts),
|
||||
minus_chunk(*proj_deriv_minus,
|
||||
gauss*(dim+1), dim+1,
|
||||
0, dim*ncontexts);
|
||||
|
||||
// This next function takes the rank-one matrix
|
||||
// (input_chunk * this_intermed_deriv'), and adds the positive
|
||||
// part to proj_deriv_plus, and minus the negative part to
|
||||
// proj_deriv_minus.
|
||||
AddOuterProductPlusMinus(static_cast<BaseFloat>(1.0),
|
||||
input_chunk,
|
||||
this_intermed_feat_deriv,
|
||||
&plus_chunk, &minus_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
void Fmpe::ComputeFeatures(const MatrixBase<BaseFloat> &feat_in,
|
||||
|
@ -296,8 +393,8 @@ void Fmpe::AccStats(const MatrixBase<BaseFloat> &feat_in,
|
|||
int32 dim = FeatDim(), ncontexts = NumContexts();
|
||||
KALDI_ASSERT(feat_in.NumRows() != 0 && feat_in.NumCols() == dim);
|
||||
KALDI_ASSERT(feat_in.NumRows() == static_cast<int32>(gselect.size()));
|
||||
AssertSameDim(*proj_deriv_plus, proj_);
|
||||
AssertSameDim(*proj_deriv_minus, proj_);
|
||||
AssertSameDim(*proj_deriv_plus, projT_);
|
||||
AssertSameDim(*proj_deriv_minus, projT_);
|
||||
AssertSameDim(feat_in, feat_deriv_in);
|
||||
|
||||
// We do everything in reverse now, in reverse order.
|
||||
|
@ -326,28 +423,29 @@ Fmpe::Fmpe(const DiagGmm &gmm, const FmpeOptions &config): gmm_(gmm),
|
|||
SetContexts(config.context_expansion);
|
||||
ComputeC();
|
||||
ComputeStddevs();
|
||||
proj_.Resize(NumGauss() * (FeatDim()+1), FeatDim() * NumContexts());
|
||||
projT_.Resize(NumGauss() * (FeatDim()+1), FeatDim() * NumContexts());
|
||||
}
|
||||
|
||||
void Fmpe::Update(const FmpeUpdateOptions &config,
|
||||
MatrixBase<BaseFloat> &proj_deriv_plus,
|
||||
MatrixBase<BaseFloat> &proj_deriv_minus) {
|
||||
BaseFloat Fmpe::Update(const FmpeUpdateOptions &config,
|
||||
MatrixBase<BaseFloat> &proj_deriv_plus,
|
||||
MatrixBase<BaseFloat> &proj_deriv_minus) {
|
||||
// tot_linear_objf_impr is the change in the actual
|
||||
// objective function if it were linear, i.e.
|
||||
// objf-gradient . parameter-change // Note: none of this is normalized by the #frames (we don't have
|
||||
// this info here), so that is done at the script level.
|
||||
BaseFloat tot_linear_objf_impr = 0.0;
|
||||
AssertSameDim(proj_deriv_plus, proj_);
|
||||
AssertSameDim(proj_deriv_minus, proj_);
|
||||
int32 changed = 0; // Keep track of how many elements change sign.
|
||||
AssertSameDim(proj_deriv_plus, projT_);
|
||||
AssertSameDim(proj_deriv_minus, projT_);
|
||||
KALDI_ASSERT(proj_deriv_plus.Min() >= 0);
|
||||
KALDI_ASSERT(proj_deriv_minus.Min() >= 0);
|
||||
BaseFloat learning_rate = config.learning_rate,
|
||||
l2_weight = config.l2_weight;
|
||||
|
||||
for (int32 i = 0; i < proj_.NumRows(); i++) {
|
||||
for (int32 j = 0; j < proj_.NumCols(); j++) {
|
||||
for (int32 i = 0; i < projT_.NumRows(); i++) {
|
||||
for (int32 j = 0; j < projT_.NumCols(); j++) {
|
||||
BaseFloat p = proj_deriv_plus(i,j), n = proj_deriv_minus(i,j),
|
||||
x = proj_(i,j);
|
||||
x = projT_(i,j);
|
||||
// Suppose the basic update (before regularization) is:
|
||||
// z <-- x + learning_rate * (p - n) / (p + n),
|
||||
// where z is the new parameter and x is the old one.
|
||||
|
@ -371,10 +469,14 @@ void Fmpe::Update(const FmpeUpdateOptions &config,
|
|||
// z is the new parameter value.
|
||||
|
||||
tot_linear_objf_impr += (z-x) * (p-n); // objf impr based on linear assumption.
|
||||
proj_(i,j) = z;
|
||||
projT_(i,j) = z;
|
||||
if (z*x < 0) changed++;
|
||||
}
|
||||
}
|
||||
KALDI_LOG << "Objf impr (assuming linear) is " << tot_linear_objf_impr;
|
||||
KALDI_LOG << ((100.0*changed)/(projT_.NumRows()*projT_.NumCols()))
|
||||
<< "% of matrix elements changed sign.";
|
||||
return tot_linear_objf_impr;
|
||||
}
|
||||
|
||||
// Note: we write the GMM first, without any other header.
|
||||
|
@ -386,7 +488,7 @@ void Fmpe::Write(std::ostream &os, bool binary) const {
|
|||
gmm_.Write(os, binary);
|
||||
config_.Write(os, binary);
|
||||
// stddevs_ are derived, don't write them.
|
||||
proj_.Write(os, binary);
|
||||
projT_.Write(os, binary);
|
||||
C_.Write(os, binary);
|
||||
// contexts_ are derived from config, don't write them.
|
||||
}
|
||||
|
@ -396,11 +498,59 @@ void Fmpe::Read(std::istream &is, bool binary) {
|
|||
gmm_.Read(is, binary);
|
||||
config_.Read(is, binary);
|
||||
ComputeStddevs(); // computed from gmm.
|
||||
proj_.Read(is, binary);
|
||||
projT_.Read(is, binary);
|
||||
C_.Read(is, binary);
|
||||
SetContexts(config_.context_expansion);
|
||||
}
|
||||
|
||||
|
||||
BaseFloat ComputeAmGmmFeatureDeriv(const AmDiagGmm &am_gmm,
|
||||
const TransitionModel &trans_model,
|
||||
const Posterior &posterior,
|
||||
const MatrixBase<BaseFloat> &features,
|
||||
Matrix<BaseFloat> *deriv) {
|
||||
BaseFloat ans = 0.0;
|
||||
KALDI_ASSERT(posterior.size() == static_cast<size_t>(features.NumRows()));
|
||||
deriv->Resize(features.NumRows(), features.NumCols());
|
||||
Vector<BaseFloat> temp_vec(features.NumCols());
|
||||
for (size_t i = 0; i < posterior.size(); i++) {
|
||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||
int32 tid = posterior[i][j].first, // transition identifier.
|
||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||
BaseFloat weight = posterior[i][j].second;
|
||||
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
|
||||
Vector<BaseFloat> gauss_posteriors;
|
||||
SubVector<BaseFloat> this_feat(features, i);
|
||||
SubVector<BaseFloat> this_deriv(*deriv, i);
|
||||
ans += weight *
|
||||
gmm.ComponentPosteriors(this_feat, &gauss_posteriors);
|
||||
|
||||
gauss_posteriors.Scale(weight);
|
||||
// The next line does: to i'th row of deriv, add
|
||||
// means_invvars^T * gauss_posteriors,
|
||||
// where each row of means_invvars is the mean times
|
||||
// diagonal inverse covariance... after transposing,
|
||||
// this becomes a weighted of these rows, weighted by
|
||||
// the posteriors. This comes from the term
|
||||
// feat^T * inv_var * mean
|
||||
// in the objective function.
|
||||
this_deriv.AddMatVec(1.0, gmm.means_invvars(), kTrans,
|
||||
gauss_posteriors, 1.0);
|
||||
|
||||
// next line does temp_vec == inv_vars^T * gauss_posteriors,
|
||||
// which sets temp_vec to a weighted sum of the inv_vars,
|
||||
// weighed by Gaussian posterior.
|
||||
temp_vec.AddMatVec(1.0, gmm.inv_vars(), kTrans,
|
||||
gauss_posteriors, 0.0);
|
||||
// Add to the derivative, -(this_feat .* temp_vec),
|
||||
// which is the term that comes from the -0.5 * inv_var^T feat_sq,
|
||||
// in the objective function (where inv_var is a vector, and feat_sq
|
||||
// is a vector of squares of the feature values).
|
||||
this_deriv.AddVecVec(-1.0, this_feat, temp_vec, 1.0);
|
||||
}
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
|
||||
} // End of namespace kaldi
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#include <vector>
|
||||
|
||||
#include "gmm/am-diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
#include "util/kaldi-holder.h" // for Posterior
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
@ -104,8 +106,13 @@ class Fmpe {
|
|||
int32 NumGauss() const { return gmm_.NumGauss(); }
|
||||
int32 NumContexts() const { return static_cast<int32>(contexts_.size()); }
|
||||
|
||||
int32 ProjectionNumRows() { return FeatDim() * NumContexts(); }
|
||||
int32 ProjectionNumCols() { return (FeatDim()+1) * NumGauss(); }
|
||||
// Note: this returns the number of rows and columns in projT_,
|
||||
// which is the transpose of the high->intermediate dimensional
|
||||
// projection matrix. This is the dimension we want for the
|
||||
// stats.
|
||||
int32 ProjectionTNumRows() { return (FeatDim()+1) * NumGauss(); }
|
||||
int32 ProjectionTNumCols() { return FeatDim() * NumContexts(); }
|
||||
|
||||
|
||||
// Computes the fMPE feature offsets and outputs them.
|
||||
// You can add feat_in to this afterwards, if you want.
|
||||
|
@ -131,9 +138,10 @@ class Fmpe {
|
|||
void Write(std::ostream &os, bool binary) const;
|
||||
void Read(std::istream &is, bool binary);
|
||||
|
||||
void Update(const FmpeUpdateOptions &config,
|
||||
MatrixBase<BaseFloat> &proj_deriv_plus,
|
||||
MatrixBase<BaseFloat> &proj_deriv_minus);
|
||||
// Returns total objf improvement, based on linear assumption.
|
||||
BaseFloat Update(const FmpeUpdateOptions &config,
|
||||
MatrixBase<BaseFloat> &proj_deriv_plus,
|
||||
MatrixBase<BaseFloat> &proj_deriv_minus);
|
||||
|
||||
private:
|
||||
void SetContexts(std::string context_str);
|
||||
|
@ -180,8 +188,9 @@ class Fmpe {
|
|||
// variances of the GMM -- computed to avoid taking a square root
|
||||
// in the fMPE computation. Derived variable-- not stored on
|
||||
// disk.
|
||||
Matrix<BaseFloat> proj_; // The projection matrix, of dimension
|
||||
// (FeatDim() * NumContexts()) x (NumGauss() * (FeatDim()+1))
|
||||
Matrix<BaseFloat> projT_; // The transpose of the projection matrix;
|
||||
// this is of dimension
|
||||
// (NumGauss() * (FeatDim()+1)) * (FeatDim() * NumContexts()).
|
||||
|
||||
TpMatrix<BaseFloat> C_; // Cholesky factor of the variance Sigma of
|
||||
// features around their mean (as estimated from GMM)... applied
|
||||
|
@ -197,6 +206,17 @@ class Fmpe {
|
|||
|
||||
};
|
||||
|
||||
/// Computes derivatives of the likelihood of these states (weighted),
|
||||
/// w.r.t. the feature values. Used in fMPE training. Note, the
|
||||
/// weights "posterior" may be positive or negative-- for MMI, MPE,
|
||||
/// etc., they will typically be of both signs. Will resize "deriv".
|
||||
/// Returns the sum of (GMM likelihood * weight), which may be used
|
||||
/// as an approximation to the objective function.
|
||||
BaseFloat ComputeAmGmmFeatureDeriv(const AmDiagGmm &am_gmm,
|
||||
const TransitionModel &trans_model,
|
||||
const Posterior &posterior,
|
||||
const MatrixBase<BaseFloat> &features,
|
||||
Matrix<BaseFloat> *deriv);
|
||||
|
||||
|
||||
} // End namespace kaldi
|
||||
|
|
Загрузка…
Ссылка в новой задаче