git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@96 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Arnab Ghoshal 2011-06-21 06:53:48 +00:00
Родитель 295f5c39af
Коммит 8049135d15
6 изменённых файлов: 166 добавлений и 7 удалений

Просмотреть файл

@ -0,0 +1,83 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation, Saarland University
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
# split (no silence weighting).
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri1
dir=exp/decode_tri1_fmllr+regtree_mllr
mkdir -p $dir
model=exp/tri1/final.mdl
occs=exp/tri1/final.occs
tree=exp/tri1/tree
graphdir=exp/graph_tri1
silphones=`cat data/silphones.csl`
regtree=$dir/regtree
maxleaves=2 # max # of regression-tree leaves.
fmllr_mincount=5000 # mincount before we add new transform.
mllr_mincount=1000 # mincount before we add new transform.
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
--max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
gmm-est-fmllr --fmllr-min-count=$fmllr_mincount $spk2utt_opt $model \
"$feats" ark,o:- ark:$dir/${test}.fmllr ) 2>$dir/fmllr_${test}.log
adapt_feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 \
--word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
"$adapt_feats" ark,t:$dir/${test}_pass2.tra ark,t:$dir/${test}_pass2.ali \
2> $dir/pass2_${test}.log
( ali-to-post ark:$dir/${test}_pass2.ali ark:- | \
gmm-est-regtree-mllr --mllr-min-count=$mllr_mincount $spk2utt_opt \
$model "$adapt_feats" ark:- $regtree ark:$dir/${test}.mllr ) \
2>$dir/mllr_${test}.log
gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
--beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
$regtree "$adapt_feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -0,0 +1,74 @@
#!/bin/bash
# Copyright 2010-2011 Microsoft Corporation, Saarland University
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.
# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
# split (no silence weighting).
if [ -f path.sh ]; then . path.sh; fi
srcdir=exp/decode_tri1
dir=exp/decode_tri1_regtree_mllr
mkdir -p $dir
model=exp/tri1/final.mdl
occs=exp/tri1/final.occs
tree=exp/tri1/tree
graphdir=exp/graph_tri1
silphones=`cat data/silphones.csl`
regtree=$dir/regtree
maxleaves=2 # max # of regression-tree leaves.
mincount=5000 # mincount before we add new transform.
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
--max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
scripts/mkgraph.sh $tree $model $graphdir
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
(
# Comment the two lines below to make this per-utterance.
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
# To deweight silence, would add the line
# weight-silence-post 0.0 $silphones $model ark:- ark:- | \
# after the line with ali-to-post
# This is useful if we don't treat silence specially when building regression tree.
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
gmm-est-regtree-mllr --mllr-min-count=$mincount $spk2utt_opt \
$model "$feats" ark:- $regtree ark:$dir/${test}.mllr ) \
2>$dir/mllr_${test}.log
gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
--beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
$regtree "$feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
# the ,p option lets it score partial output without dying..
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
) &
done
wait
grep WER $dir/wer_* | \
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
> $dir/wer

Просмотреть файл

@ -152,6 +152,7 @@ void DecodableAmDiagGmmRegtreeMllr::InitCache() {
xformed_mean_invvars_.resize(num_pdfs);
xformed_gconsts_.resize(num_pdfs);
is_cached_.resize(num_pdfs, false);
ResetLogLikeCache();
}
@ -237,7 +238,7 @@ const Vector<BaseFloat>& DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts(
BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame,
int32 state) {
KALDI_ERR << "Function not completely implemented yet.";
// KALDI_ERR << "Function not completely implemented yet.";
KALDI_ASSERT(frame < NumFrames() && frame >= 0);
KALDI_ASSERT(state < NumIndices() && state >= 0);

Просмотреть файл

@ -175,7 +175,8 @@ class DecodableAmDiagGmmRegtreeMllr : public DecodableAmDiagGmmUnmapped {
const RegressionTree &regtree,
BaseFloat scale)
: DecodableAmDiagGmmUnmapped(am, feats), trans_model_(tm), scale_(scale),
mllr_xform_(mllr_xform), regtree_(regtree) { InitCache(); }
mllr_xform_(mllr_xform), regtree_(regtree),
data_squared_(feats.NumCols()) { InitCache(); }
~DecodableAmDiagGmmRegtreeMllr();
// Note, frames are numbered from zero but transition-ids (tid) from one.

Просмотреть файл

@ -33,7 +33,7 @@ int main(int argc, char *argv[]) {
const char *usage =
"Compute MLLR transforms per-utterance (default) or per-speaker for "
"the supplied set of speakers (spk2utt option). Note: writes RegtreeMllrDiagGmm objects\n"
"Usage: gmm-estimate-regtree-fmllr [options] <model-in> <feature-rspecifier> "
"Usage: gmm-estimate-regtree-mllr [options] <model-in> <feature-rspecifier> "
"<posteriors-rspecifier> <regression-tree> <transforms-wspecifier>\n";
ParseOptions po(usage);
@ -135,7 +135,7 @@ int main(int argc, char *argv[]) {
} // end looping over all utterances of the current speaker
BaseFloat objf_impr, t;
mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is "
KALDI_LOG << "MLLR objf improvement for speaker " << spk << " is "
<< (objf_impr/(t+1.0e-10)) << " per frame over " << t
<< " frames.";
tot_objf_impr += objf_impr;
@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
<< "Avg like per frame so far is " << (tot_like / tot_t) << '\n';
BaseFloat objf_impr, t;
mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is "
KALDI_LOG << "MLLR objf improvement for utterance " << key << " is "
<< (objf_impr/(t+1.0e-10)) << " per frame over " << t
<< " frames.";
tot_objf_impr += objf_impr;
@ -192,7 +192,7 @@ int main(int argc, char *argv[]) {
}
}
KALDI_LOG << "Total objf improvement from fMLLR is " << (tot_objf_impr/tot_t_objf)
KALDI_LOG << "Total objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf)
<< " per frame over " << tot_t_objf << " frames.";
KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior
<< " with no posteriors, " << num_other_error << " with other errors.";

Просмотреть файл

@ -147,7 +147,7 @@ void RegtreeMllrDiagGmm::Read(std::istream &in, bool binary) {
&& xform_itr->NumRows() == dim_);
}
ExpectMarker(in, binary, "<BCLASS2XFORMS");
ExpectMarker(in, binary, "<BCLASS2XFORMS>");
ReadIntegerVector(in, binary, &bclass2xforms_);
ExpectMarker(in, binary, "</MLLRXFORM>");
}