зеркало из https://github.com/mozilla/kaldi.git
Adding MLLR to RM recipe.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@96 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
295f5c39af
Коммит
8049135d15
|
@ -0,0 +1,83 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation, Saarland University
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
|
||||
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
|
||||
# split (no silence weighting).
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri1
|
||||
dir=exp/decode_tri1_fmllr+regtree_mllr
|
||||
mkdir -p $dir
|
||||
model=exp/tri1/final.mdl
|
||||
occs=exp/tri1/final.occs
|
||||
tree=exp/tri1/tree
|
||||
graphdir=exp/graph_tri1
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
regtree=$dir/regtree
|
||||
maxleaves=2 # max # of regression-tree leaves.
|
||||
fmllr_mincount=5000 # mincount before we add new transform.
|
||||
mllr_mincount=1000 # mincount before we add new transform.
|
||||
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
|
||||
--max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
|
||||
( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
weight-silence-post 0.01 $silphones $model ark:- ark:- | \
|
||||
gmm-est-fmllr --fmllr-min-count=$fmllr_mincount $spk2utt_opt $model \
|
||||
"$feats" ark,o:- ark:$dir/${test}.fmllr ) 2>$dir/fmllr_${test}.log
|
||||
|
||||
adapt_feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
|
||||
|
||||
gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 \
|
||||
--word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
|
||||
"$adapt_feats" ark,t:$dir/${test}_pass2.tra ark,t:$dir/${test}_pass2.ali \
|
||||
2> $dir/pass2_${test}.log
|
||||
|
||||
( ali-to-post ark:$dir/${test}_pass2.ali ark:- | \
|
||||
gmm-est-regtree-mllr --mllr-min-count=$mllr_mincount $spk2utt_opt \
|
||||
$model "$adapt_feats" ark:- $regtree ark:$dir/${test}.mllr ) \
|
||||
2>$dir/mllr_${test}.log
|
||||
|
||||
gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
|
||||
--beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
|
||||
$regtree "$adapt_feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
|
||||
ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Copyright 2010-2011 Microsoft Corporation, Saarland University
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
# MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
# See the Apache 2 License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
|
||||
# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil
|
||||
# split (no silence weighting).
|
||||
|
||||
if [ -f path.sh ]; then . path.sh; fi
|
||||
srcdir=exp/decode_tri1
|
||||
dir=exp/decode_tri1_regtree_mllr
|
||||
mkdir -p $dir
|
||||
model=exp/tri1/final.mdl
|
||||
occs=exp/tri1/final.occs
|
||||
tree=exp/tri1/tree
|
||||
graphdir=exp/graph_tri1
|
||||
silphones=`cat data/silphones.csl`
|
||||
|
||||
regtree=$dir/regtree
|
||||
maxleaves=2 # max # of regression-tree leaves.
|
||||
mincount=5000 # mincount before we add new transform.
|
||||
gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
|
||||
--max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
|
||||
|
||||
scripts/mkgraph.sh $tree $model $graphdir
|
||||
|
||||
for test in mar87 oct87 feb89 oct89 feb91 sep92; do
|
||||
(
|
||||
# Comment the two lines below to make this per-utterance.
|
||||
spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
|
||||
utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
|
||||
|
||||
# To deweight silence, would add the line
|
||||
# weight-silence-post 0.0 $silphones $model ark:- ark:- | \
|
||||
# after the line with ali-to-post
|
||||
# This is useful if we don't treat silence specially when building regression tree.
|
||||
|
||||
feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
|
||||
( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
|
||||
gmm-est-regtree-mllr --mllr-min-count=$mincount $spk2utt_opt \
|
||||
$model "$feats" ark:- $regtree ark:$dir/${test}.mllr ) \
|
||||
2>$dir/mllr_${test}.log
|
||||
|
||||
gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
|
||||
--beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
|
||||
$regtree "$feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
|
||||
ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log
|
||||
|
||||
# the ,p option lets it score partial output without dying..
|
||||
|
||||
scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
|
||||
compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test}
|
||||
) &
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
grep WER $dir/wer_* | \
|
||||
awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
|
||||
> $dir/wer
|
||||
|
|
@ -152,6 +152,7 @@ void DecodableAmDiagGmmRegtreeMllr::InitCache() {
|
|||
xformed_mean_invvars_.resize(num_pdfs);
|
||||
xformed_gconsts_.resize(num_pdfs);
|
||||
is_cached_.resize(num_pdfs, false);
|
||||
ResetLogLikeCache();
|
||||
}
|
||||
|
||||
|
||||
|
@ -237,7 +238,7 @@ const Vector<BaseFloat>& DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts(
|
|||
|
||||
BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame,
|
||||
int32 state) {
|
||||
KALDI_ERR << "Function not completely implemented yet.";
|
||||
// KALDI_ERR << "Function not completely implemented yet.";
|
||||
KALDI_ASSERT(frame < NumFrames() && frame >= 0);
|
||||
KALDI_ASSERT(state < NumIndices() && state >= 0);
|
||||
|
||||
|
|
|
@ -175,7 +175,8 @@ class DecodableAmDiagGmmRegtreeMllr : public DecodableAmDiagGmmUnmapped {
|
|||
const RegressionTree ®tree,
|
||||
BaseFloat scale)
|
||||
: DecodableAmDiagGmmUnmapped(am, feats), trans_model_(tm), scale_(scale),
|
||||
mllr_xform_(mllr_xform), regtree_(regtree) { InitCache(); }
|
||||
mllr_xform_(mllr_xform), regtree_(regtree),
|
||||
data_squared_(feats.NumCols()) { InitCache(); }
|
||||
~DecodableAmDiagGmmRegtreeMllr();
|
||||
|
||||
// Note, frames are numbered from zero but transition-ids (tid) from one.
|
||||
|
|
|
@ -33,7 +33,7 @@ int main(int argc, char *argv[]) {
|
|||
const char *usage =
|
||||
"Compute MLLR transforms per-utterance (default) or per-speaker for "
|
||||
"the supplied set of speakers (spk2utt option). Note: writes RegtreeMllrDiagGmm objects\n"
|
||||
"Usage: gmm-estimate-regtree-fmllr [options] <model-in> <feature-rspecifier> "
|
||||
"Usage: gmm-estimate-regtree-mllr [options] <model-in> <feature-rspecifier> "
|
||||
"<posteriors-rspecifier> <regression-tree> <transforms-wspecifier>\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
@ -135,7 +135,7 @@ int main(int argc, char *argv[]) {
|
|||
} // end looping over all utterances of the current speaker
|
||||
BaseFloat objf_impr, t;
|
||||
mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
|
||||
KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is "
|
||||
KALDI_LOG << "MLLR objf improvement for speaker " << spk << " is "
|
||||
<< (objf_impr/(t+1.0e-10)) << " per frame over " << t
|
||||
<< " frames.";
|
||||
tot_objf_impr += objf_impr;
|
||||
|
@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
|
|||
<< "Avg like per frame so far is " << (tot_like / tot_t) << '\n';
|
||||
BaseFloat objf_impr, t;
|
||||
mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
|
||||
KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is "
|
||||
KALDI_LOG << "MLLR objf improvement for utterance " << key << " is "
|
||||
<< (objf_impr/(t+1.0e-10)) << " per frame over " << t
|
||||
<< " frames.";
|
||||
tot_objf_impr += objf_impr;
|
||||
|
@ -192,7 +192,7 @@ int main(int argc, char *argv[]) {
|
|||
}
|
||||
}
|
||||
|
||||
KALDI_LOG << "Total objf improvement from fMLLR is " << (tot_objf_impr/tot_t_objf)
|
||||
KALDI_LOG << "Total objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf)
|
||||
<< " per frame over " << tot_t_objf << " frames.";
|
||||
KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior
|
||||
<< " with no posteriors, " << num_other_error << " with other errors.";
|
||||
|
|
|
@ -147,7 +147,7 @@ void RegtreeMllrDiagGmm::Read(std::istream &in, bool binary) {
|
|||
&& xform_itr->NumRows() == dim_);
|
||||
}
|
||||
|
||||
ExpectMarker(in, binary, "<BCLASS2XFORMS");
|
||||
ExpectMarker(in, binary, "<BCLASS2XFORMS>");
|
||||
ReadIntegerVector(in, binary, &bclass2xforms_);
|
||||
ExpectMarker(in, binary, "</MLLRXFORM>");
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче