diff --git a/egs/rm/s1/steps/decode_tri1_fmllr+regtree_mllr.sh b/egs/rm/s1/steps/decode_tri1_fmllr+regtree_mllr.sh new file mode 100755 index 000000000..ca02faf7b --- /dev/null +++ b/egs/rm/s1/steps/decode_tri1_fmllr+regtree_mllr.sh @@ -0,0 +1,83 @@ +#!/bin/bash + +# Copyright 2010-2011 Microsoft Corporation, Saarland University + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test, +# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil +# split (no silence weighting). + +if [ -f path.sh ]; then . path.sh; fi +srcdir=exp/decode_tri1 +dir=exp/decode_tri1_fmllr+regtree_mllr +mkdir -p $dir +model=exp/tri1/final.mdl +occs=exp/tri1/final.occs +tree=exp/tri1/tree +graphdir=exp/graph_tri1 +silphones=`cat data/silphones.csl` + +regtree=$dir/regtree +maxleaves=2 # max # of regression-tree leaves. +fmllr_mincount=5000 # mincount before we add new transform. +mllr_mincount=1000 # mincount before we add new transform. +gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \ + --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out + +scripts/mkgraph.sh $tree $model $graphdir + +for test in mar87 oct87 feb89 oct89 feb91 sep92; do + ( + # Comment the two lines below to make this per-utterance. + spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt + utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk + + feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |" + + ( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \ + weight-silence-post 0.01 $silphones $model ark:- ark:- | \ + gmm-est-fmllr --fmllr-min-count=$fmllr_mincount $spk2utt_opt $model \ + "$feats" ark,o:- ark:$dir/${test}.fmllr ) 2>$dir/fmllr_${test}.log + + adapt_feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |" + + gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 \ + --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \ + "$adapt_feats" ark,t:$dir/${test}_pass2.tra ark,t:$dir/${test}_pass2.ali \ + 2> $dir/pass2_${test}.log + + ( ali-to-post ark:$dir/${test}_pass2.ali ark:- | \ + gmm-est-regtree-mllr --mllr-min-count=$mllr_mincount $spk2utt_opt \ + $model "$adapt_feats" ark:- $regtree ark:$dir/${test}.mllr ) \ + 2>$dir/mllr_${test}.log + + gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \ + --beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \ + $regtree "$adapt_feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \ + ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log + + # the ,p option lets it score partial output without dying.. + + scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \ + compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test} + ) & +done + +wait + +grep WER $dir/wer_* | \ + awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \ + > $dir/wer + diff --git a/egs/rm/s1/steps/decode_tri1_regtree_mllr.sh b/egs/rm/s1/steps/decode_tri1_regtree_mllr.sh new file mode 100755 index 000000000..5e115abb7 --- /dev/null +++ b/egs/rm/s1/steps/decode_tri1_regtree_mllr.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# Copyright 2010-2011 Microsoft Corporation, Saarland University + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +# MERCHANTABLITY OR NON-INFRINGEMENT. +# See the Apache 2 License for the specific language governing permissions and +# limitations under the License. + +# deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test, +# per speaker. There is no SAT. Use a regression-tree with top-level speech/sil +# split (no silence weighting). + +if [ -f path.sh ]; then . path.sh; fi +srcdir=exp/decode_tri1 +dir=exp/decode_tri1_regtree_mllr +mkdir -p $dir +model=exp/tri1/final.mdl +occs=exp/tri1/final.occs +tree=exp/tri1/tree +graphdir=exp/graph_tri1 +silphones=`cat data/silphones.csl` + +regtree=$dir/regtree +maxleaves=2 # max # of regression-tree leaves. +mincount=5000 # mincount before we add new transform. +gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \ + --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out + +scripts/mkgraph.sh $tree $model $graphdir + +for test in mar87 oct87 feb89 oct89 feb91 sep92; do + ( + # Comment the two lines below to make this per-utterance. + spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt + utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk + + # To deweight silence, would add the line + # weight-silence-post 0.0 $silphones $model ark:- ark:- | \ + # after the line with ali-to-post + # This is useful if we don't treat silence specially when building regression tree. + + feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |" + ( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \ + gmm-est-regtree-mllr --mllr-min-count=$mincount $spk2utt_opt \ + $model "$feats" ark:- $regtree ark:$dir/${test}.mllr ) \ + 2>$dir/mllr_${test}.log + + gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \ + --beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \ + $regtree "$feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \ + ark,t:$dir/test_${test}.ali 2> $dir/decode_${test}.log + + # the ,p option lets it score partial output without dying.. + + scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \ + compute-wer --mode=present ark:- ark,p:$dir/test_${test}.tra > $dir/wer_${test} + ) & +done + +wait + +grep WER $dir/wer_* | \ + awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \ + > $dir/wer + diff --git a/src/decoder/decodable-am-diag-gmm.cc b/src/decoder/decodable-am-diag-gmm.cc index 7e89876a8..555077999 100644 --- a/src/decoder/decodable-am-diag-gmm.cc +++ b/src/decoder/decodable-am-diag-gmm.cc @@ -152,6 +152,7 @@ void DecodableAmDiagGmmRegtreeMllr::InitCache() { xformed_mean_invvars_.resize(num_pdfs); xformed_gconsts_.resize(num_pdfs); is_cached_.resize(num_pdfs, false); + ResetLogLikeCache(); } @@ -237,7 +238,7 @@ const Vector& DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts( BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame, int32 state) { - KALDI_ERR << "Function not completely implemented yet."; +// KALDI_ERR << "Function not completely implemented yet."; KALDI_ASSERT(frame < NumFrames() && frame >= 0); KALDI_ASSERT(state < NumIndices() && state >= 0); diff --git a/src/decoder/decodable-am-diag-gmm.h b/src/decoder/decodable-am-diag-gmm.h index 2cc5701d5..0f543c9db 100644 --- a/src/decoder/decodable-am-diag-gmm.h +++ b/src/decoder/decodable-am-diag-gmm.h @@ -175,7 +175,8 @@ class DecodableAmDiagGmmRegtreeMllr : public DecodableAmDiagGmmUnmapped { const RegressionTree ®tree, BaseFloat scale) : DecodableAmDiagGmmUnmapped(am, feats), trans_model_(tm), scale_(scale), - mllr_xform_(mllr_xform), regtree_(regtree) { InitCache(); } + mllr_xform_(mllr_xform), regtree_(regtree), + data_squared_(feats.NumCols()) { InitCache(); } ~DecodableAmDiagGmmRegtreeMllr(); // Note, frames are numbered from zero but transition-ids (tid) from one. diff --git a/src/gmmbin/gmm-est-regtree-mllr.cc b/src/gmmbin/gmm-est-regtree-mllr.cc index a2440c5e8..a7c925867 100644 --- a/src/gmmbin/gmm-est-regtree-mllr.cc +++ b/src/gmmbin/gmm-est-regtree-mllr.cc @@ -33,7 +33,7 @@ int main(int argc, char *argv[]) { const char *usage = "Compute MLLR transforms per-utterance (default) or per-speaker for " "the supplied set of speakers (spk2utt option). Note: writes RegtreeMllrDiagGmm objects\n" - "Usage: gmm-estimate-regtree-fmllr [options] " + "Usage: gmm-estimate-regtree-mllr [options] " " \n"; ParseOptions po(usage); @@ -135,7 +135,7 @@ int main(int argc, char *argv[]) { } // end looping over all utterances of the current speaker BaseFloat objf_impr, t; mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is " + KALDI_LOG << "MLLR objf improvement for speaker " << spk << " is " << (objf_impr/(t+1.0e-10)) << " per frame over " << t << " frames."; tot_objf_impr += objf_impr; @@ -183,7 +183,7 @@ int main(int argc, char *argv[]) { << "Avg like per frame so far is " << (tot_like / tot_t) << '\n'; BaseFloat objf_impr, t; mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t); - KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is " + KALDI_LOG << "MLLR objf improvement for utterance " << key << " is " << (objf_impr/(t+1.0e-10)) << " per frame over " << t << " frames."; tot_objf_impr += objf_impr; @@ -192,7 +192,7 @@ int main(int argc, char *argv[]) { } } - KALDI_LOG << "Total objf improvement from fMLLR is " << (tot_objf_impr/tot_t_objf) + KALDI_LOG << "Total objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf) << " per frame over " << tot_t_objf << " frames."; KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior << " with no posteriors, " << num_other_error << " with other errors."; diff --git a/src/transform/regtree-mllr-diag-gmm.cc b/src/transform/regtree-mllr-diag-gmm.cc index 8cf038a20..20ee97f1f 100644 --- a/src/transform/regtree-mllr-diag-gmm.cc +++ b/src/transform/regtree-mllr-diag-gmm.cc @@ -147,7 +147,7 @@ void RegtreeMllrDiagGmm::Read(std::istream &in, bool binary) { && xform_itr->NumRows() == dim_); } - ExpectMarker(in, binary, ""); ReadIntegerVector(in, binary, &bclass2xforms_); ExpectMarker(in, binary, ""); }