Adding MLLR to RM recipe.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@96 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2011-06-21 06:53:48 +00:00 · 2011-06-21 06:53:48 +00:00 · 8049135d15
--- a/egs/rm/s1/steps/decode_tri1_fmllr+regtree_mllr.sh
+++ b/egs/rm/s1/steps/decode_tri1_fmllr+regtree_mllr.sh
@ -0,0 +1,83 @@
 #!/bin/bash
 # Copyright 2010-2011 	Microsoft Corporation,  Saarland University
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 # MERCHANTABLITY OR NON-INFRINGEMENT.
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.
 # deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
 # per speaker.  There is no SAT.  Use a regression-tree with top-level speech/sil
 # split (no silence weighting).
 if [ -f path.sh ]; then . path.sh; fi
 srcdir=exp/decode_tri1
 dir=exp/decode_tri1_fmllr+regtree_mllr
 mkdir -p $dir
 model=exp/tri1/final.mdl
 occs=exp/tri1/final.occs
 tree=exp/tri1/tree
 graphdir=exp/graph_tri1
 silphones=`cat data/silphones.csl`
 regtree=$dir/regtree
 maxleaves=2     # max # of regression-tree leaves.
 fmllr_mincount=5000   # mincount before we add new transform.
 mllr_mincount=1000    # mincount before we add new transform.
 gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
  --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
 scripts/mkgraph.sh $tree $model $graphdir
 for test in mar87 oct87 feb89 oct89 feb91 sep92; do
 (
  # Comment the two lines below to make this per-utterance.
  spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
  utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
  ( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
    weight-silence-post 0.01 $silphones $model ark:- ark:- | \
    gmm-est-fmllr --fmllr-min-count=$fmllr_mincount $spk2utt_opt $model \
     "$feats" ark,o:- ark:$dir/${test}.fmllr ) 2>$dir/fmllr_${test}.log
  adapt_feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/${test}.fmllr ark:- ark:- |"
  gmm-decode-faster --beam=20.0 --acoustic-scale=0.08333 \
    --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
    "$adapt_feats" ark,t:$dir/${test}_pass2.tra ark,t:$dir/${test}_pass2.ali \
    2> $dir/pass2_${test}.log
  ( ali-to-post ark:$dir/${test}_pass2.ali ark:- | \
    gmm-est-regtree-mllr --mllr-min-count=$mllr_mincount $spk2utt_opt \
      $model "$adapt_feats" ark:- $regtree ark:$dir/${test}.mllr ) \
      2>$dir/mllr_${test}.log
  gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
    --beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
    $regtree "$adapt_feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
    ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
  # the ,p option lets it score partial output without dying..
  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
 ) &
 done
 wait
 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
   > $dir/wer
--- a/egs/rm/s1/steps/decode_tri1_regtree_mllr.sh
+++ b/egs/rm/s1/steps/decode_tri1_regtree_mllr.sh
@ -0,0 +1,74 @@
 #!/bin/bash
 # Copyright 2010-2011 	Microsoft Corporation,  Saarland University
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 # THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 # WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 # MERCHANTABLITY OR NON-INFRINGEMENT.
 # See the Apache 2 License for the specific language governing permissions and
 # limitations under the License.
 # deocde_tri_regtree_mllr.sh is as ../decode_tri.sh but estimating MLLR in test,
 # per speaker.  There is no SAT.  Use a regression-tree with top-level speech/sil
 # split (no silence weighting).
 if [ -f path.sh ]; then . path.sh; fi
 srcdir=exp/decode_tri1
 dir=exp/decode_tri1_regtree_mllr
 mkdir -p $dir
 model=exp/tri1/final.mdl
 occs=exp/tri1/final.occs
 tree=exp/tri1/tree
 graphdir=exp/graph_tri1
 silphones=`cat data/silphones.csl`
 regtree=$dir/regtree
 maxleaves=2     # max # of regression-tree leaves.
 mincount=5000   # mincount before we add new transform.
 gmm-make-regtree --sil-phones=$silphones --state-occs=$occs \
  --max-leaves=$maxleaves $model $regtree 2>$dir/make_regtree.out
 scripts/mkgraph.sh $tree $model $graphdir
 for test in mar87 oct87 feb89 oct89 feb91 sep92; do
 (
  # Comment the two lines below to make this per-utterance.
  spk2utt_opt=--spk2utt=ark:data/test_${test}.spk2utt
  utt2spk_opt=--utt2spk=ark:data/test_${test}.utt2spk
  # To deweight silence, would add the line
  #   weight-silence-post 0.0 $silphones $model ark:- ark:- | \
  # after the line with ali-to-post
  # This is useful if we don't treat silence specially when building regression tree.
  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
  ( ali-to-post ark:$srcdir/test_${test}.ali ark:- | \
    gmm-est-regtree-mllr --mllr-min-count=$mincount $spk2utt_opt \
      $model "$feats" ark:- $regtree ark:$dir/${test}.mllr ) \
      2>$dir/mllr_${test}.log
  gmm-decode-faster-regtree-mllr $utt2spk_opt --acoustic-scale=0.08333 \
    --beam=20.0 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst \
    $regtree "$feats" ark:$dir/${test}.mllr ark,t:$dir/test_${test}.tra \
    ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
  # the ,p option lets it score partial output without dying..
  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
    compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra > $dir/wer_${test}
 ) &
 done
 wait
 grep WER $dir/wer_* | \
  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
   > $dir/wer
--- a/src/decoder/decodable-am-diag-gmm.cc
+++ b/src/decoder/decodable-am-diag-gmm.cc
@ -152,6 +152,7 @@ void DecodableAmDiagGmmRegtreeMllr::InitCache() {
  xformed_mean_invvars_.resize(num_pdfs);
  xformed_gconsts_.resize(num_pdfs);
  is_cached_.resize(num_pdfs, false);
  ResetLogLikeCache();
 }
@ -237,7 +238,7 @@ const Vector<BaseFloat>& DecodableAmDiagGmmRegtreeMllr::GetXformedGconsts(
 BaseFloat DecodableAmDiagGmmRegtreeMllr::LogLikelihoodZeroBased(int32 frame,
                                                                int32 state) {
-  KALDI_ERR << "Function not completely implemented yet.";
+//  KALDI_ERR << "Function not completely implemented yet.";
  KALDI_ASSERT(frame < NumFrames() && frame >= 0);
  KALDI_ASSERT(state < NumIndices() && state >= 0);
--- a/src/decoder/decodable-am-diag-gmm.h
+++ b/src/decoder/decodable-am-diag-gmm.h
@ -175,7 +175,8 @@ class DecodableAmDiagGmmRegtreeMllr : public DecodableAmDiagGmmUnmapped {
                                const RegressionTree &regtree,
                                BaseFloat scale)
      : DecodableAmDiagGmmUnmapped(am, feats), trans_model_(tm), scale_(scale),
-        mllr_xform_(mllr_xform), regtree_(regtree) { InitCache(); }
+        mllr_xform_(mllr_xform), regtree_(regtree),
        data_squared_(feats.NumCols()) { InitCache(); }
  ~DecodableAmDiagGmmRegtreeMllr();
  // Note, frames are numbered from zero but transition-ids (tid) from one.
--- a/src/gmmbin/gmm-est-regtree-mllr.cc
+++ b/src/gmmbin/gmm-est-regtree-mllr.cc
@ -33,7 +33,7 @@ int main(int argc, char *argv[]) {
    const char *usage =
        "Compute MLLR transforms per-utterance (default) or per-speaker for "
        "the supplied set of speakers (spk2utt option).  Note: writes RegtreeMllrDiagGmm objects\n"
-        "Usage: gmm-estimate-regtree-fmllr  [options] <model-in> <feature-rspecifier> "
+        "Usage: gmm-estimate-regtree-mllr  [options] <model-in> <feature-rspecifier> "
        "<posteriors-rspecifier> <regression-tree> <transforms-wspecifier>\n";
    ParseOptions po(usage);
@ -135,7 +135,7 @@ int main(int argc, char *argv[]) {
        }  // end looping over all utterances of the current speaker
        BaseFloat objf_impr, t;
        mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
-        KALDI_LOG << "fMLLR objf improvement for speaker " << spk << " is "
+        KALDI_LOG << "MLLR objf improvement for speaker " << spk << " is "
                  << (objf_impr/(t+1.0e-10)) << " per frame over " << t
                  << " frames.";
        tot_objf_impr += objf_impr;
@ -183,7 +183,7 @@ int main(int argc, char *argv[]) {
            << "Avg like per frame so far is " << (tot_like / tot_t) << '\n';
        BaseFloat objf_impr, t;
        mllr_accs.Update(regtree, opts, &mllr_xforms, &objf_impr, &t);
-        KALDI_LOG << "fMLLR objf improvement for utterance " << key << " is "
+        KALDI_LOG << "MLLR objf improvement for utterance " << key << " is "
                  << (objf_impr/(t+1.0e-10)) << " per frame over " << t
                  << " frames.";
        tot_objf_impr += objf_impr;
@ -192,7 +192,7 @@ int main(int argc, char *argv[]) {
      }
    }
-    KALDI_LOG << "Total objf improvement from fMLLR is " << (tot_objf_impr/tot_t_objf)
+    KALDI_LOG << "Total objf improvement from MLLR is " << (tot_objf_impr/tot_t_objf)
              << " per frame over " << tot_t_objf << " frames.";
    KALDI_LOG << "Done " << num_done << " files, " << num_no_posterior
        << " with no posteriors, " << num_other_error << " with other errors.";
--- a/src/transform/regtree-mllr-diag-gmm.cc
+++ b/src/transform/regtree-mllr-diag-gmm.cc
@ -147,7 +147,7 @@ void RegtreeMllrDiagGmm::Read(std::istream &in, bool binary) {
                 && xform_itr->NumRows() == dim_);
  }
-  ExpectMarker(in, binary, "<BCLASS2XFORMS");
+  ExpectMarker(in, binary, "<BCLASS2XFORMS>");
  ReadIntegerVector(in, binary, &bclass2xforms_);
  ExpectMarker(in, binary, "</MLLRXFORM>");
 }