Finishing the scripts for the ASRU papers.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@119 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2011-07-04 22:23:02 +00:00 · 2011-07-04 22:23:02 +00:00 · a70d3f856b
--- a/egs/rm/s1/RESULTS
+++ b/egs/rm/s1/RESULTS
@ -59,13 +59,16 @@ exp/decode_sgmma_fmllrbasis_utt/wer:Average WER is 3.191574 (400 / 12533)

 # sgmmb is SGMM with speaker vectors.
 exp/decode_sgmmb/wer:Average WER is 2.760712 (346 / 12533) 
+exp/decode_sgmmb_fmllr/wer:Average WER is 2.585175 (324 / 12533) 
 exp/decode_sgmmb_utt/wer:Average WER is 2.808585 (352 / 12533) 
-exp/decode_sgmmb/wer:Average WER is 2.760712 (346 / 12533) 

+# sgmmc is like sgmmb but with gender dependency
+exp/decode_sgmmc/wer:Average WER is 2.696880 (338 / 12533) 
+exp/decode_sgmmc_fmllr/wer:Average WER is 2.457512 (308 / 12533) 
+ # "norm" is normalizing weights per gender..
+ exp/decode_sgmmc_norm/wer:Average WER is 2.696880 (338 / 12533) 
+ exp/decode_sgmmc_fmllr_norm/wer:Average WER is 2.425596 (304 / 12533) 

-# sgmmc is like sgmmb but with gender dependency [doesn't help here]
-exp/decode_sgmmc/wer:Average WER is 2.776670 (348 / 12533) 
-exp/decode_sgmmc_fmllr/wer:Average WER is 2.601133 (326 / 12533) 



--- a/egs/rm/s1/steps/decode_sgmmc_fmllr.sh
+++ b/egs/rm/s1/steps/decode_sgmmc_fmllr.sh
@ -40,7 +40,7 @@ preselectmap=exp/ubmb/preselect.map
 mincount=1000  # min occupancy to extimate fMLLR transform
 iters=10       # number of iters of fMLLR estimation

-if [ ! -f $fmllr_model ]; then
+if [ ! -f $fmllr_model -o $model -nt $fmllr_model ]; then
    if [ ! -f $model ]; then
 	echo "Cannot find $model. Maybe training didn't finish?"
 	exit 1;
--- a/egs/rm/s1/steps/decode_sgmmc_fmllr_norm.sh
+++ b/egs/rm/s1/steps/decode_sgmmc_fmllr_norm.sh
@ -0,0 +1,114 @@
+#!/bin/bash
+
+# Copyright 2010-2011  Microsoft Corporation,  Arnab Ghoshal
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# SGMM decoding with adaptation.
+# 
+# SGMM decoding; use a different acoustic scale from normal (0.1 vs 0.08333)
+# (1) decode with "alignment model"
+# (2) get GMM posteriors with "alignment model" and estimate speaker
+#     vectors with final model
+# (3) decode with final model.
+# (4) get GMM posteriors from this decoded output and estimate fMLLR transforms
+#     with this final model
+# (5) decode with the final model using both the speaker vectors and fMLLR
+
+if [ -f path.sh ]; then . path.sh; fi
+dir=exp/decode_sgmmc_fmllr_norm
+tree=exp/sgmmc/tree
+occs=exp/sgmmc/final.occs
+modelin=exp/sgmmc/final.mdl
+alimodelin=exp/sgmmc/final.alimdl
+model=exp/sgmmc/final.mdl.norm
+alimodel=exp/sgmmc/final.alimdl.norm
+fmllr_model=exp/sgmmc/final_fmllr.mdl.norm
+graphdir=exp/graph_sgmmc
+silphonelist=`cat data/silphones.csl`
+preselectmap=exp/ubmb/preselect.map
+
+mincount=1000  # min occupancy to extimate fMLLR transform
+iters=10       # number of iters of fMLLR estimation
+
+
+mkdir -p $dir
+
+sgmm-normalize $modelin ark:$preselectmap $model 2>$dir/normalize.log
+sgmm-normalize $alimodelin ark:$preselectmap $alimodel 2>>$dir/normalize.log
+
+sgmm-comp-prexform $model $occs $fmllr_model 2>$dir/prexform.log
+
+
+scripts/mkgraph.sh $tree $model $graphdir
+
+for test in mar87 oct87 feb89 oct89 feb91 sep92; do
+ (
+  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
+  spk2utt_opt="--spk2utt=ark:data/test_${test}.spk2utt"
+  utt2spk_opt="--utt2spk=ark:data/test_${test}.utt2spk"
+  scripts/compose_maps.pl data/test_${test}.utt2spk data/spk2gender.map | \
+   scripts/compose_maps.pl - $preselectmap | \
+   gzip -c > $dir/preselect_${test}.gz
+
+  sgmm-gselect "--preselect=ark:gunzip -c $dir/preselect_${test}.gz|" \
+    $model "$feats" ark,t:- 2>$dir/gselect.log | \
+    gzip -c > $dir/${test}_gselect.gz || exit 1;
+  gselect_opt="--gselect=ark:gunzip -c $dir/${test}_gselect.gz|"
+
+  # Use smaller beam for the first pass decoding.
+  sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $alimodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pass1.tra ark,t:$dir/test_${test}.pass1.ali  2> $dir/pass1_${test}.log
+
+  # Estimate the speaker vectors
+  ( ali-to-post ark:$dir/test_${test}.pass1.ali ark:- | \
+    weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
+    sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
+    sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
+    ark:$dir/test_${test}.vecs1 ) 2>$dir/vecs1_${test}.log
+
+  ( ali-to-post ark:$dir/test_${test}.pass1.ali ark:- | \
+    weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
+    sgmm-est-spkvecs "$gselect_opt" --spk-vecs=ark:$dir/test_${test}.vecs1 $spk2utt_opt \
+      $model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
+
+  # Second-pass decoding with the speaker vectors.
+  sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs2 --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pass2.tra ark,t:$dir/test_${test}.pass2.ali  2> $dir/pass2_${test}.log
+
+  # Estimate the fMLLR transforms.
+  ( ali-to-post ark:$dir/test_${test}.pass2.ali ark:- | \
+    weight-silence-post 0.01 $silphonelist $model ark:- ark:- | \
+    sgmm-post-to-gpost --spk-vecs=ark:$dir/test_${test}.vecs2 $utt2spk_opt \
+                     "$gselect_opt" $model "$feats" ark,s,cs:- ark:- | \
+    sgmm-est-fmllr-gpost --fmllr-iters=$iters --fmllr-min-count=$mincount \
+      --spk-vecs=ark:$dir/test_${test}.vecs2 "$spk2utt_opt" $fmllr_model \
+      "$feats" ark,s,cs:- ark:$dir/test_${test}.fmllr ) \
+      2>$dir/est_fmllr_${test}.log
+
+  adapt_feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- | transform-feats $utt2spk_opt ark:$dir/test_${test}.fmllr ark:- ark:- |"
+
+  # Now decode with fMLLR-adapted features. Gaussian selection is also done 
+  # with the adapted features. This causes a small improvement in WER on RM. 
+  sgmm-decode-faster "$gselect_opt" $utt2spk_opt --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt --spk-vecs=ark:$dir/test_${test}.vecs2 $fmllr_model $graphdir/HCLG.fst "$adapt_feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
+
+  # the ,p option lets it score partial output without dying..
+  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
+  compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
+ ) &
+done
+
+wait
+
+grep WER $dir/wer_* | \
+  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
+   > $dir/wer
--- a/egs/rm/s1/steps/decode_sgmmc_norm.sh
+++ b/egs/rm/s1/steps/decode_sgmmc_norm.sh
@ -0,0 +1,85 @@
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+# SGMM decoding with adaptation [with gender-dependent UBM].
+# 
+# SGMM decoding; use a different acoustic scale from normal (0.1 vs 0.08333)
+# (1) decode with "alignment model"
+# (2) get GMM posteriors with "alignment model" and estimate speaker
+#     vectors with final model
+# (3) decode with final model.
+
+if [ -f path.sh ]; then . path.sh; fi
+dir=exp/decode_sgmmc_norm
+tree=exp/sgmmc/tree
+modelin=exp/sgmmc/final.mdl
+alimodelin=exp/sgmmc/final.alimdl
+model=exp/sgmmc/final.mdl.norm
+alimodel=exp/sgmmc/final.alimdl.norm
+
+graphdir=exp/graph_sgmmc
+silphonelist=`cat data/silphones.csl`
+preselectmap=exp/ubmb/preselect.map 
+
+mkdir -p $dir
+
+sgmm-normalize $modelin ark:$preselectmap $model 2>$dir/normalize.log
+sgmm-normalize $alimodelin ark:$preselectmap $alimodel 2>>$dir/normalize.log
+
+
+scripts/mkgraph.sh $tree $model $graphdir
+
+for test in mar87 oct87 feb89 oct89 feb91 sep92; do
+ (
+  feats="ark:add-deltas --print-args=false scp:data/test_${test}.scp ark:- |"
+  spk2utt_opt="--spk2utt=ark:data/test_${test}.spk2utt"
+  utt2spk_opt="--utt2spk=ark:data/test_${test}.utt2spk"
+  scripts/compose_maps.pl data/test_${test}.utt2spk data/spk2gender.map | \
+   scripts/compose_maps.pl - $preselectmap | \
+   gzip -c > $dir/preselect_${test}.gz
+
+
+  sgmm-gselect "--preselect=ark:gunzip -c $dir/preselect_${test}.gz|" \
+        $model "$feats" ark,t:- 2>$dir/gselect.log | \
+       gzip -c > $dir/${test}_gselect.gz || exit 1;
+  gselect_opt="--gselect=ark:gunzip -c $dir/${test}_gselect.gz|"
+
+  # Use smaller beam first time.
+  sgmm-decode-faster "$gselect_opt" --beam=15.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $alimodel $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.pre_tra ark,t:$dir/test_${test}.pre_ali  2> $dir/predecode_${test}.log
+
+  ( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
+    weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
+    sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
+    sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
+       ark:$dir/test_${test}.vecs1 ) 2>$dir/vecs1_${test}.log
+  
+  ( ali-to-post ark:$dir/test_${test}.pre_ali ark:- | \
+    weight-silence-post 0.01 $silphonelist $alimodel ark:- ark:- | \
+    sgmm-est-spkvecs "$gselect_opt" --spk-vecs=ark:$dir/test_${test}.vecs1 $spk2utt_opt \
+      $model "$feats" ark,s,cs:- ark:$dir/test_${test}.vecs2 ) 2>$dir/vecs2_${test}.log
+
+  sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/test_${test}.vecs2 --beam=20.0 --acoustic-scale=0.1 --word-symbol-table=data/words.txt $model $graphdir/HCLG.fst "$feats" ark,t:$dir/test_${test}.tra ark,t:$dir/test_${test}.ali  2> $dir/decode_${test}.log
+
+ # the ,p option lets it score partial output without dying..
+  scripts/sym2int.pl --ignore-first-field data/words.txt data_prep/test_${test}_trans.txt | \
+  compute-wer --mode=present ark:-  ark,p:$dir/test_${test}.tra >& $dir/wer_${test}
+ ) &
+done
+
+wait
+
+grep WER $dir/wer_* | \
+  awk '{n=n+$4; d=d+$6} END{ printf("Average WER is %f (%d / %d) \n", 100.0*n/d, n, d); }' \
+   > $dir/wer
--- a/egs/rm/s1/steps/train_ubmb.sh
+++ b/egs/rm/s1/steps/train_ubmb.sh
@ -16,8 +16,9 @@


 # Train gender-dependent UBM from a trained HMM/GMM system.
-# Instead of 400 UBM Gaussians, use 250 UBM Gaussians per gender, for
-# a total of 500.
+# We're aiming for 500 UBM Gaussians total.
+# Because RM is unbalanced (55 female, 109 male), we train 200
+# UBM Gaussians for female and 300 for male.

 if [ -f path.sh ]; then . path.sh; fi

@ -25,13 +26,19 @@ dir=exp/ubmb
 mkdir -p $dir
 srcdir=exp/tri1

-if [ ! -f $dir/0.m.ubm ]; then
- init-ubm --intermediate-numcomps=2000 --ubm-numcomps=250 --verbose=2 \
-    --fullcov-ubm=true $srcdir/final.mdl $srcdir/final.occs \
-    $dir/0.m.ubm 2> $dir/cluster.log || exit 1;
-fi
+rm -f $dir/.error
+
+init-ubm --intermediate-numcomps=2000 --ubm-numcomps=300 --verbose=2 \
+   --fullcov-ubm=true $srcdir/final.mdl $srcdir/final.occs \
+   $dir/0.m.ubm 2> $dir/cluster.log || touch $dir/.error &
+
+init-ubm --intermediate-numcomps=2000 --ubm-numcomps=200 --verbose=2 \
+   --fullcov-ubm=true $srcdir/final.mdl $srcdir/final.occs \
+   $dir/0.f.ubm 2> $dir/cluster.log || touch $dir/.error &
+
+wait;
+[ -f $dir/.error ] && echo "Error clustering UBM Gaussians" && exit 1;

-cp $dir/0.m.ubm $dir/0.f.ubm
 cp data/train.scp $dir/train.scp

 scripts/compose_maps.pl data/train.utt2spk data/spk2gender.map | grep -w m | \
--- a/egs/wsj/s1/RESULTS
+++ b/egs/wsj/s1/RESULTS
@ -82,10 +82,12 @@ system:
 [spk;+fmllr] 8.3        11.3                         | [per-speaker adaptation; +fMLLR]
 sgmm3b       7.8        10.4                         | [ SGMM with speaker vectors, on SI-284]
 [utt]        7.8        10.4                         |  [per-utterance adaptation]
- [spk;+fmllr] 7.8        10.2                         | [per-speaker adaptation, with fMLLR]
- sgmm3c       7.7        9.9                          | [ as sgmm3b but gender-dep. UBM]
- [utt]        7.7        10.1                         |  [per-utterance adaptation]
- [fmllr]      7.7        9.7                          |  [per-spk, with fMLLR]
+ [spk;+fmllr] 7.8        10.0                         | [per-speaker adaptation, with fMLLR]
+ sgmm3c       7.5        9.5                          | [ as sgmm3b but gender-dep. UBM]
+  [+norm]     7.5        9.6                          |   [normalizing weights per gender]
+ [utt]        7.7        9.6                          |  [per-utterance adaptation]
+ [fmllr]      7.6        9.2                          |  [per-spk, with fMLLR]
+  [+norm]     7.5        9.3                          |   [normalizing weights per gender]

 # Raw results:
 exp/decode_mono_tgpr_eval92/wer:%WER 31.38 [ 1770 / 5641, 108 ins, 386 del, 1276 sub ]
@ -266,14 +268,27 @@ exp/decode_sgmm2b_tgpr_utt_eval93/wer:%WER 13.72 [ 472 / 3439, 60 ins, 68 del, 3
 exp/decode_sgmm2b_fmllr_tgpr_eval92/wer:%WER 9.93 [ 560 / 5641, 130 ins, 42 del, 388 sub ]
 exp/decode_sgmm2b_fmllr_tgpr_eval93/wer:%WER 13.49 [ 464 / 3439, 54 ins, 72 del, 338 sub ]

-exp/decode_sgmm3b_fmllr_tgpr_eval92/wer:%WER 7.36 [ 415 / 5641, 110 ins, 14 del, 291 sub ]
-exp/decode_sgmm3b_fmllr_tgpr_eval93/wer:%WER 9.94 [ 342 / 3439, 56 ins, 49 del, 237 sub ]
-exp/decode_sgmm3b_tgpr_eval92/wer:%WER 7.68 [ 433 / 5641, 117 ins, 15 del, 301 sub ]
-exp/decode_sgmm3b_tgpr_eval93/wer:%WER 10.32 [ 355 / 3439, 58 ins, 55 del, 242 sub ]
-exp/decode_sgmm3b_tgpr_utt_eval92/wer:%WER 7.59 [ 428 / 5641, 111 ins, 17 del, 300 sub ]
-exp/decode_sgmm3b_tgpr_utt_eval93/wer:%WER 9.94 [ 342 / 3439, 52 ins, 52 del, 238 sub ]
+exp/decode_sgmm3b_fmllr_tgpr_eval92/wer:%WER 7.73 [ 436 / 5641, 118 ins, 15 del, 303 sub ]
+exp/decode_sgmm3b_fmllr_tgpr_eval93/wer:%WER 10.00 [ 344 / 3439, 57 ins, 47 del, 240 sub ]
+exp/decode_sgmm3b_tgpr_eval92/wer:%WER 7.78 [ 439 / 5641, 118 ins, 15 del, 306 sub ]
+exp/decode_sgmm3b_tgpr_eval93/wer:%WER 10.35 [ 356 / 3439, 58 ins, 47 del, 251 sub ]
+exp/decode_sgmm3b_tgpr_utt_eval92/wer:%WER 7.80 [ 440 / 5641, 119 ins, 13 del, 308 sub ]
+exp/decode_sgmm3b_tgpr_utt_eval93/wer:%WER 10.38 [ 357 / 3439, 55 ins, 50 del, 252 sub ]
+
+exp/decode_sgmm3c_fmllr_tgpr_eval92/wer:%WER 7.55 [ 426 / 5641, 111 ins, 14 del, 301 sub ]
+exp/decode_sgmm3c_fmllr_tgpr_eval93/wer:%WER 9.16 [ 315 / 3439, 54 ins, 41 del, 220 sub ]
+exp/decode_sgmm3c_fmllr_tgpr_norm_eval92/wer:%WER 7.46 [ 421 / 5641, 111 ins, 13 del, 297 sub ]
+exp/decode_sgmm3c_fmllr_tgpr_norm_eval93/wer:%WER 9.25 [ 318 / 3439, 54 ins, 41 del, 223 sub ]
+exp/decode_sgmm3c_tgpr_eval92/wer:%WER 7.52 [ 424 / 5641, 113 ins, 13 del, 298 sub ]
+exp/decode_sgmm3c_tgpr_eval93/wer:%WER 9.51 [ 327 / 3439, 55 ins, 42 del, 230 sub ]
+exp/decode_sgmm3c_tgpr_norm_eval92/wer:%WER 7.48 [ 422 / 5641, 111 ins, 14 del, 297 sub ]
+exp/decode_sgmm3c_tgpr_norm_eval93/wer:%WER 9.62 [ 331 / 3439, 55 ins, 43 del, 233 sub ]
+exp/decode_sgmm3c_tgpr_utt_eval92/wer:%WER 7.69 [ 434 / 5641, 110 ins, 17 del, 307 sub ]
+exp/decode_sgmm3c_tgpr_utt_eval93/wer:%WER 9.62 [ 331 / 3439, 55 ins, 46 del, 230 sub ]


+################
+# Results below this point may be out of date.
 ===========
 # Some notes on tuning the SGMM systems on half the SI-84 data (sgmm2a and sgmm2b).
 # We ended up with 400 UBM components, and acwt 1/11 (unadapted) and 1/12 (adapted..
--- a/egs/wsj/s1/run.sh
+++ b/egs/wsj/s1/run.sh
@ -418,6 +418,8 @@ steps/train_sgmm3c.sh || exit 1;
  scripts/decode.sh --per-spk exp/decode_sgmm3c_tgpr_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov${year}.scp 
  scripts/decode.sh exp/decode_sgmm3c_tgpr_utt_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c.sh data/eval_nov${year}.scp 
  scripts/decode.sh --per-spk  exp/decode_sgmm3c_fmllr_tgpr_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_fmllr.sh data/eval_nov${year}.scp 
+  scripts/decode.sh --per-spk exp/decode_sgmm3c_tgpr_norm_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_norm.sh data/eval_nov${year}.scp 
+  scripts/decode.sh --per-spk  exp/decode_sgmm3c_fmllr_tgpr_norm_eval${year} exp/graph_sgmm3c_tg_pruned/HCLG.fst steps/decode_sgmm3c_fmllr_norm.sh data/eval_nov${year}.scp
 done
 )&

--- a/egs/wsj/s1/steps/decode_sgmm3c_fmllr_norm.sh
+++ b/egs/wsj/s1/steps/decode_sgmm3c_fmllr_norm.sh
@ -0,0 +1,127 @@
+#!/bin/bash
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script does the decoding of a single batch of test data (on one core).
+# It requires arguments.  It takes the graphdir and decoding directory,
+# and the job number which can actually be any string (even ""); it expects
+# a file $decode_dir/test${job_number}.scp to exist, and puts its output in
+# $decode_dir/${job_number}.tra
+
+
+if [ $# != 3 ]; then
+   echo "Usage: scripts/decode_sgmm3c_fmllr.sh <graph> <decode-dir> <job-number>"
+   exit 1;
+fi
+
+. path.sh || exit 1;
+
+acwt=0.08333
+prebeam=12.0
+beam=13.0
+max_active=7000
+silphones=`cat data/silphones.csl`
+model=exp/sgmm3c/final.mdl.norm
+occs=exp/sgmm3c/final.occs
+alimodel=exp/sgmm3c/final.alimdl.norm
+preselectmap=exp/ubm3b/preselect.map
+fmllr_model=exp/sgmm3c/final_fmllr.mdl.norm
+graph=$1
+dir=$2
+job=$3
+scp=$dir/$job.scp
+feats="ark:add-deltas --print-args=false scp:$scp ark:- |"
+
+if [ ! -f $fmllr_model ]; then
+  if [ ! -f $model ]; then
+    echo "Cannot find $model. Maybe training didn't finish?"
+    exit 1;
+  fi
+  sgmm-comp-prexform $model $occs $fmllr_model
+fi
+
+
+filenames="$scp $alimodel $fmllr_model $model $graph data/words.txt"
+for file in $filenames; do
+  if [ ! -f $file ] ; then
+    echo "No such file $file";
+    exit 1;
+  fi
+done
+
+if [ -f $dir/$job.spk2utt ]; then
+  if [ ! -f $dir/$job.utt2spk ]; then
+     echo "spk2utt but not utt2spk file present!"
+     exit 1
+  fi
+  spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
+  utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
+fi
+
+cat data/eval*.utt2spk | \
+ scripts/compose_maps.pl - data/spk2gender.map | \
+ scripts/compose_maps.pl - $preselectmap | \
+ scripts/filter_scp.pl $scp - | \
+   gzip -c > $dir/preselect.$job.gz
+
+echo running on `hostname` > $dir/decode${job}.log
+
+
+sgmm-gselect "--preselect=ark:gunzip -c $dir/preselect.$job.gz|" \
+     $model "$feats" ark,t:- 2>$dir/gselect${job}.log | \
+     gzip -c > $dir/gselect${job}.gz || exit 1;
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect${job}.gz|"
+
+
+sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
+   --acoustic-scale=$acwt \
+   --word-symbol-table=data/words.txt $alimodel $graph "$feats" \
+   ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali  2>$dir/predecode${job}.log  || exit 1;
+
+( ali-to-post ark:$dir/${job}.pre_ali ark:- | \
+  weight-silence-post 0.01 $silphones $alimodel ark:- ark:- | \
+  sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
+  sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
+     ark:$dir/${job}.vecs1 ) 2>$dir/vecs1${job}.log || exit 1;
+
+( ali-to-post ark:$dir/${job}.pre_ali ark:- | \
+  weight-silence-post 0.01 $silphones $alimodel ark:- ark:- | \
+  sgmm-est-spkvecs "$gselect_opt" --spk-vecs=ark,t:$dir/${job}.vecs1 $spk2utt_opt $model \
+   "$feats" ark,s,cs:- ark:$dir/${job}.vecs2 ) 2>$dir/vecs2.${job}.log || exit 1;
+
+# second pass of decoding: have spk-vecs but not fMLLR
+sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
+   $utt2spk_opt --spk-vecs=ark:$dir/${job}.vecs2 \
+   --acoustic-scale=$acwt \
+   --word-symbol-table=data/words.txt $model $graph "$feats" \
+   ark,t:$dir/$job.pre2_tra ark,t:$dir/$job.pre2_ali  2>$dir/pre2decode${job}.log  || exit 1;
+
+
+# Estimate fMLLR transforms.
+
+( ali-to-post ark:$dir/$job.pre2_ali ark:- | \
+    weight-silence-post 0.01 $silphones $model ark:- ark:- | \
+    sgmm-post-to-gpost --spk-vecs=ark:$dir/${job}.vecs2 $utt2spk_opt "$gselect_opt" $model "$feats" ark,s,cs:- ark:- | \
+    sgmm-est-fmllr-gpost --spk-vecs=ark:$dir/${job}.vecs2 $spk2utt_opt $fmllr_model "$feats" ark,s,cs:- \
+	ark:$dir/$job.fmllr ) 2>$dir/est_fmllr${job}.log
+
+feats="ark:add-deltas --print-args=false scp:$scp ark:- | transform-feats $utt2spk_opt ark:$dir/$job.fmllr ark:- ark:- |"
+
+sgmm-decode-faster "$gselect_opt" $utt2spk_opt --spk-vecs=ark:$dir/${job}.vecs2 \
+     --beam=$beam --acoustic-scale=$acwt --word-symbol-table=data/words.txt \
+     $fmllr_model $graph "$feats" \
+    ark,t:$dir/${job}.tra ark,t:$dir/${job}.ali  2> $dir/decode${job}.log
+
--- a/egs/wsj/s1/steps/decode_sgmm3c_norm.sh
+++ b/egs/wsj/s1/steps/decode_sgmm3c_norm.sh
@ -0,0 +1,100 @@
+#!/bin/bash
+# Copyright 2010-2011 Microsoft Corporation
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+# MERCHANTABLITY OR NON-INFRINGEMENT.
+# See the Apache 2 License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This script does the decoding of a single batch of test data (on one core).
+# It requires arguments.  It takes the graphdir and decoding directory,
+# and the job number which can actually be any string (even ""); it expects
+# a file $decode_dir/test${job_number}.scp to exist, and puts its output in
+# $decode_dir/${job_number}.tra
+
+
+if [ $# != 3 ]; then
+   echo "Usage: scripts/decode_sgmm3c.sh <graph> <decode-dir> <job-number>"
+   exit 1;
+fi
+
+. path.sh || exit 1;
+
+acwt=0.08333
+prebeam=12.0
+beam=13.0
+max_active=7000
+silphones=`cat data/silphones.csl`
+model=exp/sgmm3c/final.mdl.norm
+alimodel=exp/sgmm3c/final.alimdl.norm
+preselectmap=exp/ubm3b/preselect.map
+graph=$1
+dir=$2
+job=$3
+scp=$dir/$job.scp
+feats="ark:add-deltas --print-args=false scp:$scp ark:- |"
+
+filenames="$scp $model $graph data/words.txt"
+for file in $filenames; do
+  if [ ! -f $file ] ; then
+    echo "No such file $file";
+    exit 1;
+  fi
+done
+
+if [ -f $dir/$job.spk2utt ]; then
+  if [ ! -f $dir/$job.utt2spk ]; then
+     echo "spk2utt but not utt2spk file present!"
+     exit 1
+  fi
+  spk2utt_opt=--spk2utt=ark:$dir/$job.spk2utt
+  utt2spk_opt=--utt2spk=ark:$dir/$job.utt2spk
+fi
+
+cat data/eval*.utt2spk | \
+ scripts/compose_maps.pl - data/spk2gender.map | \
+ scripts/compose_maps.pl - $preselectmap | \
+ scripts/filter_scp.pl $scp - | \
+   gzip -c > $dir/preselect.$job.gz
+
+
+echo running on `hostname` > $dir/decode${job}.log
+
+sgmm-gselect "--preselect=ark:gunzip -c $dir/preselect.$job.gz|" \
+     $model "$feats" ark,t:- 2>$dir/gselect${job}.log | \
+     gzip -c > $dir/gselect${job}.gz || exit 1;
+gselect_opt="--gselect=ark:gunzip -c $dir/gselect${job}.gz|"
+
+
+sgmm-decode-faster "$gselect_opt" --beam=$prebeam --max-active=$max_active \
+   --acoustic-scale=$acwt \
+   --word-symbol-table=data/words.txt $alimodel $graph "$feats" \
+   ark,t:$dir/$job.pre_tra ark,t:$dir/$job.pre_ali  2>$dir/predecode${job}.log  || exit 1;
+
+( ali-to-post ark:$dir/${job}.pre_ali ark:- | \
+  weight-silence-post 0.01 $silphones $alimodel ark:- ark:- | \
+  sgmm-post-to-gpost "$gselect_opt" $alimodel "$feats" ark,s,cs:- ark:- | \
+  sgmm-est-spkvecs-gpost $spk2utt_opt $model "$feats" ark,s,cs:- \
+     ark:$dir/${job}.vecs1 ) 2>$dir/vecs1.${job}.log || exit 1;
+
+( ali-to-post ark:$dir/${job}.pre_ali ark:- | \
+  weight-silence-post 0.01 $silphones $alimodel ark:- ark:- | \
+  sgmm-est-spkvecs "$gselect_opt" --spk-vecs=ark,t:$dir/${job}.vecs1 $spk2utt_opt $model \
+   "$feats" ark,s,cs:- ark:$dir/${job}.vecs2 ) 2>$dir/vecs2.${job}.log || exit 1;
+
+sgmm-decode-faster "$gselect_opt" --beam=$beam --max-active=$max_active \
+   $utt2spk_opt --spk-vecs=ark:$dir/${job}.vecs2 \
+   --acoustic-scale=$acwt \
+   --word-symbol-table=data/words.txt $model $graph "$feats" \
+   ark,t:$dir/$job.tra ark,t:$dir/$job.ali  2>$dir/decode${job}.log  || exit 1;
+
+
--- a/egs/wsj/s1/steps/train_sgmm3c.sh
+++ b/egs/wsj/s1/steps/train_sgmm3c.sh
@ -256,3 +256,7 @@ rm $dir/$x.?.aliacc

 ( cd $dir; rm final.alimdl 2>/dev/null; ln -s $x.alimdl final.alimdl; )

+# Compute normalized models
+sgmm-normalize $dir/final.mdl $preselectmap $dir/final.mdl.norm 2>$dir/normalize.log
+sgmm-normalize $dir/final.alimdl $preselectmap $dir/final.alimdl.norm 2>>$dir/normalize.log
+
--- a/src/TODO
+++ b/src/TODO
@ -14,7 +14,6 @@
   documentation for acoustic modeling code.

 TODO items (Dan):
-  Remove unused ET stuff.  
  Remove non-Kaldi code from decoder/
  Rename to branches/kaldi-1.0

--- a/src/gmmbin/Makefile
+++ b/src/gmmbin/Makefile
@ -9,7 +9,7 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
           gmm-acc-stats gmm-init-lvtln gmm-est-lvtln-trans gmm-train-lvtln-special \
           gmm-acc-mllt gmm-mixup gmm-init-model \
           gmm-acc-hlda gmm-est-hlda gmm-transform-means gmm-init-et gmm-est-et \
-           gmm-et-acc-a gmm-et-est-a gmm-et-acc-b gmm-copy-et gmm-et-est-b gmm-et-get-b \
+           gmm-et-acc-a gmm-et-est-a gmm-copy-et gmm-et-get-b \
           gmm-make-regtree gmm-decode-faster-regtree-fmllr gmm-post-to-gpost \
           gmm-est-fmllr-gpost gmm-est-fmllr gmm-est-regtree-fmllr-ali \
           gmm-est-regtree-mllr gmm-decode-kaldi gmm-compute-likes \
--- a/src/gmmbin/gmm-est.cc
+++ b/src/gmmbin/gmm-est.cc
@ -30,8 +30,8 @@ int main(int argc, char *argv[]) {

    const char *usage =
        "Accumulate stats for GMM training.\n"
-        "Usage:  gmm-estimate [options] <model-in> <stats-in> <model-out>\n"
-        "e.g.: gmm-estimate 1.mdl 1.acc 2.mdl\n";
+        "Usage:  gmm-est [options] <model-in> <stats-in> <model-out>\n"
+        "e.g.: gmm-est 1.mdl 1.acc 2.mdl\n";

    bool binary_write = false;
    TransitionUpdateConfig tcfg;
--- a/src/gmmbin/gmm-et-acc-b.cc
+++ b/src/gmmbin/gmm-et-acc-b.cc
@ -1,222 +0,0 @@
-// gmmbin/gmm-et-acc-b.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#include <string>
-using std::string;
-#include <vector>
-using std::vector;
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "gmm/am-diag-gmm.h"
-#include "hmm/transition-model.h"
-#include "transform/exponential-transform.h"
-
-namespace kaldi {
-static void ProcessUtterance(const ExponentialTransform &et,
-                             const GauPost &gpost,
-                             const Matrix<BaseFloat> &xform,
-                             const Matrix<BaseFloat> &feats,  // un-transformed feats.
-                             const TransitionModel &trans_model,
-                             const AmDiagGmm &am_gmm,
-                             BaseFloat t,
-                             ExponentialTransformAccsB *accs_b) {
-  // First work out Ds.
-  int32 dim = et.Dim();
-  Matrix<BaseFloat> Ds(dim, dim+1);
-
-  et.ComputeDs(xform, t, &Ds);
-
-  for (size_t i = 0; i < gpost.size(); i++) {
-    SubVector<BaseFloat> feat(feats, i);
-    Vector<BaseFloat> t_data(feat);  // transformed feature.
-    ApplyAffineTransform(xform, &t_data);
-
-    for (size_t j = 0; j < gpost[i].size(); j++) {
-      int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first);
-      const DiagGmm  &gmm = am_gmm.GetPdf(pdf_id);
-      const Vector<BaseFloat> &posteriors (gpost[i][j].second);
-      accs_b->AccumulateFromPosteriors(gmm, t_data, posteriors, Ds);
-    }
-  }
-}
-
-} // end namespace kaldi
-
-int main(int argc, char *argv[]) {
-  try {
-    typedef kaldi::int32 int32;
-    using namespace kaldi;
-    const char *usage =
-        "Accumulate statistics for estimating the A matrix of exponential transform, \n"
-        " per-utterance (default) or per-speaker for \n"
-        " the supplied set of speakers (spk2utt option).\n"
-        "Note: the align-model is needed to get GMM posteriors; it's in the unadapted space.\n"
-        "Usage: gmm-et-acc-b [options] <align-model> <model> <exponential-transform> <feature-rspecifier> "
-        "<posteriors-rspecifier> <transform-rspecifier> <warp-rspecifier> <accs-filename>\n";
-
-    ParseOptions po(usage);
-    string spk2utt_rspecifier;
-    bool binary = false;
-    po.Register("spk2utt", &spk2utt_rspecifier, "rspecifier for speaker to "
-                "utterance-list map");
-    po.Register("binary", &binary, "Write output in binary mode");
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() != 7) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    string model_rxfilename = po.GetArg(1),
-        et_rxfilename = po.GetArg(2),
-        feature_rspecifier = po.GetArg(3),
-        gpost_rspecifier = po.GetArg(4),
-        transform_rspecifier = po.GetArg(5),
-        warps_rspecifier = po.GetArg(6),
-        accs_wxfilename = po.GetArg(7);
-
-
-    RandomAccessGauPostReader gpost_reader(gpost_rspecifier);
-    RandomAccessBaseFloatMatrixReader transform_reader(transform_rspecifier);
-    RandomAccessBaseFloatReader warps_reader(warps_rspecifier);
-
-    AmDiagGmm am_gmm;
-    TransitionModel trans_model;
-    {
-      bool binary;
-      Input is(model_rxfilename, &binary);
-      trans_model.Read(is.Stream(), binary);
-      am_gmm.Read(is.Stream(), binary);
-    }
-
-    ExponentialTransform et;
-    {
-      bool binary;
-      Input ki(et_rxfilename, &binary);
-      et.Read(ki.Stream(), binary);
-    }
-
-    int32 dim = et.Dim();
-
-    ExponentialTransformAccsB accs_b(dim);
-
-    int32 num_done = 0, num_no_gpost = 0, num_other_error = 0;
-    if (spk2utt_rspecifier != "") {  // per-speaker adaptation
-      SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
-      RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
-      for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
-        string spk = spk2utt_reader.Key();
-        if (!transform_reader.HasKey(spk)) {
-          KALDI_WARN << "Could not read transform for speaker " << spk;
-          num_other_error++;
-        }
-        if (!warps_reader.HasKey(spk)) {
-          KALDI_WARN << "Could not read warp factor for speaker " << spk;
-          num_other_error++;
-          continue;
-        }
-        const Matrix<BaseFloat> &xform(transform_reader.Value(spk));
-        BaseFloat t = warps_reader.Value(spk);
-
-        const vector<string> &uttlist = spk2utt_reader.Value();
-        for (vector<string>::const_iterator utt_itr = uttlist.begin(),
-                 itr_end = uttlist.end(); utt_itr != itr_end; ++utt_itr) {
-          if (!feature_reader.HasKey(*utt_itr)) {
-            KALDI_WARN << "Did not find features for utterance " << *utt_itr;
-            continue;
-          }
-          if (!gpost_reader.HasKey(*utt_itr)) {
-            KALDI_WARN << "Did not find gpost for utterance "
-                       << *utt_itr;
-            num_no_gpost++;
-            continue;
-          }
-          const Matrix<BaseFloat> &feats = feature_reader.Value(*utt_itr);
-
-          const GauPost &gpost = gpost_reader.Value(*utt_itr);
-
-          if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
-            KALDI_WARN << "gpost has wrong size " << gpost.size()
-                       << " vs. " << feats.NumRows();
-            num_other_error++;
-            continue;
-          }
-
-          ProcessUtterance(et, gpost, xform, feats, trans_model,
-                           am_gmm, t, &accs_b);
-          num_done++;
-          if (num_done % 50 == 0)
-            KALDI_VLOG(1) << "Done " << num_done << " utterances.";
-        }  // end looping over all utterances of the current speaker
-      }  // end looping over speakers
-    } else {  // per-utterance adaptation
-      SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
-      for (; !feature_reader.Done(); feature_reader.Next()) {
-        string utt = feature_reader.Key();
-        FmllrDiagGmmAccs accs(dim);
-
-        if (!transform_reader.HasKey(utt)) {
-          KALDI_WARN << "Could not read transform for speaker " << utt;
-          num_other_error++;
-        }
-        if (!warps_reader.HasKey(utt)) {
-          KALDI_WARN << "Could not read warp factor for speaker " << utt;
-          num_other_error++;
-          continue;
-        }
-        if (!gpost_reader.HasKey(utt)) {
-          KALDI_WARN << "Did not find gpost for utterance "
-                     << utt;
-          num_no_gpost++;
-          continue;
-        }
-        const Matrix<BaseFloat> &feats = feature_reader.Value();
-        const GauPost &gpost = gpost_reader.Value(utt);
-        const Matrix<BaseFloat> &xform(transform_reader.Value(utt));
-        BaseFloat t = warps_reader.Value(utt);
-
-        if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
-          KALDI_WARN << "gpost has wrong size " << gpost.size()
-                     << " vs. " << feats.NumRows();
-          num_other_error++;
-          continue;
-        }
-
-        ProcessUtterance(et, gpost, xform, feats, trans_model,
-                         am_gmm, t, &accs_b);
-        num_done++;
-
-        if (num_done % 50 == 0)
-          KALDI_LOG << "Done " << num_done << " utterances";
-      }
-    }
-
-    KALDI_LOG << "Done " << num_done << " files, " << num_no_gpost
-              << " with no gposts, " << num_other_error << " with other errors.";
-
-    Output ko(accs_wxfilename, binary);
-    accs_b.Write(ko.Stream(), binary);
-    KALDI_LOG << "Written accs.";
-    return 0;
-  } catch(const std::exception& e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
-
--- a/src/gmmbin/gmm-et-est-b.cc
+++ b/src/gmmbin/gmm-et-est-b.cc
@ -1,89 +0,0 @@
-// gmmbin/gmm-et-est-b.cc
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#include "base/kaldi-common.h"
-#include "util/common-utils.h"
-#include "transform/exponential-transform.h"
-
-
-int main(int argc, char *argv[])
-{
-  try {
-    using namespace kaldi;
-    using kaldi::int32;
-
-    const char *usage =
-        "Update matrix B of exponential transform (uses stats from gmm-et-acc-b)\n"
-        " [Use matrix-out with gmm-transform-means to transform model means.]\n"
-        "Usage:  gmm-et-est-b [options] <et-in> <et-out> <matrix-out> <b-stats1> <b-stats2> ... \n"
-        "e.g.: \n"
-        " gmm-et-est-b 1.et 2.et 1.et_acc_b\n";
-
-    bool binary = true;
-    ParseOptions po(usage);
-
-    std::string set_normalize_type = "";  // may be "", "none", "mean", or "mean-and-var";
-    ExponentialTransformUpdateAOptions update_a_opts;
-    po.Register("binary", &binary, "Write output in binary mode");
-    update_a_opts.Register(&po);
-
-    po.Read(argc, argv);
-
-    if (po.NumArgs() < 4) {
-      po.PrintUsage();
-      exit(1);
-    }
-
-    std::string et_rxfilename = po.GetArg(1);
-    std::string et_wxfilename = po.GetArg(2);
-    std::string mat_wxfilename = po.GetArg(3);
-
-    ExponentialTransform et;
-    {
-      bool binary_in;
-      Input ki(et_rxfilename, &binary_in);
-      et.Read(ki.Stream(), binary_in);
-    }
-    ExponentialTransformAccsB stats;
-    for (int32 i = 4; i <= po.NumArgs(); i++) {
-      std::string stats_rxfilename = po.GetArg(i);
-      bool binary_in;
-      Input ki(stats_rxfilename, &binary_in);
-      stats.Read(ki.Stream(), binary_in, true);  // true == add
-    }
-
-    int32 dim = et.Dim();
-    Matrix<BaseFloat> M(dim, dim);  // to transform model means.
-    stats.Update(&et, NULL, NULL, &M);
-
-    {
-      Output ko(et_wxfilename, binary);
-      et.Write(ko.Stream(), binary);
-    }
-    {
-      Output ko(mat_wxfilename, binary);
-      M.Write(ko.Stream(), binary);
-    }
-    KALDI_LOG << "Written accs and matrix.";
-    return 0;
-  } catch(const std::exception& e) {
-    std::cerr << e.what();
-    return -1;
-  }
-}
-
--- a/src/matrix/kaldi-matrix.cc
+++ b/src/matrix/kaldi-matrix.cc
@ -1137,11 +1137,11 @@ void Matrix<Real>::Read(std::istream & is, bool binary, bool add) {
    std::string str;
    is >> str;  // get a token
    if (is.fail()) { specific_error << ": Expected \"[\", got EOF"; goto bad; }
-    if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) {  // Back compatibility.
-      is >> str;  // get #rows
-      is >> str;  // get #cols
-      is >> str;  // get "["
-    }
+    //if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) {  // Back compatibility.
+    // is >> str;  // get #rows
+    //  is >> str;  // get #cols
+    //  is >> str;  // get "["
+    //}
    if (str == "[]") { Resize(0, 0); return; } // Be tolerant of variants.
    else if (str != "[") {
      specific_error << ": Expected \"[\", got \"" << str << '"';
--- a/src/matrix/kaldi-vector.cc
+++ b/src/matrix/kaldi-vector.cc
@ -789,10 +789,10 @@ void Vector<Real>::Read(std::istream & is,  bool binary, bool add) {
  } else {  // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n"
    std::string s;
    is >> s;
-    if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) {  // Back compatibility.
-      is >> s;  // get dimension
-      is >> s;  // get "["
-    }
+    //if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) {  // Back compatibility.
+    //  is >> s;  // get dimension
+    //  is >> s;  // get "["
+    //}
    if (is.fail()) { specific_error << "EOF while trying to read vector."; goto bad; }
    if (s.compare("[]") == 0) { Resize(0); return; } // tolerate this variant.
    if (s.compare("[")) { specific_error << "Expected \"[\" but got " << s; goto bad; }
--- a/src/sgmm/am-sgmm.cc
+++ b/src/sgmm/am-sgmm.cc
@ -618,6 +618,94 @@ void AmSgmm::ComputeNormalizers() {
  KALDI_LOG << "Done computing normalizers";
 }

+
+void AmSgmm::ComputeNormalizersNormalized(const std::vector<std::vector<int32> > &normalize_sets) {
+  { // Check sets in normalize_sets are disjoint and cover all Gaussians.
+    std::set<int32> all;
+    for(int32 i = 0; i < normalize_sets.size(); i++)
+      for(int32 j = 0; static_cast<size_t>(j) < normalize_sets[i].size(); j++) {
+        int32 n = normalize_sets[i][j];
+        KALDI_ASSERT(all.count(n) == 0 && n >= 0 && n < NumGauss());
+        all.insert(n);
+      }
+    KALDI_ASSERT(all.size() == NumGauss());
+  }
+  
+  
+  KALDI_LOG << "Computing normalizers [normalized]";
+  BaseFloat DLog2pi = FeatureDim() * log(2 * M_PI);
+  Vector<BaseFloat> mu_jmi(FeatureDim());
+  Vector<BaseFloat> SigmaInv_mu(FeatureDim());
+  Vector<BaseFloat> log_det_Sigma(NumGauss());
+
+  for (int32 i = 0; i < NumGauss(); i++) {
+    try {
+      log_det_Sigma(i) = - SigmaInv_[i].LogPosDefDet();
+    } catch(...) {
+      KALDI_WARN << "Covariance is not positive definite, setting to unit";
+      SigmaInv_[i].SetUnit();
+      log_det_Sigma(i) = 0.0;
+    }
+  }
+
+  double entropy_count = 0, entropy_sum = 0;
+
+  n_.resize(NumStates());
+  for (int32 j = 0; j < NumStates(); ++j) {
+    Vector<BaseFloat> log_w_jm(NumGauss());
+
+    n_[j].Resize(NumGauss(), NumSubstates(j));
+    for (int32 m = 0; m < NumSubstates(j); m++) {
+      BaseFloat logc = log(c_[j](m));
+
+      // (in logs): w_jm = softmax([w_{k1}^T ... w_{kD}^T] * v_{jkm}) eq.(7)
+      log_w_jm.AddMatVec(1.0, w_, kNoTrans, v_[j].Row(m), 0.0);
+      log_w_jm.Add((-1.0) * log_w_jm.LogSumExp());
+
+      for(int32 n = 0; n < normalize_sets.size(); n++) {
+        const std::vector<int32> &this_set(normalize_sets[n]);
+        double sum = 0.0;
+        for(int32 p = 0; p < this_set.size(); p++)
+          sum += exp(log_w_jm(this_set[p]));
+        double offset = -log(sum); // add "offset", to normalize weights.
+        for(int32 p = 0; p < this_set.size(); p++)
+          log_w_jm(this_set[p]) += offset;
+      }
+
+      for (int32 i = 0; i < NumGauss(); ++i) {
+        // mu_jmi = M_{i} * v_{jm}
+        mu_jmi.AddMatVec(1.0, M_[i], kNoTrans, v_[j].Row(m), 0.0);
+
+        // mu_{jmi} * \Sigma_{i}^{-1} * mu_{jmi}
+        SigmaInv_mu.AddSpVec(1.0, SigmaInv_[i], mu_jmi, 0.0);
+        BaseFloat mu_SigmaInv_mu = VecVec(mu_jmi, SigmaInv_mu);
+
+        // Suggestion: Both mu_jmi and SigmaInv_mu could
+        // have been computed at once  for i ,
+        // if M[i] was concatenated to single matrix over i indeces
+
+        // eq.(31)
+        n_[j](i, m) = logc + log_w_jm(i) - 0.5 * (log_det_Sigma(i) + DLog2pi
+            + mu_SigmaInv_mu);
+        {  // Mainly diagnostic code.  Not necessary.
+          BaseFloat tmp = n_[j](i, m);
+          if (!KALDI_ISFINITE(tmp)) {  // NaN or inf
+            KALDI_LOG << "Warning: normalizer for j = " << j << ", m = " << m
+                      << ", i = " << i << " is infinite or NaN " << tmp << "= "
+                      << (logc) << "+" << (log_w_jm(i)) << "+" << (-0.5 *
+                          log_det_Sigma(i)) << "+" << (-0.5 * DLog2pi)
+                      << "+" << (mu_SigmaInv_mu) << ", setting to finite.";
+            n_[j](i, m) = -1.0e+40;  // future work(arnab): get rid of magic number
+          }
+        }
+      }
+    }
+  }
+
+  KALDI_LOG << "Done computing normalizers (normalized over subsets)";
+}
+
+
 void AmSgmm::ComputeFmllrPreXform(const Vector<BaseFloat> &state_occs,
    Matrix<BaseFloat> *xform, Matrix<BaseFloat> *inv_xform,
    Vector<BaseFloat> *diag_mean_scatter) const {
--- a/src/sgmm/am-sgmm.h
+++ b/src/sgmm/am-sgmm.h
@ -215,6 +215,12 @@ class AmSgmm {
  /// for each Gaussian component and all substates. Eq. (31)
  void ComputeNormalizers();

+
+  /// Computes the normalizers, while normalizing the weights to one
+  /// among each of the sets in "normalize_sets": these sets should
+  /// be disjoint and their union should be all the indices 0 ... I-1.
+  void ComputeNormalizersNormalized(const std::vector<std::vector<int32> > &normalize_sets);
+  
  /// Computes the LDA-like pre-transform and its inverse as well as the
  /// eigenvalues of the scatter of the means used in FMLLR estimation.
  void ComputeFmllrPreXform(const Vector<BaseFloat> &state_occs,
--- a/src/sgmmbin/Makefile
+++ b/src/sgmmbin/Makefile
@ -7,7 +7,7 @@ BINFILES = init-ubm sgmm-align sgmm-align-compiled sgmm-acc-stats-ali sgmm-sum-a
           sgmm-est sgmm-decode-faster sgmm-init sgmm-gselect  sgmm-acc-stats \
           sgmm-est-spkvecs sgmm-post-to-gpost sgmm-acc-stats-gpost sgmm-est-spkvecs-gpost \
           sgmm-comp-prexform sgmm-est-fmllr-gpost sgmm-acc-fmllrbasis-ali sgmm-est-fmllrbasis \
-	   sgmm-calc-distances
+           sgmm-calc-distances sgmm-normalize


 OBJFILES =
--- a/src/sgmmbin/sgmm-est.cc
+++ b/src/sgmmbin/sgmm-est.cc
@ -31,7 +31,7 @@ int main(int argc, char *argv[]) {
    typedef kaldi::int32 int32;
    const char *usage =
        "Estimate SGMM model parameters from accumulated stats.\n"
-        "Usage: sgmm-estimate [options] <model-in> <stats-in> <model-out>\n";
+        "Usage: sgmm-est [options] <model-in> <stats-in> <model-out>\n";

    bool binary_write = false;
    std::string update_flags_str = "vMNwcS";
--- a/src/sgmmbin/sgmm-normalize.cc
+++ b/src/sgmmbin/sgmm-normalize.cc
@ -0,0 +1,83 @@
+// sgmmbin/sgmm-normalize.cc
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+
+#include "sgmm/am-sgmm.h"
+#include "hmm/transition-model.h"
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    typedef kaldi::int32 int32;
+    const char *usage =
+        "Renormalize SGMM so that within certain subsets of UBM Gaussians (typically \n"
+        "corresponding to gender), probabilities sum to one; write it out, including\n"
+        "normalizers."
+        "Note: gaussians-rspecifier will normally be \"ark:foo\" where foo looks like\n"
+        "  m  0 1 2 3 4 5\n"
+        "  f  6 7 8 9 10\n"
+        "Usage: sgmm-normalize [options] <model-in> <gaussians-rspecifier> <model-out>\n";
+
+    bool binary_write = false;
+
+    ParseOptions po(usage);
+    po.Register("binary", &binary_write, "Write output in binary mode");
+
+    po.Read(argc, argv);
+    if (po.NumArgs() != 3) {
+      po.PrintUsage();
+      exit(1);
+    }
+    std::string model_in_filename = po.GetArg(1),
+        gaussians_rspecifier = po.GetArg(2),
+        model_out_filename = po.GetArg(3);
+
+    AmSgmm am_sgmm;
+    TransitionModel trans_model;
+    {
+      bool binary;
+      Input is(model_in_filename, &binary);
+      trans_model.Read(is.Stream(), binary);
+      am_sgmm.Read(is.Stream(), binary);
+    }
+
+    std::vector<std::vector<int32> > norm_sets;
+    SequentialInt32VectorReader vec_reader(gaussians_rspecifier);
+    for (;!vec_reader.Done(); vec_reader.Next()) 
+      norm_sets.push_back(vec_reader.Value());
+
+    am_sgmm.ComputeNormalizersNormalized(norm_sets);
+    
+    {
+      Output os(model_out_filename, binary_write);
+      trans_model.Write(os.Stream(), binary_write);
+      am_sgmm.Write(os.Stream(), binary_write, kSgmmWriteAll);
+    }
+    
+    
+    KALDI_LOG << "Written model to " << model_out_filename;
+    return 0;
+  } catch(const std::exception& e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+
+
--- a/src/transform/exponential-transform-test.cc
+++ b/src/transform/exponential-transform-test.cc
@ -18,6 +18,7 @@
 #include "util/common-utils.h"
 #include "gmm/diag-gmm.h"
 #include "transform/exponential-transform.h"
+#include "transform/mllt.h"

 namespace kaldi {

@ -102,7 +103,7 @@ void UnitTestExponentialTransformUpdate(EtNormalizeType norm_type,
    double objf_change_tot = 0.0;
    like_tot = 0.0;
    ExponentialTransformAccsA accs_a(dim);
-    ExponentialTransformAccsB accs_b(dim);
+    MlltAccs accs_b(dim);
    for (int32 k = 0; k < nblocks; k++) {
      Matrix<BaseFloat> &cur_xform(cur_xforms[k]);
      FmllrOptions opts;
@ -146,8 +147,7 @@ void UnitTestExponentialTransformUpdate(EtNormalizeType norm_type,
            if (update_b && j%2 == 1)
              accs_b.AccumulateFromPosteriors(gmm,
                                              xformed_row,
-                                              posteriors,
-                                              cur_Ds[k]);
+                                              posteriors);
        }
      }
    }
@ -161,16 +161,17 @@ void UnitTestExponentialTransformUpdate(EtNormalizeType norm_type,
    }
    if (update_b && j%2 == 1) {
      BaseFloat count, objf_impr;
-      Matrix<BaseFloat> M(dim, dim);  // to transform GMM means.
-      accs_b.Update(&et, &objf_impr, &count, &M);
-      TestIo(accs_b);
+      Matrix<BaseFloat> C(dim, dim);  // to transform GMM means.
+      C.SetUnit();
+      accs_b.Update(&C, &objf_impr, &count);
+      et.ApplyC(C);
      TestIo(et);
      KALDI_LOG << "Count is " << count << " and objf impr is " << objf_impr << " updating B";
      // update the GMM means:
      Matrix<BaseFloat> means;
      gmm.GetMeans(&means);
      Matrix<BaseFloat> new_means(means.NumRows(), means.NumCols());
-      new_means.AddMatMat(1.0, means, kNoTrans, M, kTrans, 0.0);
+      new_means.AddMatMat(1.0, means, kNoTrans, C, kTrans, 0.0);
      gmm.SetMeans(new_means);
      gmm.ComputeGconsts();
    }
--- a/src/transform/exponential-transform.cc
+++ b/src/transform/exponential-transform.cc
@ -408,157 +408,6 @@ void ExponentialTransformAccsA::Init(int32 dim) {
  Ahat_.Resize(dim, dim+1);
 }

-void ExponentialTransformAccsB::Init(int32 dim) {
-  G_.resize(dim);
-  for (int32 i = 0; i < dim; i++)
-    G_[i].Resize(dim);
-  beta_ = 0.0;
-}
-
-void
-ExponentialTransformAccsB::
-AccumulateFromPosteriors(const DiagGmm &gmm,
-                         const VectorBase<BaseFloat> &t_data,
-                         const VectorBase<BaseFloat> &posteriors,
-                         const MatrixBase<BaseFloat> &Ds) {
-  int32 dim = G_.size();
-  KALDI_ASSERT(dim == gmm.Dim() &&
-               dim == t_data.Dim());
-  KALDI_ASSERT(posteriors.Dim() == gmm.NumGauss());
-  Vector<BaseFloat> tmp_data(dim);
-  KALDI_ASSERT(Ds.NumRows() == dim && Ds.NumCols() == dim+1);
-  KALDI_ASSERT(dim < 2 || (fabs(Ds(1, 0)) < 0.01 && Ds(0, 0) != 0.0));  // quick check it's diagonal.
-  // Apply the reverse transformation of Ds to the features.
-  // If d_i is the i'th diagonal element and b_i is the i'th offset element,
-  // it transforms y_i = x_i d_i + b_i.
-  // The reverse transformation is: x_i = (y_i - b_i) / d_i
-  for (int32 i = 0; i < dim; i++)
-    tmp_data(i) = (t_data(i) - Ds(i, dim)) / Ds(i, i);
-
-  // Note on transforming means and variances from the adapted to the
-  // speaker-independendent space...
-  // the transformation on means is as above:
-  // mu_i -> (mu_i - b_i) / d_i
-  // and the transformation on variances is:
-  // sigma_i^2 -> sigma_i^2 / d_i^2.
-
-  Matrix<BaseFloat> gmm_means;
-  gmm.GetMeans(&gmm_means);
-
-  const Matrix<BaseFloat> &inv_vars = gmm.inv_vars();
-
-  double this_beta = 0.0;
-  Vector<double> offset(dim);
-  SpMatrix<double> offset2(dim);
-  for (int32 i = 0; i < gmm.NumGauss(); i++) {
-    BaseFloat gamma = posteriors(i);
-    if (gamma < 1.0e-05) continue;
-    this_beta += gamma;
-    offset.CopyFromVec(tmp_data);
-    for (int32 j = 0; j < dim; j++) {
-      BaseFloat adapted_mean = (gmm_means(i, j) - Ds(j, dim)) / Ds(j, j);
-      // adapted_mean is viewing Ds as a model-space transform.
-      offset(j) -= adapted_mean;
-    }
-    offset2.SetZero();
-    offset2.AddVec2(1.0, offset);
-    for (int32 j = 0; j < dim; j++) {
-      BaseFloat adapted_inv_var = inv_vars(i, j) * Ds(j, j) * Ds(j, j);
-      // was: G_[j].AddVec2(gamma * adapted_inv_var, offset);
-      // This should be more efficient.
-      G_[j].AddSp(gamma * adapted_inv_var, offset2);
-    }
-  }
-  beta_ += this_beta;
-}
-
-void ExponentialTransformAccsB::Update(ExponentialTransform *et,
-                                       BaseFloat *objf_impr_out,
-                                       BaseFloat *count_out,
-                                       MatrixBase<BaseFloat> *Cpart) {
-  int32 dim = G_.size();
-  KALDI_ASSERT(beta_ > 2*dim);
-  KALDI_ASSERT(dim > 0 && et->Dim() == dim);
-  BaseFloat objf_impr = 0.0;
-  Matrix<double> transform(dim, dim);
-  transform.SetUnit();
-  std::vector<SpMatrix<double> > Ginv(dim);
-  for (int32 i = 0; i < dim; i++) {
-    Ginv[i].Resize(dim);
-    Ginv[i].CopyFromSp(G_[i]);
-    Ginv[i].Invert();
-  }
-  for (int32 iter  = 0; iter < 100; iter++) {
-    for (int32 i = 0; i < dim; i++) {  // for each row...
-      SubVector<double> row(transform, i);
-      Vector<double> cofactor(dim);  // actually cofactor times a constant.
-      {
-        Matrix<double> inv(transform);
-        inv.Invert();
-        inv.Transpose();
-        cofactor.CopyFromVec(inv.Row(i));
-      }
-      // Auxf is beta * log(cofactor . row) - 0.5 * row^T G_i row
-      double old_auxf = beta_ * log(std::abs(VecVec(cofactor, row)))
-          -0.5 * VecSpVec(row, G_[i], row);
-
-      // see Gales, "Semi-tied covariance matrices for hidden
-      // markov models", feb. 1998 (techreport version of IEEE paper),
-      // eq. 22,
-      // a_i <== c_i G_i^{-1} \sqrt( beta / c_i^T G_i^{-1} c_i )
-      // [notation is not exactly as in original].
-
-      row.AddSpVec(sqrt(beta_ / VecSpVec(cofactor, Ginv[i], cofactor)),
-                   Ginv[i], cofactor, 0.0);
-      double new_auxf = beta_ * log(std::abs(VecVec(cofactor, row)))
-          -0.5 * VecSpVec(row, G_[i], row);
-      if (new_auxf < old_auxf - 0.0001 * beta_)
-        KALDI_ERR << "Auxf decreased in MLLR update.";
-      objf_impr += new_auxf - old_auxf;
-    }
-  }
-  if (et->norm_type_ == kEtNormalizeMeanAndVar)
-    KALDI_WARN << "Update for B is not guaranteed to improve objective "
-        "when both mean and variance normalization is being done.";
-
-  KALDI_LOG << "Updating matrix B: auxf improvement is "
-            << (objf_impr/beta_) << " per frame over " << beta_
-            << " frames.\n";
-  if (objf_impr_out)
-    *objf_impr_out = objf_impr;
-  if (count_out)
-    *count_out = beta_;
-
-  KALDI_ASSERT(Cpart != NULL && Cpart->NumRows() == dim
-               && Cpart->NumCols() == dim);
-  Cpart->CopyFromMat(transform);  // this is what we would apply to the
-  // means.
-
-  // HERE: actually apply to A and B.
-  // We will do (with M the transform we just estimated):
-  //  B' <-- M B
-  //  A' <-- M A M^{-1}
-  //  This way we will have exp(tA') B' = (I + t M A M^{-1} + .. ) M B
-  //      = M ( I + t A + .. ) B.
-  // To do this we need to extend M with one more row 0 0 ... 0 1.
-  Matrix<BaseFloat> Cfull(dim+1, dim+1);
-  SubMatrix<BaseFloat> Cfull_part(Cfull, 0, dim, 0, dim);
-  Cfull_part.CopyFromMat(transform);
-  Cfull(dim, dim) = 1.0;
-  Matrix<BaseFloat> Cfull_inv(Cfull);
-  Cfull_inv.Invert();
-  Matrix<BaseFloat> tmp(dim+1, dim+1), new_A(dim+1, dim+1), new_B(dim+1, dim+1);
-  // tmp <- A M^{-1}
-  tmp.AddMatMat(1.0, et->A_, kNoTrans, Cfull_inv, kNoTrans, 0.0);
-  // new_A <-- M tmp
-  new_A.AddMatMat(1.0, Cfull, kNoTrans, tmp, kNoTrans, 0.0);
-  et->A_.CopyFromMat(new_A);
-  // new_B <-- M B
-  new_B.AddMatMat(1.0, Cfull, kNoTrans, et->B_, kNoTrans, 0.0);
-  et->B_.CopyFromMat(new_B);
-
-}
-
 void ExponentialTransform::Write(std::ostream &os, bool binary) const {
  WriteMarker(os, binary, "<ExponentialTransform>");
  WriteMarker(os, binary, "<A>");
@ -585,42 +434,6 @@ void ExponentialTransform::Read(std::istream &is, bool binary) {
 }


-void ExponentialTransformAccsB::Write(std::ostream &os, bool binary) const {
-  WriteMarker(os, binary, "<ExponentialTransformAccsB>");
-  WriteMarker(os, binary, "<Beta>");
-  WriteBasicType(os, binary, beta_);
-  WriteMarker(os, binary, "<Dim>");
-  int32 dim = G_.size();
-  WriteBasicType(os, binary, dim);
-  WriteMarker(os, binary, "<G>");
-  for (int32 i = 0; i < dim; i++)
-    G_[i].Write(os, binary);
-  WriteMarker(os, binary, "</ExponentialTransformAccsB>");
-}
-
-void ExponentialTransformAccsB::Read(std::istream &os, bool binary, bool add) {
-  if (G_.empty()) add = false;  // don't add to nonexistent stats...
-  ExpectMarker(os, binary, "<ExponentialTransformAccsB>");
-  ExpectMarker(os, binary, "<Beta>");
-  double beta;
-  ReadBasicType(os, binary, &beta);
-  if (add) beta_ += beta;
-  else beta_ = beta;
-  ExpectMarker(os, binary, "<Dim>");
-  int32 dim;
-  ReadBasicType(os, binary, &dim);
-  if (!add) G_.resize(dim);
-  else {
-    if (static_cast<size_t>(dim) != G_.size())
-      KALDI_ERR << "Reading accs for updating B in exponential transform, "
-                << "dim mismatch " << dim << " vs. " << G_.size();
-  }
-  ExpectMarker(os, binary, "<G>");
-  for (size_t i = 0; i < G_.size(); i++)
-    G_[i].Read(os, binary, add);
-  ExpectMarker(os, binary, "</ExponentialTransformAccsB>");
-}
-

 void ExponentialTransformAccsA::Write(std::ostream &os, bool binary) const {
  WriteMarker(os, binary, "<ExponentialTransformAccsA>");
--- a/src/transform/exponential-transform.h
+++ b/src/transform/exponential-transform.h
@ -119,45 +119,6 @@ class ExponentialTransform {

 };

-// This is an MLLT type of update.
-class ExponentialTransformAccsB {
- public:
-  ExponentialTransformAccsB() { } // typically only used prior to Read().
-
-  ExponentialTransformAccsB(int32 dim) { Init(dim); }
-
-  void Init(int32 dim);
-
-  // AccumulateFromPosteriors is as in the base class, except we
-  // supply the transform D_s (expected to be a diagonal or mean-only
-  // transform), which is treated as a model-space transform here.
-  // Here, "t_data" is the data transformed by the transform W_s.
-  // Be careful-- this is different from the accumulation for A, in which
-  // the fMLLR stats are accumulated given the original data.
-  void AccumulateFromPosteriors(const DiagGmm &gmm,
-                                const VectorBase<BaseFloat> &t_data,
-                                const VectorBase<BaseFloat> &posteriors,
-                                const MatrixBase<BaseFloat> &Ds);
-
-
-  // The Update function does the MLLT update for B.  It sets "Cpart"
-  // (the first d x d block of C) to the transform that we would have
-  // to apply to the model means.
-  void Update(ExponentialTransform *et,
-              BaseFloat *objf_impr,
-              BaseFloat *count,
-              MatrixBase<BaseFloat> *Cpart);
-
-  void Write(std::ostream &os, bool binary) const;
-
-  void Read(std::istream &is, bool binary, bool add = false);
-
- private:
-  double beta_;
-  std::vector<SpMatrix<double> > G_;
-
-};
-


 struct ExponentialTransformUpdateAOptions {