diff --git a/egs/librispeech/s5/local/online/run_nnet2.sh b/egs/librispeech/s5/local/online/run_nnet2.sh
index 9ba7c6eaf..647d8095b 100755
--- a/egs/librispeech/s5/local/online/run_nnet2.sh
+++ b/egs/librispeech/s5/local/online/run_nnet2.sh
@@ -147,7 +147,7 @@ if [ $stage -le 13 ]; then
   done
 fi
 
-exit 0;
+#exit 0;
 ###### Comment out the "exit 0" above to run the multi-threaded decoding. #####
 
 if [ $stage -le 14 ]; then
@@ -166,8 +166,8 @@ if [ $stage -le 15 ]; then
   test=dev_clean
   steps/online/nnet2/decode.sh --threaded true --do-endpointing true \
     --config conf/decode.config --cmd "$decode_cmd" --nj 30 \
-    --per-utt true exp/tri6b/graph_pp_tgsmall data/$test \
-    ${dir}_online/decode_pp_${test}_tgsmall_utt_threaded_ep || exit 1;
+    --per-utt true exp/tri6b/graph_tgsmall data/$test \
+    ${dir}_online/decode_${test}_tgsmall_utt_threaded_ep || exit 1;
 fi
 
 exit 0;
diff --git a/egs/wsj/s5/local/online/run_nnet2.sh b/egs/wsj/s5/local/online/run_nnet2.sh
index 433377bff..a7fac1bec 100755
--- a/egs/wsj/s5/local/online/run_nnet2.sh
+++ b/egs/wsj/s5/local/online/run_nnet2.sh
@@ -134,6 +134,34 @@ if [ $stage -le 13 ]; then
   done
 fi
 
+if [ $stage -le 14 ]; then
+  # this does offline decoding, as stage 10, except we estimate the iVectors per
+  # speaker, excluding silence (based on alignments from a GMM decoding), with a
+  # different script.  This is just to demonstrate that script.
+
+  rm exp/nnet2_online/.error 2>/dev/null
+  for year in eval92 dev93; do
+    steps/online/nnet2/extract_ivectors.sh --cmd "$train_cmd" --nj 8 \
+      data/test_${year}_hires data/lang exp/nnet2_online/extractor \
+      exp/tri4b/decode_tgpr_$year exp/nnet2_online/ivectors_spk_test_${year} || touch exp/nnet2_online/.error &
+  done
+  wait
+  [ -f exp/nnet2_online/.error ] && echo "$0: Error getting iVectors" && exit 1;
+
+  for lm_suffix in bd_tgpr; do # just use the bd decoding, to avoid wasting time.
+    graph_dir=exp/tri4b/graph_${lm_suffix}
+    # use already-built graphs.
+    for year in eval92 dev93; do
+      steps/nnet2/decode.sh --nj 8 --cmd "$decode_cmd" \
+          --online-ivector-dir exp/nnet2_online/ivectors_spk_test_$year \
+         $graph_dir data/test_${year}_hires $dir/decode_${lm_suffix}_${year}_spk || touch exp/nnet2_online/.error &
+    done
+  done
+  wait
+  [ -f exp/nnet2_online/.error ] && echo "$0: Error decoding" && exit 1;
+fi
+
+
 
 
 exit 0;
diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh
new file mode 100755
index 000000000..5009716cb
--- /dev/null
+++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+
+# Copyright     2013  Daniel Povey
+# Apache 2.0.
+
+
+# This script computes iVectors in the same format as extract_ivectors_online.sh,
+# except that they are actually not really computed online, they are first computed
+# per speaker and just duplicated many times.
+#
+# This setup also makes it possible to use a previous decoding or alignment, to
+# down-weight silence in the stats (default is --silence-weight 0.0).
+#
+# This is for when you use the "online-decoding" setup in an offline task, and
+# you want the best possible results.  
+
+
+# Begin configuration section.
+nj=30
+cmd="run.pl"
+stage=0
+num_gselect=5 # Gaussian-selection using diagonal model: number of Gaussians to select
+min_post=0.025 # Minimum posterior to use (posteriors below this are pruned out)
+ivector_period=10
+posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
+                    # inter-frame correlations.  Making this small during iVector
+                    # extraction is equivalent to scaling up the prior, and will
+                    # will tend to produce smaller iVectors where data-counts are
+                    # small.  It's not so important that this match the value
+                    # used when training the iVector extractor, but more important
+                    # that this match the value used when you do real online decoding
+                    # with the neural nets trained with these iVectors.
+max_count=100       # Interpret this as a number of frames times posterior scale...
+                    # this config ensures that once the count exceeds this (i.e.
+                    # 1000 frames, or 10 seconds, by default), we start to scale
+                    # down the stats, accentuating the prior term.   This seems quite
+                    # important for some reason.
+compress=true       # If true, compress the iVectors stored on disk (it's lossy
+                    # compression, as used for feature matrices).
+silence_weight=0.0
+acwt=0.1  # used if input is a decode dir, to get best path from lattices.
+mdl=final  # change this if decode directory did not have ../final.mdl present.
+
+# End configuration section.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+
+if [ $# != 4 ] && [ $# != 5 ]; then
+  echo "Usage: $0 [options] <data> <lang> <extractor-dir> [<alignment-dir>|<decode-dir>] <ivector-dir>"
+  echo " e.g.: $0 data/test exp/nnet2_online/extractor exp/tri3/decode_test exp/nnet2_online/ivectors_test"
+  echo "main options (for others, see top of script file)"
+  echo "  --config <config-file>                           # config containing options"
+  echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
+  echo "  --nj <n|10>                                      # Number of jobs (also see num-processes and num-threads)"
+  echo "                                                   # Ignored if <alignment-dir> or <decode-dir> supplied."
+  echo "  --stage <stage|0>                                # To control partial reruns"
+  echo "  --num-gselect <n|5>                              # Number of Gaussians to select using"
+  echo "                                                   # diagonal model."
+  echo "  --min-post <float;default=0.025>                 # Pruning threshold for posteriors"
+  echo "  --ivector-period <int;default=10>                # How often to extract an iVector (frames)"
+  echo "  --utts-per-spk-max <int;default=-1>   # Controls splitting into 'fake speakers'."
+  echo "                                        # Set to 1 if compatibility with utterance-by-utterance"
+  echo "                                        # decoding is the only factor, and to larger if you care "
+  echo "                                        # also about adaptation over several utterances."
+  exit 1;
+fi
+
+if [ $# -eq 4 ]; then
+  data=$1
+  lang=$2
+  srcdir=$3
+  dir=$4
+else # 5 arguments
+  data=$1
+  lang=$2
+  srcdir=$3
+  ali_or_decode_dir=$4
+  dir=$5
+fi
+
+for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
+  $lang/phones.txt $srcdir/online_cmvn.conf $srcdir/final.mat; do
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
+done
+
+mkdir -p $dir/log 
+silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
+
+if [ ! -z "$ali_or_decode_dir" ]; then
+
+  nj_orig=$(cat $ali_or_decode_dir/num_jobs) || exit 1;
+  
+  if [ -f $ali_or_decode_dir/ali.1.gz ]; then
+    if [ ! -f $ali_or_decode_dir/${mdl}.mdl ]; then
+      echo "$0: expected $ali_or_decode_dir/${mdl}.mdl to exist."
+      exit 1;
+    fi
+
+    if [ $stage -le 0 ]; then
+      rm $dir/weights.*.gz 2>/dev/null
+
+      $cmd JOB=1:$nj_orig  $dir/log/ali_to_post.JOB.log \
+        gunzip -c $ali_or_decode_dir/ali.JOB.gz \| \
+        ali-to-post ark:- ark:- \| \
+        weight-silence-post $silence_weight $silphonelist $ali_or_decode_dir/final.mdl ark:- ark:- \| \
+        post-to-weights ark:- "ark:|gzip -c >$dir/weights.JOB.gz" || exit 1;
+
+      # put all the weights in one archive.
+      for j in $(seq $nj_orig); do gunzip -c $dir/weights.$j.gz; done | gzip -c >$dir/weights.gz || exit 1;
+      rm $dir/weights.*.gz || exit 1;
+    fi
+
+  elif [ -f $ali_or_decode_dir/lat.1.gz ]; then
+    if [ ! -f $ali_or_decode_dir/../${mdl}.mdl ]; then
+      echo "$0: expected $ali_or_decode_dir/../${mdl}.mdl to exist."
+      exit 1;
+    fi
+
+
+    if [ $stage -le 0 ]; then
+      rm $dir/weights.*.gz 2>/dev/null
+
+      $cmd JOB=1:$nj_orig  $dir/log/lat_to_post.JOB.log \
+        lattice-best-path --acoustic-scale=$acwt "ark:gunzip -c $ali_or_decode_dir/lat.JOB.gz|" ark:/dev/null ark:- \| \
+        ali-to-post ark:- ark:- \| \
+        weight-silence-post $silence_weight $silphonelist $ali_or_decode_dir/../${mdl}.mdl ark:- ark:- \| \
+        post-to-weights ark:- "ark:|gzip -c >$dir/weights.JOB.gz" || exit 1;
+
+      # put all the weights in one archive.
+      for j in $(seq $nj_orig); do gunzip -c $dir/weights.$j.gz; done | gzip -c >$dir/weights.gz || exit 1;
+      rm $dir/weights.*.gz || exit 1;
+    fi
+  else
+    echo "$0: expected ali.1.gz or lat.1.gz to exist in $ali_or_decode_dir";
+    exit 1;
+  fi
+
+fi
+
+# Now work out the per-speaker iVectors.
+
+sdata=$data/split$nj;
+utils/split_data.sh $data $nj || exit 1;
+
+echo $ivector_period > $dir/ivector_period || exit 1;
+splice_opts=$(cat $srcdir/splice_opts)
+
+
+gmm_feats="ark,s,cs:apply-cmvn-online --spk2utt=ark:$sdata/JOB/spk2utt --config=$srcdir/online_cmvn.conf $srcdir/global_cmvn.stats scp:$sdata/JOB/feats.scp ark:- | splice-feats $splice_opts ark:- ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+feats="ark,s,cs:splice-feats $splice_opts scp:$sdata/JOB/feats.scp ark:- | transform-feats $srcdir/final.mat ark:- ark:- |"
+
+
+if [ $stage -le 1 ]; then
+  if [ ! -z "$ali_or_decode_dir" ]; then
+    $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
+      gmm-global-get-post --n=$num_gselect --min-post=$min_post $srcdir/final.dubm "$gmm_feats" ark:- \| \
+      weight-post ark:- "ark,s,cs:gunzip -c $dir/weights.gz|" ark:- \| \
+      ivector-extract --acoustic-weight=$posterior_scale --compute-objf-change=true \
+        --max-count=$max_count --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.ie "$feats" ark,s,cs:- ark,t:$dir/ivectors_spk.JOB.ark || exit 1;
+  else
+    $cmd JOB=1:$nj $dir/log/extract_ivectors.JOB.log \
+      gmm-global-get-post --n=$num_gselect --min-post=$min_post $srcdir/final.dubm "$gmm_feats" ark:- \| \
+      ivector-extract --acoustic-weight=$posterior_scale --compute-objf-change=true \
+        --max-count=$max_count --spk2utt=ark:$sdata/JOB/spk2utt \
+      $srcdir/final.ie "$feats" ark,s,cs:- ark,t:$dir/ivectors_spk.JOB.ark || exit 1;
+  fi
+fi
+
+# get an utterance-level set of iVectors (just duplicate the speaker-level ones).  
+if [ $stage -le 2 ]; then
+  for j in $(seq $nj); do 
+    utils/apply_map.pl -f 2 $dir/ivectors_spk.$j.ark <$sdata/$j/utt2spk >$dir/ivectors_utt.$j.ark || exit 1;
+  done
+fi
+
+ivector_dim=$[$(head -n 1 $dir/ivectors_spk.1.ark | wc -w) - 3] || exit 1;
+echo  "$0: iVector dim is $ivector_dim"
+
+base_feat_dim=$(feat-to-dim scp:$data/feats.scp -) || exit 1;
+
+start_dim=$base_feat_dim
+end_dim=$[$base_feat_dim+$ivector_dim-1]
+
+
+if [ $stage -le 3 ]; then
+  # here, we are just using the original features in $sdata/JOB/feats.scp for
+  # their number of rows; we use the select-feats command to remove those
+  # features and retain only the iVector features.
+  $cmd JOB=1:$nj $dir/log/duplicate_feats.JOB.log \
+    append-vector-to-feats scp:$sdata/JOB/feats.scp ark:$dir/ivectors_utt.JOB.ark ark:- \| \
+    select-feats "$start_dim-$end_dim" ark:- ark:- \| \
+    subsample-feats --n=$ivector_period ark:- ark:- \| \
+    copy-feats --compress=$compress ark:- \
+    ark,scp:$dir/ivector_online.JOB.ark,$dir/ivector_online.JOB.scp || exit 1;
+fi
+
+if [ $stage -le 4 ]; then
+  echo "$0: combining iVectors across jobs"
+  for j in $(seq $nj); do cat $dir/ivector_online.$j.scp; done >$dir/ivector_online.scp || exit 1;
+fi
+
+echo "$0: done extracting (pseudo-online) iVectors"
diff --git a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
index 1a75eb1f2..6f2269f25 100755
--- a/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
+++ b/egs/wsj/s5/steps/online/nnet2/extract_ivectors_online.sh
@@ -32,9 +32,6 @@ posterior_scale=0.1 # Scale on the acoustic posteriors, intended to account for
                     # used when training the iVector extractor, but more important
                     # that this match the value used when you do real online decoding
                     # with the neural nets trained with these iVectors.
-#utts_per_spk_max=-1 # This option is no longer supported, you should use
-                    # steps/online/nnet2/copy_data_dir.sh with the --utts-per-spk-max
-                    # option to make a copy of the data dir.
 compress=true       # If true, compress the iVectors stored on disk (it's lossy
                     # compression, as used for feature matrices).
 
@@ -58,10 +55,6 @@ if [ $# != 3 ]; then
   echo "                                                   # diagonal model."
   echo "  --min-post <float;default=0.025>                 # Pruning threshold for posteriors"
   echo "  --ivector-period <int;default=10>                # How often to extract an iVector (frames)"
-  echo "  --utts-per-spk-max <int;default=-1>   # Controls splitting into 'fake speakers'."
-  echo "                                        # Set to 1 if compatibility with utterance-by-utterance"
-  echo "                                        # decoding is the only factor, and to larger if you care "
-  echo "                                        # also about adaptation over several utterances."
   exit 1;
 fi
 
@@ -71,7 +64,7 @@ dir=$3
 
 for f in $data/feats.scp $srcdir/final.ie $srcdir/final.dubm $srcdir/global_cmvn.stats $srcdir/splice_opts \
      $srcdir/online_cmvn.conf $srcdir/final.mat; do
-  [ ! -f $f ] && echo "No such file $f" && exit 1;
+  [ ! -f $f ] && echo "$0: No such file $f" && exit 1;
 done
 
 # Set various variables.
@@ -86,7 +79,7 @@ splice_opts=$(cat $srcdir/splice_opts)
 # the program ivector-extract-online2 does a bunch of stuff in memory and is
 # config-driven...  this was easier in this case because the same code is
 # involved in online decoding.  We need to create a config file for iVector
-# extration.
+# extraction.
 
 ieconf=$dir/conf/ivector_extractor.conf
 echo -n >$ieconf
@@ -104,15 +97,6 @@ echo "--posterior-scale=$posterior_scale" >>$ieconf
 echo "--max-remembered-frames=1000" >>$ieconf # the default
 
 
-ns=$(wc -l <$data/spk2utt)
-if [ "$ns" == 1 -a "$utts_per_spk_max" != 1 -a "$utts_per_spk_max" != -1 ]; then
-  echo "$0: you seem to have just one speaker in your database.  This is probably not a good idea."
-  echo "  see http://kaldi.sourceforge.net/data_prep.html (search for 'bold') for why"
-  echo "  Setting --utts-per-spk-max to 1."
-  utts_per_spk_max=1
-fi
-
-
 
 for n in $(seq $nj); do
   # This will do nothing unless the directory $dir/storage exists;
diff --git a/src/bin/ali-to-post.cc b/src/bin/ali-to-post.cc
index 7449da805..589d9d64a 100644
--- a/src/bin/ali-to-post.cc
+++ b/src/bin/ali-to-post.cc
@@ -34,7 +34,7 @@ int main(int argc, char *argv[]) {
   try {
     const char *usage =
         "Convert alignments to posteriors\n"
-        "Usage:  ali-to-post [options] alignments-rspecifier posteriors-wspecifier\n"
+        "Usage:  ali-to-post [options] <alignments-rspecifier> <posteriors-wspecifier>\n"
         "e.g.:\n"
         " ali-to-post ark:1.ali ark:1.post\n";
 
diff --git a/src/bin/copy-matrix.cc b/src/bin/copy-matrix.cc
index 0738d8dde..d7b8181c6 100644
--- a/src/bin/copy-matrix.cc
+++ b/src/bin/copy-matrix.cc
@@ -38,10 +38,14 @@ int main(int argc, char *argv[]) {
         "See also: copy-feats\n";
     
     bool binary = true;
+    BaseFloat scale = 1.0;
     ParseOptions po(usage);
 
-    po.Register("binary", &binary, "Write in binary mode (only relevant if output is a wxfilename)");
-
+    po.Register("binary", &binary,
+                "Write in binary mode (only relevant if output is a wxfilename)");
+    po.Register("scale", &scale,
+                "This option can be used to scale the matrices being copied.");
+    
     po.Read(argc, argv);
 
     if (po.NumArgs() != 2) {
@@ -68,6 +72,7 @@ int main(int argc, char *argv[]) {
     if (!in_is_rspecifier) {
       Matrix<BaseFloat> mat;
       ReadKaldiObject(matrix_in_fn, &mat);
+      if (scale != 1.0) mat.Scale(scale);
       Output ko(matrix_out_fn, binary);
       mat.Write(ko.Stream(), binary);
       KALDI_LOG << "Copied matrix to " << matrix_out_fn;
@@ -76,8 +81,15 @@ int main(int argc, char *argv[]) {
       int num_done = 0;
       BaseFloatMatrixWriter writer(matrix_out_fn);
       SequentialBaseFloatMatrixReader reader(matrix_in_fn);
-      for (; !reader.Done(); reader.Next(), num_done++)
-        writer.Write(reader.Key(), reader.Value());
+      for (; !reader.Done(); reader.Next(), num_done++) {
+        if (scale != 1.0) {
+          Matrix<BaseFloat> mat(reader.Value());
+          mat.Scale(scale);
+          writer.Write(reader.Key(), mat);
+        } else {
+          writer.Write(reader.Key(), reader.Value());
+        }
+      }
       KALDI_LOG << "Copied " << num_done << " matrices.";
       return (num_done != 0 ? 0 : 1);
     }
diff --git a/src/featbin/append-feats.cc b/src/featbin/append-feats.cc
index 9ec34941a..cf373d7a3 100644
--- a/src/featbin/append-feats.cc
+++ b/src/featbin/append-feats.cc
@@ -50,9 +50,9 @@ int main(int argc, char *argv[]) {
       exit(1);
     }
 
-    std::string rspecifier1 = po.GetArg(1);
-    std::string rspecifier2 = po.GetArg(2);
-    std::string wspecifier = po.GetArg(3);
+    std::string rspecifier1 = po.GetArg(1),
+        rspecifier2 = po.GetArg(2),
+        wspecifier = po.GetArg(3);
 
     BaseFloatMatrixWriter feats_writer(wspecifier);
     SequentialBaseFloatMatrixReader feats_reader1(rspecifier1);
diff --git a/src/featbin/paste-feats.cc b/src/featbin/paste-feats.cc
index 50b16de04..5eab09d96 100644
--- a/src/featbin/paste-feats.cc
+++ b/src/featbin/paste-feats.cc
@@ -78,8 +78,8 @@ int main(int argc, char *argv[]) {
         "Usage: paste-feats <in-rspecifier1> <in-rspecifier2> [<in-rspecifier3> ...] <out-wspecifier>\n"
         " or: paste-feats <in-rxfilename1> <in-rxfilename2> [<in-rxfilename3> ...] <out-wxfilename>\n"
         " e.g. paste-feats ark:feats1.ark \"ark:select-feats 0-3 ark:feats2.ark ark:- |\" ark:feats-out.ark\n"
-        "  or: paste-feats foo.mat bar.mat baz.mat\n";
-    
+        "  or: paste-feats foo.mat bar.mat baz.mat\n"
+        "See also: copy-feats, copy-matrix, append-vector-to-feats, concat-feats\n";
     
     ParseOptions po(usage);
 
diff --git a/src/ivector/ivector-extractor-test.cc b/src/ivector/ivector-extractor-test.cc
index d79abbaf7..3b804da2e 100644
--- a/src/ivector/ivector-extractor-test.cc
+++ b/src/ivector/ivector-extractor-test.cc
@@ -107,8 +107,9 @@ void TestIvectorExtraction(const IvectorExtractor &extractor,
   utt_stats.AccStats(feats, post);
 
   OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(),
-                                            extractor.PriorOffset());
-
+                                            extractor.PriorOffset(),
+                                            0.0);
+  
   for (int32 t = 0; t < num_frames; t++) {
     online_stats.AccStats(extractor, feats.Row(t), post[t]);
   }
diff --git a/src/ivector/ivector-extractor.cc b/src/ivector/ivector-extractor.cc
index b1f4624ec..7c26e73a4 100644
--- a/src/ivector/ivector-extractor.cc
+++ b/src/ivector/ivector-extractor.cc
@@ -259,13 +259,13 @@ void IvectorExtractor::GetIvectorDistMean(
   for (int32 i = 0; i < I; i++) {
     double gamma = utt_stats.gamma_(i);
     if (gamma != 0.0) {
-      Vector<double> x(utt_stats.X_.Row(i)); // == \gamma(i) \m_i
+      SubVector<double> x(utt_stats.X_, i); // == \gamma(i) \m_i
       // next line: a += \gamma_i \M_i^T \Sigma_i^{-1} \m_i
       linear->AddMatVec(1.0, Sigma_inv_M_[i], kTrans, x, 1.0); 
     }
   }
   SubVector<double> q_vec(quadratic->Data(), IvectorDim()*(IvectorDim()+1)/2);
-  q_vec.AddMatVec(1.0, U_, kTrans, Vector<double>(utt_stats.gamma_), 1.0);
+  q_vec.AddMatVec(1.0, U_, kTrans, utt_stats.gamma_, 1.0);
 }
 
 void IvectorExtractor::GetIvectorDistPrior(
@@ -543,24 +543,55 @@ void OnlineIvectorEstimationStats::AccStats(
     quadratic_term_vec.AddVec(weight, U_g);
     tot_weight += weight;
   }
+  if (max_count_ != 0.0) {
+    // see comments in header RE max_count for explanation.
+    double old_num_frames = num_frames_,
+        new_num_frames = num_frames_ + tot_weight;
+    double old_prior_scale = std::max(old_num_frames, max_count_) / max_count_,
+        new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
+    // The prior_scales are the inverses of the scales we would put on the stats
+    // if we were implementing this by scaling the stats.  Instead we
+    // scale the prior term.
+    double prior_scale_change = new_prior_scale - old_prior_scale;
+    if (prior_scale_change != 0.0) {
+      linear_term_(0) += prior_offset_ * prior_scale_change;
+      quadratic_term_.AddToDiag(prior_scale_change);
+    }
+  }
+  
   num_frames_ += tot_weight;
 }
 
 void OnlineIvectorEstimationStats::Scale(double scale) {
   KALDI_ASSERT(scale >= 0.0 && scale <= 1.0);
+  double old_num_frames = num_frames_;
   num_frames_ *= scale;
   quadratic_term_.Scale(scale);
   linear_term_.Scale(scale);
 
   // Scale back up the prior term, by adding in whatever we scaled down.
-  linear_term_(0) += prior_offset_ * (1.0 - scale);
-  quadratic_term_.AddToDiag(1.0 - scale);
+  if (max_count_ == 0.0) {
+    linear_term_(0) += prior_offset_ * (1.0 - scale);
+    quadratic_term_.AddToDiag(1.0 - scale);
+  } else {
+    double new_num_frames = num_frames_;
+    double old_prior_scale =
+        scale * std::max(old_num_frames, max_count_) / max_count_,
+        new_prior_scale = std::max(new_num_frames, max_count_) / max_count_;
+    // old_prior_scale is the scale the prior term currently has in the stats,
+    // i.e. the previous scale times "scale" as we just scaled the stats.
+    // new_prior_scale is the scale we want the prior term to have.
+    linear_term_(0) += prior_offset_ * (new_prior_scale - old_prior_scale);
+    quadratic_term_.AddToDiag(new_prior_scale - old_prior_scale);
+  }
 }
 
 void OnlineIvectorEstimationStats::Write(std::ostream &os, bool binary) const {
-  WriteToken(os, binary, "<OnlineIvectorEstimationStats>");  // magic string.
+  WriteToken(os, binary, "<OnlineIvectorEstimationStats>");
   WriteToken(os, binary, "<PriorOffset>");
   WriteBasicType(os, binary, prior_offset_);
+  WriteToken(os, binary, "<MaxCount>");
+  WriteBasicType(os, binary, max_count_);
   WriteToken(os, binary, "<NumFrames>");
   WriteBasicType(os, binary, num_frames_);
   WriteToken(os, binary, "<QuadraticTerm>");
@@ -571,11 +602,20 @@ void OnlineIvectorEstimationStats::Write(std::ostream &os, bool binary) const {
 }
 
 void OnlineIvectorEstimationStats::Read(std::istream &is, bool binary) {
-  ExpectToken(is, binary, "<OnlineIvectorEstimationStats>");  // magic string.
+  ExpectToken(is, binary, "<OnlineIvectorEstimationStats>");
   ExpectToken(is, binary, "<PriorOffset>");
   ReadBasicType(is, binary, &prior_offset_);
-  ExpectToken(is, binary, "<NumFrames>");
-  ReadBasicType(is, binary, &num_frames_);
+  std::string tok;
+  ReadToken(is, binary, &tok);
+  if (tok == "<MaxCount>") {
+    ReadBasicType(is, binary, &max_count_);
+    ExpectToken(is, binary, "<NumFrames>");
+    ReadBasicType(is, binary, &num_frames_);
+  } else {
+    KALDI_ASSERT(tok == "<NumFrames>");
+    max_count_ = 0.0;
+    ReadBasicType(is, binary, &num_frames_);
+  }
   ExpectToken(is, binary, "<QuadraticTerm>");
   quadratic_term_.Read(is, binary);
   ExpectToken(is, binary, "<LinearTerm>");
@@ -638,8 +678,9 @@ double OnlineIvectorEstimationStats::DefaultObjf() const {
 }
 
 OnlineIvectorEstimationStats::OnlineIvectorEstimationStats(int32 ivector_dim,
-                                                           BaseFloat prior_offset):
-    prior_offset_(prior_offset), num_frames_(0.0),
+                                                           BaseFloat prior_offset,
+                                                           BaseFloat max_count):
+    prior_offset_(prior_offset), max_count_(max_count), num_frames_(0.0),
     quadratic_term_(ivector_dim), linear_term_(ivector_dim) {
   if (ivector_dim != 0) {
     linear_term_(0) += prior_offset;
@@ -650,6 +691,7 @@ OnlineIvectorEstimationStats::OnlineIvectorEstimationStats(int32 ivector_dim,
 OnlineIvectorEstimationStats::OnlineIvectorEstimationStats(
     const OnlineIvectorEstimationStats &other):
     prior_offset_(other.prior_offset_),
+    max_count_(other.max_count_),
     num_frames_(other.num_frames_),
     quadratic_term_(other.quadratic_term_),
     linear_term_(other.linear_term_) { }
@@ -733,6 +775,12 @@ void IvectorExtractorUtteranceStats::AccStats(
   }
 }
 
+void IvectorExtractorUtteranceStats::Scale(double scale) {
+  gamma_.Scale(scale);
+  X_.Scale(scale);
+  for (size_t i = 0; i < S_.size(); i++)
+    S_[i].Scale(scale);
+}
 
 IvectorExtractorStats::IvectorExtractorStats(
     const IvectorExtractor &extractor,
@@ -1534,6 +1582,7 @@ double EstimateIvectorsOnline(
     const IvectorExtractor &extractor,
     int32 ivector_period,
     int32 num_cg_iters,
+    BaseFloat max_count,
     Matrix<BaseFloat> *ivectors) {
   
   KALDI_ASSERT(ivector_period > 0);
@@ -1544,7 +1593,8 @@ double EstimateIvectorsOnline(
   ivectors->Resize(num_ivectors, extractor.IvectorDim());
 
   OnlineIvectorEstimationStats online_stats(extractor.IvectorDim(),
-                                            extractor.PriorOffset());
+                                            extractor.PriorOffset(),
+                                            max_count);
 
   double ans = 0.0;
   
diff --git a/src/ivector/ivector-extractor.h b/src/ivector/ivector-extractor.h
index 46c5b2322..602e636ce 100644
--- a/src/ivector/ivector-extractor.h
+++ b/src/ivector/ivector-extractor.h
@@ -45,13 +45,23 @@ namespace kaldi {
 // "acoustic_weight" is not read by any class declared in this header; it has to
 // be applied by calling IvectorExtractorUtteranceStats::Scale() before
 // obtaining the iVector.
+// The same is true of max_count: it has to be applied by programs themselves
+// e.g. see ../ivectorbin/ivector-extract.cc.
 struct IvectorEstimationOptions {
   double acoustic_weight;
-  IvectorEstimationOptions(): acoustic_weight(1.0) {}
+  double max_count;
+  IvectorEstimationOptions(): acoustic_weight(1.0), max_count(0.0) {}
   void Register(OptionsItf *po) {
     po->Register("acoustic-weight", &acoustic_weight,
                  "Weight on part of auxf that involves the data (e.g. 0.2); "
                  "if this weight is small, the prior will have more effect.");
+    po->Register("max-count", &max_count,
+                 "Maximum frame count (affects prior scaling): if >0, the prior "
+                 "term will be scaled up after the frame count exceeds this "
+                 "value.  Note that this count is considered after posterior "
+                 "scaling (e.g. --acoustic-weight option, or scale argument to "
+                 "scale-post), so you would normally use a cutoff 10 times "
+                 "smaller than the corresponding number of frames.");
   }
 };
 
@@ -301,8 +311,12 @@ class IvectorExtractor {
  */
 class OnlineIvectorEstimationStats {
  public:
+  // Search above for max_count to see an explanation; if nonzero, it will
+  // put a higher weight on the prior (vs. the stats) once the count passes
+  // that value.
   OnlineIvectorEstimationStats(int32 ivector_dim,
-                               BaseFloat prior_offset);
+                               BaseFloat prior_offset,
+                               BaseFloat max_count);
 
   OnlineIvectorEstimationStats(const OnlineIvectorEstimationStats &other);
 
@@ -360,6 +374,7 @@ class OnlineIvectorEstimationStats {
   
   friend class IvectorExtractor;
   double prior_offset_;
+  double max_count_;
   double num_frames_;  // num frames (weighted, if applicable).
   SpMatrix<double> quadratic_term_;
   Vector<double> linear_term_;
@@ -368,8 +383,10 @@ class OnlineIvectorEstimationStats {
 
 // This code obtains periodically (for each "ivector_period" frames, e.g. 10
 // frames), an estimate of the iVector including all frames up to that point.
-// This emulates what you could do in an online/streaming algorithm; its use
-// is for neural network training in a way that's matched to online decoding.
+// This emulates what you could do in an online/streaming algorithm; its use is
+// for neural network training in a way that's matched to online decoding.
+// [note: I don't believe we are currently using the program,
+// ivector-extract-online.cc, that calls this function, in any of the scripts.].
 // Caution: this program outputs the raw iVectors, where the first component
 // will generally be very positive.  You probably want to subtract PriorOffset()
 // from the first element of each row of the output before writing it out.
@@ -384,6 +401,7 @@ double EstimateIvectorsOnline(
     const IvectorExtractor &extractor,
     int32 ivector_period,
     int32 num_cg_iters,
+    BaseFloat max_count,
     Matrix<BaseFloat> *ivectors);
 
 
diff --git a/src/ivectorbin/ivector-extract-online.cc b/src/ivectorbin/ivector-extract-online.cc
index 519671367..3c1795d6b 100644
--- a/src/ivectorbin/ivector-extract-online.cc
+++ b/src/ivectorbin/ivector-extract-online.cc
@@ -49,6 +49,7 @@ int main(int argc, char *argv[]) {
     ParseOptions po(usage);
     int32 num_cg_iters = 15;
     int32 ivector_period = 10;
+    BaseFloat max_count = 0.0;
     g_num_threads = 8;
 
     po.Register("num-cg-iters", &num_cg_iters,
@@ -60,6 +61,12 @@ int main(int argc, char *argv[]) {
     po.Register("num-threads", &g_num_threads,
                 "Number of threads to use for computing derived variables "
                 "of iVector extractor, at process start-up.");
+    po.Register("max-count", &max_count,
+                "If >0, when the count of posteriors exceeds max-count we will "
+                "start using a stronger prior term.  Can make iVectors from "
+                "longer than normal utterances look more 'typical'.  Interpret "
+                "this value as a number of frames multiplied by your "
+                "posterior scale (so typically 0.1 times a number of frames).");
     po.Read(argc, argv);
     
     if (po.NumArgs() != 4) {
@@ -107,7 +114,7 @@ int main(int argc, char *argv[]) {
       double objf_impr_per_frame;
       objf_impr_per_frame = EstimateIvectorsOnline(feats, posterior, extractor,
                                                    ivector_period, num_cg_iters,
-                                                   &ivectors);
+                                                   max_count, &ivectors);
       
       BaseFloat offset = extractor.PriorOffset();
       for (int32 i = 0 ; i < ivectors.NumRows(); i++)
diff --git a/src/ivectorbin/ivector-extract.cc b/src/ivectorbin/ivector-extract.cc
index 69c034e95..220677d9a 100644
--- a/src/ivectorbin/ivector-extract.cc
+++ b/src/ivectorbin/ivector-extract.cc
@@ -63,13 +63,13 @@ class IvectorExtractTask {
   }
   ~IvectorExtractTask() {
     if (tot_auxf_change_ != NULL) {
-      int32 T = posterior_.size();
+      double T = TotalPosterior(posterior_);
       *tot_auxf_change_ += auxf_change_;
       KALDI_VLOG(2) << "Auxf change for utterance " << utt_ << " was "
                     << (auxf_change_ / T) << " per frame over " << T
-                    << " frames.";
+                    << " frames (weighted)";
     }
-    // We actually write out the offset of the iVector's from the mean of the
+    // We actually write out the offset of the iVectors from the mean of the
     // prior distribution; this is the form we'll need it in for scoring.  (most
     // formulations of iVectors have zero-mean priors so this is not normally an
     // issue).
@@ -89,11 +89,124 @@ class IvectorExtractTask {
   double auxf_change_;
 };
 
+int32 RunPerSpeaker(const std::string &ivector_extractor_rxfilename,
+                   const IvectorEstimationOptions &opts,
+                   bool compute_objf_change,
+                   const std::string &spk2utt_rspecifier,
+                   const std::string &feature_rspecifier,
+                   const std::string &posterior_rspecifier,
+                   const std::string &ivector_wspecifier) {
+  IvectorExtractor extractor;
+  ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
+  SequentialTokenVectorReader spk2utt_reader(spk2utt_rspecifier);
+  RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
+  RandomAccessPosteriorReader posterior_reader(posterior_rspecifier);
+  BaseFloatVectorWriter ivector_writer(ivector_wspecifier);
+  
+  double tot_auxf_change = 0.0, tot_post = 0.0, tot_norm = 0.0;
+  int32 num_utt_done = 0, num_utt_err = 0,
+      num_spk_done = 0, num_spk_err = 0;
 
+  for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
+    std::string spk = spk2utt_reader.Key();
+    const std::vector<std::string> &utts = spk2utt_reader.Value();
+
+    bool need_2nd_order_stats = false;
+    
+    IvectorExtractorUtteranceStats utt_stats(extractor.NumGauss(),
+                                             extractor.FeatDim(),
+                                             need_2nd_order_stats);
+
+    for (size_t i = 0; i < utts.size(); i++) {
+      const std::string &utt = utts[i];
+      if (!feature_reader.HasKey(utt)) {
+        KALDI_WARN << "No features present for utterance " << utt;
+        num_utt_err++;
+        continue;
+      }
+      const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
+      if (!posterior_reader.HasKey(utt)) {
+        KALDI_WARN << "No posteriors present for utterance " << utt;
+        num_utt_err++;
+        continue;
+      }
+      Posterior posterior = posterior_reader.Value(utt);
+      if (feats.NumRows() != posterior.size()) {
+        KALDI_WARN << "Posterior has wrong size " << posterior.size()
+                   << " vs. feats " << feats.NumRows() << " for "
+                   << utt;
+        num_utt_err++;
+        continue;
+      }
+      ScalePosterior(opts.acoustic_weight, &posterior);
+      num_utt_done++;
+      utt_stats.AccStats(feats, posterior);
+    }
+
+    if (utt_stats.NumFrames() == 0.0) {
+      KALDI_WARN << "No stats accumulated for speaker " << spk;
+      num_spk_err++;
+      continue;
+    } else {
+      if (opts.max_count > 0 && utt_stats.NumFrames() > opts.max_count) {
+        double scale = opts.max_count / utt_stats.NumFrames();
+        utt_stats.Scale(scale);
+        KALDI_LOG << "Scaling stats for speaker " << spk << " by scale "
+                  << scale << " due to --max-count=" << opts.max_count;
+      }
+      
+      Vector<double> ivector(extractor.IvectorDim());
+      ivector(0) = extractor.PriorOffset();
+    
+      if (compute_objf_change) {
+        double old_auxf = extractor.GetAuxf(utt_stats, ivector);
+        extractor.GetIvectorDistribution(utt_stats, &ivector, NULL);
+        double new_auxf = extractor.GetAuxf(utt_stats, ivector);
+        double auxf_change = new_auxf - old_auxf;
+
+        KALDI_LOG << "Auxf change for speaker " << spk << " was "
+                  << (auxf_change / utt_stats.NumFrames()) << " per frame, over "
+                  << utt_stats.NumFrames() << " frames (weighted).";
+        tot_auxf_change += auxf_change;
+      } else {
+        extractor.GetIvectorDistribution(utt_stats, &ivector, NULL);
+      }
+      // We actually write out the offset of the iVectors from the mean of the
+      // prior distribution; this is the form we'll need it in for scoring and
+      // as a feature for neural nets.  (most formulations of iVectors have
+      // zero-mean priors so this is not normally an issue).
+      ivector(0) -= extractor.PriorOffset();
+      KALDI_LOG << "Ivector norm for speaker " << spk
+                << " was " << ivector.Norm(2.0);
+      
+      tot_norm += ivector.Norm(2.0) * utt_stats.NumFrames();
+      tot_post += utt_stats.NumFrames();
+      num_spk_done++;
+      Vector<BaseFloat> ivector_flt(ivector);
+      ivector_writer.Write(spk, ivector_flt);
+    }
+  }
+  
+  KALDI_LOG << "Done " << num_spk_done << " speakers; " << num_spk_err
+            << " with errors.  " << num_utt_done << " utterances "
+            << "were processed, " << num_utt_err << " with errors.";
+  if (tot_post != 0.0) {
+    if (compute_objf_change) {
+      KALDI_LOG << "Overall weighted-average objective function improvement was "
+                << (tot_auxf_change / tot_post) << " over " << tot_post
+                << " frames (weighted)";
+    }
+    KALDI_LOG << "Average iVector norm (weighted by frames) was "
+              << (tot_norm / tot_post) << " over " << tot_post
+              << " frames (weighted)";
+  }
+  return (num_spk_done != 0 ? 0 : 1);
+}
 
 }
 
 
+
 int main(int argc, char *argv[]) {
   using namespace kaldi;
   typedef kaldi::int32 int32;
@@ -102,7 +215,7 @@ int main(int argc, char *argv[]) {
     const char *usage =
         "Extract iVectors for utterances, using a trained iVector extractor,\n"
         "and features and Gaussian-level posteriors\n"
-        "Usage:  ivector-extract [options] <model-in> <feature-rspecifier>"
+        "Usage:  ivector-extract [options] <model-in> <feature-rspecifier> "
         "<posteriors-rspecifier> <ivector-wspecifier>\n"
         "e.g.: \n"
         " fgmm-global-gselect-to-post 1.ubm '$feats' 'ark:gunzip -c gselect.1.gz|' ark:- | \\\n"
@@ -110,13 +223,21 @@ int main(int argc, char *argv[]) {
 
     ParseOptions po(usage);
     bool compute_objf_change = true;
-    IvectorExtractorStatsOptions stats_opts;
+    IvectorEstimationOptions opts;
+    std::string spk2utt_rspecifier;
     TaskSequencerConfig sequencer_config;
     po.Register("compute-objf-change", &compute_objf_change,
                 "If true, compute the change in objective function from using "
                 "nonzero iVector (a potentially useful diagnostic).  Combine "
                 "with --verbose=2 for per-utterance information");
-    stats_opts.Register(&po);
+    po.Register("spk2utt", &spk2utt_rspecifier, "Supply this option if you "
+                "want iVectors to be output at the per-speaker level, estimated "
+                "using stats accumulated from multiple utterances.  Note: this "
+                "is not the normal way iVectors are obtained for speaker-id. "
+                "This option will cause the program to ignore the --num-threads "
+                "option.");
+    
+    opts.Register(&po);
     sequencer_config.Register(&po);
     
     po.Read(argc, argv);
@@ -128,63 +249,87 @@ int main(int argc, char *argv[]) {
 
     std::string ivector_extractor_rxfilename = po.GetArg(1),
         feature_rspecifier = po.GetArg(2),
-        posteriors_rspecifier = po.GetArg(3),
+        posterior_rspecifier = po.GetArg(3),
         ivectors_wspecifier = po.GetArg(4);
 
-    // g_num_threads affects how ComputeDerivedVars is called when we read the
-    // extractor.
-    g_num_threads = sequencer_config.num_threads; 
-    IvectorExtractor extractor;
-    ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
 
-    double tot_auxf_change = 0.0;
-    int64 tot_t = 0;
-    int32 num_done = 0, num_err = 0;
+    if (spk2utt_rspecifier.empty()) {
+      // g_num_threads affects how ComputeDerivedVars is called when we read the
+      // extractor.
+      g_num_threads = sequencer_config.num_threads; 
+      IvectorExtractor extractor;
+      ReadKaldiObject(ivector_extractor_rxfilename, &extractor);
+
+      double tot_auxf_change = 0.0, tot_t = 0.0;
+      int32 num_done = 0, num_err = 0;
     
-    SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
-    RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
-    BaseFloatVectorWriter ivector_writer(ivectors_wspecifier);
+      SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
+      RandomAccessPosteriorReader posterior_reader(posterior_rspecifier);
+      BaseFloatVectorWriter ivector_writer(ivectors_wspecifier);
+    
+      {
+        TaskSequencer<IvectorExtractTask> sequencer(sequencer_config);
+        for (; !feature_reader.Done(); feature_reader.Next()) {
+          std::string utt = feature_reader.Key();
+          if (!posterior_reader.HasKey(utt)) {
+            KALDI_WARN << "No posteriors for utterance " << utt;
+            num_err++;
+            continue;
+          }
+          const Matrix<BaseFloat> &mat = feature_reader.Value();
+          Posterior posterior = posterior_reader.Value(utt);
+          
+          if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
+            KALDI_WARN << "Size mismatch between posterior " << posterior.size()
+                       << " and features " << mat.NumRows() << " for utterance "
+                       << utt;
+            num_err++;
+            continue;
+          }
 
-    {
-      TaskSequencer<IvectorExtractTask> sequencer(sequencer_config);
-      for (; !feature_reader.Done(); feature_reader.Next()) {
-        std::string key = feature_reader.Key();
-        if (!posteriors_reader.HasKey(key)) {
-          KALDI_WARN << "No posteriors for utterance " << key;
-          num_err++;
-          continue;
+          double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
+
+          double this_t = opts.acoustic_weight * TotalPosterior(posterior),
+              max_count_scale = 1.0;
+          if (opts.max_count > 0 && this_t > opts.max_count) {
+            max_count_scale = opts.max_count / this_t;
+            KALDI_LOG << "Scaling stats for utterance " << utt << " by scale "
+                      << max_count_scale << " due to --max-count="
+                      << opts.max_count;
+            this_t = opts.max_count;
+          }
+          ScalePosterior(opts.acoustic_weight * max_count_scale,
+                         &posterior);
+          // note: now, this_t == sum of posteriors.
+          
+          sequencer.Run(new IvectorExtractTask(extractor, utt, mat, posterior,
+                                               &ivector_writer, auxf_ptr));
+          
+          tot_t += this_t;
+          num_done++;
         }
-        const Matrix<BaseFloat> &mat = feature_reader.Value();
-        const Posterior &posterior = posteriors_reader.Value(key);
-
-        if (static_cast<int32>(posterior.size()) != mat.NumRows()) {
-          KALDI_WARN << "Size mismatch between posterior " << posterior.size()
-                     << " and features " << mat.NumRows() << " for utterance "
-                     << key;
-          num_err++;
-          continue;
-        }
-
-        double *auxf_ptr = (compute_objf_change ? &tot_auxf_change : NULL );
-
-        sequencer.Run(new IvectorExtractTask(extractor, key, mat, posterior,
-                                             &ivector_writer, auxf_ptr));
-                      
-        tot_t += posterior.size();
-        num_done++;
+        // Destructor of "sequencer" will wait for any remaining tasks.
       }
-      // Destructor of "sequencer" will wait for any remaining tasks.
+
+      KALDI_LOG << "Done " << num_done << " files, " << num_err
+                << " with errors.  Total (weighted) frames " << tot_t;
+      if (compute_objf_change)
+        KALDI_LOG << "Overall average objective-function change from estimating "
+                  << "ivector was " << (tot_auxf_change / tot_t) << " per frame "
+                  << " over " << tot_t << " (weighted) frames.";
+
+      return (num_done != 0 ? 0 : 1);
+    } else {
+      KALDI_ASSERT(sequencer_config.num_threads == 1 &&
+                   "--spk2utt option is incompatible with --num-threads option");
+      return RunPerSpeaker(ivector_extractor_rxfilename,
+                           opts,
+                           compute_objf_change,
+                           spk2utt_rspecifier,
+                           feature_rspecifier,
+                           posterior_rspecifier,
+                           ivectors_wspecifier);
     }
-
-    KALDI_LOG << "Done " << num_done << " files, " << num_err
-              << " with errors.  Total frames " << tot_t;
-
-    if (compute_objf_change)
-      KALDI_LOG << "Overall average objective-function change from estimating "
-                << "ivector was " << (tot_auxf_change / tot_t) << " per frame "
-                << " over " << tot_t << " frames.";
-
-    return (num_done != 0 ? 0 : 1);
   } catch(const std::exception &e) {
     std::cerr << e.what();
     return -1;
diff --git a/src/matrix/kaldi-vector.cc b/src/matrix/kaldi-vector.cc
index 4b101c7ff..d61e4e598 100644
--- a/src/matrix/kaldi-vector.cc
+++ b/src/matrix/kaldi-vector.cc
@@ -1010,7 +1010,7 @@ void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal> &v)
   MatrixIndexT dim = dim_;
   if (alpha != 1.0)
     for (MatrixIndexT i = 0; i < dim; i++)
-      data[i] += alpha*other_data[i];
+      data[i] += alpha * other_data[i];
   else
     for (MatrixIndexT i = 0; i < dim; i++)
       data[i] += other_data[i];
diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc
index 2f5bce7e8..02c6126b9 100644
--- a/src/online2/online-ivector-feature.cc
+++ b/src/online2/online-ivector-feature.cc
@@ -32,6 +32,7 @@ void OnlineIvectorExtractionInfo::Init(
   num_gselect = config.num_gselect;
   min_post = config.min_post;
   posterior_scale = config.posterior_scale;
+  max_count = config.max_count;
   use_most_recent_ivector = config.use_most_recent_ivector;
   greedy_ivector_extractor = config.greedy_ivector_extractor;
   if (greedy_ivector_extractor && !use_most_recent_ivector) {
@@ -161,7 +162,7 @@ void OnlineIvectorFeature::UpdateStatsUntilFrame(int32 frame) {
   
   Vector<BaseFloat> feat(feat_dim),  // features given to iVector extractor
       log_likes(info_.diag_ubm.NumGauss());
-
+  
   for (; num_frames_stats_ <= frame; num_frames_stats_++) {
     int32 t = num_frames_stats_;  // Frame whose stats we want to get.
     lda_normalized_->GetFrame(t, &feat);
@@ -262,8 +263,10 @@ OnlineIvectorFeature::OnlineIvectorFeature(
     const OnlineIvectorExtractionInfo &info,
     OnlineFeatureInterface *base_feature):
     info_(info), base_(base_feature),
-    ivector_stats_(info_.extractor.IvectorDim(), info_.extractor.PriorOffset()),
-    num_frames_stats_(0) {
+    ivector_stats_(info_.extractor.IvectorDim(),
+                   info_.extractor.PriorOffset(),
+                   info_.max_count),
+    num_frames_stats_(0), tot_ubm_loglike_(0.0) {
   info.Check();
   KALDI_ASSERT(base_feature != NULL);
   splice_ = new OnlineSpliceFrames(info_.splice_opts, base_);
diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h
index 06981f91d..209519d88 100644
--- a/src/online2/online-ivector-feature.h
+++ b/src/online2/online-ivector-feature.h
@@ -70,6 +70,12 @@ struct OnlineIvectorExtractionConfig {
   BaseFloat posterior_scale;  // Scale on posteriors used for iVector
                               // extraction; can be interpreted as the inverse
                               // of a scale on the log-prior.
+  BaseFloat max_count;  // Maximum stats count we allow before we start scaling
+                        // down stats (if nonzero).. this prevents us getting
+                        // atypical-looking iVectors for very long utterances.
+                        // Interpret this as a number of frames times
+                        // posterior_scale, typically 1/10 of a frame count.
+  
 
   // If use_most_recent_ivector is true, we always return the most recent
   // available iVector rather than the one for the current frame.  This means
@@ -91,6 +97,7 @@ struct OnlineIvectorExtractionConfig {
 
   OnlineIvectorExtractionConfig(): ivector_period(10), num_gselect(5),
                                    min_post(0.025), posterior_scale(0.1),
+                                   max_count(0.0),
                                    use_most_recent_ivector(true),
                                    greedy_ivector_extractor(false),
                                    max_remembered_frames(1000) { }
@@ -122,6 +129,11 @@ struct OnlineIvectorExtractionConfig {
                  "iVector extraction");
     po->Register("posterior-scale", &posterior_scale, "Scale for posteriors in "
                  "iVector extraction (may be viewed as inverse of prior scale)");
+    po->Register("max-count", &max_count, "Maximum data count we allow before "
+                 "we start scaling the stats down (if nonzero)... helps to make "
+                 "iVectors from long utterances look more typical.  Interpret "
+                 "as a frame-count times --posterior-scale, typically 1/10 of "
+                 "a number of frames.  Suggest 100.");
     po->Register("use-most-recent-ivector", &use_most_recent_ivector, "If true, "
                  "always use most recent available iVector, rather than the "
                  "one for the designated frame.");
@@ -156,6 +168,7 @@ struct OnlineIvectorExtractionInfo {
   int32 num_gselect;
   BaseFloat min_post;
   BaseFloat posterior_scale;
+  BaseFloat max_count;
   bool use_most_recent_ivector;
   bool greedy_ivector_extractor;
   BaseFloat max_remembered_frames;
@@ -191,7 +204,8 @@ struct OnlineIvectorExtractorAdaptationState {
   OnlineIvectorExtractorAdaptationState(const OnlineIvectorExtractionInfo &info):
       cmvn_state(info.global_cmvn_stats),
       ivector_stats(info.extractor.IvectorDim(),
-                    info.extractor.PriorOffset()) { }
+                    info.extractor.PriorOffset(),
+                    info.max_count) { }
   
   /// Copy constructor
   OnlineIvectorExtractorAdaptationState(
diff --git a/src/online2bin/apply-cmvn-online.cc b/src/online2bin/apply-cmvn-online.cc
index 7909e0290..2745df5d4 100644
--- a/src/online2bin/apply-cmvn-online.cc
+++ b/src/online2bin/apply-cmvn-online.cc
@@ -31,7 +31,7 @@ int main(int argc, char *argv[]) {
         "Apply online cepstral mean (and possibly variance) computation online,\n"
         "using the same code as used for online decoding in the 'new' setup in\n"
         "online2/ and online2bin/.  If the --spk2utt option is used, it uses\n"
-        "prior utterances from the same speaker to back off two at the utterance\n"
+        "prior utterances from the same speaker to back off to at the utterance\n"
         "beginning.  See also apply-cmvn-sliding.\n"
         "\n"
         "Usage: apply-cmvn-online [options] <global-cmvn-stats> <feature-rspecifier> "
diff --git a/src/online2bin/ivector-extract-online2.cc b/src/online2bin/ivector-extract-online2.cc
index bc616ddb9..3251d93b5 100644
--- a/src/online2bin/ivector-extract-online2.cc
+++ b/src/online2bin/ivector-extract-online2.cc
@@ -84,7 +84,7 @@ int main(int argc, char *argv[]) {
     RandomAccessBaseFloatMatrixReader feature_reader(feature_rspecifier);
     BaseFloatMatrixWriter ivector_writer(ivectors_wspecifier);
     
-
+    
     for (; !spk2utt_reader.Done(); spk2utt_reader.Next()) {
       std::string spk = spk2utt_reader.Key();
       const std::vector<std::string> &uttlist = spk2utt_reader.Value();
@@ -98,12 +98,12 @@ int main(int argc, char *argv[]) {
           continue;
         }
         const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
-
+        
         OnlineMatrixFeature matrix_feature(feats);
 
         OnlineIvectorFeature ivector_feature(ivector_info,
                                              &matrix_feature);
-
+        
         ivector_feature.SetAdaptationState(adaptation_state);
 
         int32 T = feats.NumRows(),
@@ -130,9 +130,9 @@ int main(int argc, char *argv[]) {
                       << ", UBM loglike/frame was "
                       << ivector_feature.UbmLogLikePerFrame()
                       << ", iVector length (at utterance end) was "
-                      << ivectors.Row(n-1).Norm(2.0)
-                      << ", objf improvement from iVector estimation was "
-                      << tot_objf_impr;
+                      << ivectors.Row(num_ivectors-1).Norm(2.0)
+                      << ", objf improvement/frame from iVector estimation was "
+                      << ivector_feature.ObjfImprPerFrame();
 
         ivector_feature.GetAdaptationState(&adaptation_state);
         ivector_writer.Write(utt, ivectors);