trunk: Added FFV installation script (from Bagher) and modified sacc one; improvement to sgmm2 efficiency when spk-vec dim is large; additional documentation on I/O; minor fixes.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@2736 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2013-07-27 16:42:20 +00:00 · 2013-07-27 16:42:20 +00:00 · 8b1bfb1d8d
--- a/egs/babel/s5/local/make_pitch.sh
+++ b/egs/babel/s5/local/make_pitch.sh
@ -52,7 +52,17 @@ scp=$data/wav.scp

 [ ! -s $KALDI_ROOT ] && KALDI_ROOT=../../.. 

-sacc_dir=$KALDI_ROOT/tools/sacc/SAcC_GLNXA64/
+( # this is for back compatiblity:
+ cd $KALDI_ROOT/tools
+ if [ -d sacc ] && [ ! -d pitch_trackers/sacc ]; then
+   echo "Linking sacc directory to new location."
+   mkdir -p pitch_trackers
+   cd pitch_trackers
+   ln -s ../sacc ..
+ fi
+)
+
+sacc_dir=$KALDI_ROOT/tools/pitch_trackers/sacc/SAcC_GLNXA64/
 # make $sacc_dir an absolute pathname.
 sacc_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $sacc_dir ${PWD}`

--- a/egs/wsj/s5/utils/filter_scp.pl
+++ b/egs/wsj/s5/utils/filter_scp.pl
@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/perl
 # Copyright 2010-2012 Microsoft Corporation
 #                     Johns Hopkins University (author: Daniel Povey)

--- a/src/doc/io.dox
+++ b/src/doc/io.dox
@ -2,6 +2,7 @@


 // Copyright 2009-2011 Microsoft Corporation
+//                2013 Johns Hopkins University (author: Daniel Povey)

 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -20,7 +21,9 @@
 namespace kaldi {
 /** \page io Kaldi I/O mechanisms

-  This page gives an overview of input-output mechanisms in Kaldi.
+ This page gives an overview of input-output mechanisms in Kaldi.
+ This section of the documentation is oriented towards the code-level mechanisms
+ for I/O; for documentation more oriented towards the command-line, see \ref io_tut.

 \section io_sec_style The input/output style of Kaldi classes

@ -234,8 +237,9 @@ namespace kaldi {
  For a list of classes and types that relate to Tables, see \ref table_group.

  A Table can be accessed in three ways: using a TableWriter, a
-   SequentialTableReader, and a RandomAccessTableReader.  The names are
-  self-explanatory.  These are all templates; they are templated not on the
+   SequentialTableReader, and a RandomAccessTableReader (there is also
+  RandomAccessTableReaderMapped, which is a special case we will introduce later).  
+  These are all templates; they are templated not on the
  object in the table, but on a Holder type (see below, \ref io_sec_holders) that
  tells the Table code how to read and write that type of object.  To open
  a Table type, you must provide a string called a wspecifier or rspecifier (see below, \ref
@ -564,6 +568,23 @@ namespace kaldi {
    If there is a gap in the second archive, it can use the fact that the second archive is sorted
    to avoid searching till the end of the file (this is the job of the "s" option).

+ \subsection io_sec_mapped
+
+  In order to condense a particular code pattern that was recurring in many programs, we have introduced the template type 
+ RandomAccessTableReaderMapped.  Unlike RandomAccessTableReader, this takes two initializer arguments, for instance:
+\verbatim
+   std::string rspecifier, utt2spk_map_rspecifier; // get these from somewhere.
+   RandomAccessTableReaderMapped<BaseFloatMatrixHolder> transform_reader(rspecifier,
+                                                                         utt2spk_map_rspecifier);
+\endverbatim
+  If utt2spk_map_rspecifier is the empty string, this will behave just like a
+  regular RandomAccessTableReader.  If it is nonempty, e.g. ark:data/train/utt2spk,
+  it will read an utterance-to-speaker map from that location and whenever a particular
+  string e.g. utt1 is queried, it will use that map to convert the utterance-id
+  to a speaker-id (e.g. spk1) and use that as the key to query the table being
+  read from rspecifier.  The utterance-to-speaker map is also an archive
+  because it happens that the Table code is the easiest way to read in such maps.
+

 */

--- a/src/doc/io_tut.dox
+++ b/src/doc/io_tut.dox
@ -0,0 +1,192 @@
+// doc/io_tut.dox
+
+// Copyright 2013  Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+namespace kaldi {
+/** \page io_tut  Kaldi I/O from a command-line perspective.
+
+ This page describes the I/O mechanisms in Kaldi from the perspective of
+ a user of the command line tools.  See \ref io for a more code-level overview.
+
+ \section Overview
+
+ \subsection io_tut_nontable Non-table I/O
+
+ We first describe "non-table" I/O.  This refers to files or streams containing just
+ one or two objects (e.g. acoustic model files; transformation matrices), rather than a
+ collection of objects indexed by strings.  
+
+   - Kaldi file formats are binary by default but programs will output non-binary
+     if you supply the flag --binary=false. 
+   - Many objects have corresponding "copy" programs, e.g. copy-matrix or gmm-copy,
+      which can be used with the --binary=false flag to convert to text form, e.g.
+     ``copy-matrix --binary=false foo.mat -''.
+   - There is typically a one-to-one correspondence between an file on disk and a C++ object
+     in memory, e.g. a matrix of floats, although some files contain more than one object
+     (Case in point: for acoustic model files, typically a TransitionModel object and then
+      an acoustic model).
+   - Kaldi programs typically know which type of object they are expecting to read, rather
+     than working it out from the stream.
+   - Similarly to perl, a filename can be replaced with - (for standard input/output) or
+     a string such as ``|gzip -c >foo.gz'' or ``gunzip -c foo.gz|''
+   - For reading files, we also support things like foo:1045, meaning character-offset
+     1045 within file foo.
+   - In order to refer to the above concept, we generally use the special terms 'rxfilename' for
+     a string describing a stream to be read (i.e. a file, stream or the standard input),
+     and 'wxfilename' for an output stream.  See \ref io_sec_xfilename.
+
+ To illustrate the concepts above, make sure $KALDI_ROOT/src/bin is on your path,
+  where $KALDI_ROOT is the top of the repository, and type the following:
+\verbatim
+  echo '[ 0 1 ]' | copy-matrix - -
+\endverbatim
+It will print out a log message and some binary data corresponding to that matrix.
+\verbatim
+  echo '[ 0 1 ]' | copy-matrix --binary=false - -
+\endverbatim
+The output will look like this:
+\verbatim
+# copy-matrix --binary=false - - 
+copy-matrix --binary=false - - 
+ [
+  0 1 ]
+LOG (copy-matrix:main():copy-matrix.cc:68) Copied matrix to -
+\endverbatim
+Although it looks like the matrix and log messages are mixed up, the log messages
+are on the standard error and would not be passed into a pipe; to avoid seeing
+the log messages you could redirect stderr to /dev/null by adding 2>/dev/null to the
+command line.
+
+Kaldi programs may be connected using pipes or by using the
+stream-as-a-file mechanism of Kaldi I/O.  Here is a pipe example:
+\verbatim
+ echo '[ 0 1 ]' | copy-matrix - - | copy-matrix --binary=false - -
+\endverbatim
+This outputs the matrix in text form (the first copy-matrix command converts
+to binary form and the second to text form, which is of course pointless).
+You could accomplish the same thing in a more convoluted way by doing this:
+\verbatim
+  copy-matrix 'echo [ 0 1 ]|' '|copy-matrix --binary=false - -'
+\endverbatim
+There is no reas to do this here, but it can sometimes be useful when
+programs have multiple inputs or outputs so the stdin or stdout is
+already being used.  It is particularly useful with tables (see next section).
+
+
+ \subsection io_tut_table Table I/O
+ 
+ Kaldi has special I/O mechanisms for dealing with collections of objects
+ indexed by strings.  Examples of this are feature matrices indexed by 
+ utterance-ids, or speaker-adaptation transformation matrices indexed
+ by speaker-ids.  The strings that index the collection must be nonempty
+ and whitespace free.   See \ref io_sec_tables for a more in-depth
+ discussion.
+
+ A Table may exist in two forms: an "archive" or a "script file".  The
+ difference is that the archive actually contains the data, while
+ a script file points to the location of the data.
+
+ Programs that read from Tables expect a string we call an "rspecifier" that
+ says how to read the indexed data, and programs that write to Tables expect
+ a string we call a "wspecifier" to write it.  These are strings that specify
+ whether to expect script file or an archive, and the file location, along
+ with various options.  Common types of
+ rspecifiers include ``ark:-'', meaning read the data as an archive
+ from the standard input, or ``scp:foo.scp'', meaning the script file
+ "scp" says where to read the data from.  Points to bear in
+ mind are:
+
+   - The part after the colon is interpreted as a wxfilename or rxfilename (as
+     in \ref io_tut_nontable), meaning that things like pipes and standard 
+     input/output are supported.
+   - A Table always contains just one type of object (e.g., a matrix of floats).
+   - You may see options on rspecifiers and wspecifiers, principally:
+      - In rspecifiers, ark,s,cs:- means that when we read (from the standard input in this case)
+        we expect the keys to be in sorted order (,s) and we assert that they will be accessed
+        in sorted order (,cs) meaning that we know the program will
+        access them in sorted order (the program will crash if these conditions do not hold).
+        This allows Kaldi to emulate random access without using up a lot of memory. 
+      - For data that isn't too large and for which it's inconvenient to ensure sorted order
+        (e.g. transforms for speaker adaptation), there is little harm in omitting the ,s,cs.
+      - Typically programs that take multiple rspecifiers will iterate over the objects in the
+        first one (sequential access) and do random access on the later ones, so ,s,cs is
+        generally not needed for the first rspecifier.
+      - In scp,p:foo.scp, the ,p means we should not crash if some of the
+        referenced files do not exist (for archives, ,p| will prevent a crash if
+        the archive is corrupted or truncated.)
+      - For writing, the option ,t| means text mode, e.g. in ark,t:-|.  
+        The --binary| command-line option has no effect for archives.
+   - The script-file format is, on each line, ``<key> <rspecifier|wspecifier>'', e.g.
+      utt1 /foo/bar/utt1.mat|.  It is OK for the rspecifier or wspecifier to contain
+      spaces, e.g.: ``utt1 gunzip -c /foo/bar/utt1.mat.gz''.
+   - The archive format is: <key1> <object1> <newline> <key2> <object2> <newline> ...
+   - Archives may be concatenated and they will still be valid archives, but be careful about
+     the order of concatenation, e.g. avoid ``cat a/b/*.ark''
+     if you need the sorted order.
+   - Although not often used, script files may be used for output, e.g. if we write to
+     the wspecifier scp:foo.scp, and the program tries to write to key utt1,
+     it looks for a line like utt1 some_file.mat in foo.scp.  It will crash
+     if there is no such line.
+   - It is possible to write to both an archive and script at the same time,
+     e.g. ark,scp:foo.ark,foo.scp.  The script file will be written with offsets
+     like utt1 foo.ark:1016. This is useful when data is to be accessed in random order
+     or in parts, but you don't want to produce lots of small files.
+   - It is possible to trick the archive mechanism into operating on single files.  For instance,
+\verbatim
+     echo '[ 0 1 ]' | copy-matrix 'scp:echo foo -|' 'scp,t:echo foo -|'
+\endverbatim
+     This deserves a little explanation.  Firstly, the rspecifier ``scp:echo foo -|'' is equivalent
+     to scp:bar.scp if the file bar.scp contained just the line ``foo -''.  This
+     tells it to read the object indexed by "foo" from the standard input.  Similarly, for
+     the wspecifier ``scp,t:echo foo -|'', it writes the data for ``foo'' to the standard
+     output.  This trick should not be overused.  In this particular case, it is unnecessary
+     because we have made the copy-matrix program support regular files (rxfilenames|wxfilenames),
+     as well as tables so you could have written just ``copy-matrix - -''.  If you have to use
+     this trick too much, it's better to modify the program concerned.
+   - In certain cases the archive-reading code allows for limited type conversion, e.g.
+     between float and double for matrices, or Lattice and CompactLattice for lattices.
+
+  \subsection io_tut_maps Utterance-to-speaker and speaker-to-utterance maps.
+
+  Many Kaldi programs take utterance-to-speaker and speaker-to-utterances maps-- files 
+  called ``utt2spk'' or ``spk2utt''.  These are generally specified by command-line options
+   --utt2spk and --spk2utt.  The utt2spk map has the format
+\verbatim
+utt1 spk_of_utt1
+utt2 spk_of_utt2
+...
+\endverbatim
+   and the spk2utt map has the format
+\verbatim
+spk1 utt1_of_spk1 utt2_of_spk1 utt3_of_spk1
+spk2 utt1_of_spk2 utt2_of_spk2
+...
+\endverbatim
+ These files are used for speaker adaptation, e.g. for finding which speaker corresponds
+ to an utterances, or to iterate over speakers.
+ For reasons that relate mostly to the way the Kaldi example scripts are set up
+ and the way we split data up into multiple pieces, it's important to ensure
+ that the speakers in the utterance-to-speaker map are in sorted order (see \ref data_prep).
+ Anyway, these files are actually treated as archives, and for this reason
+ you will see command-line options like --utt2spk=ark:data/train/utt2spk.  
+ At the code level, the utt2spk file is treated as a table containing a string, and the spk2utt
+ file is treated as a table containing a list of strings.
+
+ 
+*/
+
+}
--- a/src/doc/mainpage.dox
+++ b/src/doc/mainpage.dox
@ -50,6 +50,7 @@
   - \ref matrix 
   - \ref matrixwrap 
   - \ref io 
+   - \ref io_tut 
   - \ref error 
   - \ref parse_options 
   - \ref util
--- a/src/doc/tree_externals.dox
+++ b/src/doc/tree_externals.dox
@ -383,7 +383,7 @@ TE -1 3 ( CE 12 CE 13 CE 14 )

 \section tree_ilabel The ilabel_info object

-The final graph (HCLG in the standard notation, see \ref graph) has symbols
+The CLG graph (see \ref graph) has symbols
 on its input side that represent context-dependent phones (as well as
 disambiguation symbols and possibly epsilon symbols).  In the graph, as always,
 these are represented by integer labels.  We use an object that, in code
@ -402,7 +402,7 @@ input label the corresponding phonetic context window (see above,
 have
 \code 
 // not valid C++
- ilabel_info[1500] == { 4, 1500, 12 };
+ ilabel_info[1500] == { 4, 30, 12 };
 \endcode
 In the monophone case, we would have things like:
 \code
--- a/src/sgmm2/am-sgmm2.cc
+++ b/src/sgmm2/am-sgmm2.cc
@ -385,17 +385,12 @@ void AmSgmm2::InitializeFromFullGmm(const FullGmm &full_gmm,
  full_ubm_.CopyFromFullGmm(full_gmm);
  diag_ubm_.CopyFromFullGmm(full_gmm);
  if (phn_subspace_dim < 1 || phn_subspace_dim > full_gmm.Dim() + 1) {
-    KALDI_WARN << "Initial phone-subspace dimension must be in [1, "
-               << full_gmm.Dim() + 1 << "]. Changing from " << phn_subspace_dim
-               << " to " << full_gmm.Dim() + 1;
+    KALDI_WARN << "Initial phone-subspace dimension must be >= 1, value is "
+               << phn_subspace_dim << "; setting to " << full_gmm.Dim() + 1;
    phn_subspace_dim = full_gmm.Dim() + 1;
  }
-  if (spk_subspace_dim < 0 || spk_subspace_dim > full_gmm.Dim()) {
-    KALDI_WARN << "Initial spk-subspace dimension must be in [1, "
-               << full_gmm.Dim() << "]. Changing from " << spk_subspace_dim
-               << " to " << full_gmm.Dim();
-    spk_subspace_dim = full_gmm.Dim();
-  }
+  KALDI_ASSERT(spk_subspace_dim >= 0);
+  
  w_.Resize(0, 0);
  N_.clear();
  c_.clear();
@ -1118,7 +1113,7 @@ void AmSgmm2::ComputeH(std::vector< SpMatrix<double> > *H_i) const;

 // Initializes the matrices M_{i} and w_i
 void AmSgmm2::InitializeMw(int32 phn_subspace_dim,
-                          const Matrix<BaseFloat> &norm_xform) {
+                           const Matrix<BaseFloat> &norm_xform) {
  int32 ddim = full_ubm_.Dim();
  KALDI_ASSERT(phn_subspace_dim <= ddim + 1);
  KALDI_ASSERT(phn_subspace_dim <= norm_xform.NumCols() + 1);
@ -1134,8 +1129,14 @@ void AmSgmm2::InitializeMw(int32 phn_subspace_dim,
    thisM.Resize(ddim, phn_subspace_dim);
    // Eq. (27): M_{i} = [ \bar{\mu}_{i} (J)_{1:D, 1:(S-1)}]
    thisM.CopyColFromVec(mean, 0);
-    thisM.Range(0, ddim, 1, phn_subspace_dim-1).CopyFromMat(
-        norm_xform.Range(0, ddim, 0, phn_subspace_dim-1), kNoTrans);
+    int32 nonrandom_dim = std::min(phn_subspace_dim - 1, ddim),
+        random_dim = phn_subspace_dim - 1 - nonrandom_dim;
+    thisM.Range(0, ddim, 1, nonrandom_dim).CopyFromMat(
+        norm_xform.Range(0, ddim, 0, nonrandom_dim), kNoTrans);
+    // The following extension to the original paper allows us to
+    // initialize the model with a larger dimension of phone-subspace vector.
+    if (random_dim > 0)
+      thisM.Range(0, ddim, nonrandom_dim + 1, random_dim).SetRandn();
  }
 }

@ -1144,16 +1145,22 @@ void AmSgmm2::InitializeNu(int32 spk_subspace_dim,
                          const Matrix<BaseFloat> &norm_xform,
                          bool speaker_dependent_weights) {
  int32 ddim = full_ubm_.Dim();
-  KALDI_ASSERT(spk_subspace_dim <= ddim);
-  KALDI_ASSERT(spk_subspace_dim <= norm_xform.NumCols());
-  KALDI_ASSERT(ddim <= norm_xform.NumRows());
-
+  
  int32 num_gauss = full_ubm_.NumGauss();
  N_.resize(num_gauss);
  for (int32 i = 0; i < num_gauss; i++) {
    N_[i].Resize(ddim, spk_subspace_dim);
    // Eq. (28): N_{i} = [ (J)_{1:D, 1:T)}]
-    N_[i].CopyFromMat(norm_xform.Range(0, ddim, 0, spk_subspace_dim), kNoTrans);
+    
+    int32 nonrandom_dim = std::min(spk_subspace_dim, ddim),
+        random_dim = spk_subspace_dim - nonrandom_dim;
+
+    N_[i].Range(0, ddim, 0, nonrandom_dim).
+        CopyFromMat(norm_xform.Range(0, ddim, 0, nonrandom_dim), kNoTrans);
+    // The following extension to the original paper allows us to
+    // initialize the model with a larger dimension of speaker-subspace vector.
+    if (random_dim > 0)
+      N_[i].Range(0, ddim, nonrandom_dim, random_dim).SetRandn();
  }
  if (speaker_dependent_weights) {
    u_.Resize(num_gauss, spk_subspace_dim); // will set to zero.
--- a/src/sgmm2/am-sgmm2.h
+++ b/src/sgmm2/am-sgmm2.h
@ -169,6 +169,7 @@ class Sgmm2PerSpkDerivedVars {
    log_b_is.Resize(0);
    log_d_jms.resize(0);
  }
+  bool Empty() { return v_s.Dim() == 0; }
  // caution: after SetSpeakerVector you typically want to
  // use the function AmSgmm::ComputePerSpkDerivedVars
  const Vector<BaseFloat> &GetSpeakerVector() { return v_s; }
--- a/src/sgmm2/estimate-am-sgmm2.cc
+++ b/src/sgmm2/estimate-am-sgmm2.cc
@ -584,7 +584,8 @@ void MleAmSgmm2Accs::CommitStatsForSpk(const AmSgmm2 &model,
    for (int32 i = 0; i < num_gaussians_; i++)
      // Accumulate Statistics R_{ki}
      if (gamma_s_(i) != 0.0)
-        R_[i].AddVec2(static_cast<BaseFloat>(gamma_s_(i)), v_s);
+        R_[i].AddVec2(gamma_s_(i),
+                      Vector<double>(v_s));
  }
  if (a_s_.Dim() != 0) {
    Vector<BaseFloat> tmp(gamma_s_);
@ -592,7 +593,8 @@ void MleAmSgmm2Accs::CommitStatsForSpk(const AmSgmm2 &model,
    tmp.AddVecVec(-1.0, Vector<BaseFloat>(a_s_), spk_vars.b_is, 1.0);
    t_.AddVecVec(1.0, tmp, v_s); // eq. 53 of techreport.
    for (int32 i = 0; i < num_gaussians_; i++) {
-      U_[i].AddVec2(a_s_(i) * spk_vars.b_is(i), v_s); // eq. 54 of techreport.
+      U_[i].AddVec2(a_s_(i) * spk_vars.b_is(i),
+                    Vector<double>(v_s)); // eq. 54 of techreport.
    }
  }
  gamma_s_.SetZero();
--- a/src/sgmm2bin/sgmm2-acc-stats-gpost.cc
+++ b/src/sgmm2bin/sgmm2-acc-stats-gpost.cc
@ -68,7 +68,8 @@ int main(int argc, char *argv[]) {
    RandomAccessSgmm2GauPostReader gpost_reader(gpost_rspecifier);
    RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier,
                                                           utt2spk_rspecifier);
-
+    RandomAccessTokenReader utt2spk_map(utt2spk_rspecifier);
+    
    AmSgmm2 am_sgmm;
    TransitionModel trans_model;
    {
@ -87,8 +88,37 @@ int main(int argc, char *argv[]) {
    kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
    
    int32 num_done = 0, num_err = 0;
+    std::string cur_spk;
+    Sgmm2PerSpkDerivedVars spk_vars;
+    
    for (; !feature_reader.Done(); feature_reader.Next()) {
      std::string utt = feature_reader.Key();
+      std::string spk = utt;
+
+      if (!utt2spk_rspecifier.empty()) {
+        if (!utt2spk_map.HasKey(utt)) {
+          KALDI_WARN << "utt2spk map does not have value for " << utt
+                     << ", ignoring this utterance.";
+          continue;
+        } else { spk = utt2spk_map.Value(utt); }
+      }
+      if (spk != cur_spk || spk_vars.Empty()) {
+        spk_vars.Clear();
+        if (spkvecs_reader.IsOpen()) {
+          if (spkvecs_reader.HasKey(utt)) {
+            spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
+            am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
+          } else {
+            KALDI_WARN << "Cannot find speaker vector for " << utt;
+            num_err++;
+            continue;
+          }
+        } // else spk_vars is "empty"
+      }
+      if (spk != cur_spk)
+        sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+      cur_spk = spk;      
+      
      const Matrix<BaseFloat> &mat = feature_reader.Value();
      if (!gpost_reader.HasKey(utt) ||
          gpost_reader.Value(utt).size() != mat.NumRows()) {
@ -98,19 +128,7 @@ int main(int argc, char *argv[]) {
        continue;
      }
      const Sgmm2GauPost &gpost = gpost_reader.Value(utt);
-
-      Sgmm2PerSpkDerivedVars spk_vars;
-      if (spkvecs_reader.IsOpen()) {
-        if (spkvecs_reader.HasKey(utt)) {
-          spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
-          am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
-        } else {
-          KALDI_WARN << "Cannot find speaker vector for " << utt;
-          num_err++;
-          continue;
-        }
-      } // else spk_vars is "empty"
-
+      
      num_done++;
      BaseFloat tot_weight = 0.0;

@ -130,15 +148,14 @@ int main(int argc, char *argv[]) {
                                             pdf_id, &spk_vars);
          tot_weight += weight;
        }
-
-        sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars); // no harm doing it per
-        // utterance.
-
-        tot_t += tot_weight;
-        if (num_done % 50 == 0)
-          KALDI_LOG << "Processed " << num_done << " utterances";
      }
+
+      tot_t += tot_weight;
+      if (num_done % 50 == 0)
+        KALDI_LOG << "Processed " << num_done << " utterances";      
    }
+    sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars); // for last speaker
+    
    KALDI_LOG << "Overall number of frames is " << tot_t;

    KALDI_LOG << "Done " << num_done << " files, " << num_err;
--- a/src/sgmm2bin/sgmm2-acc-stats.cc
+++ b/src/sgmm2bin/sgmm2-acc-stats.cc
@ -85,7 +85,8 @@ int main(int argc, char *argv[]) {
      RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
      RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier,
                                                             utt2spk_rspecifier);
-    
+      RandomAccessTokenReader utt2spk_map(utt2spk_rspecifier);
+      
      AmSgmm2 am_sgmm;
      TransitionModel trans_model;
      {
@ -103,9 +104,37 @@ int main(int argc, char *argv[]) {
      double tot_t = 0;

      kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
-
+      std::string cur_spk;
+      Sgmm2PerSpkDerivedVars spk_vars;
+              
      for (; !feature_reader.Done(); feature_reader.Next()) {
        std::string utt = feature_reader.Key();
+        std::string spk = utt;
+        if (!utt2spk_rspecifier.empty()) {
+          if (!utt2spk_map.HasKey(utt)) {
+            KALDI_WARN << "utt2spk map does not have value for " << utt
+                       << ", ignoring this utterance.";
+            continue;
+          } else { spk = utt2spk_map.Value(utt); }
+        }
+        if (spk != cur_spk || spk_vars.Empty()) {
+          spk_vars.Clear();
+          if (spkvecs_reader.IsOpen()) {
+            if (spkvecs_reader.HasKey(utt)) {
+              spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
+              am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
+            } else {
+              KALDI_WARN << "Cannot find speaker vector for " << utt;
+              num_err++;
+              continue;
+            }
+          } // else spk_vars is "empty"
+        }
+        
+        if (spk != cur_spk)
+          sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);        
+        cur_spk = spk;
+        
        const Matrix<BaseFloat> &features = feature_reader.Value();
        if (!posteriors_reader.HasKey(utt) ||
            posteriors_reader.Value(utt).size() != features.NumRows()) {
@ -125,17 +154,6 @@ int main(int argc, char *argv[]) {
        const std::vector<std::vector<int32> > &gselect =
            gselect_reader.Value(utt);

-        Sgmm2PerSpkDerivedVars spk_vars;
-        if (spkvecs_reader.IsOpen()) {
-          if (spkvecs_reader.HasKey(utt)) {
-            spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
-            am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
-          } else {
-            KALDI_WARN << "Cannot find speaker vector for " << utt;
-            num_err++;
-            continue;
-          }
-        } // else spk_vars is "empty"
        num_done++;
      
        BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
@ -155,8 +173,6 @@ int main(int argc, char *argv[]) {
            tot_weight += weight;
          }
        }
-
-        sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars); // no harm doing it per utterance.
        
        KALDI_VLOG(2) << "Average like for this file is "
                      << (tot_like_this_file/tot_weight) << " over "
@ -170,12 +186,15 @@ int main(int argc, char *argv[]) {
                    << " over " << tot_weight <<" frames.";
        }
      }
+      sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars); // commit stats for
+      // last speaker.
+      
      KALDI_LOG << "Overall like per frame (Gaussian only) = "
                << (tot_like/tot_t) << " over " << tot_t << " frames.";

      KALDI_LOG << "Done " << num_done << " files, " << num_err
                << " with errors.";
-    }    
+    } 

    {
      Output ko(accs_wxfilename, binary);
--- a/src/sgmm2bin/sgmm2-acc-stats2.cc
+++ b/src/sgmm2bin/sgmm2-acc-stats2.cc
@ -77,6 +77,7 @@ int main(int argc, char *argv[]) {
    RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
    RandomAccessBaseFloatVectorReaderMapped spkvecs_reader(spkvecs_rspecifier,
                                                           utt2spk_rspecifier);
+    RandomAccessTokenReader utt2spk_map(utt2spk_rspecifier);    
    
    AmSgmm2 am_sgmm;
    TransitionModel trans_model;
@ -114,8 +115,38 @@ int main(int argc, char *argv[]) {
    kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;

    int32 num_done = 0, num_err = 0;
+    std::string cur_spk;
+    Sgmm2PerSpkDerivedVars spk_vars;
+    
    for (; !feature_reader.Done(); feature_reader.Next()) {
      std::string utt = feature_reader.Key();
+      std::string spk = utt;
+      if (!utt2spk_rspecifier.empty()) {
+        if (!utt2spk_map.HasKey(utt)) {
+          KALDI_WARN << "utt2spk map does not have value for " << utt
+                     << ", ignoring this utterance.";
+          continue;
+        } else { spk = utt2spk_map.Value(utt); }
+      }
+      if (spk != cur_spk || spk_vars.Empty()) {
+        spk_vars.Clear();
+        if (spkvecs_reader.IsOpen()) {
+          if (spkvecs_reader.HasKey(utt)) {
+            spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
+            am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
+          } else {
+            KALDI_WARN << "Cannot find speaker vector for " << utt;
+            num_err++;
+            continue;
+          }
+        } // else spk_vars is "empty"
+      }
+      if (spk != cur_spk) {
+        num_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+        den_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+      }
+      cur_spk = spk;
+      
      const Matrix<BaseFloat> &features = feature_reader.Value();
      if (!posteriors_reader.HasKey(utt) ||
          posteriors_reader.Value(utt).size() != features.NumRows()) {
@ -124,6 +155,7 @@ int main(int argc, char *argv[]) {
        num_err++;
        continue;
      }
+      
      const Posterior &posterior = posteriors_reader.Value(utt);
      if (!gselect_reader.HasKey(utt)
          && gselect_reader.Value(utt).size() != features.NumRows()) {
@ -134,18 +166,6 @@ int main(int argc, char *argv[]) {
      const std::vector<std::vector<int32> > &gselect =
          gselect_reader.Value(utt);

-      Sgmm2PerSpkDerivedVars spk_vars;
-      if (spkvecs_reader.IsOpen()) {
-        if (spkvecs_reader.HasKey(utt)) {
-          spk_vars.SetSpeakerVector(spkvecs_reader.Value(utt));
-          am_sgmm.ComputePerSpkDerivedVars(&spk_vars);
-        } else {
-          KALDI_WARN << "Cannot find speaker vector for " << utt;
-          num_err++;
-          continue;
-        }
-      } // else spk_vars is "empty"
-
      num_done++;
      BaseFloat tot_like_this_file = 0.0, tot_weight_this_file = 0.0,
          tot_abs_weight_this_file = 0.0;
@ -172,8 +192,10 @@ int main(int argc, char *argv[]) {
          tot_abs_weight_this_file += abs_weight;
        }
      }
-      num_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);  // no harm doing it per utterance.
+      // Commit stats for the last speaker.
+      num_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
      den_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+      
        
      tot_like += tot_like_this_file;
      tot_weight += tot_weight_this_file;
@ -182,6 +204,10 @@ int main(int argc, char *argv[]) {
      if (num_done % 50 == 0)
        KALDI_LOG << "Processed " << num_done << " utterances.";
    }
+    // Commit stats for last speaker.
+    num_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+    den_sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars);
+    
    KALDI_LOG << "Overall weighted acoustic likelihood per frame was "
              << (tot_like/tot_frames) << " over " << tot_frames << " frames; "
              << "average weight per frame is " << (tot_weight/tot_frames)
@ -208,5 +234,3 @@ int main(int argc, char *argv[]) {
    return -1;
  }
 }
-
-
--- a/src/util/kaldi-io.cc
+++ b/src/util/kaldi-io.cc
@ -44,7 +44,7 @@ std::string PrintableRxfilename(std::string rxfilename) {


 std::string PrintableWxfilename(std::string wxfilename) {
-  if (wxfilename == "" || wxfilename == "-") return "standard input";
+  if (wxfilename == "" || wxfilename == "-") return "standard output";
  else {
    // If this call to Escape later causes compilation issues,
    // just replace it with "return rxfilename"; it's only a
--- a/tools/extras/install_ffv.sh
+++ b/tools/extras/install_ffv.sh
@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Make sure we are in the tools/ directory.
+if [ `basename $PWD` == extras ]; then
+  cd ..
+fi
+
+! [ `basename $PWD` == tools ] && \
+   echo "You must call this script from the tools/ directory" && exit 1;
+
+mkdir -p pitch_trackers
+cd pitch_trackers
+
+echo "Installing a package for FFV feature extraction."
+
+if [ -s ffv-1.0.1.tar.gz ]; then
+  echo "*ffv-1.0.1.tar.gz already exists, not getting it."
+else
+  ! wget -t 2 http://www.cs.cmu.edu/~kornel/software/ffv-1.0.1.tar.gz && \
+    echo "Error wgetting ffv-1.0.1.tar.gz" && exit 1;
+fi
+
+if [ -d ffv-1.0.1 ]; then
+  echo "*It looks like ffv-1.0.1.tar.gz has already been unpacked, not unpacking it."
+else 
+  ! tar -zxvf ffv-1.0.1.tar.gz && \
+  echo "Error unpacking  ffv-1.0.1.tar.gz [e.g. unpack not installed?]" && exit 1;
+fi
+cd ffv-1.0.1
+
+if [ -f Makefile ]; then
+  echo "Makefile already exists, no creating it."
+else
+  echo "Makefile does not exist, creating it."
+  cat<<'EOF' > ./Makefile
+  CC     = gcc
+  # CFLAGS = -c -O3 -Wall -pedantic -std=c99 
+  CFLAGS = -c -g -Wall -pedantic -std=c99
+  LIBS   = -lm
+
+  LIBOBJECTS = \
+  	\
+	windowpair.o \
+	filterbank.o \
+	dcorrxform.o \
+	ffv.o \
+	mutils.o \
+	sutils.o
+
+  all : ffv 
+
+  ffv : ffv_main.o ${LIBOBJECTS}
+	${CC} -o $@ $^ ${LIBS}
+
+  %.o : %.c
+	${CC} ${CFLAGS} $<
+
+  clean :
+    rm -f *.o ffv
+EOF
+chmod +x Makefile 
+fi
+make; 
+cd ..
+
+echo "Installing ffv package is done."
+exit 0;
+
+
+
--- a/tools/extras/install_sacc.sh
+++ b/tools/extras/install_sacc.sh
@ -8,8 +8,9 @@ fi
 ! [ `basename $PWD` == tools ] && \
   echo "You must call this script from the tools/ directory" && exit 1;

-mkdir -p sacc
-cd sacc
+mkdir -p pitch_trackers/sacc
+
+cd pitch_trackers/sacc
 if [ -s SAcC_GLNXA64.zip ]; then
  echo "*SAcC_GLNXA64.zip already exists, not getting it."
 else