Changed the code to use the "pruned" lattice-determinization-- avoid the blowup that sometimes happens.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@928 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2012-05-17 22:21:31 +00:00 · 2012-05-17 22:21:31 +00:00 · dacaf6a439
--- a/egs/wsj/s3/local/wsj_data_prep.sh
+++ b/egs/wsj/s3/local/wsj_data_prep.sh
@ -209,7 +209,9 @@ gzip -f lm_tgpr_5k.arpa || exit 1;


 if [ ! -f wsj0-train-spkrinfo.txt ]; then
-  wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt
+  ! wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt && \
+    echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
+    wget https://sourceforge.net/projects/kaldi/upload/wsj0-train-spkrinfo.txt
 fi

 if [ ! -f wsj0-train-spkrinfo.txt ]; then
--- a/egs/wsj/s5/cmd.sh
+++ b/egs/wsj/s5/cmd.sh
@ -7,7 +7,7 @@

 train_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
 decode_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
-train_cmd=run.pl
+#train_cmd=run.pl
 #decode_cmd=run.pl


--- a/egs/wsj/s5/steps/decode_fmllr.sh
+++ b/egs/wsj/s5/steps/decode_fmllr.sh
@ -157,7 +157,7 @@ fi
 if [ $stage -le 3 ]; then
  echo "$0: estimating fMLLR transforms a second time."
  $cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
-    lattice-determinize --acoustic-scale=$acwt --prune=true --beam=4.0 \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
    "ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
    weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
@ -181,7 +181,7 @@ if [ $stage -le 4 ]; then
  echo "$0: doing a final pass of acoustic rescoring."
  $cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
    gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
-    lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$lattice_beam ark:- \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
    "ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
 fi

--- a/egs/wsj/s5/steps/decode_sgmm.sh
+++ b/egs/wsj/s5/steps/decode_sgmm.sh
@ -117,7 +117,7 @@ if [ $stage -le 3 ]; then
  $cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
    gunzip -c $dir/pre_lat.JOB.gz \| \
    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
-    lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
    weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
    sgmm-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
@ -133,7 +133,7 @@ if [ $stage -le 4 ]; then
    sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
    lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
-    lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
    lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
    weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
    sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
@ -155,7 +155,7 @@ if $use_fmllr; then
      sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
      "$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
      lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
-      lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
+      lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
      lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
      weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
      sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
@ -172,7 +172,7 @@ if [ $stage -le 6 ]; then
  $cmd JOB=1:$nj $dir/log/rescore.JOB.log \
    sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
    $srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
-    lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$lat_beam ark:- \
+    lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
    "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
 fi
 rm $dir/pre_lat.*.gz
--- a/egs/yesno/s3/run.sh
+++ b/egs/yesno/s3/run.sh
@ -35,5 +35,5 @@ scripts/mkgraph.sh --mono data/lang_test_tg exp/mono0a exp/mono0a/graph_tgpr

 # Decoding
 decode_cmd="scripts/run.pl"
-scripts/decode.sh --num-jobs 1 --cmd "$decode_cmd" --opts "--beam 10.0 --lattice-beam 2.0" \
+scripts/decode.sh --num-jobs 1 --cmd "$decode_cmd" --opts "--beam 10.0 --lattice-beam 5.0" \
   steps/decode_deltas.sh exp/mono0a/graph_tgpr data/${test_base_name} exp/mono0a/decode_${test_base_name}
--- a/src/decoder/lattice-biglm-faster-decoder.h
+++ b/src/decoder/lattice-biglm-faster-decoder.h
@ -191,25 +191,26 @@ class LatticeBiglmFasterDecoder {
    Lattice raw_fst;
    if(!GetRawLattice(&raw_fst)) return false;
    Invert(&raw_fst); // make it so word labels are on the input.
-    BaseFloat cur_beam = config_.lattice_beam;
-    fst::DeterminizeLatticeOptions lat_opts;
+    if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
+      KALDI_WARN << "Topological sorting of state-level lattice failed "
+          "(probably your lexicon has empty words or your LM has epsilon cycles; this "
+          " is a bad idea.)";
+    // (in phase where we get backward-costs).
+    fst::ILabelCompare<LatticeArc> ilabel_comp;
+    ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
+    // lattice-determinization more efficient.
+    
+    LatticeWeight beam(config_.lattice_beam, 0);
+    fst::DeterminizeLatticePrunedOptions lat_opts;
    lat_opts.max_mem = config_.max_mem;
    lat_opts.max_loop = config_.max_loop;
-    for (int32 i = 0; i < 20; i++) {
-      if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
-        if (config_.prune_lattice)
-          fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
-        return true;
-      } else {
-        cur_beam *= config_.beam_ratio;
-        KALDI_WARN << "Failed to determinize lattice (presumably max-states "
-                   << "reached), reducing lattice-beam to " << cur_beam
-                   << " and re-trying.";
-        Lattice tmp_fst(raw_fst);
-        Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
-      }
-    }
-    return false; // fell off loop-- shouldn't really happen.
+    lat_opts.max_arcs = config_.max_arcs;
+    
+    DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
+    raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
+    Connect(ofst); // Remove unreachable states... there might be
+    // a small number of these, in some cases.
+    return true;
  }
  
 private:
--- a/src/decoder/lattice-faster-decoder.h
+++ b/src/decoder/lattice-faster-decoder.h
@ -24,6 +24,7 @@
 #include "fst/fstlib.h"
 #include "itf/decodable-itf.h"
 #include "fstext/fstext-lib.h"
+#include "fstext/determinize-lattice-pruned.h"
 #include "lat/kaldi-lattice.h"

 namespace kaldi {
@ -35,10 +36,9 @@ struct LatticeFasterDecoderConfig {
  int32 prune_interval;
  bool determinize_lattice; // not inspected by this class... used in
  // command-line program.
-  bool prune_lattice;
  int32 max_mem; // max memory usage in determinization
  int32 max_loop;
-  BaseFloat beam_ratio;
+  int32 max_arcs; // max #arcs in lattice.
  BaseFloat beam_delta; // has nothing to do with beam_ratio
  BaseFloat hash_ratio;
  LatticeFasterDecoderConfig(): beam(16.0),
@ -46,10 +46,9 @@ struct LatticeFasterDecoderConfig {
                                lattice_beam(10.0),
                                prune_interval(25),
                                determinize_lattice(true),
-                                prune_lattice(true),
                                max_mem(50000000), // 50 MB (probably corresponds to 100 really)
                                max_loop(500000),
-                                beam_ratio(0.9),
+                                max_arcs(-1),
                                beam_delta(0.5),
                                hash_ratio(2.0) { }
  void Register(ParseOptions *po) {
@ -58,17 +57,15 @@ struct LatticeFasterDecoderConfig {
    po->Register("lattice-beam", &lattice_beam, "Lattice generation beam");
    po->Register("prune-interval", &prune_interval, "Interval (in frames) at which to prune tokens");
    po->Register("determinize-lattice", &determinize_lattice, "If true, determinize the lattice (in a special sense, keeping only best pdf-sequence for each word-sequence).");
-    po->Register("prune-lattice", &prune_lattice, "If true, prune lattice using the lattice-beam (recommended)");
-    po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be double this)");
+    po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be many times this)");
    po->Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
-    po->Register("beam-ratio", &beam_ratio, "Ratio by which to decrease lattice-beam if we reach the max-arcs.");
+    po->Register("max-arcs", &max_arcs, "If >0, maximum #arcs allowed in output lattice (total, not per state)");
    po->Register("beam-delta", &beam_delta, "Increment used in decoding");
    po->Register("hash-ratio", &hash_ratio, "Setting used in decoder to control hash behavior");
  }
  void Check() const {
    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 
-                 && prune_interval > 0 && beam_ratio > 0.0 && beam_ratio < 1.0
-                 && beam_delta > 0.0 && hash_ratio >= 1.0);
+                 && prune_interval > 0 && beam_delta > 0.0 && hash_ratio >= 1.0);
  }
 };

@ -217,26 +214,26 @@ class LatticeFasterDecoder {
    Lattice raw_fst;
    if(!GetRawLattice(&raw_fst)) return false;
    Invert(&raw_fst); // make it so word labels are on the input.
-    BaseFloat cur_beam = config_.lattice_beam;
-    fst::DeterminizeLatticeOptions lat_opts;
+    if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
+      KALDI_WARN << "Topological sorting of state-level lattice failed "
+          "(probably your lexicon has empty words or your LM has epsilon cycles; this "
+          " is a bad idea.)";
+    // (in phase where we get backward-costs).
+    fst::ILabelCompare<LatticeArc> ilabel_comp;
+    ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
+    // lattice-determinization more efficient.
+    
+    LatticeWeight beam(config_.lattice_beam, 0);
+    fst::DeterminizeLatticePrunedOptions lat_opts;
    lat_opts.max_mem = config_.max_mem;
    lat_opts.max_loop = config_.max_loop;
-    for (int32 i = 0; i < 20; i++) {
-      if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
-        raw_fst.DeleteStates(); // Free memory prior to next stage.
-        if (config_.prune_lattice)
-          fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
-        return true;
-      } else {
-        cur_beam *= config_.beam_ratio;
-        KALDI_WARN << "Failed to determinize lattice (presumably max-states "
-                   << "reached), reducing lattice-beam to " << cur_beam
-                   << " and re-trying.";
-        Lattice tmp_fst(raw_fst);
-        Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
-      }
-    }
-    return false; // fell off loop-- shouldn't really happen.
+    lat_opts.max_arcs = config_.max_arcs;
+    
+    DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
+    raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
+    Connect(ofst); // Remove unreachable states... there might be
+    // a small number of these, in some cases.
+    return true;
  }
  
 private:
--- a/src/decoder/lattice-simple-decoder.h
+++ b/src/decoder/lattice-simple-decoder.h
@ -23,6 +23,7 @@
 #include "fst/fstlib.h"
 #include "itf/decodable-itf.h"
 #include "fstext/fstext-lib.h"
+#include "fstext/determinize-lattice-pruned.h"
 #include "lat/kaldi-lattice.h"

 #include <algorithm>
@ -44,28 +45,27 @@ struct LatticeSimpleDecoderConfig {
  bool prune_lattice;
  int32 max_mem;
  int32 max_loop;
+  int32 max_arcs;
  BaseFloat beam_ratio;
  LatticeSimpleDecoderConfig(): beam(16.0),
                                lattice_beam(10.0),
                                prune_interval(25),
                                determinize_lattice(true),
-                                prune_lattice(true),
-                                max_mem(50000000), // 50 MB (probably corresponds to 100 really) 
+                                max_mem(50000000), // 50 MB (probably corresponds to 500, really)
                                max_loop(500000),
+                                max_arcs(-1),
                                beam_ratio(0.9) { }
  void Register(ParseOptions *po) {
    po->Register("beam", &beam, "Decoding beam.");
    po->Register("lattice-beam", &lattice_beam, "Lattice generation beam");
    po->Register("prune-interval", &prune_interval, "Interval (in frames) at which to prune tokens");
    po->Register("determinize-lattice", &determinize_lattice, "If true, determinize the lattice (in a special sense, keeping only best pdf-sequence for each word-sequence).");
-    po->Register("prune-lattice", &prune_lattice, "If true, prune lattice using the lattice-beam (recommended)");
-    po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be double this)");
+    po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be many times this)");
    po->Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
-    po->Register("beam-ratio", &beam_ratio, "Ratio by which to decrease lattice-beam if we reach the max-arcs.");
+    po->Register("max-arcs", &max_arcs, "If >0, maximum #arcs allowed in output lattice (total, not per state)");
  }
  void Check() const {
-    KALDI_ASSERT(beam > 0.0 && lattice_beam > 0.0 && prune_interval > 0
-                 && beam_ratio > 0.0 && beam_ratio < 1.0);
+    KALDI_ASSERT(beam > 0.0 && lattice_beam > 0.0 && prune_interval > 0);
  }
 };

@ -211,80 +211,27 @@ class LatticeSimpleDecoder {
    Lattice raw_fst;
    if(!GetRawLattice(&raw_fst)) return false;
    Invert(&raw_fst); // make it so word labels are on the input.
-    BaseFloat cur_beam = config_.lattice_beam;
-    fst::DeterminizeLatticeOptions lat_opts;
+    if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
+      KALDI_WARN << "Topological sorting of state-level lattice failed "
+          "(probably your lexicon has empty words or your LM has epsilon cycles; this "
+          " is a bad idea.)";
+    // (in phase where we get backward-costs).
+    fst::ILabelCompare<LatticeArc> ilabel_comp;
+    ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
+    // lattice-determinization more efficient.
+    
+    LatticeWeight beam(config_.lattice_beam, 0);
+    fst::DeterminizeLatticePrunedOptions lat_opts;
    lat_opts.max_mem = config_.max_mem;
    lat_opts.max_loop = config_.max_loop;
-    for (int32 i = 0; i < 20; i++) {
-      if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
-        raw_fst.DeleteStates(); // save memory.
-        if (config_.prune_lattice)
-          fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
-        return true;
-      } else {
-        cur_beam *= config_.beam_ratio;
-        KALDI_WARN << "Failed to determinize lattice (presumably max-states "
-                   << "reached), reducing lattice-beam to " << cur_beam
-                   << " and re-trying.";
-        Lattice tmp_fst(raw_fst);
-        Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
-      }
-    }
-    return false; // fell off loop-- shouldn't really happen.
-  }
-  
-  
-  /*
-  bool GetOutput(bool is_final, fst::MutableFst<fst::StdArc> *fst_out) {  
-    // GetOutput gets the decoding output.  If is_final == true, it limits itself to final states;
-    // otherwise it gets the most likely token not taking into account final-probs.
-    // fst_out will be empty (Start() == kNoStateId) if nothing was available.
-    // It returns true if it got output (thus, fst_out will be nonempty).
-    fst_out->DeleteStates();
-    Token *best_tok = NULL;
-    if (!is_final) {
-      for (unordered_map<StateId, Token*>::iterator iter = cur_toks_.begin();
-          iter != cur_toks_.end();
-          ++iter)
-        if (best_tok == NULL || *best_tok < *(iter->second) )
-          best_tok = iter->second;
-    } else {
-      Weight best_weight = Weight::Zero();
-      for (unordered_map<StateId, Token*>::iterator iter = cur_toks_.begin();
-          iter != cur_toks_.end();
-          ++iter) {
-        Weight this_weight = Times(iter->second->arc_.weight, fst_.Final(iter->first));
-        if (this_weight != Weight::Zero() &&
-           this_weight.Value() < best_weight.Value()) {
-          best_weight = this_weight;
-          best_tok = iter->second;
-        }
-      }
-    }
-    if (best_tok == NULL) return false;  // No output.
-
-    std::vector<Arc> arcs_reverse;  // arcs in reverse order.
-    for (Token *tok = best_tok; tok != NULL; tok = tok->prev_)
-      arcs_reverse.push_back(tok->arc_);
-    KALDI_ASSERT(arcs_reverse.back().nextstate == fst_.Start());
-    arcs_reverse.pop_back();  // that was a "fake" token... gives no info.
-
-    StateId cur_state = fst_out->AddState();
-    fst_out->SetStart(cur_state);
-    for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
-      Arc arc = arcs_reverse[i];
-      arc.nextstate = fst_out->AddState();
-      fst_out->AddArc(cur_state, arc);
-      cur_state = arc.nextstate;
-    }
-    if (is_final)
-      fst_out->SetFinal(cur_state, fst_.Final(best_tok->arc_.nextstate));
-    else
-      fst_out->SetFinal(cur_state, Weight::One());
-    RemoveEpsLocal(fst_out);
+    lat_opts.max_arcs = config_.max_arcs;
+    
+    DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
+    raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
+    Connect(ofst); // Remove unreachable states... there might be
+    // a small number of these, in some cases.
    return true;
-    }
-  */
+  }

 private:
  struct Token;
--- a/src/fstext/determinize-lattice-inl.h
+++ b/src/fstext/determinize-lattice-inl.h
@ -1,6 +1,6 @@
 // fstext/determinize-lattice-inl.h

-// Copyright 2009-2011  Microsoft Corporation
+// Copyright 2009-2012  Microsoft Corporation  Daniel Povey

 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -365,7 +365,9 @@ template<class Weight, class IntType> class LatticeDeterminizer {
                      DeterminizeLatticeOptions opts):
      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), opts_(opts),
      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {    
+      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
+    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
+    // work correctly otherwise.
  }

  // frees all except output_arcs_, which contains the important info
--- a/src/fstext/determinize-lattice-pruned-inl.h
+++ b/src/fstext/determinize-lattice-pruned-inl.h
@ -191,7 +191,9 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
                            DeterminizeLatticePrunedOptions opts):
      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
      equal_(opts_.delta), determinized_(false),
-      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {    
+      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
+    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
+    // work correctly otherwise.
  }

  void FreeOutputStates() {
@ -306,34 +308,26 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
    // in "output_arcs_".  Must be called after Initialize().  To get the
    // output, call one of the Output routines.

-    bool ans = true;
-    
    InitializeDeterminization(); // some start-up tasks.
    while (!queue_.empty()) {
      Task *task = queue_.top();
-      // First assess whether we've either reached the specified beam,
-      // or reached some kind of user-specified maximum.  The condition for
+      // Note: the queue contains only tasks that are "within the beam".
+      // We also have to check whether we have reached one of the user-specified
+      // maximums, of estimated memory, arcs, or states.  The condition for
      // ending is:
-      //  weight is < cutoff-weight, OR
      // num-states is more than user specified, OR
      // num-arcs is more than user specified, OR
      // memory passed a user-specified threshold and cleanup failed
      //  to get it below that threshold.
      size_t num_states = output_states_.size();
-      if (fst::Compare(task->priority_weight, cutoff_) < 0 || 
-          (opts_.max_states > 0 && num_states > opts_.max_states) || 
+      if ((opts_.max_states > 0 && num_states > opts_.max_states) || 
          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) || 
          (num_states % 100 == 0 && !CheckMemoryUsage())) {
-        if (fst::Compare(task->priority_weight, cutoff_) >= 0) { // We didn't terminate because
-          // of the lattice-beam, but for some other reason.  This is probably
-          // going to be unusual, so let's inform the user.
-          KALDI_VLOG(1) << "Lattice determinization terminated but not "
-                        << " because of lattice-beam.  (#states, #arcs) is ( " 
-                        << output_states_.size() << ", " << num_arcs_
-                        << " ), versus limits ( " << opts_.max_states << ", "
-                        << opts_.max_arcs << " (else, may be memory limit).";
-          ans = false;
-        }
+        KALDI_VLOG(1) << "Lattice determinization terminated but not "
+                      << " because of lattice-beam.  (#states, #arcs) is ( " 
+                      << output_states_.size() << ", " << num_arcs_
+                      << " ), versus limits ( " << opts_.max_states << ", "
+                      << opts_.max_arcs << " (else, may be memory limit).";
        break;
        // we terminate the determinization here-- whatever we already expanded is
        // what we'll return...  because we expanded stuff in order of total
@ -345,7 +339,9 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
      delete task;
    }
    determinized_ = true;
-    return ans;
+    return (queue_.empty()); // return success if queue was empty, i.e. we processed
+    // all tasks and did not break out of the loop early due to reaching a memory,
+    // arc or state limit.
  }
 private:
  
@ -487,7 +483,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
    if (iter != minimal_hash_.end()) { // Found a matching subset.
      OutputStateId state_id = iter->second;
      const OutputState &state = *(output_states_[state_id]);
-      // Below is just a check the algorithm is right...
+      // Below is just a check that the algorithm is working...
      if (fst::Compare(forward_weight, state.forward_weight) > 0
          && !ApproxEqual(forward_weight, state.forward_weight,
                          0.1)) { // TODO:  remove this once we're sure it's working...
@ -704,7 +700,8 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
    // processes final-weights for this subset.  state.minimal_subset_ may be
    // empty if the graphs is not connected/trimmed, I think, do don't check
    // that it's nonempty.
-    StringId final_string = NULL;  // = NULL to keep compiler happy.
+    StringId final_string = repository_.EmptyString();  // set it to keep the
+    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
    Weight final_weight = Weight::Zero();
    bool is_final = false;
    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
@ -903,7 +900,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
    PairIter cur = all_elems.begin(), end = all_elems.end();
    while (cur != end) {
-      // The old code (non-pruned) called ProcessTransition; here,
+      // The old code (non-pruned) called ProcessTransition; here, instead,
      // we'll put the calls into a priority queue.
      Task *task = new Task;
      // Process ranges that share the same input symbol.
@ -930,11 +927,15 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
          output_states_[output_state_id]->forward_weight,
          task->priority_weight);

-      MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
-      
-      queue_.push(task); // Push the task onto the queue.  The queue keeps it      
-      // in prioritized order, so we always process the one with the "best"
-      // weight (highest in the semiring).
+      if (fst::Compare(task->priority_weight, cutoff_) < 0) {
+        // This task would never get done as it's below the pruning cutoff.
+        delete task;
+      } else {
+        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
+        queue_.push(task); // Push the task onto the queue.  The queue keeps it      
+        // in prioritized order, so we always process the one with the "best"
+        // weight (highest in the semiring).
+      }
    }
    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
    // empty.
@ -1022,13 +1023,11 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
         a lookaside buffer anyway, so this isn't a problem-- it will get populated
         later if it needs to be.
      */
-      Element elem;
-      elem.state = start_id;
-      elem.weight = Weight::One();
-      elem.string = repository_.EmptyString();  // Id of empty sequence.
-      vector<Element> subset;
-      subset.push_back(elem);
-      EpsilonClosure(&subset); // follow through epsilon-inputs links
+      vector<Element> subset(1);
+      subset[0].state = start_id;
+      subset[0].weight = Weight::One();
+      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
+      EpsilonClosure(&subset); // follow through epsilon-input links
      ConvertToMinimal(&subset); // remove all but final states and
      // states with input-labels on arcs out of them.
      // Weight::One() is the "forward-weight" of this determinized state...
@ -1056,7 +1055,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
    // output we may have to ignore some of these.
    Weight forward_weight; // Represents minimal cost from start-state
    // to this state.  Used in prioritization of tasks, and pruning.
-    // Note: we know this minimal cost from when we fist create the OutputState;
+    // Note: we know this minimal cost from when we first create the OutputState;
    // this is because of the priority-queue we use, that ensures that the
    // "best" path into the state will be expanded first.
    OutputState(const vector<Element> &minimal_subset,
--- a/src/latbin/Makefile
+++ b/src/latbin/Makefile
@ -12,7 +12,7 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
           lattice-add-trans-probs lattice-difference lattice-word-align \
           nbest-to-linear nbest-to-lattice lattice-1best linear-to-nbest \
           lattice-mbr-decode lattice-align-words lattice-to-mpe-post \
-           lattice-copy-backoff nbest-to-ctm
+           lattice-copy-backoff nbest-to-ctm lattice-determinize-pruned


 OBJFILES =
--- a/src/latbin/lattice-determinize.cc
+++ b/src/latbin/lattice-determinize.cc
@ -1,6 +1,6 @@
 // latbin/lattice-determinize.cc

-// Copyright 2009-2011  Microsoft Corporation
+// Copyright 2009-2012  Microsoft Corporation  Daniel Povey

 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -114,7 +114,7 @@ int main(int argc, char *argv[]) {
    po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling]-- also used to handle determinization failures, set --prune=false to disable routine pruning");
    po.Register("delta", &delta, "Tolerance used in determinization");
    po.Register("prune", &prune, "If true, prune determinized lattices with the --beam option.");
-    po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be twice this)");
+    po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be many times this)");
    po.Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
    po.Register("beam-ratio", &beam_ratio, "Ratio by which to decrease beam if we reach the max-arcs.");
    po.Register("num-loops", &num_loops, "Number of times to decrease beam by beam-ratio if determinization fails.");
--- a/src/latbin/lattice-difference.cc
+++ b/src/latbin/lattice-difference.cc
@ -85,9 +85,9 @@ int main(int argc, char *argv[]) {
      }
    }
    
-    KALDI_LOG << "Total " << n_done << " lattices written."
+    KALDI_LOG << "Total " << n_done << " lattices written; "
              << n_only_transcription
-              << " lattices contain only transcription; "
+              << " lattices had empty difference; "
              << n_no_lat << " missing lattices in second archive ";
    return (n_done != 0 ? 0 : 1);
  } catch(const std::exception& e) {
--- a/src/latbin/lattice-prune.cc
+++ b/src/latbin/lattice-prune.cc
@ -82,7 +82,7 @@ int main(int argc, char *argv[]) {
      n_arcs_out += pruned_narcs;
      n_states_out += pruned_nstates;
      KALDI_LOG << "For utterance " << key << ", pruned #states from "
-                << nstates << " to " << pruned_nstates << " and #arcs from"
+                << nstates << " to " << pruned_nstates << " and #arcs from "
                << narcs << " to " << pruned_narcs;
      fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &pruned_lat);
      CompactLattice pruned_clat;