Changed the code to use the "pruned" lattice-determinization-- avoid the blowup that sometimes happens.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@928 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2012-05-17 22:21:31 +00:00
Родитель 57ee24d47a
Коммит dacaf6a439
14 изменённых файлов: 122 добавлений и 174 удалений

Просмотреть файл

@ -209,7 +209,9 @@ gzip -f lm_tgpr_5k.arpa || exit 1;
if [ ! -f wsj0-train-spkrinfo.txt ]; then
wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt
! wget http://www.ldc.upenn.edu/Catalog/docs/LDC93S6A/wsj0-train-spkrinfo.txt && \
echo "Getting wsj0-train-spkrinfo.txt from backup location" && \
wget https://sourceforge.net/projects/kaldi/upload/wsj0-train-spkrinfo.txt
fi
if [ ! -f wsj0-train-spkrinfo.txt ]; then

Просмотреть файл

@ -7,7 +7,7 @@
train_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
decode_cmd="queue.pl -q all.q@a*.clsp.jhu.edu"
train_cmd=run.pl
#train_cmd=run.pl
#decode_cmd=run.pl

Просмотреть файл

@ -157,7 +157,7 @@ fi
if [ $stage -le 3 ]; then
echo "$0: estimating fMLLR transforms a second time."
$cmd JOB=1:$nj $dir/log/fmllr_pass2.JOB.log \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=4.0 \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=4.0 \
"ark:gunzip -c $dir/lat.tmp.JOB.gz|" ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post $silence_weight $silphonelist $adapt_model ark:- ark:- \| \
@ -181,7 +181,7 @@ if [ $stage -le 4 ]; then
echo "$0: doing a final pass of acoustic rescoring."
$cmd JOB=1:$nj $dir/log/acoustic_rescore.JOB.log \
gmm-rescore-lattice $final_model "ark:gunzip -c $dir/lat.tmp.JOB.gz|" "$feats" ark:- \| \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$lattice_beam ark:- \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lattice_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" '&&' rm $dir/lat.tmp.JOB.gz || exit 1;
fi

Просмотреть файл

@ -117,7 +117,7 @@ if [ $stage -le 3 ]; then
$cmd JOB=1:$nj $dir/log/vecs_pass1.JOB.log \
gunzip -c $dir/pre_lat.JOB.gz \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.alimdl ark:- ark:- \| \
sgmm-post-to-gpost "$gselect_opt" $srcdir/final.alimdl "$feats" ark:- ark:- \| \
@ -133,7 +133,7 @@ if [ $stage -le 4 ]; then
sgmm-rescore-lattice --spk-vecs=ark:$dir/pre_vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-spkvecs --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/pre_vecs.JOB \
@ -155,7 +155,7 @@ if $use_fmllr; then
sgmm-rescore-lattice --spk-vecs=ark:$dir/vecs.JOB --utt2spk=ark:$sdata/JOB/utt2spk \
"$gselect_opt" $srcdir/final.mdl ark:- "$feats" ark:- \| \
lattice-prune --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$vecs_beam ark:- ark:- \| \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$vecs_beam ark:- ark:- \| \
lattice-to-post --acoustic-scale=$acwt ark:- ark:- \| \
weight-silence-post 0.0 $silphonelist $srcdir/final.mdl ark:- ark:- \| \
sgmm-est-fmllr --spk2utt=ark:$sdata/JOB/spk2utt "$gselect_opt" --spk-vecs=ark:$dir/vecs.JOB \
@ -172,7 +172,7 @@ if [ $stage -le 6 ]; then
$cmd JOB=1:$nj $dir/log/rescore.JOB.log \
sgmm-rescore-lattice "$gselect_opt" --utt2spk=ark:$sdata/JOB/utt2spk --spk-vecs=ark:$dir/vecs.JOB \
$srcdir/final.mdl "ark:gunzip -c $dir/pre_lat.JOB.gz|" "$feats" ark:- \| \
lattice-determinize --acoustic-scale=$acwt --prune=true --beam=$lat_beam ark:- \
lattice-determinize-pruned --acoustic-scale=$acwt --beam=$lat_beam ark:- \
"ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
fi
rm $dir/pre_lat.*.gz

Просмотреть файл

@ -35,5 +35,5 @@ scripts/mkgraph.sh --mono data/lang_test_tg exp/mono0a exp/mono0a/graph_tgpr
# Decoding
decode_cmd="scripts/run.pl"
scripts/decode.sh --num-jobs 1 --cmd "$decode_cmd" --opts "--beam 10.0 --lattice-beam 2.0" \
scripts/decode.sh --num-jobs 1 --cmd "$decode_cmd" --opts "--beam 10.0 --lattice-beam 5.0" \
steps/decode_deltas.sh exp/mono0a/graph_tgpr data/${test_base_name} exp/mono0a/decode_${test_base_name}

Просмотреть файл

@ -191,25 +191,26 @@ class LatticeBiglmFasterDecoder {
Lattice raw_fst;
if(!GetRawLattice(&raw_fst)) return false;
Invert(&raw_fst); // make it so word labels are on the input.
BaseFloat cur_beam = config_.lattice_beam;
fst::DeterminizeLatticeOptions lat_opts;
if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
KALDI_WARN << "Topological sorting of state-level lattice failed "
"(probably your lexicon has empty words or your LM has epsilon cycles; this "
" is a bad idea.)";
// (in phase where we get backward-costs).
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
// lattice-determinization more efficient.
LatticeWeight beam(config_.lattice_beam, 0);
fst::DeterminizeLatticePrunedOptions lat_opts;
lat_opts.max_mem = config_.max_mem;
lat_opts.max_loop = config_.max_loop;
for (int32 i = 0; i < 20; i++) {
if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
if (config_.prune_lattice)
fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
return true;
} else {
cur_beam *= config_.beam_ratio;
KALDI_WARN << "Failed to determinize lattice (presumably max-states "
<< "reached), reducing lattice-beam to " << cur_beam
<< " and re-trying.";
Lattice tmp_fst(raw_fst);
Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
}
}
return false; // fell off loop-- shouldn't really happen.
lat_opts.max_arcs = config_.max_arcs;
DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
Connect(ofst); // Remove unreachable states... there might be
// a small number of these, in some cases.
return true;
}
private:

Просмотреть файл

@ -24,6 +24,7 @@
#include "fst/fstlib.h"
#include "itf/decodable-itf.h"
#include "fstext/fstext-lib.h"
#include "fstext/determinize-lattice-pruned.h"
#include "lat/kaldi-lattice.h"
namespace kaldi {
@ -35,10 +36,9 @@ struct LatticeFasterDecoderConfig {
int32 prune_interval;
bool determinize_lattice; // not inspected by this class... used in
// command-line program.
bool prune_lattice;
int32 max_mem; // max memory usage in determinization
int32 max_loop;
BaseFloat beam_ratio;
int32 max_arcs; // max #arcs in lattice.
BaseFloat beam_delta; // has nothing to do with beam_ratio
BaseFloat hash_ratio;
LatticeFasterDecoderConfig(): beam(16.0),
@ -46,10 +46,9 @@ struct LatticeFasterDecoderConfig {
lattice_beam(10.0),
prune_interval(25),
determinize_lattice(true),
prune_lattice(true),
max_mem(50000000), // 50 MB (probably corresponds to 100 really)
max_loop(500000),
beam_ratio(0.9),
max_arcs(-1),
beam_delta(0.5),
hash_ratio(2.0) { }
void Register(ParseOptions *po) {
@ -58,17 +57,15 @@ struct LatticeFasterDecoderConfig {
po->Register("lattice-beam", &lattice_beam, "Lattice generation beam");
po->Register("prune-interval", &prune_interval, "Interval (in frames) at which to prune tokens");
po->Register("determinize-lattice", &determinize_lattice, "If true, determinize the lattice (in a special sense, keeping only best pdf-sequence for each word-sequence).");
po->Register("prune-lattice", &prune_lattice, "If true, prune lattice using the lattice-beam (recommended)");
po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be double this)");
po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be many times this)");
po->Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
po->Register("beam-ratio", &beam_ratio, "Ratio by which to decrease lattice-beam if we reach the max-arcs.");
po->Register("max-arcs", &max_arcs, "If >0, maximum #arcs allowed in output lattice (total, not per state)");
po->Register("beam-delta", &beam_delta, "Increment used in decoding");
po->Register("hash-ratio", &hash_ratio, "Setting used in decoder to control hash behavior");
}
void Check() const {
KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0
&& prune_interval > 0 && beam_ratio > 0.0 && beam_ratio < 1.0
&& beam_delta > 0.0 && hash_ratio >= 1.0);
&& prune_interval > 0 && beam_delta > 0.0 && hash_ratio >= 1.0);
}
};
@ -217,26 +214,26 @@ class LatticeFasterDecoder {
Lattice raw_fst;
if(!GetRawLattice(&raw_fst)) return false;
Invert(&raw_fst); // make it so word labels are on the input.
BaseFloat cur_beam = config_.lattice_beam;
fst::DeterminizeLatticeOptions lat_opts;
if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
KALDI_WARN << "Topological sorting of state-level lattice failed "
"(probably your lexicon has empty words or your LM has epsilon cycles; this "
" is a bad idea.)";
// (in phase where we get backward-costs).
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
// lattice-determinization more efficient.
LatticeWeight beam(config_.lattice_beam, 0);
fst::DeterminizeLatticePrunedOptions lat_opts;
lat_opts.max_mem = config_.max_mem;
lat_opts.max_loop = config_.max_loop;
for (int32 i = 0; i < 20; i++) {
if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
raw_fst.DeleteStates(); // Free memory prior to next stage.
if (config_.prune_lattice)
fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
return true;
} else {
cur_beam *= config_.beam_ratio;
KALDI_WARN << "Failed to determinize lattice (presumably max-states "
<< "reached), reducing lattice-beam to " << cur_beam
<< " and re-trying.";
Lattice tmp_fst(raw_fst);
Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
}
}
return false; // fell off loop-- shouldn't really happen.
lat_opts.max_arcs = config_.max_arcs;
DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
Connect(ofst); // Remove unreachable states... there might be
// a small number of these, in some cases.
return true;
}
private:

Просмотреть файл

@ -23,6 +23,7 @@
#include "fst/fstlib.h"
#include "itf/decodable-itf.h"
#include "fstext/fstext-lib.h"
#include "fstext/determinize-lattice-pruned.h"
#include "lat/kaldi-lattice.h"
#include <algorithm>
@ -44,28 +45,27 @@ struct LatticeSimpleDecoderConfig {
bool prune_lattice;
int32 max_mem;
int32 max_loop;
int32 max_arcs;
BaseFloat beam_ratio;
LatticeSimpleDecoderConfig(): beam(16.0),
lattice_beam(10.0),
prune_interval(25),
determinize_lattice(true),
prune_lattice(true),
max_mem(50000000), // 50 MB (probably corresponds to 100 really)
max_mem(50000000), // 50 MB (probably corresponds to 500, really)
max_loop(500000),
max_arcs(-1),
beam_ratio(0.9) { }
void Register(ParseOptions *po) {
po->Register("beam", &beam, "Decoding beam.");
po->Register("lattice-beam", &lattice_beam, "Lattice generation beam");
po->Register("prune-interval", &prune_interval, "Interval (in frames) at which to prune tokens");
po->Register("determinize-lattice", &determinize_lattice, "If true, determinize the lattice (in a special sense, keeping only best pdf-sequence for each word-sequence).");
po->Register("prune-lattice", &prune_lattice, "If true, prune lattice using the lattice-beam (recommended)");
po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be double this)");
po->Register("max-mem", &max_mem, "Maximum approximate memory consumption (in bytes) to use in determinization (probably real consumption would be many times this)");
po->Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
po->Register("beam-ratio", &beam_ratio, "Ratio by which to decrease lattice-beam if we reach the max-arcs.");
po->Register("max-arcs", &max_arcs, "If >0, maximum #arcs allowed in output lattice (total, not per state)");
}
void Check() const {
KALDI_ASSERT(beam > 0.0 && lattice_beam > 0.0 && prune_interval > 0
&& beam_ratio > 0.0 && beam_ratio < 1.0);
KALDI_ASSERT(beam > 0.0 && lattice_beam > 0.0 && prune_interval > 0);
}
};
@ -211,80 +211,27 @@ class LatticeSimpleDecoder {
Lattice raw_fst;
if(!GetRawLattice(&raw_fst)) return false;
Invert(&raw_fst); // make it so word labels are on the input.
BaseFloat cur_beam = config_.lattice_beam;
fst::DeterminizeLatticeOptions lat_opts;
if (!TopSort(&raw_fst)) // topological sort makes lattice-determinization more efficient
KALDI_WARN << "Topological sorting of state-level lattice failed "
"(probably your lexicon has empty words or your LM has epsilon cycles; this "
" is a bad idea.)";
// (in phase where we get backward-costs).
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&raw_fst, ilabel_comp); // sort on ilabel; makes
// lattice-determinization more efficient.
LatticeWeight beam(config_.lattice_beam, 0);
fst::DeterminizeLatticePrunedOptions lat_opts;
lat_opts.max_mem = config_.max_mem;
lat_opts.max_loop = config_.max_loop;
for (int32 i = 0; i < 20; i++) {
if (DeterminizeLattice(raw_fst, ofst, lat_opts, NULL)) {
raw_fst.DeleteStates(); // save memory.
if (config_.prune_lattice)
fst::PruneCompactLattice(LatticeWeight(cur_beam, 0), ofst);
return true;
} else {
cur_beam *= config_.beam_ratio;
KALDI_WARN << "Failed to determinize lattice (presumably max-states "
<< "reached), reducing lattice-beam to " << cur_beam
<< " and re-trying.";
Lattice tmp_fst(raw_fst);
Prune(tmp_fst, &raw_fst, LatticeWeight(cur_beam, 0));
}
}
return false; // fell off loop-- shouldn't really happen.
}
/*
bool GetOutput(bool is_final, fst::MutableFst<fst::StdArc> *fst_out) {
// GetOutput gets the decoding output. If is_final == true, it limits itself to final states;
// otherwise it gets the most likely token not taking into account final-probs.
// fst_out will be empty (Start() == kNoStateId) if nothing was available.
// It returns true if it got output (thus, fst_out will be nonempty).
fst_out->DeleteStates();
Token *best_tok = NULL;
if (!is_final) {
for (unordered_map<StateId, Token*>::iterator iter = cur_toks_.begin();
iter != cur_toks_.end();
++iter)
if (best_tok == NULL || *best_tok < *(iter->second) )
best_tok = iter->second;
} else {
Weight best_weight = Weight::Zero();
for (unordered_map<StateId, Token*>::iterator iter = cur_toks_.begin();
iter != cur_toks_.end();
++iter) {
Weight this_weight = Times(iter->second->arc_.weight, fst_.Final(iter->first));
if (this_weight != Weight::Zero() &&
this_weight.Value() < best_weight.Value()) {
best_weight = this_weight;
best_tok = iter->second;
}
}
}
if (best_tok == NULL) return false; // No output.
std::vector<Arc> arcs_reverse; // arcs in reverse order.
for (Token *tok = best_tok; tok != NULL; tok = tok->prev_)
arcs_reverse.push_back(tok->arc_);
KALDI_ASSERT(arcs_reverse.back().nextstate == fst_.Start());
arcs_reverse.pop_back(); // that was a "fake" token... gives no info.
StateId cur_state = fst_out->AddState();
fst_out->SetStart(cur_state);
for (ssize_t i = static_cast<ssize_t>(arcs_reverse.size())-1; i >= 0; i--) {
Arc arc = arcs_reverse[i];
arc.nextstate = fst_out->AddState();
fst_out->AddArc(cur_state, arc);
cur_state = arc.nextstate;
}
if (is_final)
fst_out->SetFinal(cur_state, fst_.Final(best_tok->arc_.nextstate));
else
fst_out->SetFinal(cur_state, Weight::One());
RemoveEpsLocal(fst_out);
lat_opts.max_arcs = config_.max_arcs;
DeterminizeLatticePruned(raw_fst, beam, ofst, lat_opts);
raw_fst.DeleteStates(); // Free memory-- raw_fst no longer needed.
Connect(ofst); // Remove unreachable states... there might be
// a small number of these, in some cases.
return true;
}
*/
}
private:
struct Token;

Просмотреть файл

@ -1,6 +1,6 @@
// fstext/determinize-lattice-inl.h
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2012 Microsoft Corporation Daniel Povey
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -365,7 +365,9 @@ template<class Weight, class IntType> class LatticeDeterminizer {
DeterminizeLatticeOptions opts):
num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), opts_(opts),
equal_(opts_.delta), determinized_(false),
minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
// work correctly otherwise.
}
// frees all except output_arcs_, which contains the important info

Просмотреть файл

@ -191,7 +191,9 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
DeterminizeLatticePrunedOptions opts):
num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
equal_(opts_.delta), determinized_(false),
minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
// work correctly otherwise.
}
void FreeOutputStates() {
@ -306,34 +308,26 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
// in "output_arcs_". Must be called after Initialize(). To get the
// output, call one of the Output routines.
bool ans = true;
InitializeDeterminization(); // some start-up tasks.
while (!queue_.empty()) {
Task *task = queue_.top();
// First assess whether we've either reached the specified beam,
// or reached some kind of user-specified maximum. The condition for
// Note: the queue contains only tasks that are "within the beam".
// We also have to check whether we have reached one of the user-specified
// maximums, of estimated memory, arcs, or states. The condition for
// ending is:
// weight is < cutoff-weight, OR
// num-states is more than user specified, OR
// num-arcs is more than user specified, OR
// memory passed a user-specified threshold and cleanup failed
// to get it below that threshold.
size_t num_states = output_states_.size();
if (fst::Compare(task->priority_weight, cutoff_) < 0 ||
(opts_.max_states > 0 && num_states > opts_.max_states) ||
if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
(opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
(num_states % 100 == 0 && !CheckMemoryUsage())) {
if (fst::Compare(task->priority_weight, cutoff_) >= 0) { // We didn't terminate because
// of the lattice-beam, but for some other reason. This is probably
// going to be unusual, so let's inform the user.
KALDI_VLOG(1) << "Lattice determinization terminated but not "
<< " because of lattice-beam. (#states, #arcs) is ( "
<< output_states_.size() << ", " << num_arcs_
<< " ), versus limits ( " << opts_.max_states << ", "
<< opts_.max_arcs << " (else, may be memory limit).";
ans = false;
}
KALDI_VLOG(1) << "Lattice determinization terminated but not "
<< " because of lattice-beam. (#states, #arcs) is ( "
<< output_states_.size() << ", " << num_arcs_
<< " ), versus limits ( " << opts_.max_states << ", "
<< opts_.max_arcs << " (else, may be memory limit).";
break;
// we terminate the determinization here-- whatever we already expanded is
// what we'll return... because we expanded stuff in order of total
@ -345,7 +339,9 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
delete task;
}
determinized_ = true;
return ans;
return (queue_.empty()); // return success if queue was empty, i.e. we processed
// all tasks and did not break out of the loop early due to reaching a memory,
// arc or state limit.
}
private:
@ -487,7 +483,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
if (iter != minimal_hash_.end()) { // Found a matching subset.
OutputStateId state_id = iter->second;
const OutputState &state = *(output_states_[state_id]);
// Below is just a check the algorithm is right...
// Below is just a check that the algorithm is working...
if (fst::Compare(forward_weight, state.forward_weight) > 0
&& !ApproxEqual(forward_weight, state.forward_weight,
0.1)) { // TODO: remove this once we're sure it's working...
@ -704,7 +700,8 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
// processes final-weights for this subset. state.minimal_subset_ may be
// empty if the graphs is not connected/trimmed, I think, do don't check
// that it's nonempty.
StringId final_string = NULL; // = NULL to keep compiler happy.
StringId final_string = repository_.EmptyString(); // set it to keep the
// compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
Weight final_weight = Weight::Zero();
bool is_final = false;
typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
@ -903,7 +900,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
PairIter cur = all_elems.begin(), end = all_elems.end();
while (cur != end) {
// The old code (non-pruned) called ProcessTransition; here,
// The old code (non-pruned) called ProcessTransition; here, instead,
// we'll put the calls into a priority queue.
Task *task = new Task;
// Process ranges that share the same input symbol.
@ -930,11 +927,15 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
output_states_[output_state_id]->forward_weight,
task->priority_weight);
MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
queue_.push(task); // Push the task onto the queue. The queue keeps it
// in prioritized order, so we always process the one with the "best"
// weight (highest in the semiring).
if (fst::Compare(task->priority_weight, cutoff_) < 0) {
// This task would never get done as it's below the pruning cutoff.
delete task;
} else {
MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
queue_.push(task); // Push the task onto the queue. The queue keeps it
// in prioritized order, so we always process the one with the "best"
// weight (highest in the semiring).
}
}
all_elems.clear(); // as it's a reference to a class variable; we want it to stay
// empty.
@ -1022,13 +1023,11 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
a lookaside buffer anyway, so this isn't a problem-- it will get populated
later if it needs to be.
*/
Element elem;
elem.state = start_id;
elem.weight = Weight::One();
elem.string = repository_.EmptyString(); // Id of empty sequence.
vector<Element> subset;
subset.push_back(elem);
EpsilonClosure(&subset); // follow through epsilon-inputs links
vector<Element> subset(1);
subset[0].state = start_id;
subset[0].weight = Weight::One();
subset[0].string = repository_.EmptyString(); // Id of empty sequence.
EpsilonClosure(&subset); // follow through epsilon-input links
ConvertToMinimal(&subset); // remove all but final states and
// states with input-labels on arcs out of them.
// Weight::One() is the "forward-weight" of this determinized state...
@ -1056,7 +1055,7 @@ template<class Weight, class IntType> class LatticeDeterminizerPruned {
// output we may have to ignore some of these.
Weight forward_weight; // Represents minimal cost from start-state
// to this state. Used in prioritization of tasks, and pruning.
// Note: we know this minimal cost from when we fist create the OutputState;
// Note: we know this minimal cost from when we first create the OutputState;
// this is because of the priority-queue we use, that ensures that the
// "best" path into the state will be expanded first.
OutputState(const vector<Element> &minimal_subset,

Просмотреть файл

@ -12,7 +12,7 @@ BINFILES = lattice-best-path lattice-prune lattice-equivalent lattice-to-nbest \
lattice-add-trans-probs lattice-difference lattice-word-align \
nbest-to-linear nbest-to-lattice lattice-1best linear-to-nbest \
lattice-mbr-decode lattice-align-words lattice-to-mpe-post \
lattice-copy-backoff nbest-to-ctm
lattice-copy-backoff nbest-to-ctm lattice-determinize-pruned
OBJFILES =

Просмотреть файл

@ -1,6 +1,6 @@
// latbin/lattice-determinize.cc
// Copyright 2009-2011 Microsoft Corporation
// Copyright 2009-2012 Microsoft Corporation Daniel Povey
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@ -114,7 +114,7 @@ int main(int argc, char *argv[]) {
po.Register("beam", &beam, "Pruning beam [applied after acoustic scaling]-- also used to handle determinization failures, set --prune=false to disable routine pruning");
po.Register("delta", &delta, "Tolerance used in determinization");
po.Register("prune", &prune, "If true, prune determinized lattices with the --beam option.");
po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be twice this)");
po.Register("max-mem", &max_mem, "Maximum approximate memory usage in determinization (real usage might be many times this)");
po.Register("max-loop", &max_loop, "Option to detect a certain type of failure in lattice determinization (not critical)");
po.Register("beam-ratio", &beam_ratio, "Ratio by which to decrease beam if we reach the max-arcs.");
po.Register("num-loops", &num_loops, "Number of times to decrease beam by beam-ratio if determinization fails.");

Просмотреть файл

@ -85,9 +85,9 @@ int main(int argc, char *argv[]) {
}
}
KALDI_LOG << "Total " << n_done << " lattices written."
KALDI_LOG << "Total " << n_done << " lattices written; "
<< n_only_transcription
<< " lattices contain only transcription; "
<< " lattices had empty difference; "
<< n_no_lat << " missing lattices in second archive ";
return (n_done != 0 ? 0 : 1);
} catch(const std::exception& e) {

Просмотреть файл

@ -82,7 +82,7 @@ int main(int argc, char *argv[]) {
n_arcs_out += pruned_narcs;
n_states_out += pruned_nstates;
KALDI_LOG << "For utterance " << key << ", pruned #states from "
<< nstates << " to " << pruned_nstates << " and #arcs from"
<< nstates << " to " << pruned_nstates << " and #arcs from "
<< narcs << " to " << pruned_narcs;
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale), &pruned_lat);
CompactLattice pruned_clat;