WARNING: this change list changed GauPost to GaussPost, and also made it pdf-id indexed instead of transition-id indexed. Details: 1) Modified those posterior related programs that use TransitionIdToPdf to get a pdf-id, and later on only use the pdf-id. We merge the posteriors that corresponds to the same pdf-id to avoid redundant computation. 2) Modified phone lattice determinization, added a wrapper for the lattice type determinization to reduce redundant code in the decoding binaries.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3588 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Guoguo Chen 2014-02-25 04:00:06 +00:00
Родитель cd36df98b5
Коммит 725f4abd68
35 изменённых файлов: 381 добавлений и 289 удалений

Просмотреть файл

@ -1,6 +1,7 @@
// bin/acc-lda.cc // bin/acc-lda.cc
// Copyright 2009-2011 Microsoft Corporation, Go-Vivace Inc. // Copyright 2009-2011 Microsoft Corporation, Go-Vivace Inc.
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -95,14 +96,15 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (int32 i = 0; i < feats.NumRows(); i++) { for (int32 i = 0; i < feats.NumRows(); i++) {
SubVector<BaseFloat> feat(feats, i); SubVector<BaseFloat> feat(feats, i);
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 tid = post[i][j].first; int32 pdf_id = pdf_post[i][j].first;
BaseFloat weight = RandPrune(post[i][j].second, rand_prune); BaseFloat weight = RandPrune(pdf_post[i][j].second, rand_prune);
if (weight != 0.0) { if (weight != 0.0) {
int32 pdf = trans_model.TransitionIdToPdf(tid); lda.Accumulate(feat, pdf_id, weight);
lda.Accumulate(feat, pdf, weight);
} }
} }
} }

Просмотреть файл

@ -790,28 +790,16 @@ void DecodeUtteranceLatticeFasterClass::operator () () {
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt_; KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt_;
fst::Connect(lat_); fst::Connect(lat_);
if (determinize_) { if (determinize_) {
Invert(lat_);
if (!TopSort(lat_)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_WARN << "Topological sorting of state-level lattice failed "
<< "(probably your lexicon has empty words or your LM has "
<< "epsilon cycles).";
delete lat_; // Delete it here.
success_ = false;
return;
}
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(lat_, ilabel_comp);
clat_ = new CompactLattice; clat_ = new CompactLattice;
if (!DeterminizeLatticePhonePruned(*trans_model_, if (!DeterminizeLatticePhonePrunedWrapper(
lat_, *trans_model_,
decoder_->GetOptions().lattice_beam, lat_,
clat_, decoder_->GetOptions().lattice_beam,
decoder_->GetOptions().det_opts)) clat_,
decoder_->GetOptions().det_opts))
KALDI_WARN << "Determinization finished earlier than the beam for " KALDI_WARN << "Determinization finished earlier than the beam for "
<< "utterance " << utt_; << "utterance " << utt_;
delete lat_; delete lat_;
fst::Connect(clat_);
// We'll write the lattice without acoustic scaling. // We'll write the lattice without acoustic scaling.
if (acoustic_scale_ != 0.0) if (acoustic_scale_ != 0.0)
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat_); fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat_);
@ -971,25 +959,15 @@ bool DecodeUtteranceLatticeFaster(
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt; KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
fst::Connect(&lat); fst::Connect(&lat);
if (determinize) { if (determinize) {
Invert(&lat);
if (!TopSort(&lat)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_WARN << "Topological sorting of state-level lattice failed "
<< "(probably your lexicon has empty words or your LM has "
<< "epsilon cycles).";
return false;
}
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&lat, ilabel_comp);
CompactLattice clat; CompactLattice clat;
if (!DeterminizeLatticePhonePruned(trans_model, if (!DeterminizeLatticePhonePrunedWrapper(
&lat, trans_model,
decoder.GetOptions().lattice_beam, &lat,
&clat, decoder.GetOptions().lattice_beam,
decoder.GetOptions().det_opts)) &clat,
decoder.GetOptions().det_opts))
KALDI_WARN << "Determinization finished earlier than the beam for " KALDI_WARN << "Determinization finished earlier than the beam for "
<< "utterance " << utt; << "utterance " << utt;
fst::Connect(&clat);
// We'll write the lattice without acoustic scaling. // We'll write the lattice without acoustic scaling.
if (acoustic_scale != 0.0) if (acoustic_scale != 0.0)
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat); fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);

Просмотреть файл

@ -656,25 +656,15 @@ bool DecodeUtteranceLatticeSimple(
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt; KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
fst::Connect(&lat); fst::Connect(&lat);
if (determinize) { if (determinize) {
Invert(&lat);
if (!TopSort(&lat)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_WARN << "Topological sorting of state-level lattice failed "
<< "(probably your lexicon has empty words or your LM has "
<< "epsilon cycles).";
return false;
}
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&lat, ilabel_comp);
CompactLattice clat; CompactLattice clat;
if (!DeterminizeLatticePhonePruned(trans_model, if (!DeterminizeLatticePhonePrunedWrapper(
&lat, trans_model,
decoder.GetOptions().lattice_beam, &lat,
&clat, decoder.GetOptions().lattice_beam,
decoder.GetOptions().det_opts)) &clat,
decoder.GetOptions().det_opts))
KALDI_WARN << "Determinization finished earlier than the beam for " KALDI_WARN << "Determinization finished earlier than the beam for "
<< "utterance " << utt; << "utterance " << utt;
fst::Connect(&clat);
// We'll write the lattice without acoustic scaling. // We'll write the lattice without acoustic scaling.
if (acoustic_scale != 0.0) if (acoustic_scale != 0.0)
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat); fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);

Просмотреть файл

@ -938,25 +938,15 @@ bool DecodeUtteranceLatticeTracking(
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt; KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
fst::Connect(&lat); fst::Connect(&lat);
if (determinize) { if (determinize) {
Invert(&lat);
if (!TopSort(&lat)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_WARN << "Topological sorting of state-level lattice failed "
<< "(probably your lexicon has empty words or your LM has "
<< "epsilon cycles).";
return false;
}
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&lat, ilabel_comp);
CompactLattice clat; CompactLattice clat;
if (!DeterminizeLatticePhonePruned(trans_model, if (!DeterminizeLatticePhonePrunedWrapper(
&lat, trans_model,
decoder.GetOptions().lattice_beam, &lat,
&clat, decoder.GetOptions().lattice_beam,
decoder.GetOptions().det_opts)) &clat,
decoder.GetOptions().det_opts))
KALDI_WARN << "Determinization finished earlier than the beam for " KALDI_WARN << "Determinization finished earlier than the beam for "
<< "utterance " << utt; << "utterance " << utt;
fst::Connect(&clat);
// We'll write the lattice without acoustic scaling. // We'll write the lattice without acoustic scaling.
if (acoustic_scale != 0.0) if (acoustic_scale != 0.0)
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat); fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-acc-hlda.cc // gmmbin/gmm-acc-hlda.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -106,11 +107,12 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 pdf_id = pdf_posterior[i][j].first;
pdf_id = trans_model.TransitionIdToPdf(tid); BaseFloat weight = pdf_posterior[i][j].second;
BaseFloat weight = posterior[i][j].second;
Vector<BaseFloat> posteriors; Vector<BaseFloat> posteriors;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id); const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-acc-mllt.cc // gmmbin/gmm-acc-mllt.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -90,11 +91,12 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 pdf_id = pdf_posterior[i][j].first;
pdf_id = trans_model.TransitionIdToPdf(tid); BaseFloat weight = pdf_posterior[i][j].second;
BaseFloat weight = posterior[i][j].second;
tot_like_this_file += mllt_accs.AccumulateFromGmm(am_gmm.GetPdf(pdf_id), tot_like_this_file += mllt_accs.AccumulateFromGmm(am_gmm.GetPdf(pdf_id),
mat.Row(i), mat.Row(i),

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-acc-stats-twofeats.cc // gmmbin/gmm-acc-stats-twofeats.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -111,12 +112,13 @@ int main(int argc, char *argv[]) {
BaseFloat tot_like_this_file = 0.0, BaseFloat tot_like_this_file = 0.0,
tot_weight_this_file = 0.0; tot_weight_this_file = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { // Accumulates for GMM.
int32 tid = posterior[i][j].first, // transition identifier. for (size_t j = 0; j <pdf_posterior[i].size(); j++) {
pdf_id = trans_model.TransitionIdToPdf(tid); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat weight = posterior[i][j].second; BaseFloat weight = pdf_posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
tot_like_this_file += weight * tot_like_this_file += weight *
gmm_accs.AccumulateForGmmTwofeats(am_gmm, gmm_accs.AccumulateForGmmTwofeats(am_gmm,
mat1.Row(i), mat1.Row(i),
@ -125,6 +127,13 @@ int main(int argc, char *argv[]) {
weight); weight);
tot_weight_this_file += weight; tot_weight_this_file += weight;
} }
// Accumulates for transitions.
for (size_t j = 0; j < posterior[i].size(); j++) {
int32 tid = posterior[i][j].first;
BaseFloat weight = posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
}
} }
KALDI_LOG << "Average like for this file is " KALDI_LOG << "Average like for this file is "
<< (tot_like_this_file/tot_weight_this_file) << " over " << (tot_like_this_file/tot_weight_this_file) << " over "

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-acc-stats.cc // gmmbin/gmm-acc-stats.cc
// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -98,16 +99,24 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { // Accumulates for GMM.
int32 tid = posterior[i][j].first, // transition identifier. for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
pdf_id = trans_model.TransitionIdToPdf(tid); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat weight = posterior[i][j].second; BaseFloat weight = pdf_posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
tot_like_this_file += gmm_accs.AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, weight) tot_like_this_file += gmm_accs.AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, weight)
* weight; * weight;
tot_weight += weight; tot_weight += weight;
} }
// Accumulates for transitions.
for (size_t j = 0; j < posterior[i].size(); j++) {
int32 tid = posterior[i][j].first;
BaseFloat weight = posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
}
} }
if (num_done % 50 == 0) { if (num_done % 50 == 0) {
KALDI_LOG << "Processed " << num_done << " utterances; for utterance " KALDI_LOG << "Processed " << num_done << " utterances; for utterance "

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2012 Cisco Systems (author: Neha Agrawal) // Copyright 2012 Cisco Systems (author: Neha Agrawal)
// Johns Hopkins University (author: Daniel Povey) // Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -119,11 +120,12 @@ int main(int argc, char *argv[]) {
} }
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for ( size_t i = 0; i < posterior.size(); i++ ) { for ( size_t i = 0; i < posterior.size(); i++ ) {
for ( size_t j = 0; j < posterior[i].size(); j++ ) { for ( size_t j = 0; j < pdf_posterior[i].size(); j++ ) {
int32 pdf_id = int32 pdf_id = pdf_posterior[i][j].first;
trans_model.TransitionIdToPdf(posterior[i][j].first); BaseFloat weight = pdf_posterior[i][j].second;
BaseFloat weight = posterior[i][j].second;
file_like += map_accs.AccumulateForGmm(copy_am_gmm, file_like += map_accs.AccumulateForGmm(copy_am_gmm,
feats.Row(i), feats.Row(i),
pdf_id, weight); pdf_id, weight);
@ -183,11 +185,12 @@ int main(int argc, char *argv[]) {
} }
num_done++; num_done++;
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for ( size_t i = 0; i < posterior.size(); i++ ) { for ( size_t i = 0; i < posterior.size(); i++ ) {
for ( size_t j = 0; j < posterior[i].size(); j++ ) { for ( size_t j = 0; j < pdf_posterior[i].size(); j++ ) {
int32 pdf_id = int32 pdf_id = pdf_posterior[i][j].first;
trans_model.TransitionIdToPdf(posterior[i][j].first); BaseFloat prob = pdf_posterior[i][j].second;
BaseFloat prob = posterior[i][j].second;
file_like += map_accs.AccumulateForGmm(copy_am_gmm,feats.Row(i), file_like += map_accs.AccumulateForGmm(copy_am_gmm,feats.Row(i),
pdf_id, prob); pdf_id, prob);
file_t += prob; file_t += prob;

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-basis-fmllr-accs-gpost.cc // gmmbin/gmm-basis-fmllr-accs-gpost.cc
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao) // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -32,13 +33,13 @@ using std::vector;
namespace kaldi { namespace kaldi {
void AccumulateForUtterance(const Matrix<BaseFloat> &feats, void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const GauPost &gpost, const GaussPost &gpost,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
for (size_t i = 0; i < gpost.size(); i++) { for (size_t i = 0; i < gpost.size(); i++) {
for (size_t j = 0; j < gpost[i].size(); j++) { for (size_t j = 0; j < gpost[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first); int32 pdf_id = gpost[i][j].first;
const Vector<BaseFloat> & posterior(gpost[i][j].second); const Vector<BaseFloat> & posterior(gpost[i][j].second);
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
feats.Row(i), posterior); feats.Row(i), posterior);
@ -89,7 +90,7 @@ int main(int argc, char *argv[]) {
am_gmm.Read(ki.Stream(), binary); am_gmm.Read(ki.Stream(), binary);
} }
RandomAccessGauPostReader gpost_reader(gpost_rspecifier); RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
BasisFmllrAccus basis_accs(am_gmm.Dim()); BasisFmllrAccus basis_accs(am_gmm.Dim());
int32 num_done = 0, num_no_post = 0, num_other_error = 0; int32 num_done = 0, num_no_post = 0, num_other_error = 0;
@ -115,9 +116,9 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(utt); const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;
@ -143,10 +144,10 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(); const Matrix<BaseFloat> &feats = feature_reader.Value();
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-basis-fmllr-accs.cc // gmmbin/gmm-basis-fmllr-accs.cc
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao) // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t i = 0; i < post.size(); i++) { for (size_t i = 0; i < post.size(); i++) {
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first); int32 pdf_id = pdf_post[i][j].first;
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
feats.Row(i), feats.Row(i),
post[i][j].second); pdf_post[i][j].second);
} }
} }
} }

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-basis-fmllr-gpost.cc // gmmbin/gmm-est-basis-fmllr-gpost.cc
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao) // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -32,13 +33,13 @@ using std::vector;
namespace kaldi { namespace kaldi {
void AccumulateForUtterance(const Matrix<BaseFloat> &feats, void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const GauPost &gpost, const GaussPost &gpost,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
for (size_t i = 0; i < gpost.size(); i++) { for (size_t i = 0; i < gpost.size(); i++) {
for (size_t j = 0; j < gpost[i].size(); j++) { for (size_t j = 0; j < gpost[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first); int32 pdf_id = gpost[i][j].first;
const Vector<BaseFloat> & posterior(gpost[i][j].second); const Vector<BaseFloat> & posterior(gpost[i][j].second);
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
feats.Row(i), posterior); feats.Row(i), posterior);
@ -102,7 +103,7 @@ int main(int argc, char *argv[]) {
basis_est.ReadBasis(ki.Stream(), binary, false); basis_est.ReadBasis(ki.Stream(), binary, false);
} }
RandomAccessGauPostReader gpost_reader(gpost_rspecifier); RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
double tot_impr = 0.0, tot_t = 0.0; double tot_impr = 0.0, tot_t = 0.0;
@ -134,9 +135,9 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(utt); const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;
@ -178,10 +179,10 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(); const Matrix<BaseFloat> &feats = feature_reader.Value();
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-basis-fmllr.cc // gmmbin/gmm-est-basis-fmllr.cc
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao) // Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t i = 0; i < post.size(); i++) { for (size_t i = 0; i < post.size(); i++) {
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first); int32 pdf_id = pdf_post[i][j].first;
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
feats.Row(i), feats.Row(i),
post[i][j].second); pdf_post[i][j].second);
} }
} }
} }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2011 Microsoft Corporation; Saarland University // Copyright 2009-2011 Microsoft Corporation; Saarland University
// 2013 Johns Hopkins University (author: Daniel Povey) // 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -32,13 +33,13 @@ using std::vector;
namespace kaldi { namespace kaldi {
void AccumulateForUtterance(const Matrix<BaseFloat> &feats, void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const GauPost &gpost, const GaussPost &gpost,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
for (size_t i = 0; i < gpost.size(); i++) { for (size_t i = 0; i < gpost.size(); i++) {
for (size_t j = 0; j < gpost[i].size(); j++) { for (size_t j = 0; j < gpost[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first); int32 pdf_id = gpost[i][j].first;
const Vector<BaseFloat> & posterior(gpost[i][j].second); const Vector<BaseFloat> & posterior(gpost[i][j].second);
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
feats.Row(i), posterior); feats.Row(i), posterior);
@ -89,7 +90,7 @@ int main(int argc, char *argv[]) {
am_gmm.Read(ki.Stream(), binary); am_gmm.Read(ki.Stream(), binary);
} }
RandomAccessGauPostReader gpost_reader(gpost_rspecifier); RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
double tot_impr = 0.0, tot_t = 0.0; double tot_impr = 0.0, tot_t = 0.0;
@ -117,9 +118,9 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(utt); const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost vector has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost vector has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;
@ -153,10 +154,10 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(); const Matrix<BaseFloat> &feats = feature_reader.Value();
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "GauPost has wrong size " << (gpost.size()) KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
<< " vs. " << (feats.NumRows()); << " vs. " << (feats.NumRows());
num_other_error++; num_other_error++;
continue; continue;

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-fmllr-raw-gpost.cc // gmmbin/gmm-est-fmllr-raw-gpost.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey) // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -29,12 +30,12 @@ namespace kaldi {
void AccStatsForUtterance(const TransitionModel &trans_model, void AccStatsForUtterance(const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
const GauPost &gpost, const GaussPost &gpost,
const Matrix<BaseFloat> &feats, const Matrix<BaseFloat> &feats,
FmllrRawAccs *accs) { FmllrRawAccs *accs) {
for (size_t t = 0; t < gpost.size(); t++) { for (size_t t = 0; t < gpost.size(); t++) {
for (size_t i = 0; i < gpost[t].size(); i++) { for (size_t i = 0; i < gpost[t].size(); i++) {
int32 pdf = trans_model.TransitionIdToPdf(gpost[t][i].first); int32 pdf = gpost[t][i].first;
const Vector<BaseFloat> &posterior(gpost[t][i].second); const Vector<BaseFloat> &posterior(gpost[t][i].second);
accs->AccumulateFromPosteriors(am_gmm.GetPdf(pdf), accs->AccumulateFromPosteriors(am_gmm.GetPdf(pdf),
feats.Row(t), posterior); feats.Row(t), posterior);
@ -94,7 +95,7 @@ int main(int argc, char *argv[]) {
Matrix<BaseFloat> full_lda_mat; Matrix<BaseFloat> full_lda_mat;
ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat); ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat);
RandomAccessGauPostReader gpost_reader(gpost_rspecifier); RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
BaseFloatMatrixWriter transform_writer(transform_wspecifier); BaseFloatMatrixWriter transform_writer(transform_wspecifier);
double tot_auxf_impr = 0.0, tot_count = 0.0; double tot_auxf_impr = 0.0, tot_count = 0.0;
@ -121,7 +122,7 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(utt); const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "Size mismatch between gposteriors " << gpost.size() KALDI_WARN << "Size mismatch between gposteriors " << gpost.size()
<< " and features " << feats.NumRows(); << " and features " << feats.NumRows();
@ -155,7 +156,7 @@ int main(int argc, char *argv[]) {
continue; continue;
} }
const Matrix<BaseFloat> &feats = feature_reader.Value(); const Matrix<BaseFloat> &feats = feature_reader.Value();
const GauPost &gpost = gpost_reader.Value(utt); const GaussPost &gpost = gpost_reader.Value(utt);
if (static_cast<int32>(gpost.size()) != feats.NumRows()) { if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
KALDI_WARN << "Size mismatch between posteriors " << gpost.size() KALDI_WARN << "Size mismatch between posteriors " << gpost.size()

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-fmllr-raw.cc // gmmbin/gmm-est-fmllr-raw.cc
// Copyright 2013 Johns Hopkins University (author: Daniel Povey) // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -32,13 +33,14 @@ void AccStatsForUtterance(const TransitionModel &trans_model,
const Posterior &post, const Posterior &post,
const Matrix<BaseFloat> &feats, const Matrix<BaseFloat> &feats,
FmllrRawAccs *accs) { FmllrRawAccs *accs) {
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t t = 0; t < post.size(); t++) { for (size_t t = 0; t < post.size(); t++) {
for (size_t i = 0; i < post[t].size(); i++) { for (size_t i = 0; i < pdf_post[t].size(); i++) {
int32 pdf = trans_model.TransitionIdToPdf(post[t][i].first); int32 pdf = pdf_post[t][i].first;
BaseFloat weight = post[t][i].second; BaseFloat weight = pdf_post[t][i].second;
accs->AccumulateForGmm(am_gmm.GetPdf(pdf), accs->AccumulateForGmm(am_gmm.GetPdf(pdf),
feats.Row(t), feats.Row(t), weight);
weight);
} }
} }
} }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2011 Microsoft Corporation; Saarland University // Copyright 2009-2011 Microsoft Corporation; Saarland University
// 2013 Johns Hopkins University (author: Daniel Povey) // 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
const TransitionModel &trans_model, const TransitionModel &trans_model,
const AmDiagGmm &am_gmm, const AmDiagGmm &am_gmm,
FmllrDiagGmmAccs *spk_stats) { FmllrDiagGmmAccs *spk_stats) {
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t i = 0; i < post.size(); i++) { for (size_t i = 0; i < post.size(); i++) {
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first); int32 pdf_id = pdf_post[i][j].first;
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id), spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
feats.Row(i), feats.Row(i),
post[i][j].second); pdf_post[i][j].second);
} }
} }
} }

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-regtree-fmllr.cc // gmmbin/gmm-est-regtree-fmllr.cc
// Copyright 2009-2011 Saarland University; Microsoft Corporation // Copyright 2009-2011 Saarland University; Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -117,10 +118,12 @@ int main(int argc, char *argv[]) {
} }
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat prob = posterior[i][j].second; BaseFloat prob = pdf_posterior[i][j].second;
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
feats.Row(i), pdf_id, feats.Row(i), pdf_id,
prob); prob);
@ -168,10 +171,12 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
fmllr_accs.SetZero(); fmllr_accs.SetZero();
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat prob = posterior[i][j].second; BaseFloat prob = pdf_posterior[i][j].second;
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm, file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
feats.Row(i), pdf_id, feats.Row(i), pdf_id,
prob); prob);

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-est-regtree-mllr.cc // gmmbin/gmm-est-regtree-mllr.cc
// Copyright 2009-2011 Saarland University; Microsoft Corporation // Copyright 2009-2011 Saarland University; Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -117,10 +118,12 @@ int main(int argc, char *argv[]) {
} }
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat prob = posterior[i][j].second; BaseFloat prob = pdf_posterior[i][j].second;
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm, file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
feats.Row(i), pdf_id, feats.Row(i), pdf_id,
prob); prob);
@ -168,10 +171,12 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat file_like = 0.0, file_t = 0.0; BaseFloat file_like = 0.0, file_t = 0.0;
mllr_accs.SetZero(); mllr_accs.SetZero();
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat prob = posterior[i][j].second; BaseFloat prob = pdf_posterior[i][j].second;
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm, file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
feats.Row(i), pdf_id, feats.Row(i), pdf_id,
prob); prob);

Просмотреть файл

@ -100,25 +100,15 @@ bool DecodeUtterance(LatticeBiglmFasterDecoder &decoder, // not const but is rea
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt; KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
fst::Connect(&lat); fst::Connect(&lat);
if (determinize) { if (determinize) {
Invert(&lat);
if (!TopSort(&lat)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_WARN << "Topological sorting of state-level lattice failed "
<< "(probably your lexicon has empty words or your LM has "
<< "epsilon cycles).";
return false;
}
fst::ILabelCompare<LatticeArc> ilabel_comp;
ArcSort(&lat, ilabel_comp);
CompactLattice clat; CompactLattice clat;
if (!DeterminizeLatticePhonePruned(trans_model, if (!DeterminizeLatticePhonePrunedWrapper(
&lat, trans_model,
decoder.GetOptions().lattice_beam, &lat,
&clat, decoder.GetOptions().lattice_beam,
decoder.GetOptions().det_opts)) &clat,
decoder.GetOptions().det_opts))
KALDI_WARN << "Determinization finished earlier than the beam for " KALDI_WARN << "Determinization finished earlier than the beam for "
<< "utterance " << utt; << "utterance " << utt;
fst::Connect(&clat);
// We'll write the lattice without acoustic scaling. // We'll write the lattice without acoustic scaling.
if (acoustic_scale != 0.0) if (acoustic_scale != 0.0)
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat); fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);

Просмотреть файл

@ -1,6 +1,7 @@
// gmmbin/gmm-post-to-gpost.cc // gmmbin/gmm-post-to-gpost.cc
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -69,7 +70,7 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier); SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier); RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
GauPostWriter gpost_writer(gpost_wspecifier); GaussPostWriter gpost_writer(gpost_wspecifier);
int32 num_done = 0, num_no_posterior = 0, num_other_error = 0; int32 num_done = 0, num_no_posterior = 0, num_other_error = 0;
for (; !feature_reader.Done(); feature_reader.Next()) { for (; !feature_reader.Done(); feature_reader.Next()) {
@ -79,7 +80,7 @@ int main(int argc, char *argv[]) {
} else { } else {
const Matrix<BaseFloat> &mat = feature_reader.Value(); const Matrix<BaseFloat> &mat = feature_reader.Value();
const Posterior &posterior = posteriors_reader.Value(key); const Posterior &posterior = posteriors_reader.Value(key);
GauPost gpost(posterior.size()); GaussPost gpost(posterior.size());
if (posterior.size() != mat.NumRows()) { if (posterior.size() != mat.NumRows()) {
KALDI_WARN << "Posterior vector has wrong size "<< (posterior.size()) << " vs. "<< (mat.NumRows()); KALDI_WARN << "Posterior vector has wrong size "<< (posterior.size()) << " vs. "<< (mat.NumRows());
@ -90,12 +91,13 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
gpost[i].reserve(posterior[i].size()); gpost[i].reserve(pdf_posterior[i].size());
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 pdf_id = pdf_posterior[i][j].first;
pdf_id = trans_model.TransitionIdToPdf(tid); BaseFloat weight = pdf_posterior[i][j].second;
BaseFloat weight = posterior[i][j].second;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id); const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
Vector<BaseFloat> this_post_vec; Vector<BaseFloat> this_post_vec;
BaseFloat like = BaseFloat like =
@ -106,7 +108,7 @@ int main(int argc, char *argv[]) {
this_post_vec(k) = RandPrune(this_post_vec(k), this_post_vec(k) = RandPrune(this_post_vec(k),
rand_prune); rand_prune);
if (!this_post_vec.IsZero()) if (!this_post_vec.IsZero())
gpost[i].push_back(std::make_pair(tid, this_post_vec)); gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
tot_like_this_file += like * weight; tot_like_this_file += like * weight;
tot_weight += weight; tot_weight += weight;
} }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2013 Johns Hopkins University (author: Daniel Povey) // 2013 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -136,13 +137,13 @@ bool PosteriorHolder::Read(std::istream &is) {
} }
// static // static
bool GauPostHolder::Write(std::ostream &os, bool binary, const T &t) { bool GaussPostHolder::Write(std::ostream &os, bool binary, const T &t) {
InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
try { try {
// We don't bother making this a one-line format. // We don't bother making this a one-line format.
int32 sz = t.size(); int32 sz = t.size();
WriteBasicType(os, binary, sz); WriteBasicType(os, binary, sz);
for (GauPost::const_iterator iter = t.begin(); iter != t.end(); ++iter) { for (GaussPost::const_iterator iter = t.begin(); iter != t.end(); ++iter) {
int32 sz2 = iter->size(); int32 sz2 = iter->size();
WriteBasicType(os, binary, sz2); WriteBasicType(os, binary, sz2);
for (std::vector<std::pair<int32, Vector<BaseFloat> > >::const_iterator iter2=iter->begin(); for (std::vector<std::pair<int32, Vector<BaseFloat> > >::const_iterator iter2=iter->begin();
@ -161,7 +162,7 @@ bool GauPostHolder::Write(std::ostream &os, bool binary, const T &t) {
} }
} }
bool GauPostHolder::Read(std::istream &is) { bool GaussPostHolder::Read(std::istream &is) {
t_.clear(); t_.clear();
bool is_binary; bool is_binary;
@ -175,7 +176,7 @@ bool GauPostHolder::Read(std::istream &is) {
if (sz < 0) if (sz < 0)
KALDI_ERR << "Reading posteriors: got negative size\n"; KALDI_ERR << "Reading posteriors: got negative size\n";
t_.resize(sz); t_.resize(sz);
for (GauPost::iterator iter = t_.begin(); iter != t_.end(); ++iter) { for (GaussPost::iterator iter = t_.begin(); iter != t_.end(); ++iter) {
int32 sz2; int32 sz2;
ReadBasicType(is, is_binary, &sz2); ReadBasicType(is, is_binary, &sz2);
if (sz2 < 0) if (sz2 < 0)
@ -270,6 +271,27 @@ void AlignmentToPosterior(const std::vector<int32> &ali,
} }
} }
struct ComparePosteriorByPdfs {
const TransitionModel *tmodel_;
ComparePosteriorByPdfs(const TransitionModel &tmodel): tmodel_(&tmodel) {}
bool operator() (const std::pair<int32, BaseFloat> &a,
const std::pair<int32, BaseFloat> &b) {
if (tmodel_->TransitionIdToPdf(a.first)
< tmodel_->TransitionIdToPdf(b.first))
return true;
else
return false;
}
};
void SortPosteriorByPdfs(const TransitionModel &tmodel,
Posterior *post) {
ComparePosteriorByPdfs compare(tmodel);
for (size_t i = 0; i < post->size(); i++) {
sort((*post)[i].begin(), (*post)[i].end(), compare);
}
}
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
const Posterior &post_in, const Posterior &post_in,
Posterior *post_out) { Posterior *post_out) {

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2011 Microsoft Corporation // Copyright 2009-2011 Microsoft Corporation
// 2013-2014 Johns Hopkins University (author: Daniel Povey) // 2013-2014 Johns Hopkins University (author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
@ -40,10 +41,14 @@ namespace kaldi {
/// is a probability (typically between zero and one). /// is a probability (typically between zero and one).
typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior; typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
/// GauPost is a typedef for storing Gaussian-level posteriors for an utterance. /// GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of /// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
/// Gaussian posteriors. /// Gaussian posteriors.
typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GauPost; /// WARNING: We changed "int32" from transition-id to pdf-id, and the change is
/// applied for all programs using GaussPost. This is for efficiency purpose. We
/// also changed the name slightly from GauPost to GaussPost to reduce the
/// chance that the change will go un-noticed in downstream code.
typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
// PosteriorHolder is a holder for Posterior, which is // PosteriorHolder is a holder for Posterior, which is
@ -75,19 +80,19 @@ class PosteriorHolder {
}; };
// GauPostHolder is a holder for GauPost, which is // GaussPostHolder is a holder for GaussPost, which is
// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > // std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
// This is used for storing posteriors of transition id's for an // This is used for storing posteriors of transition id's for an
// utterance. // utterance.
class GauPostHolder { class GaussPostHolder {
public: public:
typedef GauPost T; typedef GaussPost T;
GauPostHolder() { } GaussPostHolder() { }
static bool Write(std::ostream &os, bool binary, const T &t); static bool Write(std::ostream &os, bool binary, const T &t);
void Clear() { GauPost tmp; std::swap(tmp, t_); } void Clear() { GaussPost tmp; std::swap(tmp, t_); }
// Reads into the holder. // Reads into the holder.
bool Read(std::istream &is); bool Read(std::istream &is);
@ -99,7 +104,7 @@ class GauPostHolder {
const T &Value() const { return t_; } const T &Value() const { return t_; }
private: private:
KALDI_DISALLOW_COPY_AND_ASSIGN(GauPostHolder); KALDI_DISALLOW_COPY_AND_ASSIGN(GaussPostHolder);
T t_; T t_;
}; };
@ -112,10 +117,10 @@ typedef SequentialTableReader<PosteriorHolder> SequentialPosteriorReader;
typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader; typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GauPost; // typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
typedef TableWriter<GauPostHolder> GauPostWriter; typedef TableWriter<GaussPostHolder> GaussPostWriter;
typedef SequentialTableReader<GauPostHolder> SequentialGauPostReader; typedef SequentialTableReader<GaussPostHolder> SequentialGaussPostReader;
typedef RandomAccessTableReader<GauPostHolder> RandomAccessGauPostReader; typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
/// Scales the BaseFloat (weight) element in the posterior entries. /// Scales the BaseFloat (weight) element in the posterior entries.
@ -146,6 +151,11 @@ int32 MergePosteriors(const Posterior &post1,
void AlignmentToPosterior(const std::vector<int32> &ali, void AlignmentToPosterior(const std::vector<int32> &ali,
Posterior *post); Posterior *post);
/// Sorts posterior entries so that transition-ids with same pdf-id are next to
/// each other.
void SortPosteriorByPdfs(const TransitionModel &tmodel,
Posterior *post);
/// Converts a posterior over transition-ids to be a posterior /// Converts a posterior over transition-ids to be a posterior
/// over pdf-ids. /// over pdf-ids.
void ConvertPosteriorToPdfs(const TransitionModel &tmodel, void ConvertPosteriorToPdfs(const TransitionModel &tmodel,

Просмотреть файл

@ -1464,6 +1464,30 @@ bool DeterminizeLatticePhonePruned(
beam, ofst, opts); beam, ofst, opts);
} }
bool DeterminizeLatticePhonePrunedWrapper(
const kaldi::TransitionModel &trans_model,
MutableFst<kaldi::LatticeArc> *ifst,
double beam,
MutableFst<kaldi::CompactLatticeArc> *ofst,
DeterminizeLatticePhonePrunedOptions opts) {
bool ans = true;
Invert(ifst);
if (ifst->Properties(fst::kTopSorted, true) == 0) {
if (!TopSort(ifst)) {
// Cannot topologically sort the lattice -- determinization will fail.
KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
<< " your lexicon has empty words or your LM has epsilon cycles"
<< ").";
}
}
ILabelCompare<kaldi::LatticeArc> ilabel_comp;
ArcSort(ifst, ilabel_comp);
ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
trans_model, ifst, beam, ofst, opts);
Connect(ofst);
return ans;
}
// Instantiate the templates for the types we might need. // Instantiate the templates for the types we might need.
// Note: there are actually four templates, each of which // Note: there are actually four templates, each of which
// we instantiate for a single type. // we instantiate for a single type.

Просмотреть файл

@ -30,6 +30,7 @@
#include "fstext/lattice-weight.h" #include "fstext/lattice-weight.h"
#include "hmm/transition-model.h" #include "hmm/transition-model.h"
#include "itf/options-itf.h" #include "itf/options-itf.h"
#include "lat/kaldi-lattice.h"
namespace fst { namespace fst {
@ -256,6 +257,19 @@ bool DeterminizeLatticePhonePruned(
DeterminizeLatticePhonePrunedOptions opts DeterminizeLatticePhonePrunedOptions opts
= DeterminizeLatticePhonePrunedOptions()); = DeterminizeLatticePhonePrunedOptions());
/** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
Lattice type FSTs. Unlike other determinization routines, the function
requires "ifst" to have transition-id's on the input side and words on the
output side.
*/
bool DeterminizeLatticePhonePrunedWrapper(
const kaldi::TransitionModel &trans_model,
MutableFst<kaldi::LatticeArc> *ifst,
double prune,
MutableFst<kaldi::CompactLatticeArc> *ofst,
DeterminizeLatticePhonePrunedOptions opts
= DeterminizeLatticePhonePrunedOptions());
/// @} end "addtogroup fst_extensions" /// @} end "addtogroup fst_extensions"
} // end namespace fst } // end namespace fst

Просмотреть файл

@ -40,60 +40,40 @@ class DeterminizeLatticeTask {
bool minimize, bool minimize,
Lattice *lat, Lattice *lat,
CompactLatticeWriter *clat_writer, CompactLatticeWriter *clat_writer,
int32 *num_warn, int32 *num_warn):
int32 *num_fail):
trans_model_(&trans_model), opts_(opts), key_(key), trans_model_(&trans_model), opts_(opts), key_(key),
acoustic_scale_(acoustic_scale), beam_(beam), minimize_(minimize), acoustic_scale_(acoustic_scale), beam_(beam), minimize_(minimize),
lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn), lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn) { }
num_fail_(num_fail), skip_writting_(false) { }
void operator () () { void operator () () {
// Put word labels on the input side.
Invert(lat_);
// We apply the acoustic scale before determinization and will undo it // We apply the acoustic scale before determinization and will undo it
// afterward, since it can affect the result. // afterward, since it can affect the result.
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale_), lat_); fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale_), lat_);
if (!TopSort(lat_)) { if (!DeterminizeLatticePhonePrunedWrapper(
KALDI_WARN << "Could not topologically sort lattice: this probably means " *trans_model_, lat_, beam_, &det_clat_, opts_)) {
"it has bad properties e.g. epsilon cycles. Your LM or lexicon might " KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
"be broken, e.g. LM with epsilon cycles or lexicon with empty words."; "(partial output will be pruned tighter than the specified beam.)";
(*num_fail_)++; (*num_warn_)++;
skip_writting_ = true;
delete lat_;
lat_ = NULL;
} else {
fst::ArcSort(lat_, fst::ILabelCompare<LatticeArc>());
if (!DeterminizeLatticePhonePruned(
*trans_model_, lat_, beam_, &det_clat_, opts_)) {
KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
"(partial output will be pruned tighter than the specified beam.)";
(*num_warn_)++;
}
delete lat_;
lat_ = NULL;
if (minimize_) {
PushCompactLatticeStrings(&det_clat_);
PushCompactLatticeWeights(&det_clat_);
MinimizeCompactLattice(&det_clat_);
}
// Invert the original acoustic scaling
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
&det_clat_);
} }
delete lat_;
lat_ = NULL;
if (minimize_) {
PushCompactLatticeStrings(&det_clat_);
PushCompactLatticeWeights(&det_clat_);
MinimizeCompactLattice(&det_clat_);
}
// Invert the original acoustic scaling
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
&det_clat_);
} }
~DeterminizeLatticeTask() { ~DeterminizeLatticeTask() {
if (!skip_writting_) { KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates()
KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates() << " for key " << key_;
<< " for key " << key_; clat_writer_->Write(key_, det_clat_);
clat_writer_->Write(key_, det_clat_);
}
} }
private: private:
const TransitionModel *trans_model_; const TransitionModel *trans_model_;
@ -109,8 +89,6 @@ class DeterminizeLatticeTask {
CompactLattice det_clat_; CompactLattice det_clat_;
CompactLatticeWriter *clat_writer_; CompactLatticeWriter *clat_writer_;
int32 *num_warn_; int32 *num_warn_;
int32 *num_fail_;
bool skip_writting_;
}; };
@ -175,7 +153,7 @@ int main(int argc, char *argv[]) {
TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_opts); TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_opts);
int32 n_done = 0, n_warn = 0, n_fail = 0; int32 n_done = 0, n_warn = 0;
if (acoustic_scale == 0.0) if (acoustic_scale == 0.0)
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
@ -190,7 +168,7 @@ int main(int argc, char *argv[]) {
DeterminizeLatticeTask *task = new DeterminizeLatticeTask( DeterminizeLatticeTask *task = new DeterminizeLatticeTask(
trans_model, determinize_opts, key, acoustic_scale, beam, minimize, trans_model, determinize_opts, key, acoustic_scale, beam, minimize,
lat, &compact_lat_writer, &n_warn, &n_fail); lat, &compact_lat_writer, &n_warn);
sequencer.Run(task); sequencer.Run(task);
n_done++; n_done++;
@ -198,7 +176,7 @@ int main(int argc, char *argv[]) {
sequencer.Wait(); sequencer.Wait();
KALDI_LOG << "Done " << n_done << " lattices, determinization finished " KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
<< "earlier than specified by the beam on " << n_warn << " of " << "earlier than specified by the beam on " << n_warn << " of "
<< "these, failed for " << n_fail; << "these.";
return (n_done != 0 ? 0 : 1); return (n_done != 0 ? 0 : 1);
} catch(const std::exception &e) { } catch(const std::exception &e) {
std::cerr << e.what(); std::cerr << e.what();

Просмотреть файл

@ -77,7 +77,7 @@ int main(int argc, char *argv[]) {
// Writes as compact lattice. // Writes as compact lattice.
CompactLatticeWriter compact_lat_writer(lats_wspecifier); CompactLatticeWriter compact_lat_writer(lats_wspecifier);
int32 n_done = 0, n_warn = 0, n_fail = 0; int32 n_done = 0, n_warn = 0;
if (acoustic_scale == 0.0) if (acoustic_scale == 0.0)
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)"; KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
@ -89,23 +89,11 @@ int main(int argc, char *argv[]) {
KALDI_VLOG(2) << "Processing lattice " << key; KALDI_VLOG(2) << "Processing lattice " << key;
Invert(&lat); // so word labels are on the input side.
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat); fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat);
if (!TopSort(&lat)) {
KALDI_WARN << "Could not topologically sort lattice: this probably "
"means it has bad properties e.g. epsilon cycles. Your LM or "
"lexicon might be broken, e.g. LM with epsilon cycles or lexicon "
"with empty words.";
n_fail++;
continue;
}
fst::ArcSort(&lat, fst::ILabelCompare<LatticeArc>());
CompactLattice det_clat; CompactLattice det_clat;
if (!DeterminizeLatticePhonePruned( if (!DeterminizeLatticePhonePrunedWrapper(
trans_model, lat, beam, &det_clat, opts)) { trans_model, &lat, beam, &det_clat, opts)) {
KALDI_WARN << "For key " << key << ", determinization did not succeed" KALDI_WARN << "For key " << key << ", determinization did not succeed"
"(partial output will be pruned tighter than the specified beam.)"; "(partial output will be pruned tighter than the specified beam.)";
n_warn++; n_warn++;
@ -124,7 +112,7 @@ int main(int argc, char *argv[]) {
KALDI_LOG << "Done " << n_done << " lattices, determinization finished " KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
<< "earlier than specified by the beam on " << n_warn << " of " << "earlier than specified by the beam on " << n_warn << " of "
<< "these, failed for " << n_fail; << "these.";
return (n_done != 0 ? 0 : 1); return (n_done != 0 ? 0 : 1);
} catch(const std::exception &e) { } catch(const std::exception &e) {
std::cerr << e.what(); std::cerr << e.what();

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal), // Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal),
// Johns Hopkins University (Author: Daniel Povey) // Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -160,21 +161,28 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
am_sgmm.ComputePerFrameVars(features.Row(i), gselect[i], spk_vars, am_sgmm.ComputePerFrameVars(features.Row(i), gselect[i], spk_vars,
&per_frame_vars); &per_frame_vars);
// Accumulates for SGMM.
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 pdf_id = pdf_posterior[i][j].first;
pdf_id = trans_model.TransitionIdToPdf(tid); BaseFloat weight = pdf_posterior[i][j].second;
BaseFloat weight = posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars, tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
pdf_id, weight, &spk_vars) pdf_id, weight, &spk_vars)
* weight; * weight;
tot_weight += weight; tot_weight += weight;
} }
// Accumulates for transitions.
for (size_t j = 0; j < posterior[i].size(); j++) {
int32 tid = posterior[i][j].first;
BaseFloat weight = posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
}
} }
KALDI_VLOG(2) << "Average like for this file is " KALDI_VLOG(2) << "Average like for this file is "

Просмотреть файл

@ -1,6 +1,7 @@
// sgmm2bin/sgmm2-est-fmllr.cc // sgmm2bin/sgmm2-est-fmllr.cc
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -42,6 +43,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
FmllrSgmm2Accs *spk_stats) { FmllrSgmm2Accs *spk_stats) {
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars; kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t t = 0; t < post.size(); t++) { for (size_t t = 0; t < post.size(); t++) {
// per-frame vars only used for computing posteriors... use the // per-frame vars only used for computing posteriors... use the
// transformed feats for this, if available. // transformed feats for this, if available.
@ -49,12 +52,12 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
*spk_vars, &per_frame_vars); *spk_vars, &per_frame_vars);
for (size_t j = 0; j < post[t].size(); j++) { for (size_t j = 0; j < pdf_post[t].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[t][j].first); int32 pdf_id = pdf_post[t][j].first;
Matrix<BaseFloat> posteriors; Matrix<BaseFloat> posteriors;
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
spk_vars, &posteriors); spk_vars, &posteriors);
posteriors.Scale(post[t][j].second); posteriors.Scale(pdf_post[t][j].second);
spk_stats->AccumulateFromPosteriors(am_sgmm, *spk_vars, feats.Row(t), spk_stats->AccumulateFromPosteriors(am_sgmm, *spk_vars, feats.Row(t),
gselect[t], posteriors, pdf_id); gselect[t], posteriors, pdf_id);
} }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2012 Saarland University Microsoft Corporation // Copyright 2009-2012 Saarland University Microsoft Corporation
// Johns Hopkins University (Author: Daniel Povey) // Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -42,14 +43,16 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars; kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
KALDI_ASSERT(gselect.size() == feats.NumRows()); KALDI_ASSERT(gselect.size() == feats.NumRows());
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t i = 0; i < post.size(); i++) { for (size_t i = 0; i < post.size(); i++) {
am_sgmm.ComputePerFrameVars(feats.Row(i), gselect[i], am_sgmm.ComputePerFrameVars(feats.Row(i), gselect[i],
*spk_vars, &per_frame_vars); *spk_vars, &per_frame_vars);
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first); int32 pdf_id = pdf_post[i][j].first;
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id,
post[i][j].second, spk_vars); pdf_post[i][j].second, spk_vars);
} }
} }
} }

Просмотреть файл

@ -2,6 +2,7 @@
// Copyright 2009-2012 Saarland University Microsoft Corporation // Copyright 2009-2012 Saarland University Microsoft Corporation
// Johns Hopkins University (Author: Daniel Povey) // Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -94,7 +95,7 @@ int main(int argc, char *argv[]) {
num_err++; num_err++;
continue; continue;
} }
const Posterior &posterior = posteriors_reader.Value(utt); Posterior posterior = posteriors_reader.Value(utt);
if (!gselect_reader.HasKey(utt) || if (!gselect_reader.HasKey(utt) ||
gselect_reader.Value(utt).size() != mat.NumRows()) { gselect_reader.Value(utt).size() != mat.NumRows()) {
@ -123,6 +124,10 @@ int main(int argc, char *argv[]) {
Sgmm2GauPost gpost(posterior.size()); // posterior.size() == T. Sgmm2GauPost gpost(posterior.size()); // posterior.size() == T.
SortPosteriorByPdfs(trans_model, &posterior);
int32 prev_pdf_id = -1;
BaseFloat prev_like = 0;
Matrix<BaseFloat> prev_posterior;
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
am_sgmm.ComputePerFrameVars(mat.Row(i), gselect[i], am_sgmm.ComputePerFrameVars(mat.Row(i), gselect[i],
spk_vars, &per_frame_vars); spk_vars, &per_frame_vars);
@ -130,18 +135,25 @@ int main(int argc, char *argv[]) {
gpost[i].gselect = gselect[i]; gpost[i].gselect = gselect[i];
gpost[i].tids.resize(posterior[i].size()); gpost[i].tids.resize(posterior[i].size());
gpost[i].posteriors.resize(posterior[i].size()); gpost[i].posteriors.resize(posterior[i].size());
prev_pdf_id = -1; // Only cache for the same frame.
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 tid = posterior[i][j].first, // transition identifier.
pdf_id = trans_model.TransitionIdToPdf(tid); pdf_id = trans_model.TransitionIdToPdf(tid);
BaseFloat weight = posterior[i][j].second; BaseFloat weight = posterior[i][j].second;
gpost[i].tids[j] = tid; gpost[i].tids[j] = tid;
tot_like_this_file += if (pdf_id != prev_pdf_id) {
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, // First time see this pdf-id for this frame, update the cached
&spk_vars, // variables.
&(gpost[i].posteriors[j])) prev_pdf_id = pdf_id;
* weight; prev_like = am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
&spk_vars,
&prev_posterior);
}
gpost[i].posteriors[j] = prev_posterior;
tot_like_this_file += prev_like * weight;
tot_weight += weight; tot_weight += weight;
gpost[i].posteriors[j].Scale(weight); gpost[i].posteriors[j].Scale(weight);
} }

Просмотреть файл

@ -1,6 +1,7 @@
// sgmmbin/sgmm-acc-stats.cc // sgmmbin/sgmm-acc-stats.cc
// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal), // Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal),
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -138,6 +139,8 @@ int main(int argc, char *argv[]) {
num_done++; num_done++;
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0; BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
Posterior pdf_posterior;
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
if (posterior[i].empty()) if (posterior[i].empty())
continue; continue;
@ -147,18 +150,25 @@ int main(int argc, char *argv[]) {
am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0, am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0,
&per_frame_vars); &per_frame_vars);
for (size_t j = 0; j < posterior[i].size(); j++) { // Accumulates for SGMM.
int32 tid = posterior[i][j].first, // transition identifier. for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
pdf_id = trans_model.TransitionIdToPdf(tid); int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat weight = posterior[i][j].second; BaseFloat weight = pdf_posterior[i][j].second;
if (acc_flags & kaldi::kSgmmTransitions)
trans_model.Accumulate(weight, tid, &transition_accs);
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars, tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
spk_vars.v_s, pdf_id, spk_vars.v_s, pdf_id,
weight, acc_flags) weight, acc_flags)
* weight; * weight;
tot_weight += weight; tot_weight += weight;
} }
// Accumulates for transitions.
for (size_t j = 0; j < posterior[i].size(); j++) {
if (acc_flags & kaldi::kSgmmTransitions) {
int32 tid = posterior[i][j].first;
BaseFloat weight = posterior[i][j].second;
trans_model.Accumulate(weight, tid, &transition_accs);
}
}
} }
sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance. sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance.

Просмотреть файл

@ -1,6 +1,7 @@
// sgmmbin/sgmm-est-fmllr.cc // sgmmbin/sgmm-est-fmllr.cc
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -43,6 +44,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
FmllrSgmmAccs *spk_stats) { FmllrSgmmAccs *spk_stats) {
kaldi::SgmmPerFrameDerivedVars per_frame_vars; kaldi::SgmmPerFrameDerivedVars per_frame_vars;
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t t = 0; t < post.size(); t++) { for (size_t t = 0; t < post.size(); t++) {
std::vector<int32> this_gselect; std::vector<int32> this_gselect;
if (!gselect.empty()) { if (!gselect.empty()) {
@ -57,12 +60,12 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
0.0 /*fMLLR logdet*/, &per_frame_vars); 0.0 /*fMLLR logdet*/, &per_frame_vars);
for (size_t j = 0; j < post[t].size(); j++) { for (size_t j = 0; j < pdf_post[t].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[t][j].first); int32 pdf_id = pdf_post[t][j].first;
Matrix<BaseFloat> posteriors; Matrix<BaseFloat> posteriors;
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
&posteriors); &posteriors);
posteriors.Scale(post[t][j].second); posteriors.Scale(pdf_post[t][j].second);
spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(t), spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(t),
this_gselect, this_gselect,
posteriors, pdf_id); posteriors, pdf_id);

Просмотреть файл

@ -1,6 +1,7 @@
// sgmmbin/sgmm-est-spkvecs.cc // sgmmbin/sgmm-est-spkvecs.cc
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -41,6 +42,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
MleSgmmSpeakerAccs *spk_stats) { MleSgmmSpeakerAccs *spk_stats) {
kaldi::SgmmPerFrameDerivedVars per_frame_vars; kaldi::SgmmPerFrameDerivedVars per_frame_vars;
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
for (size_t i = 0; i < post.size(); i++) { for (size_t i = 0; i < post.size(); i++) {
std::vector<int32> this_gselect; std::vector<int32> this_gselect;
if (!gselect.empty()) if (!gselect.empty())
@ -49,9 +52,9 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
am_sgmm.GaussianSelection(gselect_opts, feats.Row(i), &this_gselect); am_sgmm.GaussianSelection(gselect_opts, feats.Row(i), &this_gselect);
am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars); am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars);
for (size_t j = 0; j < post[i].size(); j++) { for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first); int32 pdf_id = pdf_post[i][j].first;
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, post[i][j].second); spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, pdf_post[i][j].second);
} }
} }
} }

Просмотреть файл

@ -1,6 +1,7 @@
// sgmmbin/sgmm-post-to-gpost.cc // sgmmbin/sgmm-post-to-gpost.cc
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) // Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
// 2014 Guoguo Chen
// See ../../COPYING for clarification regarding multiple authors // See ../../COPYING for clarification regarding multiple authors
// //
@ -87,7 +88,7 @@ int main(int argc, char *argv[]) {
num_no_posterior++; num_no_posterior++;
} else { } else {
const Matrix<BaseFloat> &mat = feature_reader.Value(); const Matrix<BaseFloat> &mat = feature_reader.Value();
const Posterior &posterior = posteriors_reader.Value(utt); Posterior posterior = posteriors_reader.Value(utt);
bool have_gselect = !gselect_rspecifier.empty() bool have_gselect = !gselect_rspecifier.empty()
&& gselect_reader.HasKey(utt) && gselect_reader.HasKey(utt)
@ -123,6 +124,10 @@ int main(int argc, char *argv[]) {
SgmmGauPost gpost(posterior.size()); // posterior.size() == T. SgmmGauPost gpost(posterior.size()); // posterior.size() == T.
SortPosteriorByPdfs(trans_model, &posterior);
int32 prev_pdf_id = -1;
BaseFloat prev_like = 0;
Matrix<BaseFloat> prev_posterior;
for (size_t i = 0; i < posterior.size(); i++) { for (size_t i = 0; i < posterior.size(); i++) {
std::vector<int32> this_gselect; std::vector<int32> this_gselect;
@ -134,16 +139,23 @@ int main(int argc, char *argv[]) {
gpost[i].tids.resize(posterior[i].size()); gpost[i].tids.resize(posterior[i].size());
gpost[i].posteriors.resize(posterior[i].size()); gpost[i].posteriors.resize(posterior[i].size());
prev_pdf_id = -1; // Only cache for the same frame.
for (size_t j = 0; j < posterior[i].size(); j++) { for (size_t j = 0; j < posterior[i].size(); j++) {
int32 tid = posterior[i][j].first, // transition identifier. int32 tid = posterior[i][j].first, // transition identifier.
pdf_id = trans_model.TransitionIdToPdf(tid); pdf_id = trans_model.TransitionIdToPdf(tid);
BaseFloat weight = posterior[i][j].second; BaseFloat weight = posterior[i][j].second;
gpost[i].tids[j] = tid; gpost[i].tids[j] = tid;
tot_like_this_file += if (pdf_id != prev_pdf_id) {
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id, // First time see this pdf-id for this frame, update the cached
&(gpost[i].posteriors[j])) // variables.
* weight; prev_pdf_id = pdf_id;
prev_like = am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
&prev_posterior);
}
gpost[i].posteriors[j] = prev_posterior;
tot_like_this_file += prev_like * weight;
tot_weight += weight; tot_weight += weight;
gpost[i].posteriors[j].Scale(weight); gpost[i].posteriors[j].Scale(weight);
} }