зеркало из https://github.com/mozilla/kaldi.git
WARNING: this change list changed GauPost to GaussPost, and also made it pdf-id indexed instead of transition-id indexed. Details: 1) Modified those posterior related programs that use TransitionIdToPdf to get a pdf-id, and later on only use the pdf-id. We merge the posteriors that corresponds to the same pdf-id to avoid redundant computation. 2) Modified phone lattice determinization, added a wrapper for the lattice type determinization to reduce redundant code in the decoding binaries.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3588 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
cd36df98b5
Коммит
725f4abd68
|
@ -1,6 +1,7 @@
|
||||||
// bin/acc-lda.cc
|
// bin/acc-lda.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation, Go-Vivace Inc.
|
// Copyright 2009-2011 Microsoft Corporation, Go-Vivace Inc.
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -95,14 +96,15 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (int32 i = 0; i < feats.NumRows(); i++) {
|
for (int32 i = 0; i < feats.NumRows(); i++) {
|
||||||
SubVector<BaseFloat> feat(feats, i);
|
SubVector<BaseFloat> feat(feats, i);
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 tid = post[i][j].first;
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
BaseFloat weight = RandPrune(post[i][j].second, rand_prune);
|
BaseFloat weight = RandPrune(pdf_post[i][j].second, rand_prune);
|
||||||
if (weight != 0.0) {
|
if (weight != 0.0) {
|
||||||
int32 pdf = trans_model.TransitionIdToPdf(tid);
|
lda.Accumulate(feat, pdf_id, weight);
|
||||||
lda.Accumulate(feat, pdf, weight);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -790,28 +790,16 @@ void DecodeUtteranceLatticeFasterClass::operator () () {
|
||||||
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt_;
|
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt_;
|
||||||
fst::Connect(lat_);
|
fst::Connect(lat_);
|
||||||
if (determinize_) {
|
if (determinize_) {
|
||||||
Invert(lat_);
|
|
||||||
if (!TopSort(lat_)) {
|
|
||||||
// Cannot topologically sort the lattice -- determinization will fail.
|
|
||||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
|
||||||
<< "(probably your lexicon has empty words or your LM has "
|
|
||||||
<< "epsilon cycles).";
|
|
||||||
delete lat_; // Delete it here.
|
|
||||||
success_ = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fst::ILabelCompare<LatticeArc> ilabel_comp;
|
|
||||||
ArcSort(lat_, ilabel_comp);
|
|
||||||
clat_ = new CompactLattice;
|
clat_ = new CompactLattice;
|
||||||
if (!DeterminizeLatticePhonePruned(*trans_model_,
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
lat_,
|
*trans_model_,
|
||||||
decoder_->GetOptions().lattice_beam,
|
lat_,
|
||||||
clat_,
|
decoder_->GetOptions().lattice_beam,
|
||||||
decoder_->GetOptions().det_opts))
|
clat_,
|
||||||
|
decoder_->GetOptions().det_opts))
|
||||||
KALDI_WARN << "Determinization finished earlier than the beam for "
|
KALDI_WARN << "Determinization finished earlier than the beam for "
|
||||||
<< "utterance " << utt_;
|
<< "utterance " << utt_;
|
||||||
delete lat_;
|
delete lat_;
|
||||||
fst::Connect(clat_);
|
|
||||||
// We'll write the lattice without acoustic scaling.
|
// We'll write the lattice without acoustic scaling.
|
||||||
if (acoustic_scale_ != 0.0)
|
if (acoustic_scale_ != 0.0)
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat_);
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat_);
|
||||||
|
@ -971,25 +959,15 @@ bool DecodeUtteranceLatticeFaster(
|
||||||
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
||||||
fst::Connect(&lat);
|
fst::Connect(&lat);
|
||||||
if (determinize) {
|
if (determinize) {
|
||||||
Invert(&lat);
|
|
||||||
if (!TopSort(&lat)) {
|
|
||||||
// Cannot topologically sort the lattice -- determinization will fail.
|
|
||||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
|
||||||
<< "(probably your lexicon has empty words or your LM has "
|
|
||||||
<< "epsilon cycles).";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fst::ILabelCompare<LatticeArc> ilabel_comp;
|
|
||||||
ArcSort(&lat, ilabel_comp);
|
|
||||||
CompactLattice clat;
|
CompactLattice clat;
|
||||||
if (!DeterminizeLatticePhonePruned(trans_model,
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
&lat,
|
trans_model,
|
||||||
decoder.GetOptions().lattice_beam,
|
&lat,
|
||||||
&clat,
|
decoder.GetOptions().lattice_beam,
|
||||||
decoder.GetOptions().det_opts))
|
&clat,
|
||||||
|
decoder.GetOptions().det_opts))
|
||||||
KALDI_WARN << "Determinization finished earlier than the beam for "
|
KALDI_WARN << "Determinization finished earlier than the beam for "
|
||||||
<< "utterance " << utt;
|
<< "utterance " << utt;
|
||||||
fst::Connect(&clat);
|
|
||||||
// We'll write the lattice without acoustic scaling.
|
// We'll write the lattice without acoustic scaling.
|
||||||
if (acoustic_scale != 0.0)
|
if (acoustic_scale != 0.0)
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
||||||
|
|
|
@ -656,25 +656,15 @@ bool DecodeUtteranceLatticeSimple(
|
||||||
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
||||||
fst::Connect(&lat);
|
fst::Connect(&lat);
|
||||||
if (determinize) {
|
if (determinize) {
|
||||||
Invert(&lat);
|
|
||||||
if (!TopSort(&lat)) {
|
|
||||||
// Cannot topologically sort the lattice -- determinization will fail.
|
|
||||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
|
||||||
<< "(probably your lexicon has empty words or your LM has "
|
|
||||||
<< "epsilon cycles).";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fst::ILabelCompare<LatticeArc> ilabel_comp;
|
|
||||||
ArcSort(&lat, ilabel_comp);
|
|
||||||
CompactLattice clat;
|
CompactLattice clat;
|
||||||
if (!DeterminizeLatticePhonePruned(trans_model,
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
&lat,
|
trans_model,
|
||||||
decoder.GetOptions().lattice_beam,
|
&lat,
|
||||||
&clat,
|
decoder.GetOptions().lattice_beam,
|
||||||
decoder.GetOptions().det_opts))
|
&clat,
|
||||||
|
decoder.GetOptions().det_opts))
|
||||||
KALDI_WARN << "Determinization finished earlier than the beam for "
|
KALDI_WARN << "Determinization finished earlier than the beam for "
|
||||||
<< "utterance " << utt;
|
<< "utterance " << utt;
|
||||||
fst::Connect(&clat);
|
|
||||||
// We'll write the lattice without acoustic scaling.
|
// We'll write the lattice without acoustic scaling.
|
||||||
if (acoustic_scale != 0.0)
|
if (acoustic_scale != 0.0)
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
||||||
|
|
|
@ -938,25 +938,15 @@ bool DecodeUtteranceLatticeTracking(
|
||||||
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
||||||
fst::Connect(&lat);
|
fst::Connect(&lat);
|
||||||
if (determinize) {
|
if (determinize) {
|
||||||
Invert(&lat);
|
|
||||||
if (!TopSort(&lat)) {
|
|
||||||
// Cannot topologically sort the lattice -- determinization will fail.
|
|
||||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
|
||||||
<< "(probably your lexicon has empty words or your LM has "
|
|
||||||
<< "epsilon cycles).";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fst::ILabelCompare<LatticeArc> ilabel_comp;
|
|
||||||
ArcSort(&lat, ilabel_comp);
|
|
||||||
CompactLattice clat;
|
CompactLattice clat;
|
||||||
if (!DeterminizeLatticePhonePruned(trans_model,
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
&lat,
|
trans_model,
|
||||||
decoder.GetOptions().lattice_beam,
|
&lat,
|
||||||
&clat,
|
decoder.GetOptions().lattice_beam,
|
||||||
decoder.GetOptions().det_opts))
|
&clat,
|
||||||
|
decoder.GetOptions().det_opts))
|
||||||
KALDI_WARN << "Determinization finished earlier than the beam for "
|
KALDI_WARN << "Determinization finished earlier than the beam for "
|
||||||
<< "utterance " << utt;
|
<< "utterance " << utt;
|
||||||
fst::Connect(&clat);
|
|
||||||
// We'll write the lattice without acoustic scaling.
|
// We'll write the lattice without acoustic scaling.
|
||||||
if (acoustic_scale != 0.0)
|
if (acoustic_scale != 0.0)
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-acc-hlda.cc
|
// gmmbin/gmm-acc-hlda.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -106,11 +107,12 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
|
||||||
|
|
||||||
Vector<BaseFloat> posteriors;
|
Vector<BaseFloat> posteriors;
|
||||||
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
|
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-acc-mllt.cc
|
// gmmbin/gmm-acc-mllt.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -90,11 +91,12 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
|
||||||
|
|
||||||
tot_like_this_file += mllt_accs.AccumulateFromGmm(am_gmm.GetPdf(pdf_id),
|
tot_like_this_file += mllt_accs.AccumulateFromGmm(am_gmm.GetPdf(pdf_id),
|
||||||
mat.Row(i),
|
mat.Row(i),
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-acc-stats-twofeats.cc
|
// gmmbin/gmm-acc-stats-twofeats.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -111,12 +112,13 @@ int main(int argc, char *argv[]) {
|
||||||
BaseFloat tot_like_this_file = 0.0,
|
BaseFloat tot_like_this_file = 0.0,
|
||||||
tot_weight_this_file = 0.0;
|
tot_weight_this_file = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
// Accumulates for GMM.
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
for (size_t j = 0; j <pdf_posterior[i].size(); j++) {
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
trans_model.Accumulate(weight, tid, &transition_accs);
|
|
||||||
tot_like_this_file += weight *
|
tot_like_this_file += weight *
|
||||||
gmm_accs.AccumulateForGmmTwofeats(am_gmm,
|
gmm_accs.AccumulateForGmmTwofeats(am_gmm,
|
||||||
mat1.Row(i),
|
mat1.Row(i),
|
||||||
|
@ -125,6 +127,13 @@ int main(int argc, char *argv[]) {
|
||||||
weight);
|
weight);
|
||||||
tot_weight_this_file += weight;
|
tot_weight_this_file += weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Accumulates for transitions.
|
||||||
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
|
int32 tid = posterior[i][j].first;
|
||||||
|
BaseFloat weight = posterior[i][j].second;
|
||||||
|
trans_model.Accumulate(weight, tid, &transition_accs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
KALDI_LOG << "Average like for this file is "
|
KALDI_LOG << "Average like for this file is "
|
||||||
<< (tot_like_this_file/tot_weight_this_file) << " over "
|
<< (tot_like_this_file/tot_weight_this_file) << " over "
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-acc-stats.cc
|
// gmmbin/gmm-acc-stats.cc
|
||||||
|
|
||||||
// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
// Copyright 2009-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -98,16 +99,24 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
// Accumulates for GMM.
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
trans_model.Accumulate(weight, tid, &transition_accs);
|
|
||||||
tot_like_this_file += gmm_accs.AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, weight)
|
tot_like_this_file += gmm_accs.AccumulateForGmm(am_gmm, mat.Row(i), pdf_id, weight)
|
||||||
* weight;
|
* weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Accumulates for transitions.
|
||||||
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
|
int32 tid = posterior[i][j].first;
|
||||||
|
BaseFloat weight = posterior[i][j].second;
|
||||||
|
trans_model.Accumulate(weight, tid, &transition_accs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (num_done % 50 == 0) {
|
if (num_done % 50 == 0) {
|
||||||
KALDI_LOG << "Processed " << num_done << " utterances; for utterance "
|
KALDI_LOG << "Processed " << num_done << " utterances; for utterance "
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2012 Cisco Systems (author: Neha Agrawal)
|
// Copyright 2012 Cisco Systems (author: Neha Agrawal)
|
||||||
// Johns Hopkins University (author: Daniel Povey)
|
// Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -119,11 +120,12 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for ( size_t i = 0; i < posterior.size(); i++ ) {
|
for ( size_t i = 0; i < posterior.size(); i++ ) {
|
||||||
for ( size_t j = 0; j < posterior[i].size(); j++ ) {
|
for ( size_t j = 0; j < pdf_posterior[i].size(); j++ ) {
|
||||||
int32 pdf_id =
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
trans_model.TransitionIdToPdf(posterior[i][j].first);
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
|
||||||
file_like += map_accs.AccumulateForGmm(copy_am_gmm,
|
file_like += map_accs.AccumulateForGmm(copy_am_gmm,
|
||||||
feats.Row(i),
|
feats.Row(i),
|
||||||
pdf_id, weight);
|
pdf_id, weight);
|
||||||
|
@ -183,11 +185,12 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for ( size_t i = 0; i < posterior.size(); i++ ) {
|
for ( size_t i = 0; i < posterior.size(); i++ ) {
|
||||||
for ( size_t j = 0; j < posterior[i].size(); j++ ) {
|
for ( size_t j = 0; j < pdf_posterior[i].size(); j++ ) {
|
||||||
int32 pdf_id =
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
trans_model.TransitionIdToPdf(posterior[i][j].first);
|
BaseFloat prob = pdf_posterior[i][j].second;
|
||||||
BaseFloat prob = posterior[i][j].second;
|
|
||||||
file_like += map_accs.AccumulateForGmm(copy_am_gmm,feats.Row(i),
|
file_like += map_accs.AccumulateForGmm(copy_am_gmm,feats.Row(i),
|
||||||
pdf_id, prob);
|
pdf_id, prob);
|
||||||
file_t += prob;
|
file_t += prob;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-basis-fmllr-accs-gpost.cc
|
// gmmbin/gmm-basis-fmllr-accs-gpost.cc
|
||||||
|
|
||||||
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -32,13 +33,13 @@ using std::vector;
|
||||||
|
|
||||||
namespace kaldi {
|
namespace kaldi {
|
||||||
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const GauPost &gpost,
|
const GaussPost &gpost,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
for (size_t i = 0; i < gpost.size(); i++) {
|
for (size_t i = 0; i < gpost.size(); i++) {
|
||||||
for (size_t j = 0; j < gpost[i].size(); j++) {
|
for (size_t j = 0; j < gpost[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first);
|
int32 pdf_id = gpost[i][j].first;
|
||||||
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
||||||
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i), posterior);
|
feats.Row(i), posterior);
|
||||||
|
@ -89,7 +90,7 @@ int main(int argc, char *argv[]) {
|
||||||
am_gmm.Read(ki.Stream(), binary);
|
am_gmm.Read(ki.Stream(), binary);
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomAccessGauPostReader gpost_reader(gpost_rspecifier);
|
RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
|
||||||
BasisFmllrAccus basis_accs(am_gmm.Dim());
|
BasisFmllrAccus basis_accs(am_gmm.Dim());
|
||||||
|
|
||||||
int32 num_done = 0, num_no_post = 0, num_other_error = 0;
|
int32 num_done = 0, num_no_post = 0, num_other_error = 0;
|
||||||
|
@ -115,9 +116,9 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -143,10 +144,10 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
|
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-basis-fmllr-accs.cc
|
// gmmbin/gmm-basis-fmllr-accs.cc
|
||||||
|
|
||||||
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t i = 0; i < post.size(); i++) {
|
for (size_t i = 0; i < post.size(); i++) {
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first);
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i),
|
feats.Row(i),
|
||||||
post[i][j].second);
|
pdf_post[i][j].second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-basis-fmllr-gpost.cc
|
// gmmbin/gmm-est-basis-fmllr-gpost.cc
|
||||||
|
|
||||||
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -32,13 +33,13 @@ using std::vector;
|
||||||
|
|
||||||
namespace kaldi {
|
namespace kaldi {
|
||||||
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const GauPost &gpost,
|
const GaussPost &gpost,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
for (size_t i = 0; i < gpost.size(); i++) {
|
for (size_t i = 0; i < gpost.size(); i++) {
|
||||||
for (size_t j = 0; j < gpost[i].size(); j++) {
|
for (size_t j = 0; j < gpost[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first);
|
int32 pdf_id = gpost[i][j].first;
|
||||||
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
||||||
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i), posterior);
|
feats.Row(i), posterior);
|
||||||
|
@ -102,7 +103,7 @@ int main(int argc, char *argv[]) {
|
||||||
basis_est.ReadBasis(ki.Stream(), binary, false);
|
basis_est.ReadBasis(ki.Stream(), binary, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomAccessGauPostReader gpost_reader(gpost_rspecifier);
|
RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
|
||||||
|
|
||||||
double tot_impr = 0.0, tot_t = 0.0;
|
double tot_impr = 0.0, tot_t = 0.0;
|
||||||
|
|
||||||
|
@ -134,9 +135,9 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -178,10 +179,10 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
|
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-basis-fmllr.cc
|
// gmmbin/gmm-est-basis-fmllr.cc
|
||||||
|
|
||||||
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
// Copyright 2012 Carnegie Mellon University (author: Yajie Miao)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t i = 0; i < post.size(); i++) {
|
for (size_t i = 0; i < post.size(); i++) {
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first);
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i),
|
feats.Row(i),
|
||||||
post[i][j].second);
|
pdf_post[i][j].second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation; Saarland University
|
// Copyright 2009-2011 Microsoft Corporation; Saarland University
|
||||||
// 2013 Johns Hopkins University (author: Daniel Povey)
|
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -32,13 +33,13 @@ using std::vector;
|
||||||
|
|
||||||
namespace kaldi {
|
namespace kaldi {
|
||||||
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const GauPost &gpost,
|
const GaussPost &gpost,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
for (size_t i = 0; i < gpost.size(); i++) {
|
for (size_t i = 0; i < gpost.size(); i++) {
|
||||||
for (size_t j = 0; j < gpost[i].size(); j++) {
|
for (size_t j = 0; j < gpost[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(gpost[i][j].first);
|
int32 pdf_id = gpost[i][j].first;
|
||||||
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
const Vector<BaseFloat> & posterior(gpost[i][j].second);
|
||||||
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateFromPosteriors(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i), posterior);
|
feats.Row(i), posterior);
|
||||||
|
@ -89,7 +90,7 @@ int main(int argc, char *argv[]) {
|
||||||
am_gmm.Read(ki.Stream(), binary);
|
am_gmm.Read(ki.Stream(), binary);
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomAccessGauPostReader gpost_reader(gpost_rspecifier);
|
RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
|
||||||
|
|
||||||
double tot_impr = 0.0, tot_t = 0.0;
|
double tot_impr = 0.0, tot_t = 0.0;
|
||||||
|
|
||||||
|
@ -117,9 +118,9 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost vector has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost vector has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -153,10 +154,10 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
|
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "GauPost has wrong size " << (gpost.size())
|
KALDI_WARN << "GaussPost has wrong size " << (gpost.size())
|
||||||
<< " vs. " << (feats.NumRows());
|
<< " vs. " << (feats.NumRows());
|
||||||
num_other_error++;
|
num_other_error++;
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-fmllr-raw-gpost.cc
|
// gmmbin/gmm-est-fmllr-raw-gpost.cc
|
||||||
|
|
||||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -29,12 +30,12 @@ namespace kaldi {
|
||||||
|
|
||||||
void AccStatsForUtterance(const TransitionModel &trans_model,
|
void AccStatsForUtterance(const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
const GauPost &gpost,
|
const GaussPost &gpost,
|
||||||
const Matrix<BaseFloat> &feats,
|
const Matrix<BaseFloat> &feats,
|
||||||
FmllrRawAccs *accs) {
|
FmllrRawAccs *accs) {
|
||||||
for (size_t t = 0; t < gpost.size(); t++) {
|
for (size_t t = 0; t < gpost.size(); t++) {
|
||||||
for (size_t i = 0; i < gpost[t].size(); i++) {
|
for (size_t i = 0; i < gpost[t].size(); i++) {
|
||||||
int32 pdf = trans_model.TransitionIdToPdf(gpost[t][i].first);
|
int32 pdf = gpost[t][i].first;
|
||||||
const Vector<BaseFloat> &posterior(gpost[t][i].second);
|
const Vector<BaseFloat> &posterior(gpost[t][i].second);
|
||||||
accs->AccumulateFromPosteriors(am_gmm.GetPdf(pdf),
|
accs->AccumulateFromPosteriors(am_gmm.GetPdf(pdf),
|
||||||
feats.Row(t), posterior);
|
feats.Row(t), posterior);
|
||||||
|
@ -94,7 +95,7 @@ int main(int argc, char *argv[]) {
|
||||||
Matrix<BaseFloat> full_lda_mat;
|
Matrix<BaseFloat> full_lda_mat;
|
||||||
ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat);
|
ReadKaldiObject(full_lda_mat_rxfilename, &full_lda_mat);
|
||||||
|
|
||||||
RandomAccessGauPostReader gpost_reader(gpost_rspecifier);
|
RandomAccessGaussPostReader gpost_reader(gpost_rspecifier);
|
||||||
BaseFloatMatrixWriter transform_writer(transform_wspecifier);
|
BaseFloatMatrixWriter transform_writer(transform_wspecifier);
|
||||||
|
|
||||||
double tot_auxf_impr = 0.0, tot_count = 0.0;
|
double tot_auxf_impr = 0.0, tot_count = 0.0;
|
||||||
|
@ -121,7 +122,7 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
const Matrix<BaseFloat> &feats = feature_reader.Value(utt);
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "Size mismatch between gposteriors " << gpost.size()
|
KALDI_WARN << "Size mismatch between gposteriors " << gpost.size()
|
||||||
<< " and features " << feats.NumRows();
|
<< " and features " << feats.NumRows();
|
||||||
|
@ -155,7 +156,7 @@ int main(int argc, char *argv[]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
const Matrix<BaseFloat> &feats = feature_reader.Value();
|
||||||
const GauPost &gpost = gpost_reader.Value(utt);
|
const GaussPost &gpost = gpost_reader.Value(utt);
|
||||||
|
|
||||||
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
if (static_cast<int32>(gpost.size()) != feats.NumRows()) {
|
||||||
KALDI_WARN << "Size mismatch between posteriors " << gpost.size()
|
KALDI_WARN << "Size mismatch between posteriors " << gpost.size()
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-fmllr-raw.cc
|
// gmmbin/gmm-est-fmllr-raw.cc
|
||||||
|
|
||||||
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
// Copyright 2013 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -32,13 +33,14 @@ void AccStatsForUtterance(const TransitionModel &trans_model,
|
||||||
const Posterior &post,
|
const Posterior &post,
|
||||||
const Matrix<BaseFloat> &feats,
|
const Matrix<BaseFloat> &feats,
|
||||||
FmllrRawAccs *accs) {
|
FmllrRawAccs *accs) {
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t t = 0; t < post.size(); t++) {
|
for (size_t t = 0; t < post.size(); t++) {
|
||||||
for (size_t i = 0; i < post[t].size(); i++) {
|
for (size_t i = 0; i < pdf_post[t].size(); i++) {
|
||||||
int32 pdf = trans_model.TransitionIdToPdf(post[t][i].first);
|
int32 pdf = pdf_post[t][i].first;
|
||||||
BaseFloat weight = post[t][i].second;
|
BaseFloat weight = pdf_post[t][i].second;
|
||||||
accs->AccumulateForGmm(am_gmm.GetPdf(pdf),
|
accs->AccumulateForGmm(am_gmm.GetPdf(pdf),
|
||||||
feats.Row(t),
|
feats.Row(t), weight);
|
||||||
weight);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation; Saarland University
|
// Copyright 2009-2011 Microsoft Corporation; Saarland University
|
||||||
// 2013 Johns Hopkins University (author: Daniel Povey)
|
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -36,12 +37,14 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
const TransitionModel &trans_model,
|
const TransitionModel &trans_model,
|
||||||
const AmDiagGmm &am_gmm,
|
const AmDiagGmm &am_gmm,
|
||||||
FmllrDiagGmmAccs *spk_stats) {
|
FmllrDiagGmmAccs *spk_stats) {
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t i = 0; i < post.size(); i++) {
|
for (size_t i = 0; i < post.size(); i++) {
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first);
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
spk_stats->AccumulateForGmm(am_gmm.GetPdf(pdf_id),
|
||||||
feats.Row(i),
|
feats.Row(i),
|
||||||
post[i][j].second);
|
pdf_post[i][j].second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-regtree-fmllr.cc
|
// gmmbin/gmm-est-regtree-fmllr.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Saarland University; Microsoft Corporation
|
// Copyright 2009-2011 Saarland University; Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -117,10 +118,12 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat prob = posterior[i][j].second;
|
BaseFloat prob = pdf_posterior[i][j].second;
|
||||||
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
|
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
|
||||||
feats.Row(i), pdf_id,
|
feats.Row(i), pdf_id,
|
||||||
prob);
|
prob);
|
||||||
|
@ -168,10 +171,12 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
fmllr_accs.SetZero();
|
fmllr_accs.SetZero();
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat prob = posterior[i][j].second;
|
BaseFloat prob = pdf_posterior[i][j].second;
|
||||||
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
|
file_like += fmllr_accs.AccumulateForGmm(regtree, am_gmm,
|
||||||
feats.Row(i), pdf_id,
|
feats.Row(i), pdf_id,
|
||||||
prob);
|
prob);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-est-regtree-mllr.cc
|
// gmmbin/gmm-est-regtree-mllr.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Saarland University; Microsoft Corporation
|
// Copyright 2009-2011 Saarland University; Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -117,10 +118,12 @@ int main(int argc, char *argv[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat prob = posterior[i][j].second;
|
BaseFloat prob = pdf_posterior[i][j].second;
|
||||||
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
|
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
|
||||||
feats.Row(i), pdf_id,
|
feats.Row(i), pdf_id,
|
||||||
prob);
|
prob);
|
||||||
|
@ -168,10 +171,12 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat file_like = 0.0, file_t = 0.0;
|
BaseFloat file_like = 0.0, file_t = 0.0;
|
||||||
mllr_accs.SetZero();
|
mllr_accs.SetZero();
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(posterior[i][j].first);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat prob = posterior[i][j].second;
|
BaseFloat prob = pdf_posterior[i][j].second;
|
||||||
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
|
file_like += mllr_accs.AccumulateForGmm(regtree, am_gmm,
|
||||||
feats.Row(i), pdf_id,
|
feats.Row(i), pdf_id,
|
||||||
prob);
|
prob);
|
||||||
|
|
|
@ -100,25 +100,15 @@ bool DecodeUtterance(LatticeBiglmFasterDecoder &decoder, // not const but is rea
|
||||||
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
KALDI_ERR << "Unexpected problem getting lattice for utterance " << utt;
|
||||||
fst::Connect(&lat);
|
fst::Connect(&lat);
|
||||||
if (determinize) {
|
if (determinize) {
|
||||||
Invert(&lat);
|
|
||||||
if (!TopSort(&lat)) {
|
|
||||||
// Cannot topologically sort the lattice -- determinization will fail.
|
|
||||||
KALDI_WARN << "Topological sorting of state-level lattice failed "
|
|
||||||
<< "(probably your lexicon has empty words or your LM has "
|
|
||||||
<< "epsilon cycles).";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
fst::ILabelCompare<LatticeArc> ilabel_comp;
|
|
||||||
ArcSort(&lat, ilabel_comp);
|
|
||||||
CompactLattice clat;
|
CompactLattice clat;
|
||||||
if (!DeterminizeLatticePhonePruned(trans_model,
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
&lat,
|
trans_model,
|
||||||
decoder.GetOptions().lattice_beam,
|
&lat,
|
||||||
&clat,
|
decoder.GetOptions().lattice_beam,
|
||||||
decoder.GetOptions().det_opts))
|
&clat,
|
||||||
|
decoder.GetOptions().det_opts))
|
||||||
KALDI_WARN << "Determinization finished earlier than the beam for "
|
KALDI_WARN << "Determinization finished earlier than the beam for "
|
||||||
<< "utterance " << utt;
|
<< "utterance " << utt;
|
||||||
fst::Connect(&clat);
|
|
||||||
// We'll write the lattice without acoustic scaling.
|
// We'll write the lattice without acoustic scaling.
|
||||||
if (acoustic_scale != 0.0)
|
if (acoustic_scale != 0.0)
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale), &clat);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// gmmbin/gmm-post-to-gpost.cc
|
// gmmbin/gmm-post-to-gpost.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -69,7 +70,7 @@ int main(int argc, char *argv[]) {
|
||||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||||
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
RandomAccessPosteriorReader posteriors_reader(posteriors_rspecifier);
|
||||||
|
|
||||||
GauPostWriter gpost_writer(gpost_wspecifier);
|
GaussPostWriter gpost_writer(gpost_wspecifier);
|
||||||
|
|
||||||
int32 num_done = 0, num_no_posterior = 0, num_other_error = 0;
|
int32 num_done = 0, num_no_posterior = 0, num_other_error = 0;
|
||||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||||
|
@ -79,7 +80,7 @@ int main(int argc, char *argv[]) {
|
||||||
} else {
|
} else {
|
||||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||||
const Posterior &posterior = posteriors_reader.Value(key);
|
const Posterior &posterior = posteriors_reader.Value(key);
|
||||||
GauPost gpost(posterior.size());
|
GaussPost gpost(posterior.size());
|
||||||
|
|
||||||
if (posterior.size() != mat.NumRows()) {
|
if (posterior.size() != mat.NumRows()) {
|
||||||
KALDI_WARN << "Posterior vector has wrong size "<< (posterior.size()) << " vs. "<< (mat.NumRows());
|
KALDI_WARN << "Posterior vector has wrong size "<< (posterior.size()) << " vs. "<< (mat.NumRows());
|
||||||
|
@ -90,12 +91,13 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
gpost[i].reserve(posterior[i].size());
|
gpost[i].reserve(pdf_posterior[i].size());
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
|
||||||
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
|
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
|
||||||
Vector<BaseFloat> this_post_vec;
|
Vector<BaseFloat> this_post_vec;
|
||||||
BaseFloat like =
|
BaseFloat like =
|
||||||
|
@ -106,7 +108,7 @@ int main(int argc, char *argv[]) {
|
||||||
this_post_vec(k) = RandPrune(this_post_vec(k),
|
this_post_vec(k) = RandPrune(this_post_vec(k),
|
||||||
rand_prune);
|
rand_prune);
|
||||||
if (!this_post_vec.IsZero())
|
if (!this_post_vec.IsZero())
|
||||||
gpost[i].push_back(std::make_pair(tid, this_post_vec));
|
gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
|
||||||
tot_like_this_file += like * weight;
|
tot_like_this_file += like * weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
// 2013 Johns Hopkins University (author: Daniel Povey)
|
// 2013 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -136,13 +137,13 @@ bool PosteriorHolder::Read(std::istream &is) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// static
|
// static
|
||||||
bool GauPostHolder::Write(std::ostream &os, bool binary, const T &t) {
|
bool GaussPostHolder::Write(std::ostream &os, bool binary, const T &t) {
|
||||||
InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
|
InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
|
||||||
try {
|
try {
|
||||||
// We don't bother making this a one-line format.
|
// We don't bother making this a one-line format.
|
||||||
int32 sz = t.size();
|
int32 sz = t.size();
|
||||||
WriteBasicType(os, binary, sz);
|
WriteBasicType(os, binary, sz);
|
||||||
for (GauPost::const_iterator iter = t.begin(); iter != t.end(); ++iter) {
|
for (GaussPost::const_iterator iter = t.begin(); iter != t.end(); ++iter) {
|
||||||
int32 sz2 = iter->size();
|
int32 sz2 = iter->size();
|
||||||
WriteBasicType(os, binary, sz2);
|
WriteBasicType(os, binary, sz2);
|
||||||
for (std::vector<std::pair<int32, Vector<BaseFloat> > >::const_iterator iter2=iter->begin();
|
for (std::vector<std::pair<int32, Vector<BaseFloat> > >::const_iterator iter2=iter->begin();
|
||||||
|
@ -161,7 +162,7 @@ bool GauPostHolder::Write(std::ostream &os, bool binary, const T &t) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GauPostHolder::Read(std::istream &is) {
|
bool GaussPostHolder::Read(std::istream &is) {
|
||||||
t_.clear();
|
t_.clear();
|
||||||
|
|
||||||
bool is_binary;
|
bool is_binary;
|
||||||
|
@ -175,7 +176,7 @@ bool GauPostHolder::Read(std::istream &is) {
|
||||||
if (sz < 0)
|
if (sz < 0)
|
||||||
KALDI_ERR << "Reading posteriors: got negative size\n";
|
KALDI_ERR << "Reading posteriors: got negative size\n";
|
||||||
t_.resize(sz);
|
t_.resize(sz);
|
||||||
for (GauPost::iterator iter = t_.begin(); iter != t_.end(); ++iter) {
|
for (GaussPost::iterator iter = t_.begin(); iter != t_.end(); ++iter) {
|
||||||
int32 sz2;
|
int32 sz2;
|
||||||
ReadBasicType(is, is_binary, &sz2);
|
ReadBasicType(is, is_binary, &sz2);
|
||||||
if (sz2 < 0)
|
if (sz2 < 0)
|
||||||
|
@ -270,6 +271,27 @@ void AlignmentToPosterior(const std::vector<int32> &ali,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ComparePosteriorByPdfs {
|
||||||
|
const TransitionModel *tmodel_;
|
||||||
|
ComparePosteriorByPdfs(const TransitionModel &tmodel): tmodel_(&tmodel) {}
|
||||||
|
bool operator() (const std::pair<int32, BaseFloat> &a,
|
||||||
|
const std::pair<int32, BaseFloat> &b) {
|
||||||
|
if (tmodel_->TransitionIdToPdf(a.first)
|
||||||
|
< tmodel_->TransitionIdToPdf(b.first))
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void SortPosteriorByPdfs(const TransitionModel &tmodel,
|
||||||
|
Posterior *post) {
|
||||||
|
ComparePosteriorByPdfs compare(tmodel);
|
||||||
|
for (size_t i = 0; i < post->size(); i++) {
|
||||||
|
sort((*post)[i].begin(), (*post)[i].end(), compare);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
|
void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
|
||||||
const Posterior &post_in,
|
const Posterior &post_in,
|
||||||
Posterior *post_out) {
|
Posterior *post_out) {
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2011 Microsoft Corporation
|
// Copyright 2009-2011 Microsoft Corporation
|
||||||
// 2013-2014 Johns Hopkins University (author: Daniel Povey)
|
// 2013-2014 Johns Hopkins University (author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
|
@ -40,10 +41,14 @@ namespace kaldi {
|
||||||
/// is a probability (typically between zero and one).
|
/// is a probability (typically between zero and one).
|
||||||
typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
|
typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
|
||||||
|
|
||||||
/// GauPost is a typedef for storing Gaussian-level posteriors for an utterance.
|
/// GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
|
||||||
/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
|
/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
|
||||||
/// Gaussian posteriors.
|
/// Gaussian posteriors.
|
||||||
typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GauPost;
|
/// WARNING: We changed "int32" from transition-id to pdf-id, and the change is
|
||||||
|
/// applied for all programs using GaussPost. This is for efficiency purpose. We
|
||||||
|
/// also changed the name slightly from GauPost to GaussPost to reduce the
|
||||||
|
/// chance that the change will go un-noticed in downstream code.
|
||||||
|
typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
|
||||||
|
|
||||||
|
|
||||||
// PosteriorHolder is a holder for Posterior, which is
|
// PosteriorHolder is a holder for Posterior, which is
|
||||||
|
@ -75,19 +80,19 @@ class PosteriorHolder {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// GauPostHolder is a holder for GauPost, which is
|
// GaussPostHolder is a holder for GaussPost, which is
|
||||||
// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
|
// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
|
||||||
// This is used for storing posteriors of transition id's for an
|
// This is used for storing posteriors of transition id's for an
|
||||||
// utterance.
|
// utterance.
|
||||||
class GauPostHolder {
|
class GaussPostHolder {
|
||||||
public:
|
public:
|
||||||
typedef GauPost T;
|
typedef GaussPost T;
|
||||||
|
|
||||||
GauPostHolder() { }
|
GaussPostHolder() { }
|
||||||
|
|
||||||
static bool Write(std::ostream &os, bool binary, const T &t);
|
static bool Write(std::ostream &os, bool binary, const T &t);
|
||||||
|
|
||||||
void Clear() { GauPost tmp; std::swap(tmp, t_); }
|
void Clear() { GaussPost tmp; std::swap(tmp, t_); }
|
||||||
|
|
||||||
// Reads into the holder.
|
// Reads into the holder.
|
||||||
bool Read(std::istream &is);
|
bool Read(std::istream &is);
|
||||||
|
@ -99,7 +104,7 @@ class GauPostHolder {
|
||||||
const T &Value() const { return t_; }
|
const T &Value() const { return t_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
KALDI_DISALLOW_COPY_AND_ASSIGN(GauPostHolder);
|
KALDI_DISALLOW_COPY_AND_ASSIGN(GaussPostHolder);
|
||||||
T t_;
|
T t_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -112,10 +117,10 @@ typedef SequentialTableReader<PosteriorHolder> SequentialPosteriorReader;
|
||||||
typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
|
typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
|
||||||
|
|
||||||
|
|
||||||
// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GauPost;
|
// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
|
||||||
typedef TableWriter<GauPostHolder> GauPostWriter;
|
typedef TableWriter<GaussPostHolder> GaussPostWriter;
|
||||||
typedef SequentialTableReader<GauPostHolder> SequentialGauPostReader;
|
typedef SequentialTableReader<GaussPostHolder> SequentialGaussPostReader;
|
||||||
typedef RandomAccessTableReader<GauPostHolder> RandomAccessGauPostReader;
|
typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
|
||||||
|
|
||||||
|
|
||||||
/// Scales the BaseFloat (weight) element in the posterior entries.
|
/// Scales the BaseFloat (weight) element in the posterior entries.
|
||||||
|
@ -146,6 +151,11 @@ int32 MergePosteriors(const Posterior &post1,
|
||||||
void AlignmentToPosterior(const std::vector<int32> &ali,
|
void AlignmentToPosterior(const std::vector<int32> &ali,
|
||||||
Posterior *post);
|
Posterior *post);
|
||||||
|
|
||||||
|
/// Sorts posterior entries so that transition-ids with same pdf-id are next to
|
||||||
|
/// each other.
|
||||||
|
void SortPosteriorByPdfs(const TransitionModel &tmodel,
|
||||||
|
Posterior *post);
|
||||||
|
|
||||||
/// Converts a posterior over transition-ids to be a posterior
|
/// Converts a posterior over transition-ids to be a posterior
|
||||||
/// over pdf-ids.
|
/// over pdf-ids.
|
||||||
void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
|
void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
|
||||||
|
|
|
@ -1464,6 +1464,30 @@ bool DeterminizeLatticePhonePruned(
|
||||||
beam, ofst, opts);
|
beam, ofst, opts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool DeterminizeLatticePhonePrunedWrapper(
|
||||||
|
const kaldi::TransitionModel &trans_model,
|
||||||
|
MutableFst<kaldi::LatticeArc> *ifst,
|
||||||
|
double beam,
|
||||||
|
MutableFst<kaldi::CompactLatticeArc> *ofst,
|
||||||
|
DeterminizeLatticePhonePrunedOptions opts) {
|
||||||
|
bool ans = true;
|
||||||
|
Invert(ifst);
|
||||||
|
if (ifst->Properties(fst::kTopSorted, true) == 0) {
|
||||||
|
if (!TopSort(ifst)) {
|
||||||
|
// Cannot topologically sort the lattice -- determinization will fail.
|
||||||
|
KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
|
||||||
|
<< " your lexicon has empty words or your LM has epsilon cycles"
|
||||||
|
<< ").";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ILabelCompare<kaldi::LatticeArc> ilabel_comp;
|
||||||
|
ArcSort(ifst, ilabel_comp);
|
||||||
|
ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
|
||||||
|
trans_model, ifst, beam, ofst, opts);
|
||||||
|
Connect(ofst);
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
// Instantiate the templates for the types we might need.
|
// Instantiate the templates for the types we might need.
|
||||||
// Note: there are actually four templates, each of which
|
// Note: there are actually four templates, each of which
|
||||||
// we instantiate for a single type.
|
// we instantiate for a single type.
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include "fstext/lattice-weight.h"
|
#include "fstext/lattice-weight.h"
|
||||||
#include "hmm/transition-model.h"
|
#include "hmm/transition-model.h"
|
||||||
#include "itf/options-itf.h"
|
#include "itf/options-itf.h"
|
||||||
|
#include "lat/kaldi-lattice.h"
|
||||||
|
|
||||||
namespace fst {
|
namespace fst {
|
||||||
|
|
||||||
|
@ -256,6 +257,19 @@ bool DeterminizeLatticePhonePruned(
|
||||||
DeterminizeLatticePhonePrunedOptions opts
|
DeterminizeLatticePhonePrunedOptions opts
|
||||||
= DeterminizeLatticePhonePrunedOptions());
|
= DeterminizeLatticePhonePrunedOptions());
|
||||||
|
|
||||||
|
/** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
|
||||||
|
Lattice type FSTs. Unlike other determinization routines, the function
|
||||||
|
requires "ifst" to have transition-id's on the input side and words on the
|
||||||
|
output side.
|
||||||
|
*/
|
||||||
|
bool DeterminizeLatticePhonePrunedWrapper(
|
||||||
|
const kaldi::TransitionModel &trans_model,
|
||||||
|
MutableFst<kaldi::LatticeArc> *ifst,
|
||||||
|
double prune,
|
||||||
|
MutableFst<kaldi::CompactLatticeArc> *ofst,
|
||||||
|
DeterminizeLatticePhonePrunedOptions opts
|
||||||
|
= DeterminizeLatticePhonePrunedOptions());
|
||||||
|
|
||||||
/// @} end "addtogroup fst_extensions"
|
/// @} end "addtogroup fst_extensions"
|
||||||
|
|
||||||
} // end namespace fst
|
} // end namespace fst
|
||||||
|
|
|
@ -40,60 +40,40 @@ class DeterminizeLatticeTask {
|
||||||
bool minimize,
|
bool minimize,
|
||||||
Lattice *lat,
|
Lattice *lat,
|
||||||
CompactLatticeWriter *clat_writer,
|
CompactLatticeWriter *clat_writer,
|
||||||
int32 *num_warn,
|
int32 *num_warn):
|
||||||
int32 *num_fail):
|
|
||||||
trans_model_(&trans_model), opts_(opts), key_(key),
|
trans_model_(&trans_model), opts_(opts), key_(key),
|
||||||
acoustic_scale_(acoustic_scale), beam_(beam), minimize_(minimize),
|
acoustic_scale_(acoustic_scale), beam_(beam), minimize_(minimize),
|
||||||
lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn),
|
lat_(lat), clat_writer_(clat_writer), num_warn_(num_warn) { }
|
||||||
num_fail_(num_fail), skip_writting_(false) { }
|
|
||||||
|
|
||||||
void operator () () {
|
void operator () () {
|
||||||
// Put word labels on the input side.
|
|
||||||
Invert(lat_);
|
|
||||||
|
|
||||||
// We apply the acoustic scale before determinization and will undo it
|
// We apply the acoustic scale before determinization and will undo it
|
||||||
// afterward, since it can affect the result.
|
// afterward, since it can affect the result.
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale_), lat_);
|
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale_), lat_);
|
||||||
|
|
||||||
if (!TopSort(lat_)) {
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
KALDI_WARN << "Could not topologically sort lattice: this probably means "
|
*trans_model_, lat_, beam_, &det_clat_, opts_)) {
|
||||||
"it has bad properties e.g. epsilon cycles. Your LM or lexicon might "
|
KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
|
||||||
"be broken, e.g. LM with epsilon cycles or lexicon with empty words.";
|
"(partial output will be pruned tighter than the specified beam.)";
|
||||||
(*num_fail_)++;
|
(*num_warn_)++;
|
||||||
skip_writting_ = true;
|
|
||||||
|
|
||||||
delete lat_;
|
|
||||||
lat_ = NULL;
|
|
||||||
} else {
|
|
||||||
fst::ArcSort(lat_, fst::ILabelCompare<LatticeArc>());
|
|
||||||
|
|
||||||
if (!DeterminizeLatticePhonePruned(
|
|
||||||
*trans_model_, lat_, beam_, &det_clat_, opts_)) {
|
|
||||||
KALDI_WARN << "For key " << key_ << ", determinization did not succeed"
|
|
||||||
"(partial output will be pruned tighter than the specified beam.)";
|
|
||||||
(*num_warn_)++;
|
|
||||||
}
|
|
||||||
|
|
||||||
delete lat_;
|
|
||||||
lat_ = NULL;
|
|
||||||
|
|
||||||
if (minimize_) {
|
|
||||||
PushCompactLatticeStrings(&det_clat_);
|
|
||||||
PushCompactLatticeWeights(&det_clat_);
|
|
||||||
MinimizeCompactLattice(&det_clat_);
|
|
||||||
}
|
|
||||||
// Invert the original acoustic scaling
|
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
|
|
||||||
&det_clat_);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete lat_;
|
||||||
|
lat_ = NULL;
|
||||||
|
|
||||||
|
if (minimize_) {
|
||||||
|
PushCompactLatticeStrings(&det_clat_);
|
||||||
|
PushCompactLatticeWeights(&det_clat_);
|
||||||
|
MinimizeCompactLattice(&det_clat_);
|
||||||
|
}
|
||||||
|
// Invert the original acoustic scaling
|
||||||
|
fst::ScaleLattice(fst::AcousticLatticeScale(1.0/acoustic_scale_),
|
||||||
|
&det_clat_);
|
||||||
}
|
}
|
||||||
|
|
||||||
~DeterminizeLatticeTask() {
|
~DeterminizeLatticeTask() {
|
||||||
if (!skip_writting_) {
|
KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates()
|
||||||
KALDI_VLOG(2) << "Wrote lattice with " << det_clat_.NumStates()
|
<< " for key " << key_;
|
||||||
<< " for key " << key_;
|
clat_writer_->Write(key_, det_clat_);
|
||||||
clat_writer_->Write(key_, det_clat_);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
const TransitionModel *trans_model_;
|
const TransitionModel *trans_model_;
|
||||||
|
@ -109,8 +89,6 @@ class DeterminizeLatticeTask {
|
||||||
CompactLattice det_clat_;
|
CompactLattice det_clat_;
|
||||||
CompactLatticeWriter *clat_writer_;
|
CompactLatticeWriter *clat_writer_;
|
||||||
int32 *num_warn_;
|
int32 *num_warn_;
|
||||||
int32 *num_fail_;
|
|
||||||
bool skip_writting_;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -175,7 +153,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_opts);
|
TaskSequencer<DeterminizeLatticeTask> sequencer(sequencer_opts);
|
||||||
|
|
||||||
int32 n_done = 0, n_warn = 0, n_fail = 0;
|
int32 n_done = 0, n_warn = 0;
|
||||||
|
|
||||||
if (acoustic_scale == 0.0)
|
if (acoustic_scale == 0.0)
|
||||||
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
|
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
|
||||||
|
@ -190,7 +168,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
DeterminizeLatticeTask *task = new DeterminizeLatticeTask(
|
DeterminizeLatticeTask *task = new DeterminizeLatticeTask(
|
||||||
trans_model, determinize_opts, key, acoustic_scale, beam, minimize,
|
trans_model, determinize_opts, key, acoustic_scale, beam, minimize,
|
||||||
lat, &compact_lat_writer, &n_warn, &n_fail);
|
lat, &compact_lat_writer, &n_warn);
|
||||||
sequencer.Run(task);
|
sequencer.Run(task);
|
||||||
|
|
||||||
n_done++;
|
n_done++;
|
||||||
|
@ -198,7 +176,7 @@ int main(int argc, char *argv[]) {
|
||||||
sequencer.Wait();
|
sequencer.Wait();
|
||||||
KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
|
KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
|
||||||
<< "earlier than specified by the beam on " << n_warn << " of "
|
<< "earlier than specified by the beam on " << n_warn << " of "
|
||||||
<< "these, failed for " << n_fail;
|
<< "these.";
|
||||||
return (n_done != 0 ? 0 : 1);
|
return (n_done != 0 ? 0 : 1);
|
||||||
} catch(const std::exception &e) {
|
} catch(const std::exception &e) {
|
||||||
std::cerr << e.what();
|
std::cerr << e.what();
|
||||||
|
|
|
@ -77,7 +77,7 @@ int main(int argc, char *argv[]) {
|
||||||
// Writes as compact lattice.
|
// Writes as compact lattice.
|
||||||
CompactLatticeWriter compact_lat_writer(lats_wspecifier);
|
CompactLatticeWriter compact_lat_writer(lats_wspecifier);
|
||||||
|
|
||||||
int32 n_done = 0, n_warn = 0, n_fail = 0;
|
int32 n_done = 0, n_warn = 0;
|
||||||
|
|
||||||
if (acoustic_scale == 0.0)
|
if (acoustic_scale == 0.0)
|
||||||
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
|
KALDI_ERR << "Do not use a zero acoustic scale (cannot be inverted)";
|
||||||
|
@ -89,23 +89,11 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
KALDI_VLOG(2) << "Processing lattice " << key;
|
KALDI_VLOG(2) << "Processing lattice " << key;
|
||||||
|
|
||||||
Invert(&lat); // so word labels are on the input side.
|
|
||||||
|
|
||||||
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat);
|
fst::ScaleLattice(fst::AcousticLatticeScale(acoustic_scale), &lat);
|
||||||
|
|
||||||
if (!TopSort(&lat)) {
|
|
||||||
KALDI_WARN << "Could not topologically sort lattice: this probably "
|
|
||||||
"means it has bad properties e.g. epsilon cycles. Your LM or "
|
|
||||||
"lexicon might be broken, e.g. LM with epsilon cycles or lexicon "
|
|
||||||
"with empty words.";
|
|
||||||
n_fail++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
fst::ArcSort(&lat, fst::ILabelCompare<LatticeArc>());
|
|
||||||
|
|
||||||
CompactLattice det_clat;
|
CompactLattice det_clat;
|
||||||
if (!DeterminizeLatticePhonePruned(
|
if (!DeterminizeLatticePhonePrunedWrapper(
|
||||||
trans_model, lat, beam, &det_clat, opts)) {
|
trans_model, &lat, beam, &det_clat, opts)) {
|
||||||
KALDI_WARN << "For key " << key << ", determinization did not succeed"
|
KALDI_WARN << "For key " << key << ", determinization did not succeed"
|
||||||
"(partial output will be pruned tighter than the specified beam.)";
|
"(partial output will be pruned tighter than the specified beam.)";
|
||||||
n_warn++;
|
n_warn++;
|
||||||
|
@ -124,7 +112,7 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
|
KALDI_LOG << "Done " << n_done << " lattices, determinization finished "
|
||||||
<< "earlier than specified by the beam on " << n_warn << " of "
|
<< "earlier than specified by the beam on " << n_warn << " of "
|
||||||
<< "these, failed for " << n_fail;
|
<< "these.";
|
||||||
return (n_done != 0 ? 0 : 1);
|
return (n_done != 0 ? 0 : 1);
|
||||||
} catch(const std::exception &e) {
|
} catch(const std::exception &e) {
|
||||||
std::cerr << e.what();
|
std::cerr << e.what();
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal),
|
// Copyright 2009-2012 Saarland University (Author: Arnab Ghoshal),
|
||||||
// Johns Hopkins University (Author: Daniel Povey)
|
// Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -160,21 +161,28 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
|
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
am_sgmm.ComputePerFrameVars(features.Row(i), gselect[i], spk_vars,
|
am_sgmm.ComputePerFrameVars(features.Row(i), gselect[i], spk_vars,
|
||||||
&per_frame_vars);
|
&per_frame_vars);
|
||||||
|
// Accumulates for SGMM.
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
|
||||||
trans_model.Accumulate(weight, tid, &transition_accs);
|
|
||||||
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
|
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
|
||||||
pdf_id, weight, &spk_vars)
|
pdf_id, weight, &spk_vars)
|
||||||
* weight;
|
* weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Accumulates for transitions.
|
||||||
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
|
int32 tid = posterior[i][j].first;
|
||||||
|
BaseFloat weight = posterior[i][j].second;
|
||||||
|
trans_model.Accumulate(weight, tid, &transition_accs);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
KALDI_VLOG(2) << "Average like for this file is "
|
KALDI_VLOG(2) << "Average like for this file is "
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// sgmm2bin/sgmm2-est-fmllr.cc
|
// sgmm2bin/sgmm2-est-fmllr.cc
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -42,6 +43,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
FmllrSgmm2Accs *spk_stats) {
|
FmllrSgmm2Accs *spk_stats) {
|
||||||
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
|
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
|
||||||
|
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t t = 0; t < post.size(); t++) {
|
for (size_t t = 0; t < post.size(); t++) {
|
||||||
// per-frame vars only used for computing posteriors... use the
|
// per-frame vars only used for computing posteriors... use the
|
||||||
// transformed feats for this, if available.
|
// transformed feats for this, if available.
|
||||||
|
@ -49,12 +52,12 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
*spk_vars, &per_frame_vars);
|
*spk_vars, &per_frame_vars);
|
||||||
|
|
||||||
|
|
||||||
for (size_t j = 0; j < post[t].size(); j++) {
|
for (size_t j = 0; j < pdf_post[t].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[t][j].first);
|
int32 pdf_id = pdf_post[t][j].first;
|
||||||
Matrix<BaseFloat> posteriors;
|
Matrix<BaseFloat> posteriors;
|
||||||
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
||||||
spk_vars, &posteriors);
|
spk_vars, &posteriors);
|
||||||
posteriors.Scale(post[t][j].second);
|
posteriors.Scale(pdf_post[t][j].second);
|
||||||
spk_stats->AccumulateFromPosteriors(am_sgmm, *spk_vars, feats.Row(t),
|
spk_stats->AccumulateFromPosteriors(am_sgmm, *spk_vars, feats.Row(t),
|
||||||
gselect[t], posteriors, pdf_id);
|
gselect[t], posteriors, pdf_id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation
|
// Copyright 2009-2012 Saarland University Microsoft Corporation
|
||||||
// Johns Hopkins University (Author: Daniel Povey)
|
// Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -42,14 +43,16 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
|
kaldi::Sgmm2PerFrameDerivedVars per_frame_vars;
|
||||||
|
|
||||||
KALDI_ASSERT(gselect.size() == feats.NumRows());
|
KALDI_ASSERT(gselect.size() == feats.NumRows());
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t i = 0; i < post.size(); i++) {
|
for (size_t i = 0; i < post.size(); i++) {
|
||||||
am_sgmm.ComputePerFrameVars(feats.Row(i), gselect[i],
|
am_sgmm.ComputePerFrameVars(feats.Row(i), gselect[i],
|
||||||
*spk_vars, &per_frame_vars);
|
*spk_vars, &per_frame_vars);
|
||||||
|
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first);
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id,
|
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id,
|
||||||
post[i][j].second, spk_vars);
|
pdf_post[i][j].second, spk_vars);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation
|
// Copyright 2009-2012 Saarland University Microsoft Corporation
|
||||||
// Johns Hopkins University (Author: Daniel Povey)
|
// Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -94,7 +95,7 @@ int main(int argc, char *argv[]) {
|
||||||
num_err++;
|
num_err++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const Posterior &posterior = posteriors_reader.Value(utt);
|
Posterior posterior = posteriors_reader.Value(utt);
|
||||||
|
|
||||||
if (!gselect_reader.HasKey(utt) ||
|
if (!gselect_reader.HasKey(utt) ||
|
||||||
gselect_reader.Value(utt).size() != mat.NumRows()) {
|
gselect_reader.Value(utt).size() != mat.NumRows()) {
|
||||||
|
@ -123,6 +124,10 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
Sgmm2GauPost gpost(posterior.size()); // posterior.size() == T.
|
Sgmm2GauPost gpost(posterior.size()); // posterior.size() == T.
|
||||||
|
|
||||||
|
SortPosteriorByPdfs(trans_model, &posterior);
|
||||||
|
int32 prev_pdf_id = -1;
|
||||||
|
BaseFloat prev_like = 0;
|
||||||
|
Matrix<BaseFloat> prev_posterior;
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
am_sgmm.ComputePerFrameVars(mat.Row(i), gselect[i],
|
am_sgmm.ComputePerFrameVars(mat.Row(i), gselect[i],
|
||||||
spk_vars, &per_frame_vars);
|
spk_vars, &per_frame_vars);
|
||||||
|
@ -130,18 +135,25 @@ int main(int argc, char *argv[]) {
|
||||||
gpost[i].gselect = gselect[i];
|
gpost[i].gselect = gselect[i];
|
||||||
gpost[i].tids.resize(posterior[i].size());
|
gpost[i].tids.resize(posterior[i].size());
|
||||||
gpost[i].posteriors.resize(posterior[i].size());
|
gpost[i].posteriors.resize(posterior[i].size());
|
||||||
|
|
||||||
|
prev_pdf_id = -1; // Only cache for the same frame.
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 tid = posterior[i][j].first, // transition identifier.
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||||
BaseFloat weight = posterior[i][j].second;
|
BaseFloat weight = posterior[i][j].second;
|
||||||
gpost[i].tids[j] = tid;
|
gpost[i].tids[j] = tid;
|
||||||
|
|
||||||
tot_like_this_file +=
|
if (pdf_id != prev_pdf_id) {
|
||||||
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
// First time see this pdf-id for this frame, update the cached
|
||||||
&spk_vars,
|
// variables.
|
||||||
&(gpost[i].posteriors[j]))
|
prev_pdf_id = pdf_id;
|
||||||
* weight;
|
prev_like = am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
||||||
|
&spk_vars,
|
||||||
|
&prev_posterior);
|
||||||
|
}
|
||||||
|
|
||||||
|
gpost[i].posteriors[j] = prev_posterior;
|
||||||
|
tot_like_this_file += prev_like * weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
gpost[i].posteriors[j].Scale(weight);
|
gpost[i].posteriors[j].Scale(weight);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// sgmmbin/sgmm-acc-stats.cc
|
// sgmmbin/sgmm-acc-stats.cc
|
||||||
|
|
||||||
// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal),
|
// Copyright 2009-2011 Saarland University (Author: Arnab Ghoshal),
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -138,6 +139,8 @@ int main(int argc, char *argv[]) {
|
||||||
num_done++;
|
num_done++;
|
||||||
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
|
||||||
|
|
||||||
|
Posterior pdf_posterior;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, posterior, &pdf_posterior);
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
if (posterior[i].empty())
|
if (posterior[i].empty())
|
||||||
continue;
|
continue;
|
||||||
|
@ -147,18 +150,25 @@ int main(int argc, char *argv[]) {
|
||||||
am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0,
|
am_sgmm.ComputePerFrameVars(mat.Row(i), this_gselect, spk_vars, 0.0,
|
||||||
&per_frame_vars);
|
&per_frame_vars);
|
||||||
|
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
// Accumulates for SGMM.
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
int32 pdf_id = pdf_posterior[i][j].first;
|
||||||
BaseFloat weight = posterior[i][j].second;
|
BaseFloat weight = pdf_posterior[i][j].second;
|
||||||
if (acc_flags & kaldi::kSgmmTransitions)
|
|
||||||
trans_model.Accumulate(weight, tid, &transition_accs);
|
|
||||||
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
|
tot_like_this_file += sgmm_accs.Accumulate(am_sgmm, per_frame_vars,
|
||||||
spk_vars.v_s, pdf_id,
|
spk_vars.v_s, pdf_id,
|
||||||
weight, acc_flags)
|
weight, acc_flags)
|
||||||
* weight;
|
* weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Accumulates for transitions.
|
||||||
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
|
if (acc_flags & kaldi::kSgmmTransitions) {
|
||||||
|
int32 tid = posterior[i][j].first;
|
||||||
|
BaseFloat weight = posterior[i][j].second;
|
||||||
|
trans_model.Accumulate(weight, tid, &transition_accs);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance.
|
sgmm_accs.CommitStatsForSpk(am_sgmm, spk_vars.v_s); // no harm doing it per utterance.
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// sgmmbin/sgmm-est-fmllr.cc
|
// sgmmbin/sgmm-est-fmllr.cc
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -43,6 +44,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
FmllrSgmmAccs *spk_stats) {
|
FmllrSgmmAccs *spk_stats) {
|
||||||
kaldi::SgmmPerFrameDerivedVars per_frame_vars;
|
kaldi::SgmmPerFrameDerivedVars per_frame_vars;
|
||||||
|
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t t = 0; t < post.size(); t++) {
|
for (size_t t = 0; t < post.size(); t++) {
|
||||||
std::vector<int32> this_gselect;
|
std::vector<int32> this_gselect;
|
||||||
if (!gselect.empty()) {
|
if (!gselect.empty()) {
|
||||||
|
@ -57,12 +60,12 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
0.0 /*fMLLR logdet*/, &per_frame_vars);
|
0.0 /*fMLLR logdet*/, &per_frame_vars);
|
||||||
|
|
||||||
|
|
||||||
for (size_t j = 0; j < post[t].size(); j++) {
|
for (size_t j = 0; j < pdf_post[t].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[t][j].first);
|
int32 pdf_id = pdf_post[t][j].first;
|
||||||
Matrix<BaseFloat> posteriors;
|
Matrix<BaseFloat> posteriors;
|
||||||
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
||||||
&posteriors);
|
&posteriors);
|
||||||
posteriors.Scale(post[t][j].second);
|
posteriors.Scale(pdf_post[t][j].second);
|
||||||
spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(t),
|
spk_stats->AccumulateFromPosteriors(am_sgmm, spk_vars, feats.Row(t),
|
||||||
this_gselect,
|
this_gselect,
|
||||||
posteriors, pdf_id);
|
posteriors, pdf_id);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// sgmmbin/sgmm-est-spkvecs.cc
|
// sgmmbin/sgmm-est-spkvecs.cc
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -41,6 +42,8 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
MleSgmmSpeakerAccs *spk_stats) {
|
MleSgmmSpeakerAccs *spk_stats) {
|
||||||
kaldi::SgmmPerFrameDerivedVars per_frame_vars;
|
kaldi::SgmmPerFrameDerivedVars per_frame_vars;
|
||||||
|
|
||||||
|
Posterior pdf_post;
|
||||||
|
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
|
||||||
for (size_t i = 0; i < post.size(); i++) {
|
for (size_t i = 0; i < post.size(); i++) {
|
||||||
std::vector<int32> this_gselect;
|
std::vector<int32> this_gselect;
|
||||||
if (!gselect.empty())
|
if (!gselect.empty())
|
||||||
|
@ -49,9 +52,9 @@ void AccumulateForUtterance(const Matrix<BaseFloat> &feats,
|
||||||
am_sgmm.GaussianSelection(gselect_opts, feats.Row(i), &this_gselect);
|
am_sgmm.GaussianSelection(gselect_opts, feats.Row(i), &this_gselect);
|
||||||
am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars);
|
am_sgmm.ComputePerFrameVars(feats.Row(i), this_gselect, spk_vars, 0.0, &per_frame_vars);
|
||||||
|
|
||||||
for (size_t j = 0; j < post[i].size(); j++) {
|
for (size_t j = 0; j < pdf_post[i].size(); j++) {
|
||||||
int32 pdf_id = trans_model.TransitionIdToPdf(post[i][j].first);
|
int32 pdf_id = pdf_post[i][j].first;
|
||||||
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, post[i][j].second);
|
spk_stats->Accumulate(am_sgmm, per_frame_vars, pdf_id, pdf_post[i][j].second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
// sgmmbin/sgmm-post-to-gpost.cc
|
// sgmmbin/sgmm-post-to-gpost.cc
|
||||||
|
|
||||||
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
// Copyright 2009-2012 Saarland University Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
|
||||||
|
// 2014 Guoguo Chen
|
||||||
|
|
||||||
// See ../../COPYING for clarification regarding multiple authors
|
// See ../../COPYING for clarification regarding multiple authors
|
||||||
//
|
//
|
||||||
|
@ -87,7 +88,7 @@ int main(int argc, char *argv[]) {
|
||||||
num_no_posterior++;
|
num_no_posterior++;
|
||||||
} else {
|
} else {
|
||||||
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
const Matrix<BaseFloat> &mat = feature_reader.Value();
|
||||||
const Posterior &posterior = posteriors_reader.Value(utt);
|
Posterior posterior = posteriors_reader.Value(utt);
|
||||||
|
|
||||||
bool have_gselect = !gselect_rspecifier.empty()
|
bool have_gselect = !gselect_rspecifier.empty()
|
||||||
&& gselect_reader.HasKey(utt)
|
&& gselect_reader.HasKey(utt)
|
||||||
|
@ -123,6 +124,10 @@ int main(int argc, char *argv[]) {
|
||||||
|
|
||||||
SgmmGauPost gpost(posterior.size()); // posterior.size() == T.
|
SgmmGauPost gpost(posterior.size()); // posterior.size() == T.
|
||||||
|
|
||||||
|
SortPosteriorByPdfs(trans_model, &posterior);
|
||||||
|
int32 prev_pdf_id = -1;
|
||||||
|
BaseFloat prev_like = 0;
|
||||||
|
Matrix<BaseFloat> prev_posterior;
|
||||||
for (size_t i = 0; i < posterior.size(); i++) {
|
for (size_t i = 0; i < posterior.size(); i++) {
|
||||||
|
|
||||||
std::vector<int32> this_gselect;
|
std::vector<int32> this_gselect;
|
||||||
|
@ -134,16 +139,23 @@ int main(int argc, char *argv[]) {
|
||||||
gpost[i].tids.resize(posterior[i].size());
|
gpost[i].tids.resize(posterior[i].size());
|
||||||
gpost[i].posteriors.resize(posterior[i].size());
|
gpost[i].posteriors.resize(posterior[i].size());
|
||||||
|
|
||||||
|
prev_pdf_id = -1; // Only cache for the same frame.
|
||||||
for (size_t j = 0; j < posterior[i].size(); j++) {
|
for (size_t j = 0; j < posterior[i].size(); j++) {
|
||||||
int32 tid = posterior[i][j].first, // transition identifier.
|
int32 tid = posterior[i][j].first, // transition identifier.
|
||||||
pdf_id = trans_model.TransitionIdToPdf(tid);
|
pdf_id = trans_model.TransitionIdToPdf(tid);
|
||||||
BaseFloat weight = posterior[i][j].second;
|
BaseFloat weight = posterior[i][j].second;
|
||||||
gpost[i].tids[j] = tid;
|
gpost[i].tids[j] = tid;
|
||||||
|
|
||||||
tot_like_this_file +=
|
if (pdf_id != prev_pdf_id) {
|
||||||
am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
// First time see this pdf-id for this frame, update the cached
|
||||||
&(gpost[i].posteriors[j]))
|
// variables.
|
||||||
* weight;
|
prev_pdf_id = pdf_id;
|
||||||
|
prev_like = am_sgmm.ComponentPosteriors(per_frame_vars, pdf_id,
|
||||||
|
&prev_posterior);
|
||||||
|
}
|
||||||
|
|
||||||
|
gpost[i].posteriors[j] = prev_posterior;
|
||||||
|
tot_like_this_file += prev_like * weight;
|
||||||
tot_weight += weight;
|
tot_weight += weight;
|
||||||
gpost[i].posteriors[j].Scale(weight);
|
gpost[i].posteriors[j].Scale(weight);
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче