diff --git a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh index 79a1bbd22..bebe67bee 100755 --- a/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh +++ b/egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh @@ -47,7 +47,7 @@ export PATH=$PATH:/export/babel/sanjeev/kaldi-trunk/tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/babel/s5/local/wsj_train_lms.sh b/egs/babel/s5/local/wsj_train_lms.sh index 060f387f2..34c3b7b99 100755 --- a/egs/babel/s5/local/wsj_train_lms.sh +++ b/egs/babel/s5/local/wsj_train_lms.sh @@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/fisher_english/s5/local/fisher_train_lms.sh b/egs/fisher_english/s5/local/fisher_train_lms.sh index d6f9abce5..881d3ce94 100755 --- a/egs/fisher_english/s5/local/fisher_train_lms.sh +++ b/egs/fisher_english/s5/local/fisher_train_lms.sh @@ -29,7 +29,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/hkust/s5/local/hkust_train_lms.sh b/egs/hkust/s5/local/hkust_train_lms.sh index f319960fc..4362bdd70 100755 --- a/egs/hkust/s5/local/hkust_train_lms.sh +++ b/egs/hkust/s5/local/hkust_train_lms.sh @@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/swbd/s3/local/swbd_p1_train_lms.sh b/egs/swbd/s3/local/swbd_p1_train_lms.sh index 79c776cb1..3807b0afb 100755 --- a/egs/swbd/s3/local/swbd_p1_train_lms.sh +++ b/egs/swbd/s3/local/swbd_p1_train_lms.sh @@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/swbd/s5/local/swbd_p1_train_lms.sh b/egs/swbd/s5/local/swbd_p1_train_lms.sh index f319960fc..4362bdd70 100755 --- a/egs/swbd/s5/local/swbd_p1_train_lms.sh +++ b/egs/swbd/s5/local/swbd_p1_train_lms.sh @@ -28,7 +28,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/timit/s3/local/timit_train_lms.sh b/egs/timit/s3/local/timit_train_lms.sh index 043fc2c17..eb6112244 100755 --- a/egs/timit/s3/local/timit_train_lms.sh +++ b/egs/timit/s3/local/timit_train_lms.sh @@ -53,7 +53,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm @@ -87,7 +87,7 @@ cat $trans_file | awk -v wmap=$dir/word_map 'BEGIN{while((getline0)map[$1 ! merge_ngrams &/dev/null && \ echo merge_ngrams not found in kaldi_lm. You need to have kaldi_lm on your path OR && \ echo You can do the following: && \ - echo 1. Install the latest version from http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz && \ + echo 1. Install the latest version from http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz && \ echo 2. you delete kaldi_lm, and kaldi_lm.tar.gz in the tools folder. This script will automatically install it. && \ exit 1; diff --git a/egs/wsj/s2/local/wsj_train_lms.sh b/egs/wsj/s2/local/wsj_train_lms.sh index 8be4bf0af..430f423fe 100755 --- a/egs/wsj/s2/local/wsj_train_lms.sh +++ b/egs/wsj/s2/local/wsj_train_lms.sh @@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/wsj/s3/local/wsj_train_lms.sh b/egs/wsj/s3/local/wsj_train_lms.sh index 8be4bf0af..430f423fe 100755 --- a/egs/wsj/s3/local/wsj_train_lms.sh +++ b/egs/wsj/s3/local/wsj_train_lms.sh @@ -20,7 +20,7 @@ export PATH=$PATH:`pwd`/../../../tools/kaldi_lm else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/egs/wsj/s5/local/wsj_train_lms.sh b/egs/wsj/s5/local/wsj_train_lms.sh index 060f387f2..34c3b7b99 100755 --- a/egs/wsj/s5/local/wsj_train_lms.sh +++ b/egs/wsj/s5/local/wsj_train_lms.sh @@ -20,7 +20,7 @@ export PATH=$KALDI_ROOT/tools/kaldi_lm:$PATH else echo Downloading and installing the kaldi_lm tools if [ ! -f kaldi_lm.tar.gz ]; then - wget http://merlin.fit.vutbr.cz/kaldi/kaldi_lm.tar.gz || exit 1; + wget http://www.danielpovey.com/files/kaldi/kaldi_lm.tar.gz || exit 1; fi tar -xvzf kaldi_lm.tar.gz || exit 1; cd kaldi_lm diff --git a/src/bin/build-tree.cc b/src/bin/build-tree.cc index 5b2933d4a..69a94a546 100644 --- a/src/bin/build-tree.cc +++ b/src/bin/build-tree.cc @@ -58,7 +58,9 @@ int main(int argc, char *argv[]) { po.Register("thresh", &thresh, "Log-likelihood change threshold for " "tree-building"); po.Register("cluster-thresh", &cluster_thresh, "Log-likelihood change " - "threshold for clustering after tree-building"); + "threshold for clustering after tree-building. 0 means " + "no clustering; -1 means use as a clustering threshold the " + "likelihood change of the final split."); po.Read(argc, argv); diff --git a/src/gmm/diag-gmm.cc b/src/gmm/diag-gmm.cc index ce934f951..dd64409fe 100644 --- a/src/gmm/diag-gmm.cc +++ b/src/gmm/diag-gmm.cc @@ -528,9 +528,9 @@ void DiagGmm::LogLikelihoods(const VectorBase &data, Vector *loglikes) const { loglikes->Resize(gconsts_.Dim(), kUndefined); loglikes->CopyFromVec(gconsts_); - if (static_cast(data.Dim()) != Dim()) { + if (data.Dim() != Dim()) { KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension " - << "mismatch " << (data.Dim()) << " vs. "<< (Dim()); + << "mismatch " << data.Dim() << " vs. "<< Dim(); } Vector data_sq(data); data_sq.ApplyPow(2.0); @@ -542,6 +542,26 @@ void DiagGmm::LogLikelihoods(const VectorBase &data, } +void DiagGmm::LogLikelihoods(const MatrixBase &data, + Matrix *loglikes) const { + KALDI_ASSERT(data.NumRows() != 0); + loglikes->Resize(data.NumRows(), gconsts_.Dim(), kUndefined); + loglikes->CopyRowsFromVec(gconsts_); + if (data.NumCols() != Dim()) { + KALDI_ERR << "DiagGmm::ComponentLogLikelihood, dimension " + << "mismatch " << data.NumCols() << " vs. "<< Dim(); + } + Matrix data_sq(data); + data_sq.ApplyPow(2.0); + + // loglikes += means * inv(vars) * data. + loglikes->AddMatMat(1.0, data, kNoTrans, means_invvars_, kTrans, 1.0); + // loglikes += -0.5 * inv(vars) * data_sq. + loglikes->AddMatMat(-0.5, data_sq, kNoTrans, inv_vars_, kTrans, 1.0); +} + + + void DiagGmm::LogLikelihoodsPreselect(const VectorBase &data, const std::vector &indices, Vector *loglikes) const { @@ -777,6 +797,79 @@ BaseFloat DiagGmm::GaussianSelection(const VectorBase &data, return tot_loglike; } +BaseFloat DiagGmm::GaussianSelection(const MatrixBase &data, + int32 num_gselect, + std::vector > *output) const { + double ans = 0.0; + int32 num_frames = data.NumRows(), num_gauss = NumGauss(); + + int32 max_mem = 10000000; // Don't devote more than 10Mb to loglikes_mat; + // break up the utterance if needed. + int32 mem_needed = num_frames * num_gauss * sizeof(BaseFloat); + if (mem_needed > max_mem) { + // Break into parts and recurse, we don't want to consume too + // much memory. + int32 num_parts = (mem_needed + max_mem - 1) / max_mem; + int32 part_frames = (data.NumRows() + num_parts - 1) / num_parts; + double tot_ans = 0.0; + std::vector > part_output; + output->clear(); + output->resize(num_frames); + for (int32 p = 0; p < num_parts; p++) { + int32 start_frame = p * part_frames, + this_num_frames = std::min(num_frames - start_frame, part_frames); + SubMatrix data_part(data, start_frame, this_num_frames, + 0, data.NumCols()); + tot_ans += GaussianSelection(data_part, num_gselect, &part_output); + for (int32 t = 0; t < this_num_frames; t++) + (*output)[start_frame + t].swap(part_output[t]); + } + KALDI_ASSERT(!output->back().empty()); + return tot_ans; + } + + KALDI_ASSERT(num_frames != 0); + Matrix loglikes_mat(num_frames, num_gauss, kUndefined); + this->LogLikelihoods(data, &loglikes_mat); + + output->clear(); + output->resize(num_frames); + + for (int32 i = 0; i < num_frames; i++) { + SubVector loglikes(loglikes_mat, i); + + BaseFloat thresh; + if (num_gselect < num_gauss) { + Vector loglikes_copy(loglikes); + BaseFloat *ptr = loglikes_copy.Data(); + std::nth_element(ptr, ptr+num_gauss-num_gselect, ptr+num_gauss); + thresh = ptr[num_gauss-num_gselect]; + } else { + thresh = -std::numeric_limits::infinity(); + } + BaseFloat tot_loglike = -std::numeric_limits::infinity(); + std::vector > pairs; + for (int32 p = 0; p < num_gauss; p++) { + if (loglikes(p) >= thresh) { + pairs.push_back(std::make_pair(loglikes(p), p)); + } + } + std::sort(pairs.begin(), pairs.end(), + std::greater >()); + std::vector &this_output = (*output)[i]; + for (int32 j = 0; + j < num_gselect && j < static_cast(pairs.size()); + j++) { + this_output.push_back(pairs[j].second); + tot_loglike = LogAdd(tot_loglike, pairs[j].first); + } + KALDI_ASSERT(!this_output.empty()); + ans += tot_loglike; + } + return ans; +} + + BaseFloat DiagGmm::GaussianSelectionPreselect( const VectorBase &data, diff --git a/src/gmm/diag-gmm.h b/src/gmm/diag-gmm.h index d4d16e952..233e7d2d7 100644 --- a/src/gmm/diag-gmm.h +++ b/src/gmm/diag-gmm.h @@ -81,6 +81,13 @@ class DiagGmm { void LogLikelihoods(const VectorBase &data, Vector *loglikes) const; + /// This version of the LogLikelihoods function operates on + /// a sequence of frames simultaneously; the row index of both "data" and + /// "loglikes" is the frame index. + void LogLikelihoods(const MatrixBase &data, + Matrix *loglikes) const; + + /// Outputs the per-component log-likelihoods of a subset of mixture /// components. Note: at output, loglikes->Dim() will equal indices.size(). /// loglikes[i] will correspond to the log-likelihood of the Gaussian @@ -89,13 +96,20 @@ class DiagGmm { const std::vector &indices, Vector *loglikes) const; - /// Get gaussian selection information for one frame. Returns log-like for + /// Get gaussian selection information for one frame. Returns og-like /// this frame. Output is the best "num_gselect" indices, sorted from best to /// worst likelihood. If "num_gselect" > NumGauss(), sets it to NumGauss(). BaseFloat GaussianSelection(const VectorBase &data, int32 num_gselect, std::vector *output) const; + /// This version of the Gaussian selection function works for a sequence + /// of frames rather than just a single frame. Returns sum of the log-likes + /// over all frames. + BaseFloat GaussianSelection(const MatrixBase &data, + int32 num_gselect, + std::vector > *output) const; + /// Get gaussian selection information for one frame. Returns log-like for /// this frame. Output is the best "num_gselect" indices that were /// preselected, sorted from best to worst likelihood. If "num_gselect" > diff --git a/src/gmmbin/gmm-gselect.cc b/src/gmmbin/gmm-gselect.cc index d26b48575..c0089ed93 100644 --- a/src/gmmbin/gmm-gselect.cc +++ b/src/gmmbin/gmm-gselect.cc @@ -105,9 +105,8 @@ int main(int argc, char *argv[]) { gmm.GaussianSelectionPreselect(mat.Row(i), preselect[i], num_gselect, &(gselect[i])); } else { // No "preselect" [i.e. no existing gselect]: simple case. - for (int32 i = 0; i < mat.NumRows(); i++) - tot_like_this_file += - gmm.GaussianSelection(mat.Row(i), num_gselect, &(gselect[i])); + tot_like_this_file = + gmm.GaussianSelection(mat, num_gselect, &gselect); } gselect_writer.Write(utt, gselect); diff --git a/tools/Makefile b/tools/Makefile index 40298e830..7a9fc9276 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -128,8 +128,8 @@ sph2pipe_v2.5: sph2pipe_v2.5.tar.gz tar xzf sph2pipe_v2.5.tar.gz sph2pipe_v2.5.tar.gz: - wget -T 10 -t 3 http://merlin.fit.vutbr.cz/kaldi/sph2pipe_v2.5.tar.gz || \ - wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz + wget --no-check-certificate -T 10 https://sourceforge.net/projects/kaldi/files/sph2pipe_v2.5.tar.gz || \ + wget -T 10 -t 3 http://www.danielpovey.com/files/kaldi/sph2pipe_v2.5.tar.gz openblas: openblas_compiled