trunk: various small extensions to programs and cosmetic bug fixes; code cleanup in online/ directory.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3240 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Dan Povey 2013-11-30 23:08:52 +00:00
Родитель 0acf744859
Коммит 3bf9adc11e
16 изменённых файлов: 173 добавлений и 63 удалений

Просмотреть файл

@ -60,6 +60,7 @@ int main(int argc, char *argv[]) {
BaseFloatMatrixWriter feat_writer(wspecifier);
int32 num_done = 0, num_err = 0;
int64 frames_in = 0, frames_out = 0;
// process all keys
for (; !feat_reader.Done(); feat_reader.Next()) {
@ -71,6 +72,9 @@ int main(int argc, char *argv[]) {
int32 num_indexes = 0;
for (int32 k = offset; k < feats.NumRows(); k += n)
num_indexes++; // k is the index.
frames_in += feats.NumRows();
frames_out += num_indexes;
if (num_indexes == 0) {
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
@ -88,8 +92,9 @@ int main(int argc, char *argv[]) {
feat_writer.Write(utt, output);
num_done++;
}
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
KALDI_LOG << "Sub-sampled " << num_done << " feature matrices; " << num_err
<< " with errors.";
KALDI_LOG << "Reduced " << frames_in << " frames to " << frames_out;
return (num_done != 0 ? 0 : 1);
} catch(const std::exception &e) {
std::cerr << e.what();

Просмотреть файл

@ -6,7 +6,7 @@ include ../kaldi.mk
BINFILES = fgmm-global-acc-stats fgmm-global-sum-accs fgmm-global-est \
fgmm-global-merge fgmm-global-to-gmm fgmm-gselect fgmm-global-get-frame-likes \
fgmm-global-acc-stats-twofeats fgmm-global-copy fgmm-global-mixdown \
fgmm-global-gselect-to-post
fgmm-global-gselect-to-post fgmm-global-info
OBJFILES =

Просмотреть файл

@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
const char *usage =
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
"of vectors of floats.\n"
"of vectors of floats. If --average=true, prints out the average per-frame\n"
"log-likelihood for each utterance, as a single float.\n"
"Usage: fgmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
"<likes-out-wspecifier>\n"
"e.g.: fgmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
ParseOptions po(usage);
bool average = false;
std::string gselect_rspecifier;
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
"to limit the #Gaussians accessed on each frame.");
po.Register("average", &average, "If true, print out the average per-frame "
"log-likelihood as a single float per utterance.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
BaseFloatVectorWriter likes_writer(likes_wspecifier);
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
int32 num_done = 0, num_err = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
tot_like += likes.Sum();
tot_frames += file_frames;
likes_writer.Write(key, likes);
if (average)
average_likes_writer.Write(key, likes.Sum() / file_frames);
else
likes_writer.Write(key, likes);
num_done++;
}
KALDI_LOG << "Done " << num_done << " files; " << num_err

Просмотреть файл

@ -0,0 +1,60 @@
// fgmmbin/fgmm-global-info.cc
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/full-gmm.h"
#include "hmm/transition-model.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Write to standard output various properties of full-covariance GMM model\n"
"This is for a single mixture of Gaussians, e.g. as used for a UBM.\n"
"Usage: gmm-info [options] <gmm>\n"
"e.g.:\n"
" fgmm-info 1.ubm\n";
ParseOptions po(usage);
po.Read(argc, argv);
if (po.NumArgs() != 1) {
po.PrintUsage();
exit(1);
}
std::string model_rxfilename = po.GetArg(1);
FullGmm gmm;
ReadKaldiObject(model_rxfilename, &gmm);
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
std::cout << "feature dimension " << gmm.Dim() << '\n';
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -26,7 +26,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
gmm-global-info
OBJFILES =

Просмотреть файл

@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
const char *usage =
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
"of vectors of floats.\n"
"of vectors of floats. If --average=true, prints out the average per-frame\n"
"log-likelihood for each utterance, as a single float.\n"
"Usage: gmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
"<likes-out-wspecifier>\n"
"e.g.: gmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
ParseOptions po(usage);
bool average = false;
std::string gselect_rspecifier;
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
"to limit the #Gaussians accessed on each frame.");
po.Register("average", &average, "If true, print out the average per-frame "
"log-likelihood as a single float per utterance.");
po.Read(argc, argv);
if (po.NumArgs() != 3) {
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
BaseFloatVectorWriter likes_writer(likes_wspecifier);
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
int32 num_done = 0, num_err = 0;
for (; !feature_reader.Done(); feature_reader.Next()) {
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
tot_like += likes.Sum();
tot_frames += file_frames;
likes_writer.Write(key, likes);
if (average)
average_likes_writer.Write(key, likes.Sum() / file_frames);
else
likes_writer.Write(key, likes);
num_done++;
}
KALDI_LOG << "Done " << num_done << " files; " << num_err

Просмотреть файл

@ -0,0 +1,60 @@
// gmmbin/gmm-global-info.cc
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "base/kaldi-common.h"
#include "util/common-utils.h"
#include "gmm/diag-gmm.h"
#include "hmm/transition-model.h"
int main(int argc, char *argv[]) {
try {
using namespace kaldi;
typedef kaldi::int32 int32;
const char *usage =
"Write to standard output various properties of GMM model\n"
"This is for a single diagonal GMM, e.g. as used for a UBM.\n"
"Usage: gmm-info [options] <gmm>\n"
"e.g.:\n"
" gmm-info 1.dubm\n";
ParseOptions po(usage);
po.Read(argc, argv);
if (po.NumArgs() != 1) {
po.PrintUsage();
exit(1);
}
std::string model_rxfilename = po.GetArg(1);
DiagGmm gmm;
ReadKaldiObject(model_rxfilename, &gmm);
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
std::cout << "feature dimension " << gmm.Dim() << '\n';
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;
}
}

Просмотреть файл

@ -194,7 +194,7 @@ int main(int argc, char *argv[]) {
TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
if (next_num_gauss > cur_num_gauss) {
if (next_num_gauss > gmm.NumGauss()) {
KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
gmm.Split(next_num_gauss, 0.1);
cur_num_gauss = next_num_gauss;

Просмотреть файл

@ -61,6 +61,7 @@ int main(int argc, char *argv[]) {
<< trans_model.NumTransitionStates() << '\n';
std::cout << "feature dimension " << am_gmm.Dim() << '\n';
std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n';
return 0;
} catch(const std::exception &e) {
std::cerr << e.what() << '\n';
return -1;

Просмотреть файл

@ -60,14 +60,12 @@ int main(int argc, char** argv) {
"e.g.: ./online-audio-client 192.168.50.12 9012 'scp:wav_files.scp'\n\n";
ParseOptions po(usage);
bool htk = false, vtt = false, silent = false;
bool htk = false, vtt = false;
int32 channel = -1;
int32 packet_size = 1024;
po.Register("htk", &htk, "Save the result to an HTK label file");
po.Register("vtt", &vtt, "Save the result to a WebVTT subtitle file");
po.Register("silent", &silent,
"Don't print any output (except for errors)");
po.Register(
"channel", &channel,
"Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
@ -116,10 +114,8 @@ int main(int argc, char** argv) {
return -1;
}
if (!silent) {
std::cout << "Connected to KALDI server at host " << server_addr_str
<< " port " << server_port << std::endl;
}
KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str
<< " port " << server_port << std::endl;
char* pack_buffer = new char[packet_size];
@ -127,8 +123,7 @@ int main(int argc, char** argv) {
for (; !reader.Done(); reader.Next()) {
std::string wav_key = reader.Key();
if (!silent)
std::cout << "File: " << wav_key << std::endl;
KALDI_VLOG(2) << "File: " << wav_key << std::endl;
const WaveData &wav_data = reader.Value();
@ -260,10 +255,10 @@ int main(int argc, char** argv) {
}
}
if (!silent) {
{
float speed = total_input_dur / total_reco_dur;
std::cout << "Recognized (" << speed << "xRT): " << reco_output
<< std::endl;
KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output
<< std::endl;
}
if (htk) {

Просмотреть файл

@ -116,9 +116,6 @@ int32 main(int argc, char *argv[]) {
return 1;
}
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR<< "Invalid left/right context parameters!";
std::string model_rspecifier = po.GetArg(1), fst_rspecifier = po.GetArg(2),
word_syms_filename = po.GetArg(3), silence_phones_str = po.GetArg(4),
word_boundary_filename = po.GetOptArg(6), lda_mat_rspecifier = po
@ -210,10 +207,6 @@ int32 main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -85,10 +85,7 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 4)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string model_rxfilename = po.GetArg(1),
fst_rxfilename = po.GetArg(2),
word_syms_filename = po.GetArg(3),
@ -151,10 +148,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -102,9 +102,6 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 5)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string model_rxfilename = po.GetArg(1),
fst_rxfilename = po.GetArg(2),
@ -163,10 +160,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -83,10 +83,7 @@ int main(int argc, char *argv[]) {
po.PrintUsage();
return 1;
}
if (po.NumArgs() == 7)
if (left_context % kDeltaOrder != 0 || left_context != right_context)
KALDI_ERR << "Invalid left/right context parameters!";
std::string wav_rspecifier = po.GetArg(1),
model_rspecifier = po.GetArg(2),
fst_rspecifier = po.GetArg(3),
@ -181,10 +178,6 @@ int main(int argc, char *argv[]) {
} else {
DeltaFeaturesOptions opts;
opts.order = kDeltaOrder;
// Note from Dan: keeping the next statement for back-compatibility,
// but I don't think this is really the right way to set the window-size
// in the delta computation: it should be a separate config.
opts.window = left_context / 2;
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
}

Просмотреть файл

@ -43,15 +43,15 @@ namespace kaldi {
Here, we will still accept objects of some class C with an operator () that
takes no arguments. C may also have a constructor and a destructor that do
something (typically the constructor just sets variables, and the destructor
does some kind of output). We
have a templated class TaskSequencer<C> which is responsible for running
the jobs in parallel. It has a function Run() that will accept a new object
of class C; this will block until a thread is free, at which time it will
start running the operator () of the class. When classes are finished running,
the object will be deleted. Class TaskSequencer guarantees that the
destructors will be called sequentially (not in parallel) and in the same
order the objects were given to the Run() function, so that it is safe for
the destructor to have side effects such as outputting data.
does some kind of output). We have a templated class TaskSequencer<C> which
is responsible for running the jobs in parallel. It has a function Run()
that will accept a new object of class C; this will block until a thread is
free, at which time it will spawn a thread that starts running the operator
() of the class. When classes are finished running, the objects will be
deleted. Class TaskSequencer guarantees that the destructors will be called
sequentially (not in parallel) and in the same order the objects were given
to the Run() function, so that it is safe for the destructor to have side
effects such as outputting data.
Note: the destructor of TaskSequencer will wait for any remaining jobs that
are still running and will call the destructors.

Просмотреть файл

@ -406,8 +406,8 @@ class PipeInputImpl: public InputImplBase {
return false;
} else {
#ifndef _MSC_VER
fb_ = new PipebufType(f_, // Using his constructor won't lead the
// destructor close the stream.
fb_ = new PipebufType(f_, // Using this constructor won't lead the
// destructor to close the stream.
(binary ? std::ios_base::in|std::ios_base::binary
:std::ios_base::in));
KALDI_ASSERT(fb_ != NULL); // or would be alloc error.