зеркало из https://github.com/mozilla/kaldi.git
trunk: various small extensions to programs and cosmetic bug fixes; code cleanup in online/ directory.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@3240 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
0acf744859
Коммит
3bf9adc11e
|
@ -60,6 +60,7 @@ int main(int argc, char *argv[]) {
|
|||
BaseFloatMatrixWriter feat_writer(wspecifier);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
int64 frames_in = 0, frames_out = 0;
|
||||
|
||||
// process all keys
|
||||
for (; !feat_reader.Done(); feat_reader.Next()) {
|
||||
|
@ -71,6 +72,9 @@ int main(int argc, char *argv[]) {
|
|||
int32 num_indexes = 0;
|
||||
for (int32 k = offset; k < feats.NumRows(); k += n)
|
||||
num_indexes++; // k is the index.
|
||||
|
||||
frames_in += feats.NumRows();
|
||||
frames_out += num_indexes;
|
||||
|
||||
if (num_indexes == 0) {
|
||||
KALDI_WARN << "For utterance " << utt << ", output would have no rows, "
|
||||
|
@ -88,8 +92,9 @@ int main(int argc, char *argv[]) {
|
|||
feat_writer.Write(utt, output);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Sub-sampled " << num_done << " feats; " << num_err
|
||||
KALDI_LOG << "Sub-sampled " << num_done << " feature matrices; " << num_err
|
||||
<< " with errors.";
|
||||
KALDI_LOG << "Reduced " << frames_in << " frames to " << frames_out;
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
|
|
|
@ -6,7 +6,7 @@ include ../kaldi.mk
|
|||
BINFILES = fgmm-global-acc-stats fgmm-global-sum-accs fgmm-global-est \
|
||||
fgmm-global-merge fgmm-global-to-gmm fgmm-gselect fgmm-global-get-frame-likes \
|
||||
fgmm-global-acc-stats-twofeats fgmm-global-copy fgmm-global-mixdown \
|
||||
fgmm-global-gselect-to-post
|
||||
fgmm-global-gselect-to-post fgmm-global-info
|
||||
|
||||
|
||||
OBJFILES =
|
||||
|
|
|
@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
|
||||
"of vectors of floats.\n"
|
||||
"of vectors of floats. If --average=true, prints out the average per-frame\n"
|
||||
"log-likelihood for each utterance, as a single float.\n"
|
||||
"Usage: fgmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
|
||||
"<likes-out-wspecifier>\n"
|
||||
"e.g.: fgmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool average = false;
|
||||
std::string gselect_rspecifier;
|
||||
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
|
||||
"to limit the #Gaussians accessed on each frame.");
|
||||
po.Register("average", &average, "If true, print out the average per-frame "
|
||||
"log-likelihood as a single float per utterance.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
|
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
BaseFloatVectorWriter likes_writer(likes_wspecifier);
|
||||
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
|
||||
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
|
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
tot_like += likes.Sum();
|
||||
tot_frames += file_frames;
|
||||
likes_writer.Write(key, likes);
|
||||
if (average)
|
||||
average_likes_writer.Write(key, likes.Sum() / file_frames);
|
||||
else
|
||||
likes_writer.Write(key, likes);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Done " << num_done << " files; " << num_err
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
// fgmmbin/fgmm-global-info.cc
|
||||
|
||||
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/full-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Write to standard output various properties of full-covariance GMM model\n"
|
||||
"This is for a single mixture of Gaussians, e.g. as used for a UBM.\n"
|
||||
"Usage: gmm-info [options] <gmm>\n"
|
||||
"e.g.:\n"
|
||||
" fgmm-info 1.ubm\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 1) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1);
|
||||
|
||||
FullGmm gmm;
|
||||
ReadKaldiObject(model_rxfilename, &gmm);
|
||||
|
||||
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
|
||||
std::cout << "feature dimension " << gmm.Dim() << '\n';
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -26,7 +26,8 @@ BINFILES = gmm-init-mono gmm-est gmm-acc-stats-ali gmm-align \
|
|||
gmm-diff-accs gmm-basis-fmllr-accs gmm-basis-fmllr-training gmm-est-basis-fmllr \
|
||||
gmm-est-map gmm-adapt-map gmm-latgen-map gmm-basis-fmllr-accs-gpost \
|
||||
gmm-est-basis-fmllr-gpost gmm-latgen-tracking gmm-latgen-faster-parallel \
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats
|
||||
gmm-est-fmllr-raw gmm-est-fmllr-raw-gpost gmm-global-init-from-feats \
|
||||
gmm-global-info
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -32,15 +32,19 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
const char *usage =
|
||||
"Print out per-frame log-likelihoods for each utterance, as an archive\n"
|
||||
"of vectors of floats.\n"
|
||||
"of vectors of floats. If --average=true, prints out the average per-frame\n"
|
||||
"log-likelihood for each utterance, as a single float.\n"
|
||||
"Usage: gmm-global-get-frame-likes [options] <model-in> <feature-rspecifier> "
|
||||
"<likes-out-wspecifier>\n"
|
||||
"e.g.: gmm-global-get-frame-likes 1.mdl scp:train.scp ark:1.likes\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
bool average = false;
|
||||
std::string gselect_rspecifier;
|
||||
po.Register("gselect", &gselect_rspecifier, "rspecifier for gselect objects "
|
||||
"to limit the #Gaussians accessed on each frame.");
|
||||
po.Register("average", &average, "If true, print out the average per-frame "
|
||||
"log-likelihood as a single float per utterance.");
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 3) {
|
||||
|
@ -63,7 +67,8 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
|
||||
RandomAccessInt32VectorVectorReader gselect_reader(gselect_rspecifier);
|
||||
BaseFloatVectorWriter likes_writer(likes_wspecifier);
|
||||
BaseFloatVectorWriter likes_writer(average ? "" : likes_wspecifier);
|
||||
BaseFloatWriter average_likes_writer(average ? likes_wspecifier : "");
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
for (; !feature_reader.Done(); feature_reader.Next()) {
|
||||
|
@ -104,7 +109,10 @@ int main(int argc, char *argv[]) {
|
|||
|
||||
tot_like += likes.Sum();
|
||||
tot_frames += file_frames;
|
||||
likes_writer.Write(key, likes);
|
||||
if (average)
|
||||
average_likes_writer.Write(key, likes.Sum() / file_frames);
|
||||
else
|
||||
likes_writer.Write(key, likes);
|
||||
num_done++;
|
||||
}
|
||||
KALDI_LOG << "Done " << num_done << " files; " << num_err
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
// gmmbin/gmm-global-info.cc
|
||||
|
||||
// Copyright 2012-2013 Johns Hopkins University (Author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "gmm/diag-gmm.h"
|
||||
#include "hmm/transition-model.h"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
typedef kaldi::int32 int32;
|
||||
|
||||
const char *usage =
|
||||
"Write to standard output various properties of GMM model\n"
|
||||
"This is for a single diagonal GMM, e.g. as used for a UBM.\n"
|
||||
"Usage: gmm-info [options] <gmm>\n"
|
||||
"e.g.:\n"
|
||||
" gmm-info 1.dubm\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 1) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1);
|
||||
|
||||
DiagGmm gmm;
|
||||
ReadKaldiObject(model_rxfilename, &gmm);
|
||||
|
||||
std::cout << "number of gaussians " << gmm.NumGauss() << '\n';
|
||||
std::cout << "feature dimension " << gmm.Dim() << '\n';
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -194,7 +194,7 @@ int main(int argc, char *argv[]) {
|
|||
TrainOneIter(feats, gmm_opts, iter, num_threads, &gmm);
|
||||
|
||||
int32 next_num_gauss = std::min(num_gauss, cur_num_gauss + gauss_inc);
|
||||
if (next_num_gauss > cur_num_gauss) {
|
||||
if (next_num_gauss > gmm.NumGauss()) {
|
||||
KALDI_LOG << "Splitting to " << next_num_gauss << " Gaussians.";
|
||||
gmm.Split(next_num_gauss, 0.1);
|
||||
cur_num_gauss = next_num_gauss;
|
||||
|
|
|
@ -61,6 +61,7 @@ int main(int argc, char *argv[]) {
|
|||
<< trans_model.NumTransitionStates() << '\n';
|
||||
std::cout << "feature dimension " << am_gmm.Dim() << '\n';
|
||||
std::cout << "number of gaussians " << am_gmm.NumGauss() << '\n';
|
||||
return 0;
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what() << '\n';
|
||||
return -1;
|
||||
|
|
|
@ -60,14 +60,12 @@ int main(int argc, char** argv) {
|
|||
"e.g.: ./online-audio-client 192.168.50.12 9012 'scp:wav_files.scp'\n\n";
|
||||
ParseOptions po(usage);
|
||||
|
||||
bool htk = false, vtt = false, silent = false;
|
||||
bool htk = false, vtt = false;
|
||||
int32 channel = -1;
|
||||
int32 packet_size = 1024;
|
||||
|
||||
po.Register("htk", &htk, "Save the result to an HTK label file");
|
||||
po.Register("vtt", &vtt, "Save the result to a WebVTT subtitle file");
|
||||
po.Register("silent", &silent,
|
||||
"Don't print any output (except for errors)");
|
||||
po.Register(
|
||||
"channel", &channel,
|
||||
"Channel to extract (-1 -> expect mono, 0 -> left, 1 -> right)");
|
||||
|
@ -116,10 +114,8 @@ int main(int argc, char** argv) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (!silent) {
|
||||
std::cout << "Connected to KALDI server at host " << server_addr_str
|
||||
<< " port " << server_port << std::endl;
|
||||
}
|
||||
KALDI_VLOG(2) << "Connected to KALDI server at host " << server_addr_str
|
||||
<< " port " << server_port << std::endl;
|
||||
|
||||
char* pack_buffer = new char[packet_size];
|
||||
|
||||
|
@ -127,8 +123,7 @@ int main(int argc, char** argv) {
|
|||
for (; !reader.Done(); reader.Next()) {
|
||||
std::string wav_key = reader.Key();
|
||||
|
||||
if (!silent)
|
||||
std::cout << "File: " << wav_key << std::endl;
|
||||
KALDI_VLOG(2) << "File: " << wav_key << std::endl;
|
||||
|
||||
const WaveData &wav_data = reader.Value();
|
||||
|
||||
|
@ -260,10 +255,10 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
if (!silent) {
|
||||
{
|
||||
float speed = total_input_dur / total_reco_dur;
|
||||
std::cout << "Recognized (" << speed << "xRT): " << reco_output
|
||||
<< std::endl;
|
||||
KALDI_VLOG(2) << "Recognized (" << speed << "xRT): " << reco_output
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
if (htk) {
|
||||
|
|
|
@ -116,9 +116,6 @@ int32 main(int argc, char *argv[]) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR<< "Invalid left/right context parameters!";
|
||||
|
||||
std::string model_rspecifier = po.GetArg(1), fst_rspecifier = po.GetArg(2),
|
||||
word_syms_filename = po.GetArg(3), silence_phones_str = po.GetArg(4),
|
||||
word_boundary_filename = po.GetOptArg(6), lda_mat_rspecifier = po
|
||||
|
@ -210,10 +207,6 @@ int32 main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -85,10 +85,7 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 4)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
fst_rxfilename = po.GetArg(2),
|
||||
word_syms_filename = po.GetArg(3),
|
||||
|
@ -151,10 +148,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -102,9 +102,6 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 5)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
std::string model_rxfilename = po.GetArg(1),
|
||||
fst_rxfilename = po.GetArg(2),
|
||||
|
@ -163,10 +160,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -83,10 +83,7 @@ int main(int argc, char *argv[]) {
|
|||
po.PrintUsage();
|
||||
return 1;
|
||||
}
|
||||
if (po.NumArgs() == 7)
|
||||
if (left_context % kDeltaOrder != 0 || left_context != right_context)
|
||||
KALDI_ERR << "Invalid left/right context parameters!";
|
||||
|
||||
|
||||
std::string wav_rspecifier = po.GetArg(1),
|
||||
model_rspecifier = po.GetArg(2),
|
||||
fst_rspecifier = po.GetArg(3),
|
||||
|
@ -181,10 +178,6 @@ int main(int argc, char *argv[]) {
|
|||
} else {
|
||||
DeltaFeaturesOptions opts;
|
||||
opts.order = kDeltaOrder;
|
||||
// Note from Dan: keeping the next statement for back-compatibility,
|
||||
// but I don't think this is really the right way to set the window-size
|
||||
// in the delta computation: it should be a separate config.
|
||||
opts.window = left_context / 2;
|
||||
feat_transform = new OnlineDeltaInput(opts, &cmn_input);
|
||||
}
|
||||
|
||||
|
|
|
@ -43,15 +43,15 @@ namespace kaldi {
|
|||
Here, we will still accept objects of some class C with an operator () that
|
||||
takes no arguments. C may also have a constructor and a destructor that do
|
||||
something (typically the constructor just sets variables, and the destructor
|
||||
does some kind of output). We
|
||||
have a templated class TaskSequencer<C> which is responsible for running
|
||||
the jobs in parallel. It has a function Run() that will accept a new object
|
||||
of class C; this will block until a thread is free, at which time it will
|
||||
start running the operator () of the class. When classes are finished running,
|
||||
the object will be deleted. Class TaskSequencer guarantees that the
|
||||
destructors will be called sequentially (not in parallel) and in the same
|
||||
order the objects were given to the Run() function, so that it is safe for
|
||||
the destructor to have side effects such as outputting data.
|
||||
does some kind of output). We have a templated class TaskSequencer<C> which
|
||||
is responsible for running the jobs in parallel. It has a function Run()
|
||||
that will accept a new object of class C; this will block until a thread is
|
||||
free, at which time it will spawn a thread that starts running the operator
|
||||
() of the class. When classes are finished running, the objects will be
|
||||
deleted. Class TaskSequencer guarantees that the destructors will be called
|
||||
sequentially (not in parallel) and in the same order the objects were given
|
||||
to the Run() function, so that it is safe for the destructor to have side
|
||||
effects such as outputting data.
|
||||
|
||||
Note: the destructor of TaskSequencer will wait for any remaining jobs that
|
||||
are still running and will call the destructors.
|
||||
|
|
|
@ -406,8 +406,8 @@ class PipeInputImpl: public InputImplBase {
|
|||
return false;
|
||||
} else {
|
||||
#ifndef _MSC_VER
|
||||
fb_ = new PipebufType(f_, // Using his constructor won't lead the
|
||||
// destructor close the stream.
|
||||
fb_ = new PipebufType(f_, // Using this constructor won't lead the
|
||||
// destructor to close the stream.
|
||||
(binary ? std::ios_base::in|std::ios_base::binary
|
||||
:std::ios_base::in));
|
||||
KALDI_ASSERT(fb_ != NULL); // or would be alloc error.
|
||||
|
|
Загрузка…
Ссылка в новой задаче