some mostly whitespace-related fixes relating to online-nnet3 decoding (should make no difference to anything)

This commit is contained in:
Dan Povey 2016-04-28 11:26:47 -04:00
Родитель 6bd89dff99
Коммит 1475415074
4 изменённых файлов: 24 добавлений и 25 удалений

Просмотреть файл

@ -25,10 +25,10 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \
nnet-diagnostics.o nnet-combine.o nnet-am-decodable-simple.o \
nnet-optimize-utils.o nnet-chain-example.o \
nnet-chain-training.o nnet-chain-diagnostics.o nnet-chain-combine.o \
discriminative-supervision.o nnet-discriminative-example.o \
nnet-discriminative-diagnostics.o \
discriminative-training.o nnet-discriminative-training.o \
online-nnet3-decodable-simple.o
discriminative-supervision.o nnet-discriminative-example.o \
nnet-discriminative-diagnostics.o \
discriminative-training.o nnet-discriminative-training.o \
online-nnet3-decodable-simple.o
LIBNAME = kaldi-nnet3

Просмотреть файл

@ -83,7 +83,7 @@ int32 DecodableNnet3SimpleOnline::NumSubsampledFrames(int32 num_frames) const {
void DecodableNnet3SimpleOnline::ComputeForFrame(int32 subsampled_frame) {
int32 features_ready = features_->NumFramesReady();
bool input_finished = features_->IsLastFrame(features_ready - 1);
bool input_finished = features_->IsLastFrame(features_ready - 1);
KALDI_ASSERT(subsampled_frame >= 0);
if (subsampled_frame >= begin_frame_ &&
subsampled_frame < begin_frame_ + scaled_loglikes_.NumRows())
@ -121,13 +121,13 @@ void DecodableNnet3SimpleOnline::ComputeForFrame(int32 subsampled_frame) {
int32 num_subsampled_frames = NumSubsampledFrames(input_frame_end - input_frame_begin -
left_context_ - right_context_);
// I'm not checking if the input feature vector is ok.
// It should be done, but I'm not sure if it is the best place.
// It should be done, but I'm not sure if it is the best place.
// Maybe a new "nnet3 feature pipeline"?
int32 mfcc_dim = am_nnet_.GetNnet().InputDim("input");
int32 ivector_dim = am_nnet_.GetNnet().InputDim("ivector");
// MFCCs in the left chunk
SubMatrix<BaseFloat> mfcc_mat(features.ColRange(0, mfcc_dim));
Vector<BaseFloat> input_ivector;
if(ivector_dim != -1){
// iVectors in the right chunk
@ -135,9 +135,9 @@ void DecodableNnet3SimpleOnline::ComputeForFrame(int32 subsampled_frame) {
SubMatrix<BaseFloat> ivector_mat(features.ColRange(mfcc_dim, ivector_dim));
// Get last ivector... not sure if GetCurrentIvector is needed in the online context
// I think it should work fine just getting the last row for testing
input_ivector = ivector_mat.Row(ivector_mat.NumRows() - 1);
input_ivector = ivector_mat.Row(ivector_mat.NumRows() - 1);
}
DoNnetComputation(input_frame_begin,
mfcc_mat, input_ivector, subsampled_frame * subsample, num_subsampled_frames);

Просмотреть файл

@ -43,7 +43,7 @@ struct DecodableNnet3OnlineOptions {
int32 max_nnet_batch_size;
NnetComputeOptions compute_config;
NnetOptimizeOptions optimize_config;
DecodableNnet3OnlineOptions():
frame_subsampling_factor(1),
acoustic_scale(0.1),
@ -73,7 +73,7 @@ struct DecodableNnet3OnlineOptions {
// register the compute options with the prefix "computation".
ParseOptions compute_opts("computation", opts);
compute_config.Register(&compute_opts);
}
};
@ -87,21 +87,21 @@ struct DecodableNnet3OnlineOptions {
class DecodableNnet3SimpleOnline: public DecodableInterface {
public:
DecodableNnet3SimpleOnline(const AmNnetSimple &am_nnet,
const TransitionModel &trans_model,
const DecodableNnet3OnlineOptions &opts,
OnlineFeatureInterface *input_feats);
const TransitionModel &trans_model,
const DecodableNnet3OnlineOptions &opts,
OnlineFeatureInterface *input_feats);
/// Returns the scaled log likelihood
virtual BaseFloat LogLikelihood(int32 frame, int32 index);
virtual bool IsLastFrame(int32 frame) const;
virtual int32 NumFramesReady() const;
virtual int32 NumFramesReady() const;
/// Indices are one-based! This is for compatibility with OpenFst.
virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
private:
/// If the neural-network outputs for this frame are not cached, it computes
@ -109,7 +109,7 @@ class DecodableNnet3SimpleOnline: public DecodableInterface {
void ComputeForFrame(int32 frame);
// corrects number of frames by frame_subsampling_factor;
int32 NumSubsampledFrames(int32) const;
void DoNnetComputation(
int32 input_t_start,
const MatrixBase<BaseFloat> &input_feats,
@ -129,11 +129,11 @@ class DecodableNnet3SimpleOnline: public DecodableInterface {
int32 right_context_; // Right context of the network (cached here)
int32 num_pdfs_; // Number of pdfs, equals output-dim of the network (cached
// here)
int32 begin_frame_; // First frame for which scaled_loglikes_ is valid
// (i.e. the first frame of the batch of frames for
// which we've computed the output).
// scaled_loglikes_ contains the neural network pseudo-likelihoods: the log of
// (prob divided by the prior), scaled by opts.acoustic_scale). We may
// compute this using the GPU, but we transfer it back to the system memory

Просмотреть файл

@ -18,12 +18,11 @@ OBJFILES =
TESTFILES =
ADDLIBS = ../online2/kaldi-online2.a ../ivector/kaldi-ivector.a \
../nnet2/kaldi-nnet2.a ../lat/kaldi-lat.a \
../nnet2/kaldi-nnet2.a ../nnet3/kaldi-nnet3.a ../lat/kaldi-lat.a \
../decoder/kaldi-decoder.a ../cudamatrix/kaldi-cudamatrix.a \
../feat/kaldi-feat.a ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
../hmm/kaldi-hmm.a ../tree/kaldi-tree.a \
../matrix/kaldi-matrix.a ../fstext/kaldi-fstext.a \
../util/kaldi-util.a ../thread/kaldi-thread.a ../base/kaldi-base.a \
../nnet3/kaldi-nnet3.a
include ../makefiles/default_rules.mk