зеркало из https://github.com/mozilla/kaldi.git
trunk: committing tools to detect sinusoid tones in signals. Will be used as input for later DTMF and dialtone detection.
git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4995 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
This commit is contained in:
Родитель
6e8a324f88
Коммит
36dffb1ba2
|
@ -796,6 +796,7 @@
|
|||
/src/featbin/compute-and-process-kaldi-pitch-feats
|
||||
/src/featbin/modify-cmvn-stats
|
||||
/src/featbin/wav-copy
|
||||
/src/featbin/detect-sinusoids
|
||||
/src/fstext/push-special-test
|
||||
/src/gmmbin/gmm-acc-mllt-global
|
||||
/src/gmmbin/gmm-est-fmllr-global
|
||||
|
@ -850,6 +851,7 @@
|
|||
/src/transform/fmllr-raw-test
|
||||
/src/util/simple-options-test
|
||||
/src/feat/resample-test
|
||||
/src/feat/sinusoid-detection-test
|
||||
/src/feat/online-feature-test
|
||||
/src/probe/exp-test
|
||||
/src/fstext/prune-special-test
|
||||
|
|
|
@ -6,11 +6,11 @@ include ../kaldi.mk
|
|||
|
||||
TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \
|
||||
feature-functions-test pitch-functions-test feature-sdc-test \
|
||||
resample-test online-feature-test
|
||||
resample-test online-feature-test sinusoid-detection-test
|
||||
|
||||
OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \
|
||||
feature-spectrogram.o mel-computations.o wave-reader.o \
|
||||
pitch-functions.o resample.o online-feature.o
|
||||
pitch-functions.o resample.o online-feature.o sinusoid-detection.o
|
||||
|
||||
LIBNAME = kaldi-feat
|
||||
|
||||
|
|
|
@ -0,0 +1,450 @@
|
|||
// feat/sinusoid-detection-test.cc
|
||||
|
||||
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "base/kaldi-math.h"
|
||||
#include "feat/sinusoid-detection.h"
|
||||
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
// this function is used for testing AddSinusoid.
|
||||
void AddSinusoidSimple(BaseFloat samp_freq,
|
||||
const Sinusoid &sinusoid,
|
||||
VectorBase<BaseFloat> *signal) {
|
||||
for (int32 i = 0; i < signal->Dim(); i++)
|
||||
(*signal)(i) += sinusoid.amplitude *
|
||||
cos(M_2PI * sinusoid.freq / samp_freq * i + sinusoid.phase);
|
||||
}
|
||||
|
||||
void UnitTestAddSinusoid() {
|
||||
BaseFloat samp_freq = 560.1;
|
||||
int32 length = 511;
|
||||
Vector<BaseFloat> orig(length);
|
||||
orig.SetRandn();
|
||||
Vector<BaseFloat> orig2(orig);
|
||||
Sinusoid sinusoid(49.20, 2.111, 1.5);
|
||||
|
||||
AddSinusoid(samp_freq, sinusoid, &orig);
|
||||
AddSinusoidSimple(samp_freq, sinusoid, &orig2);
|
||||
AssertEqual(orig, orig2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void UnitTestQuadraticMaximizeEqualSpaced() {
|
||||
for (int32 n = 0; n < 50; n++) {
|
||||
|
||||
// Let the cubic function be y = a x^2 + b x + c, and let
|
||||
// y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we
|
||||
// want it evaluated at arbitrary x.
|
||||
|
||||
BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform();
|
||||
BaseFloat y[3];
|
||||
for (int32 i = 0; i < 3; i++) {
|
||||
BaseFloat x = i;
|
||||
y[i] = a * x * x + b * x + c;
|
||||
}
|
||||
BaseFloat x_max, y_max;
|
||||
SinusoidDetector::QuadraticMaximizeEqualSpaced(y[0], y[1], y[2], &x_max, &y_max);
|
||||
|
||||
for (int32 m = 0; m <= 10; m++) {
|
||||
BaseFloat x_test = 0.1 * m;
|
||||
BaseFloat y_test = a * x_test * x_test + b * x_test + c;
|
||||
KALDI_ASSERT(y_test <= y_max + 1.0e-05);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnitTestQuadraticMaximize() {
|
||||
for (int32 n = 0; n < 50; n++) {
|
||||
|
||||
// Let the cubic function be y = a x^2 + b x + c, and let
|
||||
// y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we
|
||||
// want it evaluated at arbitrary x.
|
||||
|
||||
BaseFloat a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(),
|
||||
x = 0.1 + RandUniform() * 0.98;
|
||||
BaseFloat y[3];
|
||||
for (int32 i = 0; i < 3; i++) {
|
||||
BaseFloat this_x;
|
||||
if (i == 0) { this_x = 0.0; }
|
||||
else if (i == 1) { this_x = x; }
|
||||
else { this_x = 1.0; }
|
||||
y[i] = a * this_x * this_x + b * this_x + c;
|
||||
}
|
||||
BaseFloat x_max, y_max;
|
||||
SinusoidDetector::QuadraticMaximize(x, y[0], y[1], y[2], &x_max, &y_max);
|
||||
|
||||
for (int32 m = 0; m <= 10; m++) {
|
||||
BaseFloat x_test = 0.1 * m;
|
||||
BaseFloat y_test = a * x_test * x_test + b * x_test + c;
|
||||
if (n < 100 && m == 5) {
|
||||
KALDI_VLOG(2) << "Checking y_test <= y_max: "
|
||||
<< y_test << " <= " << y_max << " [x_max = "
|
||||
<< x_max << "]";
|
||||
KALDI_ASSERT(y_test <= y_max + 1.0e-05);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void UnitTestSinusoidDetector() {
|
||||
BaseFloat samp_freq = 4000 + (rand() % 2000);
|
||||
int32 num_samp = 128 + rand() % 400;
|
||||
SinusoidDetector detector(samp_freq, num_samp);
|
||||
|
||||
for (int32 i = 0; i < 40; i++) {
|
||||
|
||||
Vector<BaseFloat> signal(num_samp);
|
||||
|
||||
// Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0);
|
||||
// Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5);
|
||||
|
||||
BaseFloat nyquist = samp_freq * 0.5;
|
||||
BaseFloat freq = nyquist * RandUniform();
|
||||
BaseFloat amplitude = RandUniform();
|
||||
BaseFloat phase = M_2PI * RandUniform();
|
||||
|
||||
Sinusoid ref_sinusoid(amplitude, freq, phase);
|
||||
|
||||
AddSinusoid(samp_freq, ref_sinusoid, &signal);
|
||||
|
||||
|
||||
BaseFloat orig_energy = VecVec(signal, signal);
|
||||
KALDI_LOG << "Real frequency is " << freq << ", amplitude "
|
||||
<< amplitude << ", phase " << phase << ", samp-freq "
|
||||
<< samp_freq;
|
||||
KALDI_LOG << "Total energy of signal (with sinusoid) is " << orig_energy;
|
||||
|
||||
Sinusoid sinusoid;
|
||||
BaseFloat min_energy = 0.0;
|
||||
BaseFloat energy = detector.DetectSinusoid(min_energy,
|
||||
signal, &sinusoid);
|
||||
|
||||
Vector<BaseFloat> new_signal(signal);
|
||||
sinusoid.phase += M_PI; // Reverse the phase.
|
||||
AddSinusoid(samp_freq, sinusoid, &new_signal);
|
||||
BaseFloat delta_energy = VecVec(signal, signal) -
|
||||
VecVec(new_signal, new_signal);
|
||||
KALDI_LOG << "Projected delta energy = " << energy
|
||||
<< " and observed was " << delta_energy;
|
||||
|
||||
BaseFloat remaining_energy = VecVec(new_signal, new_signal);
|
||||
if (remaining_energy > 0.01 * orig_energy) {
|
||||
KALDI_WARN << "Energy remaining is " << remaining_energy
|
||||
<< " vs. original " << orig_energy;
|
||||
BaseFloat relative_freq = freq / nyquist;
|
||||
BaseFloat inv_num_samp = 1.0 / num_samp;
|
||||
// We only tolerate this kind of error for very ridiculous frequency,
|
||||
// close to zero or the Nyquist.
|
||||
KALDI_ASSERT(relative_freq < inv_num_samp ||
|
||||
relative_freq > 1.0 - inv_num_samp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// as UnitTestSinusoidDetector(), but doing it in noisy signals.
|
||||
void UnitTestSinusoidDetectorNoisy() {
|
||||
BaseFloat samp_freq = 4000 + (rand() % 2000);
|
||||
int32 num_samp = 128 + rand() % 400;
|
||||
SinusoidDetector detector(samp_freq, num_samp);
|
||||
|
||||
for (int32 i = 0; i < 40; i++) {
|
||||
|
||||
Vector<BaseFloat> signal(num_samp);
|
||||
|
||||
signal.SetRandn();
|
||||
|
||||
BaseFloat rand_energy = VecVec(signal, signal);
|
||||
|
||||
// Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0);
|
||||
// Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5);
|
||||
|
||||
BaseFloat nyquist = samp_freq * 0.5;
|
||||
BaseFloat freq = nyquist * RandUniform();
|
||||
BaseFloat amplitude = 10.0 * RandUniform();
|
||||
BaseFloat phase = M_2PI * RandUniform();
|
||||
|
||||
Sinusoid ref_sinusoid(amplitude, freq, phase);
|
||||
|
||||
AddSinusoid(samp_freq, ref_sinusoid, &signal);
|
||||
|
||||
BaseFloat tot_energy = VecVec(signal, signal);
|
||||
|
||||
KALDI_LOG << "Real frequency is " << freq << ", amplitude "
|
||||
<< amplitude << ", phase " << phase << ", samp-freq "
|
||||
<< samp_freq;
|
||||
KALDI_LOG << "Total energy of signal (with noise + sinusoid) is " << tot_energy;
|
||||
|
||||
Sinusoid sinusoid;
|
||||
BaseFloat min_energy = 0.0;
|
||||
BaseFloat energy = detector.DetectSinusoid(min_energy,
|
||||
signal, &sinusoid);
|
||||
|
||||
Vector<BaseFloat> new_signal(signal);
|
||||
sinusoid.phase += M_PI; // reverse the phase.
|
||||
AddSinusoid(samp_freq, sinusoid, &new_signal);
|
||||
BaseFloat delta_energy = VecVec(signal, signal) -
|
||||
VecVec(new_signal, new_signal);
|
||||
KALDI_LOG << "Projected delta energy = " << energy
|
||||
<< " and observed was " << delta_energy;
|
||||
|
||||
BaseFloat min_energy_diff = 0.99 * (tot_energy - rand_energy);
|
||||
|
||||
if (delta_energy < min_energy_diff) {
|
||||
KALDI_WARN << "Energy reduction is " << delta_energy
|
||||
<< " vs. expected " << (tot_energy - rand_energy);
|
||||
BaseFloat relative_freq = freq / nyquist;
|
||||
BaseFloat inv_num_samp = 1.0 / num_samp;
|
||||
// We only tolerate this kind of error for very ridiculous frequency,
|
||||
// close to zero or the Nyquist.
|
||||
KALDI_ASSERT(relative_freq < inv_num_samp ||
|
||||
relative_freq > 1.0 - inv_num_samp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void AddFreqToSignal(BaseFloat base_freq,
|
||||
BaseFloat samp_freq,
|
||||
BaseFloat tolerance,
|
||||
BaseFloat gain,
|
||||
VectorBase<BaseFloat> *signal) {
|
||||
BaseFloat error_scale = (2 * RandUniform() - 1) * tolerance;
|
||||
BaseFloat freq = base_freq * (1.0 + error_scale);
|
||||
KALDI_VLOG(3) << "base-freq = " << base_freq << ", factor = " << error_scale;
|
||||
for (int32 i = 0; i < signal->Dim(); i++)
|
||||
(*signal)(i) += gain * sin(i * 2.0 * 3.14159 * freq / samp_freq);
|
||||
}
|
||||
|
||||
|
||||
void GenerateDtmfTestCase(
|
||||
BaseFloat sampling_rate,
|
||||
Vector<BaseFloat> *signal,
|
||||
std::vector<MultiSinusoidDetectorOutput> *ref_output) {
|
||||
// the "ref_output" should correlate with the first of each run of frames with the same label.
|
||||
|
||||
BaseFloat min_duration_secs = 0.04; // min duration of dtmf or non-tone segments.
|
||||
BaseFloat min_dialtone_duration_secs = 0.1;
|
||||
BaseFloat frequency_tolerance = 0.035;
|
||||
BaseFloat dialtone_frequency_tolerance = 0.4 * (440.0 - 425.0) / 440.0;
|
||||
|
||||
int32 num_events = 2 * (5 + rand() % 5) + 1; // odd number.
|
||||
int32 tot_signal_dim = 0;
|
||||
|
||||
ref_output->resize(num_events);
|
||||
std::vector<Vector<BaseFloat> > all_signals(num_events);
|
||||
for (int32 i = 0; i < num_events; i++) {
|
||||
MultiSinusoidDetectorOutput &this_output = (*ref_output)[i];
|
||||
Vector<BaseFloat> &this_signal = all_signals[i];
|
||||
BaseFloat duration_secs = min_duration_secs * (1 + rand() % 3);
|
||||
int32 num_samp = sampling_rate * duration_secs;
|
||||
tot_signal_dim += num_samp;
|
||||
|
||||
this_signal.Resize(num_samp);
|
||||
this_signal.SetRandn();
|
||||
|
||||
if (i % 2 == 0); // do nothing;
|
||||
else if (rand() % 2 == 0 && duration_secs >= min_dialtone_duration_secs) {
|
||||
// dialtone.
|
||||
BaseFloat freq;
|
||||
if (rand() % 3 == 0) { freq = 350; }
|
||||
else if (rand() % 2 == 0) { freq = 440; }
|
||||
else { freq = 425; }
|
||||
BaseFloat gain = 10.0 * (1.0 + rand() % 2);
|
||||
AddFreqToSignal(freq, sampling_rate, dialtone_frequency_tolerance,
|
||||
gain, &(this_signal));
|
||||
this_output.freq1 = freq;
|
||||
} else {
|
||||
// dtmf. use a subset of tones as examples.
|
||||
BaseFloat freq1, freq2;
|
||||
char c;
|
||||
if (rand() % 4 == 0) {
|
||||
c = '8'; freq1 = 852; freq2 = 1336;
|
||||
} else if (rand() % 3 == 0) {
|
||||
c = '0'; freq1 = 941; freq2 = 1336;
|
||||
} else if (rand() % 2 == 0) {
|
||||
c = '#'; freq1 = 941; freq2 = 1477;
|
||||
} else {
|
||||
c = '1'; freq1 = 697; freq2 = 1209;
|
||||
}
|
||||
BaseFloat base_gain = 10.0 * (1.0 + (rand() % 3)),
|
||||
gain_factor = 1.0 + 0.1 * (-2 + rand() % 5),
|
||||
gain1 = base_gain, gain2 = gain_factor * base_gain;
|
||||
AddFreqToSignal(freq1, sampling_rate, frequency_tolerance, gain1,
|
||||
&(this_signal));
|
||||
AddFreqToSignal(freq2, sampling_rate, frequency_tolerance, gain2,
|
||||
&(this_signal));
|
||||
this_output.freq1 = freq1;
|
||||
this_output.freq2 = freq2;
|
||||
}
|
||||
}
|
||||
signal->Resize(tot_signal_dim);
|
||||
int32 signal_offset = 0;
|
||||
for (int32 i = 0; i < num_events; i++) {
|
||||
int32 this_dim = all_signals[i].Dim();
|
||||
signal->Range(signal_offset, this_dim).CopyFromVec(all_signals[i]);
|
||||
signal_offset += this_dim;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
||||
// Just a basic test to check that it produces output.
|
||||
|
||||
void UnitTestToneDetection() {
|
||||
BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000;
|
||||
ToneDetectionConfig config;
|
||||
|
||||
int32 num_frames = 100 + (rand() % 100);
|
||||
int32 frame_length = static_cast<int32>(samp_freq * config.frame_length_secs);
|
||||
|
||||
int32 num_samples = frame_length * num_frames + rand() % frame_length;
|
||||
Vector<BaseFloat> signal(num_samples);
|
||||
signal.SetRandn();
|
||||
|
||||
ToneDetector tone_detector(config, samp_freq);
|
||||
|
||||
int32 signal_offset = 0;
|
||||
|
||||
std::vector<ToneDetectorOutput*> tone_detector_output;
|
||||
|
||||
while (signal_offset < num_samples) {
|
||||
int32 signal_remaining = num_samples - signal_offset,
|
||||
chunk_size = std::min<int32>((rand() % 200) + 100,
|
||||
signal_remaining);
|
||||
SubVector<BaseFloat> signal_part(signal, signal_offset, chunk_size);
|
||||
tone_detector.AcceptWaveform(signal_part);
|
||||
signal_offset += chunk_size;
|
||||
|
||||
if (signal_offset == num_samples)
|
||||
tone_detector.WaveformFinished();
|
||||
while (!tone_detector.Done() &&
|
||||
(rand() % 2 == 0 || signal_offset == num_samples)) {
|
||||
ToneDetectorOutput *output = new ToneDetectorOutput();
|
||||
tone_detector.GetNextFrame(output);
|
||||
tone_detector_output.push_back(output);
|
||||
}
|
||||
}
|
||||
KALDI_ASSERT(signal_offset == num_samples);
|
||||
|
||||
Vector<BaseFloat> signal2(signal.Dim());
|
||||
signal_offset = 0;
|
||||
for (int32 i = 0; i < tone_detector_output.size(); i++) {
|
||||
ToneDetectorOutput *output = tone_detector_output[i];
|
||||
signal2.Range(signal_offset,
|
||||
output->signal.Dim()).CopyFromVec(output->signal);
|
||||
signal_offset += output->signal.Dim();
|
||||
if (output->frame_type != 'n') {
|
||||
KALDI_ERR << "Frame " << i << " badly classified, should be 'n', is: "
|
||||
<< output->frame_type;
|
||||
}
|
||||
delete output;
|
||||
}
|
||||
KALDI_ASSERT(signal_offset == num_samples &&
|
||||
signal.ApproxEqual(signal2, 1.0e-10));
|
||||
|
||||
}
|
||||
|
||||
std::ostringstream & operator << (std::ostringstream &ostr,
|
||||
const ToneDetectorOutput &output) {
|
||||
ostr << output.frame_type;
|
||||
if (output.frame_type == 'd')
|
||||
ostr << output.dialtone_freq;
|
||||
ostr << ' ';
|
||||
return ostr;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
// This version of the unit-test generates a signal that has tones in it, and
|
||||
// runs the detection on that signal.
|
||||
void UnitTestToneDetection2() {
|
||||
BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000;
|
||||
Vector<BaseFloat> signal;
|
||||
std::vector<MultiSinusoidDetectorOutput> ref_output;
|
||||
GenerateDtmfTestCase(samp_freq, &signal, &ref_output);
|
||||
|
||||
MultiSinusoidDetectorConfig config;
|
||||
|
||||
int32 num_samples = signal.Dim();
|
||||
KALDI_ASSERT(num_samples > 0);
|
||||
|
||||
MultiSinusoidDetector multi_sinusoid_detector(config, samp_freq);
|
||||
|
||||
int32 signal_offset = 0;
|
||||
|
||||
std::vector<MultiSinusoidDetectorOutput*> multi_sinusoid_detector_output;
|
||||
|
||||
while (signal_offset < num_samples) {
|
||||
int32 signal_remaining = num_samples - signal_offset,
|
||||
chunk_size = std::min<int32>((rand() % 200) + 100,
|
||||
signal_remaining);
|
||||
SubVector<BaseFloat> signal_part(signal, signal_offset, chunk_size);
|
||||
multi_sinusoid_detector.AcceptWaveform(signal_part);
|
||||
signal_offset += chunk_size;
|
||||
|
||||
if (signal_offset == num_samples)
|
||||
multi_sinusoid_detector.WaveformFinished();
|
||||
while (!multi_sinusoid_detector.Done() &&
|
||||
(rand() % 2 == 0 || signal_offset == num_samples)) {
|
||||
MultiSinusoidDetectorOutput *output = new MultiSinusoidDetectorOutput();
|
||||
multi_sinusoid_detector.GetNextFrame(output);
|
||||
multi_sinusoid_detector_output.push_back(output);
|
||||
}
|
||||
}
|
||||
KALDI_ASSERT(signal_offset == num_samples);
|
||||
|
||||
// std::ostringstream str_ref, str_hyp;
|
||||
//for (size_t i = 0; i < ref_output.size(); i++)
|
||||
// str_ref << ref_output[i];
|
||||
|
||||
|
||||
for (size_t i = 0; i < multi_sinusoid_detector_output.size(); i++) {
|
||||
MultiSinusoidDetectorOutput *output = multi_sinusoid_detector_output[i];
|
||||
KALDI_LOG << "tot-energy = " << output->tot_energy
|
||||
<< ", freq1 " << output->freq1 << ", energy1 " << output->energy1
|
||||
<< ", freq2 " << output->freq2 << ", energy2 " << output->energy2;
|
||||
delete output;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
} // namespace kaldi
|
||||
|
||||
int main() {
|
||||
using namespace kaldi;
|
||||
|
||||
SetVerboseLevel(4);
|
||||
|
||||
UnitTestToneDetection2();
|
||||
UnitTestAddSinusoid();
|
||||
UnitTestQuadraticMaximizeEqualSpaced();
|
||||
UnitTestQuadraticMaximize();
|
||||
for (int32 i = 0; i < 10; i++) {
|
||||
UnitTestSinusoidDetector();
|
||||
UnitTestSinusoidDetectorNoisy();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,945 @@
|
|||
// feat/sinusoid-detection.cc
|
||||
|
||||
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "feat/sinusoid-detection.h"
|
||||
#include "matrix/matrix-functions.h"
|
||||
#include "feat/resample.h"
|
||||
|
||||
namespace kaldi {
|
||||
|
||||
|
||||
|
||||
// This function adds the given sinusoid to the signal, as:
|
||||
// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase).
|
||||
void AddSinusoid(BaseFloat samp_freq,
|
||||
const Sinusoid &sinusoid,
|
||||
VectorBase<BaseFloat> *signal) {
|
||||
// treat "factor" as a complex variable equal to exp(i * 2 pi freq / samp_freq); it's
|
||||
// the factor by which we multiply on each frame.
|
||||
BaseFloat factor_real = cos(M_2PI * sinusoid.freq / samp_freq),
|
||||
factor_im = sin(M_2PI * sinusoid.freq / samp_freq);
|
||||
BaseFloat *signal_data = signal->Data();
|
||||
int32 dim = signal->Dim(), batch_size = 100;
|
||||
// process frames in batches of size "batch_size", after which we recompute
|
||||
// the starting point to prevent loss of accuracy due to drift.
|
||||
for (int32 b = 0; b * batch_size < dim; b++) {
|
||||
int32 t_offset = b * batch_size,
|
||||
t_end = std::min(dim, t_offset + batch_size);
|
||||
double phase = sinusoid.phase + M_2PI * t_offset * sinusoid.freq / samp_freq;
|
||||
// treat x as a complex variable which initially is equal to amplitude * exp(i * phase),
|
||||
// but which gets multiplied by "factor" on each frame.
|
||||
BaseFloat x_real = sinusoid.amplitude * cos(phase),
|
||||
x_im = sinusoid.amplitude * sin(phase);
|
||||
for (int32 t = t_offset; t < t_end; t++) {
|
||||
signal_data[t] += x_real;
|
||||
ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// static
|
||||
void SinusoidDetector::QuadraticMaximizeEqualSpaced(
|
||||
BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat *x_max, BaseFloat *y_max) {
|
||||
// Let the function be y = a x^2 + b x + c, and
|
||||
// suppose we have the values of y(0), y(1) and y(2).
|
||||
// We have y0 = c, y1 = a + b + c, and y2 = 4a + 2b + c,
|
||||
// so c = y0.
|
||||
// Also, y2 - 2 y1 = 2a - c, so
|
||||
// a = (y2 - 2 y1 + c) / 2, and
|
||||
// b = y1 - a - c.
|
||||
BaseFloat c = y0, a = y2 - 2 * y1 + c, b = y1 - a - c;
|
||||
if (a >= 0) {
|
||||
// The maximum of the function will occur at one of the end points.
|
||||
if (y0 > y2) {
|
||||
*x_max = 0;
|
||||
*y_max = y0;
|
||||
} else {
|
||||
*x_max = 2;
|
||||
*y_max = y2;
|
||||
}
|
||||
} else {
|
||||
// derivative y' = 2a x + b. y' = 0 at x = -b / 2 a.
|
||||
BaseFloat x = -b / (2.0 * a);
|
||||
if (x <= 0.0) {
|
||||
*x_max = 0;
|
||||
*y_max = y0;
|
||||
} else if (x >= 2.0) {
|
||||
*x_max = 0;
|
||||
*y_max = y2;
|
||||
} else {
|
||||
*x_max = x;
|
||||
*y_max = a * x * x + b * x + c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
void SinusoidDetector::QuadraticMaximize(
|
||||
BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat *x_max, BaseFloat *y_max) {
|
||||
// Let the function be y = a x^2 + b x + c, and
|
||||
// suppose we have the values of y(0), y(x1) and y(1),
|
||||
// where 0 < x1 < 1.
|
||||
// We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c,
|
||||
// so c = y0.
|
||||
// Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so
|
||||
// a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and
|
||||
// b = y2 - a - c.
|
||||
BaseFloat c = y0,
|
||||
a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1),
|
||||
b = y2 - a - c;
|
||||
|
||||
// TODO: remove these lines.
|
||||
AssertEqual(y1, a * x1 * x1 + b * x1 + c);
|
||||
AssertEqual(y2, a + b + c);
|
||||
|
||||
if (a >= 0) {
|
||||
// The maximum of the function will occur at one of the end points.
|
||||
if (y0 > y2) {
|
||||
*x_max = 0;
|
||||
*y_max = y0;
|
||||
} else {
|
||||
*x_max = 1.0;
|
||||
*y_max = y2;
|
||||
}
|
||||
} else {
|
||||
// derivative y' = 2a x + b. y' = 0 at x = -b / 2 a.
|
||||
BaseFloat x = -b / (2.0 * a);
|
||||
if (x <= 0.0) {
|
||||
*x_max = 0.0;
|
||||
*y_max = y0;
|
||||
} else if (x >= 1.0) {
|
||||
*x_max = 1.0;
|
||||
*y_max = y2;
|
||||
} else {
|
||||
*x_max = x;
|
||||
*y_max = a * x * x + b * x + c;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//static
|
||||
BaseFloat SinusoidDetector::QuadraticInterpolate(
|
||||
BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat x) {
|
||||
// Let the function be y = a x^2 + b x + c, and
|
||||
// suppose we have the values of y(0), y(x1) and y(1),
|
||||
// where 0 < x1 < 1.
|
||||
// We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c,
|
||||
// so c = y0.
|
||||
// Also, x1.y2 - y1 = a (x1 - x1^2) + (x1 - 1) c, so
|
||||
// a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and
|
||||
// b = y2 - a - c.
|
||||
KALDI_ASSERT(x1 >= 0.0 && x1 <= 1.0);
|
||||
if (x1 == 0.0) return y0;
|
||||
else if (x1 == 1.0) return y2;
|
||||
|
||||
BaseFloat c = y0,
|
||||
a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1),
|
||||
b = y2 - a - c;
|
||||
return a * x * x + b * x + c;
|
||||
}
|
||||
|
||||
// This function does
|
||||
// (*cos)(t) = cos(2 pi t freq / samp_freq)
|
||||
// (*sin)(t) = sin(2 pi t freq / samp_freq)
|
||||
//static
|
||||
void SinusoidDetector::CreateCosAndSin(BaseFloat samp_freq,
|
||||
BaseFloat freq,
|
||||
VectorBase<BaseFloat> *cos_vec,
|
||||
VectorBase<BaseFloat> *sin_vec) {
|
||||
int32 dim = cos_vec->Dim(), batch_size = 100;
|
||||
KALDI_ASSERT(dim == sin_vec->Dim());
|
||||
BaseFloat *cos_data = cos_vec->Data(), *sin_data = sin_vec->Data();
|
||||
BaseFloat factor_real = cos(M_2PI * freq / samp_freq),
|
||||
factor_im = sin(M_2PI * freq / samp_freq);
|
||||
|
||||
// process frames in batches of size "batch_size", after which we recompute
|
||||
// the starting point to prevent loss of accuracy due to drift.
|
||||
for (int32 b = 0; b * batch_size < dim; b++) {
|
||||
int32 t_offset = b * batch_size,
|
||||
t_end = std::min(dim, t_offset + batch_size);
|
||||
double phase = M_2PI * t_offset * freq / samp_freq;
|
||||
// treat x as a complex variable which initially is equal to amplitude * exp(i * phase),
|
||||
// but which gets multiplied by "factor" on each frame.
|
||||
BaseFloat x_real = cos(phase), x_im = sin(phase);
|
||||
for (int32 t = t_offset; t < t_end; t++) {
|
||||
cos_data[t] = x_real;
|
||||
sin_data[t] = x_im;
|
||||
ComplexMul(factor_real, factor_im, &x_real, &x_im); // x *= factor.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SinusoidDetector::SinusoidDetector(BaseFloat samp_freq,
|
||||
int32 num_samp):
|
||||
samp_freq_(samp_freq),
|
||||
num_samples_(num_samp),
|
||||
num_samples_padded_(RoundUpToNearestPowerOfTwo(num_samp)),
|
||||
fft_(num_samples_padded_),
|
||||
factor1_(3.1),
|
||||
factor2_(1.42) {
|
||||
ComputeCoefficients();
|
||||
}
|
||||
|
||||
void SinusoidDetector::SelfTest(
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
const std::vector<InfoForBin> &info,
|
||||
BaseFloat final_freq,
|
||||
BaseFloat final_energy) {
|
||||
int32 num_bins = num_samples_padded_ * 2 + 1;
|
||||
|
||||
|
||||
{
|
||||
BaseFloat cutoff = 0.0;
|
||||
for (int32 k = 0; k <= num_bins; k += 4)
|
||||
cutoff = std::max(cutoff, info[k].energy);
|
||||
BaseFloat energy_upper_bound = factor1_ * cutoff;
|
||||
if (final_energy > energy_upper_bound) {
|
||||
KALDI_WARN << "Self-testing failed [factor1]: "
|
||||
<< final_energy << " > " << energy_upper_bound
|
||||
<< ", num-samples is " << num_samples_
|
||||
<< ", freq/nyquist = "
|
||||
<< (final_freq / (samp_freq_ * 0.5))
|
||||
<< "- would require factor1 >= "
|
||||
<< (final_energy / cutoff);
|
||||
}
|
||||
}
|
||||
{
|
||||
BaseFloat cutoff = 0.0;
|
||||
for (int32 k = 0; k <= num_bins; k += 2)
|
||||
if (info[k].valid)
|
||||
cutoff = std::max(cutoff, info[k].energy);
|
||||
BaseFloat energy_upper_bound = factor2_ * cutoff;
|
||||
if (final_energy > energy_upper_bound) {
|
||||
KALDI_WARN << "Self-testing failed [factor2]: "
|
||||
<< final_energy << " > " << energy_upper_bound
|
||||
<< ", num-samples is " << num_samples_
|
||||
<< ", freq/nyquist = "
|
||||
<< (final_freq / (samp_freq_ * 0.5))
|
||||
<< "- would require factor2 >= "
|
||||
<< (final_energy / cutoff);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
BaseFloat SinusoidDetector::OptimizeFrequency(
|
||||
const std::vector<InfoForBin> &info,
|
||||
int32 *bin_out,
|
||||
BaseFloat *offset_out) const {
|
||||
|
||||
BaseFloat max_energy = 0.0;
|
||||
*bin_out = -1;
|
||||
int32 max_freq = num_samples_padded_ * 2;
|
||||
|
||||
// For each bin, we consider the frequency range [bin, bin+1, bin+2],
|
||||
// and if we have info for all those bins, do a quadratic interpolation to
|
||||
// find the maximum within the range.
|
||||
for (int32 bin = 0; bin + 2 <= max_freq; bin++) {
|
||||
if (info[bin].valid && info[bin+1].valid && info[bin+2].valid) {
|
||||
// First handle the left side of the bin.
|
||||
BaseFloat best_x, best_y;
|
||||
QuadraticMaximizeEqualSpaced(info[bin].energy, info[bin+1].energy,
|
||||
info[bin+2].energy, &best_x, &best_y);
|
||||
if (best_y > max_energy) {
|
||||
max_energy = best_y;
|
||||
if (best_x <= 1.0) {
|
||||
*bin_out = bin;
|
||||
*offset_out = best_x;
|
||||
} else {
|
||||
*bin_out = bin + 1;
|
||||
*offset_out = best_x - 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return max_energy;
|
||||
}
|
||||
|
||||
|
||||
BaseFloat SinusoidDetector::DetectSinusoid(
|
||||
BaseFloat min_energy,
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
Sinusoid *sinusoid) {
|
||||
if (signal(0) == 0.0 && signal.Norm(2.0) == 0.0)
|
||||
return 0.0;
|
||||
KALDI_ASSERT(signal.Dim() == num_samples_);
|
||||
Vector<BaseFloat> fft(num_samples_padded_);
|
||||
fft.Range(0, num_samples_).CopyFromVec(signal);
|
||||
bool forward = true;
|
||||
fft_.Compute(fft.Data(), forward);
|
||||
|
||||
std::vector<InfoForBin> info;
|
||||
ComputeCoarseInfo(fft, &info);
|
||||
// we now have info for the "coarse" bins.
|
||||
|
||||
// each element b of "bins" will be a multiple of 4: it's possible
|
||||
// that the best frequency is in the range [b, b+4]
|
||||
std::vector<int32> bins;
|
||||
FindCandidateBins(min_energy, info, &bins);
|
||||
|
||||
if (bins.empty())
|
||||
return 0.0; // not enough energy in signal.
|
||||
|
||||
for (size_t i = 0; i < bins.size(); i++) {
|
||||
int32 bin = bins[i];
|
||||
ComputeBinInfo(signal, bin, &(info[bin]));
|
||||
}
|
||||
|
||||
std::vector<int32> bins2;
|
||||
FindCandidateBins2(min_energy, info, &bins2);
|
||||
|
||||
for (size_t i = 0; i < bins2.size(); i++) {
|
||||
int32 bin = bins2[i];
|
||||
ComputeBinInfo(signal, bin, &(info[bin]));
|
||||
}
|
||||
|
||||
// compute energy for the predicted-optimum point, which will usually be
|
||||
// between bins, with an offset.
|
||||
int32 bin;
|
||||
BaseFloat offset;
|
||||
|
||||
BaseFloat opt_energy = OptimizeFrequency(info, &bin, &offset);
|
||||
|
||||
if (opt_energy == 0.0)
|
||||
return 0.0;
|
||||
|
||||
BaseFloat max_freq = (bin + offset) * samp_freq_ / (num_samples_padded_ * 4);
|
||||
|
||||
KALDI_VLOG(4) << "Best frequency based on interpolation is "
|
||||
<< max_freq << ", best energy is "
|
||||
<< opt_energy << ", bin is " << bin;
|
||||
|
||||
OptimizedInfo final_info;
|
||||
|
||||
FineOptimizeFrequency(signal, bin, offset, &info, &final_info);
|
||||
|
||||
// the following while loop will rarely be accessed.
|
||||
while (final_info.offset == 0.0 && bin > 0) {
|
||||
bin--;
|
||||
FineOptimizeFrequency(signal, bin, 1.0, &info, &final_info);
|
||||
}
|
||||
|
||||
// the following while loop will rarely be accessed.
|
||||
while (final_info.offset == 1.0 && bin < num_samples_padded_ * 2) {
|
||||
bin++;
|
||||
FineOptimizeFrequency(signal, bin, 0.0, &info, &final_info);
|
||||
}
|
||||
|
||||
if (bin <= 1 || bin >= num_samples_padded_ * 2 - 2) {
|
||||
// If we're in the lowest or next-to-lowest bin, or the highest or
|
||||
// next-to-highest allowed bin (note, "bin" here is a range, and it can
|
||||
// never have the value num_samples_padded_ * 2), we tend to get more
|
||||
// estimation error than usual, so do another round of optimization.
|
||||
FineOptimizeFrequency(signal, bin, final_info.offset, &info, &final_info);
|
||||
}
|
||||
|
||||
BaseFloat final_freq = (final_info.bin + final_info.offset) * samp_freq_ / (num_samples_padded_ * 4);
|
||||
KALDI_VLOG(4) << "Final optimized info is: freq " << final_freq
|
||||
<< ", cos coeff " << final_info.cos_coeff << ", sin coeff "
|
||||
<< final_info.sin_coeff << ", energy " << final_info.energy;
|
||||
|
||||
if (GetVerboseLevel() > 1)
|
||||
SelfTest(signal, info, final_freq, final_info.energy);
|
||||
|
||||
if (final_info.energy >= min_energy) {
|
||||
sinusoid->amplitude = std::sqrt(final_info.cos_coeff * final_info.cos_coeff
|
||||
+ final_info.sin_coeff * final_info.sin_coeff);
|
||||
sinusoid->freq = final_freq;
|
||||
sinusoid->phase = -std::atan2(final_info.sin_coeff, final_info.cos_coeff);
|
||||
KALDI_VLOG(4) << "Phase is " << sinusoid->phase << ", amplitude is "
|
||||
<< sinusoid->amplitude << ", freq is " << sinusoid->freq;
|
||||
return final_info.energy;
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
This function computes, the original FFT bins, the amount of energy in
|
||||
the signal that can be explained by a sinusoid at the corresponding frequency.
|
||||
|
||||
Let f be the continuous-valued frequency.
|
||||
|
||||
Define the vector C_f as
|
||||
C_f = [ c_0, c_1 ... c_n ] where c_k = cos(2 pi k f / samp_freq). [obviously this notation depends on f].
|
||||
and S_f the same thing with sin in place of cos.
|
||||
|
||||
Let the signal, as a vector, be V.
|
||||
We want to maximize the (positive) energy-difference:
|
||||
||V||^2 - || V - c C_f - s S_f ||^2
|
||||
where c and s are the coefficients of C_f and S_f.
|
||||
This quantity can be expanded as follows, where . means dot product.
|
||||
\delta E = -c^2 C_f.C_f - s^2 S_f.S_f - 2 c s C_f.S_f + 2 c V.C_f + 2 s V.S_f.
|
||||
which can be written as follows, where . means dot-product and ' means transpose:
|
||||
\delta E = 2 [c s] v - [c s] M [c s]'
|
||||
where M = [ C_f.C_f, C_f.S_f, C_f.S_f, S_f.S_f ],
|
||||
and v = [V.C_f, V.S_f].
|
||||
If M is invertible (i.e. for nonzero frequencies), this is maximized by
|
||||
[c s] = M^-1 v
|
||||
giving us the value.
|
||||
\delta E = v' M^{-1} v.
|
||||
We'll compute the inverse of M in advance, inside ComputeCoefficients(), using
|
||||
the formula [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a] For zero frequency and at the
|
||||
Nyquist, M has the value [ a 0; 0 0 ], and we have the same type of expression
|
||||
limited to the first dim of v, i.e. Minv = [ a^{-1} 0; 0 0 ], a kind of pseudo-inverse.
|
||||
*/
|
||||
|
||||
void SinusoidDetector::ComputeCoarseInfo(
|
||||
const Vector<BaseFloat> &fft,
|
||||
std::vector<InfoForBin> *info) const {
|
||||
info->resize(num_samples_padded_ * 2 + 1); // 4 times resolution of FFT itself.
|
||||
|
||||
const BaseFloat *fft_data = fft.Data();
|
||||
|
||||
int32 num_bins = num_samples_padded_ / 2 + 1;
|
||||
for (int32 k = 0; k < num_bins; k++) {
|
||||
BaseFloat real, im;
|
||||
if (k == 0) {
|
||||
real = fft_data[0];
|
||||
im = 0.0;
|
||||
} else if (k == num_samples_padded_ / 2) {
|
||||
real = fft_data[1];
|
||||
im = 0.0;
|
||||
} else {
|
||||
real = fft_data[k * 2];
|
||||
im = fft_data[k * 2 + 1];
|
||||
}
|
||||
// v1 and v2 are the two components of the vector v in the math above.
|
||||
BaseFloat v1 = real, v2 = -im;
|
||||
// Minv_'s row indexes correspond to frequencies with 4 times more
|
||||
// resolution than the FFT bins.
|
||||
const BaseFloat *Minv_data = Minv_.RowData(k * 4);
|
||||
// The Matrix M^{-1} is of the form [a b; b d]
|
||||
BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2];
|
||||
// compute \delta E = v' M^{-1} v.
|
||||
BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b;
|
||||
InfoForBin &this_info = (*info)[k * 4];
|
||||
this_info.valid = true;
|
||||
this_info.cos_dot = real;
|
||||
this_info.sin_dot = -im;
|
||||
this_info.energy = delta_e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SinusoidDetector::ComputeCoefficients() {
|
||||
int32 num_samp = num_samples_;
|
||||
int32 num_freq = num_samples_padded_ * 2 + 1;
|
||||
cos_.Resize(num_freq, num_samp);
|
||||
sin_.Resize(num_freq, num_samp);
|
||||
|
||||
Vector<BaseFloat> cc(num_freq), cs(num_freq);
|
||||
for (int32 k = 0; k < num_freq; k++) {
|
||||
BaseFloat freq = k * samp_freq_ / (num_samples_padded_ * 4);
|
||||
SubVector<BaseFloat> c(cos_, k), s(sin_, k);
|
||||
CreateCosAndSin(samp_freq_, freq, &c, &s);
|
||||
cc(k) = VecVec(c, c);
|
||||
cs(k) = VecVec(c, s);
|
||||
}
|
||||
|
||||
M_.Resize(num_freq, 3, kUndefined);
|
||||
Minv_.Resize(num_freq, 3, kUndefined);
|
||||
|
||||
for (int32 k = 0; k < num_freq; k++) {
|
||||
// Let the matrix M be [ a b; b d ]. [we don't write c because c == b].
|
||||
// We want to compute Minv_.
|
||||
BaseFloat a = cc(k), b = cs(k), d = num_samples_ - a;
|
||||
M_(k, 0) = a;
|
||||
M_(k, 1) = b;
|
||||
M_(k, 2) = d;
|
||||
if (k == 0 || k == num_freq - 1) {
|
||||
// this is a special case; it's not really the inverse of M but it will
|
||||
|
||||
// give us the expression we want; it's like an inverse in just one dimension.
|
||||
Minv_(k, 0) = 1.0 / a;
|
||||
Minv_(k, 1) = 0.0;
|
||||
Minv_(k, 2) = 0.0;
|
||||
} else {
|
||||
BaseFloat inv_det = 1.0 / (a * d - b * b);
|
||||
// check for NaN and inf.
|
||||
KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0);
|
||||
// use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b.
|
||||
BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det;
|
||||
Minv_(k, 0) = inv_a;
|
||||
Minv_(k, 1) = inv_b;
|
||||
Minv_(k, 2) = inv_d;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Does fine optimization of the frequency within this bin; returns the
|
||||
// final energy, the optimized frequency, and the cos and sin coefficients.
|
||||
void SinusoidDetector::FineOptimizeFrequency(
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
int32 bin,
|
||||
BaseFloat bin_offset,
|
||||
std::vector<InfoForBin> *info_in,
|
||||
OptimizedInfo *opt_info) const {
|
||||
std::vector<InfoForBin> &info = *info_in;
|
||||
if (!info[bin].valid) ComputeBinInfo(signal, bin, &(info[bin]));
|
||||
if (!info[bin+1].valid) ComputeBinInfo(signal, bin+1, &(info[bin+1]));
|
||||
|
||||
const BaseFloat epsilon = 0.02, delta = 0.001;
|
||||
|
||||
// If the offset is very close to the edges of the bin, move it
|
||||
// closer to the center. Otherwise we may have problems with the
|
||||
// steps below. The initial offset is only used as a starting point
|
||||
// anyway, so this won't affect the final value much.
|
||||
if (bin_offset < epsilon)
|
||||
bin_offset = epsilon;
|
||||
if (bin_offset > 1.0 - epsilon)
|
||||
bin_offset = 1.0 - epsilon;
|
||||
KALDI_VLOG(4) << "Initial bin offset = " << bin_offset << ", bin = " << bin;
|
||||
|
||||
// create cos and sin waves of the specified frequency.
|
||||
BaseFloat freq = (bin + bin_offset) * samp_freq_ / (num_samples_padded_ * 4);
|
||||
Vector<BaseFloat> c(num_samples_, kUndefined), s(num_samples_, kUndefined);
|
||||
CreateCosAndSin(samp_freq_, freq, &c, &s);
|
||||
|
||||
// these a, b and d values are the elements of the M matrix at this frequency
|
||||
// "freq", i.e. the matrix M_f [ a b; b d ]. This will be invertible because
|
||||
// we have ensured that the frequency is not too close to zero or the Nyquist.
|
||||
BaseFloat a = VecVec(c, c), b = VecVec(c, s), d = num_samples_ - a;
|
||||
BaseFloat inv_det = 1.0 / (a * d - b * b);
|
||||
BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det;
|
||||
|
||||
|
||||
BaseFloat v1 = VecVec(c, signal), v2 = VecVec(s, signal);
|
||||
|
||||
BaseFloat delta_e = v1 * v1 * inv_a + v2 * v2 * inv_d + 2 * v1 * v2 * inv_b;
|
||||
|
||||
KALDI_VLOG(4) << "Actual energy-change at frequency " << freq << " is "
|
||||
<< delta_e;
|
||||
// "freq" is frequency somewhere in the middle of the bin.
|
||||
|
||||
BaseFloat final_offset, final_energy;
|
||||
QuadraticMaximize(bin_offset, info[bin].energy, delta_e, info[bin+1].energy,
|
||||
&final_offset, &final_energy);
|
||||
|
||||
KALDI_VLOG(4) << "After further optimizing, offset was " << final_offset
|
||||
<< " giving freq "
|
||||
<< ((bin+final_offset) * samp_freq_ / (num_samples_padded_*4))
|
||||
<< ", with energy " << final_energy;
|
||||
|
||||
// Use interpolation (using a quadratic function) to get the entries of the M matrix
|
||||
// the the final, tuned frequency. Interpolation on M is better than M^{-1}, as its
|
||||
// elements are much better behaved as the frequency varies.
|
||||
const BaseFloat *M_left_data = M_.RowData(bin),
|
||||
*M_right_data = M_.RowData(bin + 1);
|
||||
|
||||
BaseFloat a_interp = QuadraticInterpolate(bin_offset, M_left_data[0], a, M_right_data[0],
|
||||
final_offset);
|
||||
BaseFloat b_interp = QuadraticInterpolate(bin_offset, M_left_data[1], b, M_right_data[1],
|
||||
final_offset);
|
||||
BaseFloat d_interp = QuadraticInterpolate(bin_offset, M_left_data[2], d, M_right_data[2],
|
||||
final_offset);
|
||||
|
||||
// Now get the inverse of the M matrix at the final point.
|
||||
BaseFloat a_inv_interp, b_inv_interp, d_inv_interp;
|
||||
|
||||
if ((bin == 0 && final_offset < delta) ||
|
||||
(bin == num_samples_padded_ * 2 && final_offset > 1.0 - delta)) {
|
||||
// If we're extremely close to zero or the Nyquist, we'll have trouble
|
||||
// inverting M; just invert in the 1st dimension (only have a cos
|
||||
// component).
|
||||
a_inv_interp = 1.0 / a_interp;
|
||||
b_inv_interp = 0.0;
|
||||
d_inv_interp = 0.0;
|
||||
} else {
|
||||
BaseFloat inv_det = 1.0 / (a_interp * d_interp - b_interp * b_interp);
|
||||
// check for NaN and inf.
|
||||
KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0);
|
||||
// use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b.
|
||||
a_inv_interp = d_interp * inv_det;
|
||||
b_inv_interp = -b_interp * inv_det;
|
||||
d_inv_interp = a_interp * inv_det;
|
||||
}
|
||||
|
||||
BaseFloat v1_interp = QuadraticInterpolate(bin_offset, info[bin].cos_dot, v1,
|
||||
info[bin+1].cos_dot, final_offset);
|
||||
BaseFloat v2_interp = QuadraticInterpolate(bin_offset, info[bin].sin_dot, v2,
|
||||
info[bin+1].sin_dot, final_offset);
|
||||
|
||||
opt_info->bin = bin;
|
||||
opt_info->offset = final_offset;
|
||||
// Recompute the energy-reduction using the more accurate interpolated values of
|
||||
// v1 and v2 (the dot-products of the cos and sin with the signal), and
|
||||
// of M.
|
||||
opt_info->energy = v1_interp * v1_interp * a_inv_interp +
|
||||
v2_interp * v2_interp * d_inv_interp +
|
||||
2 * v1_interp * v2_interp * b_inv_interp;
|
||||
// Compute the coefficients of the cos and sin in the optimal sinusoid, as
|
||||
// M^{-1} v.
|
||||
opt_info->cos_coeff = a_inv_interp * v1_interp + b_inv_interp * v2_interp;
|
||||
opt_info->sin_coeff = b_inv_interp * v1_interp + d_inv_interp * v2_interp;
|
||||
}
|
||||
|
||||
void SinusoidDetector::FindCandidateBins(
|
||||
BaseFloat min_energy,
|
||||
const std::vector<InfoForBin> &info,
|
||||
std::vector<int32> *bins) const {
|
||||
|
||||
int32 max_bin = num_samples_padded_ * 2;
|
||||
|
||||
BaseFloat cutoff = min_energy;
|
||||
for (int32 k = 0; k <= max_bin; k += 4) {
|
||||
KALDI_ASSERT(info[k].valid);
|
||||
cutoff = std::max(cutoff, info[k].energy);
|
||||
}
|
||||
|
||||
for (int32 k = 0; k < max_bin; k += 4) {
|
||||
BaseFloat energy_upper_bound =
|
||||
factor1_ * std::max(info[k].energy,
|
||||
info[k+4].energy);
|
||||
if (energy_upper_bound >= cutoff)
|
||||
bins->push_back(k + 2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SinusoidDetector::FindCandidateBins2(
|
||||
BaseFloat min_energy,
|
||||
const std::vector<InfoForBin> &info,
|
||||
std::vector<int32> *bins2) const {
|
||||
|
||||
int32 max_bin = num_samples_padded_ * 2;
|
||||
|
||||
BaseFloat cutoff = min_energy;
|
||||
for (int32 k = 0; k <= max_bin; k += 2) {
|
||||
if (info[k].valid)
|
||||
cutoff = std::max(cutoff, info[k].energy);
|
||||
}
|
||||
|
||||
for (int32 k = 0; k < max_bin; k += 2) {
|
||||
if (info[k].valid && info[k+2].valid) {
|
||||
BaseFloat energy_upper_bound =
|
||||
factor2_ * std::max(info[k].energy,
|
||||
info[k+2].energy);
|
||||
if (energy_upper_bound >= cutoff)
|
||||
bins2->push_back(k + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SinusoidDetector::ComputeBinInfo(
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
int32 bin,
|
||||
InfoForBin *info) const {
|
||||
KALDI_ASSERT(!info->valid); // or wasted time.
|
||||
info->valid = true;
|
||||
BaseFloat v1 = info->cos_dot = VecVec(cos_.Row(bin), signal);
|
||||
BaseFloat v2 = info->sin_dot = VecVec(sin_.Row(bin), signal);
|
||||
const BaseFloat *Minv_data = Minv_.RowData(bin);
|
||||
BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2];
|
||||
// compute \delta E = v' M^{-1} v.
|
||||
BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b;
|
||||
info->energy = delta_e;
|
||||
}
|
||||
|
||||
|
||||
MultiSinusoidDetector::MultiSinusoidDetector(
|
||||
const MultiSinusoidDetectorConfig &config,
|
||||
int32 sampling_freq):
|
||||
config_(config),
|
||||
sample_freq_(sampling_freq),
|
||||
samples_per_frame_subsampled_(0.001 * config.frame_length_ms *
|
||||
static_cast<BaseFloat>(config.subsample_freq)),
|
||||
samples_shift_subsampled_(0.001 * config.frame_shift_ms *
|
||||
static_cast<BaseFloat>(config.subsample_freq)),
|
||||
waveform_finished_(false),
|
||||
samples_consumed_(0),
|
||||
resampler_(sampling_freq, config.subsample_freq,
|
||||
config.subsample_filter_cutoff, config.subsample_filter_zeros),
|
||||
detector_(config.subsample_freq, samples_per_frame_subsampled_) {
|
||||
config.Check();
|
||||
}
|
||||
|
||||
|
||||
void MultiSinusoidDetector::Reset() {
|
||||
waveform_finished_ = false;
|
||||
samples_consumed_ = 0;
|
||||
while(!subsampled_signal_.empty()) {
|
||||
delete subsampled_signal_.front();
|
||||
subsampled_signal_.pop_front();
|
||||
}
|
||||
resampler_.Reset();
|
||||
}
|
||||
|
||||
void MultiSinusoidDetector::WaveformFinished() {
|
||||
KALDI_ASSERT(!waveform_finished_ &&
|
||||
"WaveformFinished() called twice.");
|
||||
|
||||
Vector<BaseFloat> empty_waveform;
|
||||
subsampled_signal_.push_back(new Vector<BaseFloat>());
|
||||
bool flush = true;
|
||||
resampler_.Resample(empty_waveform, flush,
|
||||
subsampled_signal_.back());
|
||||
waveform_finished_ = true;
|
||||
if (subsampled_signal_.back()->Dim() == 0) {
|
||||
delete subsampled_signal_.back();
|
||||
subsampled_signal_.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
void MultiSinusoidDetector::AcceptWaveform(
|
||||
const VectorBase<BaseFloat> &waveform) {
|
||||
|
||||
|
||||
subsampled_signal_.push_back(new Vector<BaseFloat>());
|
||||
bool flush = false;
|
||||
resampler_.Resample(waveform, flush,
|
||||
subsampled_signal_.back());
|
||||
if (subsampled_signal_.back()->Dim() == 0) {
|
||||
delete subsampled_signal_.back();
|
||||
subsampled_signal_.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
int32 MultiSinusoidDetector::NumSubsampledSamplesReady(int32 max_samp) const {
|
||||
KALDI_ASSERT(samples_consumed_ >= 0 &&
|
||||
((subsampled_signal_.empty() && samples_consumed_ == 0) ||
|
||||
(!subsampled_signal_.empty () && samples_consumed_ <
|
||||
subsampled_signal_[0]->Dim())));
|
||||
|
||||
int32 ans = -samples_consumed_;
|
||||
for (size_t i = 0; i < subsampled_signal_.size(); i++) {
|
||||
ans += subsampled_signal_[i]->Dim();
|
||||
if (ans > max_samp) break;
|
||||
}
|
||||
KALDI_ASSERT(ans >= 0);
|
||||
return std::min(ans, max_samp);
|
||||
}
|
||||
|
||||
bool MultiSinusoidDetector::Done() const {
|
||||
int32 samp_ready = NumSubsampledSamplesReady(samples_per_frame_subsampled_);
|
||||
if ((samp_ready >= samples_per_frame_subsampled_ && !waveform_finished_) ||
|
||||
(samp_ready > 0 && waveform_finished_))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
void MultiSinusoidDetector::GetNextFrameOfSignal(Vector<BaseFloat> *frame) {
|
||||
frame->Resize(samples_per_frame_subsampled_, kUndefined);
|
||||
|
||||
int32 sample_offset = 0,
|
||||
samples_needed = samples_per_frame_subsampled_;
|
||||
while (samples_needed > 0 &&
|
||||
!subsampled_signal_.empty()) {
|
||||
Vector<BaseFloat> *src = subsampled_signal_.front();
|
||||
int32 num_samples_avail = src->Dim() - samples_consumed_;
|
||||
KALDI_ASSERT(num_samples_avail > 0);
|
||||
int32 chunk_size = std::min(num_samples_avail,
|
||||
samples_needed);
|
||||
frame->Range(sample_offset, chunk_size).CopyFromVec(
|
||||
src->Range(samples_consumed_, chunk_size));
|
||||
sample_offset += chunk_size;
|
||||
samples_needed -= chunk_size;
|
||||
samples_consumed_ += chunk_size;
|
||||
if (samples_consumed_ == src->Dim()) {
|
||||
samples_consumed_ = 0;
|
||||
delete src;
|
||||
subsampled_signal_.pop_front();
|
||||
}
|
||||
}
|
||||
if (samples_needed > 0) {
|
||||
KALDI_ASSERT(waveform_finished_ && sample_offset > 0); // or code error.
|
||||
frame->Range(sample_offset, samples_needed).SetZero();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MultiSinusoidDetector::GetNextFrame(MultiSinusoidDetectorOutput *output) {
|
||||
Vector<BaseFloat> frame;
|
||||
GetNextFrameOfSignal(&frame);
|
||||
// Mean subtraction
|
||||
frame.Add(-1.0 * frame.Sum() / frame.Dim());
|
||||
*output = MultiSinusoidDetectorOutput(); // reset to default.
|
||||
|
||||
BaseFloat signal_energy = VecVec(frame, frame);
|
||||
output->tot_energy = signal_energy / frame.Dim();
|
||||
if (signal_energy == 0.0) return;
|
||||
|
||||
// min_energy1 is the lowest energy we might care about.
|
||||
BaseFloat min_energy1 = signal_energy *
|
||||
std::min<BaseFloat>(config_.two_freq_min_total_energy * 0.5,
|
||||
config_.one_freq_min_energy);
|
||||
|
||||
Sinusoid sinusoid1;
|
||||
BaseFloat energy1 = detector_.DetectSinusoid(min_energy1,
|
||||
frame,
|
||||
&sinusoid1);
|
||||
|
||||
if (energy1 == 0.0) return; // Nothing detected.
|
||||
|
||||
// we only care about the 2nd sinusoid if
|
||||
// energy1 + energy2 >= signal_energy * two_freq_min_total_energy,
|
||||
// and energy2 >= signal_energy * config.two_freq_min_energy.
|
||||
|
||||
BaseFloat min_energy2 =
|
||||
std::max(signal_energy * config_.two_freq_min_energy,
|
||||
signal_energy * config_.two_freq_min_total_energy
|
||||
- energy1);
|
||||
|
||||
BaseFloat energy2;
|
||||
Sinusoid sinusoid2;
|
||||
|
||||
// If there is enough energy left in the signal that we could
|
||||
// possibly detect a sinusoid of energy at least min_energy2...
|
||||
if (min_energy2 <= signal_energy - energy1) {
|
||||
sinusoid1.phase += M_PI; // reverse the phase.
|
||||
AddSinusoid(config_.subsample_freq, sinusoid1, &frame);
|
||||
|
||||
|
||||
energy2 = detector_.DetectSinusoid(min_energy2,
|
||||
frame,
|
||||
&sinusoid2);
|
||||
|
||||
if (energy2 > energy1) {
|
||||
// The following is just for our information, so we are aware
|
||||
// when the sinusoid detection gives us the non-optimal sinusoid
|
||||
// first.
|
||||
BaseFloat factor = energy2 / energy1;
|
||||
KALDI_VLOG(2) << "Second sinusoid greater than first by a factor of "
|
||||
<< factor << ". (This means sinusoid detection is not "
|
||||
<< " working ideally).";
|
||||
}
|
||||
|
||||
if (DetectedTwoFrequency(signal_energy,
|
||||
sinusoid1, energy1,
|
||||
sinusoid2, energy2,
|
||||
output))
|
||||
return;
|
||||
} else {
|
||||
energy2 = 0.0;
|
||||
}
|
||||
// We don't need the return status of the following; we just return anyway.
|
||||
DetectedOneFrequency(signal_energy,
|
||||
sinusoid1, energy1,
|
||||
sinusoid2, energy2,
|
||||
output);
|
||||
}
|
||||
|
||||
// acceptable two-frequency tone.
|
||||
bool MultiSinusoidDetector::DetectedTwoFrequency(
|
||||
BaseFloat signal_energy,
|
||||
const Sinusoid &sinusoid1,
|
||||
BaseFloat energy1,
|
||||
const Sinusoid &sinusoid2,
|
||||
BaseFloat energy2,
|
||||
MultiSinusoidDetectorOutput *output) {
|
||||
|
||||
if (energy1 + energy2 >= signal_energy *
|
||||
config_.two_freq_min_total_energy &&
|
||||
std::min(energy1, energy2) >= signal_energy *
|
||||
config_.two_freq_min_energy &&
|
||||
std::min(sinusoid1.freq, sinusoid2.freq) >= config_.min_freq &&
|
||||
std::max(sinusoid1.freq, sinusoid2.freq) <= config_.max_freq) {
|
||||
output->freq1 = sinusoid1.freq;
|
||||
output->energy1 = energy1 / signal_energy;
|
||||
output->freq2 = sinusoid2.freq;
|
||||
output->energy2 = energy2 / signal_energy;
|
||||
if (output->freq1 > output->freq2) {
|
||||
std::swap(output->freq1, output->freq2);
|
||||
std::swap(output->energy1, output->energy2);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// acceptable two-frequency tone.
|
||||
bool MultiSinusoidDetector::DetectedOneFrequency(
|
||||
BaseFloat signal_energy,
|
||||
const Sinusoid &sinusoid1,
|
||||
BaseFloat energy1,
|
||||
const Sinusoid &sinusoid2,
|
||||
BaseFloat energy2,
|
||||
MultiSinusoidDetectorOutput *output) {
|
||||
// If sinusoid detection were performing exactly to spec, we could assume
|
||||
// energy1 >= energy2, but we don't assume this as it's not guaranteed.
|
||||
if (energy1 > energy2 && energy1 > signal_energy *
|
||||
config_.one_freq_min_energy &&
|
||||
sinusoid1.freq >= config_.min_freq &&
|
||||
sinusoid1.freq <= config_.max_freq) {
|
||||
output->freq1 = sinusoid1.freq;
|
||||
output->energy1 = energy1 / signal_energy;
|
||||
output->freq2 = 0.0;
|
||||
output->energy2 = 0.0;
|
||||
return true;
|
||||
} else if (energy2 > energy1 && energy2 > signal_energy *
|
||||
config_.one_freq_min_energy &&
|
||||
sinusoid2.freq >= config_.min_freq &&
|
||||
sinusoid2.freq <= config_.max_freq) {
|
||||
output->freq1 = sinusoid2.freq;
|
||||
output->energy1 = energy2 / signal_energy;
|
||||
output->freq2 = 0.0;
|
||||
output->energy2 = 0.0;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void DetectSinusoids(const VectorBase<BaseFloat> &signal,
|
||||
MultiSinusoidDetector *detector,
|
||||
Matrix<BaseFloat> *output) {
|
||||
std::vector<MultiSinusoidDetectorOutput> output_vec;
|
||||
detector->AcceptWaveform(signal);
|
||||
detector->WaveformFinished();
|
||||
|
||||
int32 safety_margin = 10, approx_num_frames = safety_margin +
|
||||
(signal.Dim() / (detector->SamplingFrequency() *
|
||||
detector->FrameShiftSecs()));
|
||||
output_vec.reserve(approx_num_frames);
|
||||
while (!detector->Done()) {
|
||||
output_vec.resize(output_vec.size() + 1);
|
||||
detector->GetNextFrame(&(output_vec.back()));
|
||||
}
|
||||
detector->Reset();
|
||||
if (output_vec.empty()) {
|
||||
output->Resize(0, 0);
|
||||
} else {
|
||||
output->Resize(output_vec.size(), 5, kUndefined);
|
||||
for (int32 i = 0; i < output->NumRows(); i++) {
|
||||
BaseFloat *row_data = output->RowData(i);
|
||||
MultiSinusoidDetectorOutput &this_output = output_vec[i];
|
||||
row_data[0] = this_output.tot_energy;
|
||||
row_data[1] = this_output.freq1;
|
||||
row_data[2] = this_output.energy1;
|
||||
row_data[3] = this_output.freq2;
|
||||
row_data[4] = this_output.energy2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace kaldi
|
||||
|
|
@ -0,0 +1,436 @@
|
|||
// feat/sinusoid-detection.h
|
||||
|
||||
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef KALDI_FEAT_SINUSOID_DETECTION_H_
|
||||
#define KALDI_FEAT_SINUSOID_DETECTION_H_
|
||||
|
||||
|
||||
#include "base/kaldi-error.h"
|
||||
#include "matrix/matrix-lib.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "feat/resample.h"
|
||||
#include <deque>
|
||||
|
||||
namespace kaldi {
|
||||
/// @addtogroup feat FeatureExtraction
|
||||
/// @{
|
||||
|
||||
|
||||
struct Sinusoid {
|
||||
// this structure used to represent a sinusoid of type amplitude cos (2 pi
|
||||
// freq t + phase), in the SinusoidDetector code.
|
||||
BaseFloat amplitude;
|
||||
BaseFloat freq;
|
||||
BaseFloat phase;
|
||||
Sinusoid(BaseFloat a, BaseFloat f, BaseFloat p):
|
||||
amplitude(a), freq(f), phase(p) { }
|
||||
Sinusoid() {}
|
||||
};
|
||||
|
||||
|
||||
// This function adds the given sinusoid to the signal, as:
|
||||
// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase).
|
||||
void AddSinusoid(BaseFloat samp_freq,
|
||||
const Sinusoid &sinusoid,
|
||||
VectorBase<BaseFloat> *signal);
|
||||
|
||||
|
||||
class SinusoidDetector {
|
||||
public:
|
||||
SinusoidDetector(BaseFloat samp_freq,
|
||||
int32 num_samp);
|
||||
|
||||
|
||||
// Detect the dominant sinusoid component in the signal, as long as the
|
||||
// energy-reduction of the signal from subtracting that sinuoid would be >=
|
||||
// "min_energy_change", and return that energy reduction; or zero if no
|
||||
// candidate was found.
|
||||
// non-const because the FFT class has a temporary buffer.
|
||||
BaseFloat DetectSinusoid(BaseFloat min_energy_change,
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
Sinusoid *sinusoid);
|
||||
|
||||
// This function does quadratic interpolation for a function that is known at
|
||||
// three equally spaced points [x0 x1 x2] = [0 1 2], and we want the x-value
|
||||
// and corresponding y-value at the maximum of the function within the range
|
||||
// 0 <= x <= 2. It's public for testing reasons.
|
||||
static void QuadraticMaximizeEqualSpaced(
|
||||
BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat *x, BaseFloat *y);
|
||||
|
||||
|
||||
// This function does quadratic interpolation for a function that is known at
|
||||
// three points x0, x1 and x2 with x0 = 0, 0 < x1 < 1 and x2 = 1, where we
|
||||
// want the x-value and corresponding y-value at the maximum of the function
|
||||
// within the range 0 <= x <= 1. It's public for testing reasons.
|
||||
static void QuadraticMaximize(
|
||||
BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat *x, BaseFloat *y);
|
||||
|
||||
// This function does quadratic interpolation for a function that is known at
|
||||
// three points x0, x1 and x2 with x0 = 0, 0 <= x1 <= 1 and x2 = 1, where
|
||||
// we want the value at a specific value x. The corresponding y-value is returned.
|
||||
static BaseFloat QuadraticInterpolate(
|
||||
BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
|
||||
BaseFloat x);
|
||||
|
||||
|
||||
private:
|
||||
BaseFloat samp_freq_;
|
||||
int32 num_samples_;
|
||||
int32 num_samples_padded_; // Number of samples, after zero-padding to power of 2.
|
||||
SplitRadixRealFft<BaseFloat> fft_; // Object used to compute FFT of padded_signal_.
|
||||
|
||||
BaseFloat factor1_; // When we search the range between two FFT bins, we
|
||||
// assume that the maximum energy-reduction within the
|
||||
// range may be greater than the maximum of the
|
||||
// energy-reductions at either side, by at most
|
||||
// "factor1", with factor1 > 1.0. The analysis is quite
|
||||
// hard so we determine this factor empirically. Making
|
||||
// this as small as possible helps us avoid searching too
|
||||
// many bins.
|
||||
|
||||
BaseFloat factor2_; // As factor1, but for searches within a half-fft-bin
|
||||
// range. Again determined empirically. After that we
|
||||
// use quadratic interpolation to find the maximum energy.
|
||||
|
||||
// This matrix, of dimension (num_samples_padded_ * 2 + 1) by
|
||||
// num_samples_, has in each row, a different frequency of cosine wave.
|
||||
Matrix<BaseFloat> cos_;
|
||||
// This matrix, of dimension (num_samples_padded_ * 2 + 1) by
|
||||
// num_samples_, has in each row, a different frequency of sine wave.
|
||||
Matrix<BaseFloat> sin_;
|
||||
|
||||
// M_ is a precomputed matrix of dimension (num_samples_padded_ * 2 + 1) by 3,
|
||||
// containing the values x y z of a symmetric matrix [ a b; b c ]. There is
|
||||
// one of these matrices for each frequency, sampled at one quarter the
|
||||
// spacing of the FFT bins. There is a long comment next to the definition of
|
||||
// ComputeCoefficients that describes this.
|
||||
Matrix<BaseFloat> M_;
|
||||
|
||||
// Minv_ is the coefficients in the same format as M_, but containing the
|
||||
// corresponding coefficients of the inverse matrix. There is a long comment
|
||||
// next to the definition of ComputeCoefficients that describes this.
|
||||
Matrix<BaseFloat> Minv_;
|
||||
|
||||
|
||||
struct InfoForBin {
|
||||
bool valid;
|
||||
BaseFloat cos_dot; // dot product of signal with cosine on left frequency
|
||||
BaseFloat sin_dot; // dot product of signal with sine on left frequency
|
||||
BaseFloat energy; // energy.
|
||||
InfoForBin(): valid(false) { }
|
||||
};
|
||||
|
||||
// Info after fine optimization within a bin.
|
||||
struct OptimizedInfo {
|
||||
int32 bin;
|
||||
BaseFloat offset;
|
||||
BaseFloat energy;
|
||||
BaseFloat cos_coeff;
|
||||
BaseFloat sin_coeff;
|
||||
};
|
||||
|
||||
// Compute the coefficients and energies at the original FFT bins (every
|
||||
// fourth entry in "info").
|
||||
void ComputeCoarseInfo(const Vector<BaseFloat> &fft,
|
||||
std::vector<InfoForBin> *info) const;
|
||||
|
||||
|
||||
// After the coarse-level info is computed using ComputeCoarseInfo, finds a
|
||||
// set of intermediate bin indexes to compute, that are the midpoints of
|
||||
// coarse-level bins.
|
||||
void FindCandidateBins(BaseFloat min_energy,
|
||||
const std::vector<InfoForBin> &info,
|
||||
std::vector<int32> *bins) const;
|
||||
|
||||
void FindCandidateBins2(BaseFloat min_energy,
|
||||
const std::vector<InfoForBin> &info,
|
||||
std::vector<int32> *bins) const;
|
||||
|
||||
|
||||
void ComputeBinInfo(const VectorBase<BaseFloat> &signal,
|
||||
int32 bin, InfoForBin *info) const;
|
||||
|
||||
|
||||
// For each bin b such that we have valid "info" data for bins b, b+1 and b+2,
|
||||
// does quadratic interpolation to find the maximum predicted energy in the
|
||||
// range [b, b+2]. The location of the maximum predicted energy is output to
|
||||
// "bin_out" and "offset_out", and the corresponding predicted energy is
|
||||
// returned.
|
||||
//
|
||||
// Note: if there are two different frequencies with similar maximum energies
|
||||
// (e.g. within a factor of probably around 1.2 or so), the fact that
|
||||
// OptimizeFrequency only returns one maximum may potentially lead to the
|
||||
// smaller maximum being output. We could have modified this to output
|
||||
// multiple different maxima, which could have been more accurate in terms of
|
||||
// being guaranteed to output the best maximum, but this probably wouldn't
|
||||
// have a measurable impact on our application so we haven't bothered.
|
||||
BaseFloat OptimizeFrequency(
|
||||
const std::vector<InfoForBin> &info,
|
||||
int32 *bin_out,
|
||||
BaseFloat *offset_out) const;
|
||||
|
||||
|
||||
// This function does
|
||||
// (*cos)(t) = cos(2 pi t freq / samp_freq)
|
||||
// (*sin)(t) = sin(2 pi t freq / samp_freq)
|
||||
static void CreateCosAndSin(BaseFloat samp_freq,
|
||||
BaseFloat freq,
|
||||
VectorBase<BaseFloat> *cos,
|
||||
VectorBase<BaseFloat> *sin);
|
||||
|
||||
// Do fine optimization of the frequency within a bin, given a reasonable
|
||||
// approximate position within it based on interpolation (that should be close
|
||||
// to the optimum).
|
||||
void FineOptimizeFrequency(
|
||||
const VectorBase<BaseFloat> &signal,
|
||||
int32 bin,
|
||||
BaseFloat offset,
|
||||
std::vector<InfoForBin> *info,
|
||||
OptimizedInfo *opt_info) const;
|
||||
|
||||
// Computes the coefficients cos_, sin_, and Minv_.
|
||||
void ComputeCoefficients();
|
||||
|
||||
// Calls some self-testing code that prints warnings if
|
||||
// some of our assumptions were wrong.
|
||||
void SelfTest(const VectorBase<BaseFloat> &signal,
|
||||
const std::vector<InfoForBin> &info,
|
||||
BaseFloat final_freq,
|
||||
BaseFloat final_energy);
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
/**
|
||||
This configuration class is for the frame-by-frame detection of
|
||||
cases where there are one or two sinusoids that can explain
|
||||
a lot of the energy in the signal.
|
||||
*/
|
||||
struct MultiSinusoidDetectorConfig {
|
||||
|
||||
// frame length in milliseconds
|
||||
BaseFloat frame_length_ms;
|
||||
// frame shift in milliseconds
|
||||
BaseFloat frame_shift_ms;
|
||||
|
||||
// Proportion of the total energy of the signal that the quieter of
|
||||
// the two sinusoids must comprise, in order to be counted, if two
|
||||
// sinusoids are detected.
|
||||
BaseFloat two_freq_min_energy;
|
||||
|
||||
// Proportion of the total energy of the signal that both sinusoids (if
|
||||
// two are detected) must comprise, in order to be output.
|
||||
BaseFloat two_freq_min_total_energy;
|
||||
|
||||
// Proportion of the total energy of the signal that a single sinusoid
|
||||
// must comprise, in order to be output, if we are considering
|
||||
// reporting a single sinusoid. Note: detection of two sinusoids
|
||||
// will take precedence over detection of a single sinusoid.
|
||||
BaseFloat one_freq_min_energy;
|
||||
|
||||
// Lower end of frequency range that we consider; frequencies outside
|
||||
// this range are not candidates to appear in the detected output.
|
||||
BaseFloat min_freq;
|
||||
// Upper end of frequency range that we consider, see min_freq.
|
||||
BaseFloat max_freq;
|
||||
|
||||
// Frequency to which we subsample the signal before processing it.
|
||||
// Must be integer because of how LinearResample code works.
|
||||
int32 subsample_freq;
|
||||
|
||||
// Filter cut-off frequency used in sub-sampling.
|
||||
BaseFloat subsample_filter_cutoff;
|
||||
|
||||
// the following is not critical and is not exported to the
|
||||
// command line.
|
||||
int32 subsample_filter_zeros;
|
||||
|
||||
MultiSinusoidDetectorConfig():
|
||||
frame_length_ms(20), frame_shift_ms(10),
|
||||
two_freq_min_energy(0.2), two_freq_min_total_energy(0.6),
|
||||
one_freq_min_energy(0.75), min_freq(300.0),
|
||||
max_freq(1800.0), subsample_freq(4000),
|
||||
subsample_filter_cutoff(1900.0), subsample_filter_zeros(5) {}
|
||||
|
||||
void Register(OptionsItf *po) {
|
||||
po->Register("frame-length", &frame_length_ms,
|
||||
"Frame length in milliseconds");
|
||||
po->Register("frame-shift", &frame_shift_ms,
|
||||
"Frame shift in milliseconds");
|
||||
po->Register("two-freq-min-energy", &two_freq_min_energy,
|
||||
"For detecting two-frequency tones, minimum energy that "
|
||||
"the quieter frequency must have (relative to total "
|
||||
"enegy of frame)");
|
||||
po->Register("two-freq-min-total-energy", &two_freq_min_total_energy,
|
||||
"For detecting two-frequency tones, minimum energy that "
|
||||
"the two frequencies together must have (relative to total "
|
||||
"energy of frame)");
|
||||
po->Register("one-freq-min-energy", &one_freq_min_energy, "For detecting "
|
||||
"single-frequency tones, minimum energy that the frequency "
|
||||
"must have relative to total energy of frame");
|
||||
po->Register("min-freq", &min_freq, "Minimum frequency of sinusoid that "
|
||||
"will be detected");
|
||||
po->Register("max-freq", &min_freq, "Maximum frequency of sinusoid that "
|
||||
"will be detected");
|
||||
po->Register("subsample-freq", &subsample_freq, "Frequency at which "
|
||||
"we subsample the signal");
|
||||
po->Register("subsample-filter-cutoff", &subsample_filter_cutoff, "Filter "
|
||||
"cut-off frequency used in subsampling");
|
||||
}
|
||||
void Check() const {
|
||||
KALDI_ASSERT(frame_length_ms > 0 && frame_length_ms >= frame_shift_ms &&
|
||||
min_freq > 0 && max_freq > min_freq &&
|
||||
subsample_filter_cutoff > max_freq &&
|
||||
subsample_freq/2 > subsample_filter_cutoff &&
|
||||
subsample_filter_zeros > 2 &&
|
||||
subsample_filter_cutoff > 0.25 * subsample_freq &&
|
||||
two_freq_min_total_energy > two_freq_min_energy &&
|
||||
two_freq_min_energy <= 0.5 * two_freq_min_total_energy);
|
||||
BaseFloat samples_per_frame_shift =
|
||||
frame_shift_ms * 0.001 * subsample_freq;
|
||||
// The following assert ensures that the frame-shift is an exact
|
||||
// number of samples, so that the locations of the frames
|
||||
// don't gradually drift out of sync.
|
||||
KALDI_ASSERT(fabs(samples_per_frame_shift -
|
||||
static_cast<int32>(samples_per_frame_shift)) <
|
||||
0.001);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
struct MultiSinusoidDetectorOutput {
|
||||
BaseFloat tot_energy; // Total energy per sample of this frame (sum-square of
|
||||
// signal divided by number of samples... this is after
|
||||
// downsampling and mean subtraction.
|
||||
BaseFloat freq1; // Lower frequency detected, or 0 if none detected.
|
||||
BaseFloat energy1; // Energy of lower frequency divided by total energy, or 0
|
||||
// if none detected.
|
||||
BaseFloat freq2; // Lower frequency detected, or 0 if zero or one
|
||||
// frequencies detected.
|
||||
BaseFloat energy2; // Energy of higher frequency divided by total energy, or 0
|
||||
// if zero or one freqencies detected.
|
||||
MultiSinusoidDetectorOutput(): tot_energy(0.0), freq1(0.0),
|
||||
energy1(0.0), freq2(0.0), energy2(0.0) { }
|
||||
};
|
||||
|
||||
|
||||
class MultiSinusoidDetector {
|
||||
public:
|
||||
|
||||
// Initialize sinusoid detector. Sampling frequency must be integer.
|
||||
MultiSinusoidDetector(const MultiSinusoidDetectorConfig &config,
|
||||
int32 sampling_freq);
|
||||
|
||||
/// This is how the class acccepts its input. You can put the waveform in
|
||||
/// piece by piece, if it's an online application.
|
||||
void AcceptWaveform(const VectorBase<BaseFloat> &waveform);
|
||||
|
||||
/// The user calls this to announce to the class that the waveform has ended;
|
||||
/// this forces any pending data to be flushed.
|
||||
void WaveformFinished();
|
||||
|
||||
/// Resets the state of the class so you can start processing another waveform.
|
||||
void Reset();
|
||||
|
||||
/// This returns true if the class currently has no more data ready to output.
|
||||
bool Done() const;
|
||||
|
||||
/// Outputs the next frame of output to "frame", which must be non-NULL.
|
||||
/// It is an error to call this if Done() has returned true, or has not been
|
||||
/// checked.
|
||||
void GetNextFrame(MultiSinusoidDetectorOutput *output);
|
||||
|
||||
BaseFloat FrameShiftSecs() const { return 0.001 * config_.frame_shift_ms; }
|
||||
|
||||
BaseFloat SamplingFrequency() const { return sample_freq_; }
|
||||
|
||||
private:
|
||||
// Gets the next frame of subsampled signal, and consumes the appropriate
|
||||
// amount of stored data. It is an error to call this if Done() returned
|
||||
// true.
|
||||
void GetNextFrameOfSignal(Vector<BaseFloat> *frame);
|
||||
|
||||
// returns true and sets freq1, freq1, energy1 and energy2 in "output" if we
|
||||
// successfully detected an acceptable two-frequency tone.
|
||||
bool DetectedTwoFrequency(BaseFloat signal_energy,
|
||||
const Sinusoid &sinusoid1,
|
||||
BaseFloat energy1,
|
||||
const Sinusoid &sinusoid2,
|
||||
BaseFloat energy2,
|
||||
MultiSinusoidDetectorOutput *output);
|
||||
|
||||
// returns true and sets freq1, freq1, energy1 and energy2 in "output" if we
|
||||
// successfully detected an acceptable one-frequency tone.
|
||||
bool DetectedOneFrequency(BaseFloat signal_energy,
|
||||
const Sinusoid &sinusoid1,
|
||||
BaseFloat energy1,
|
||||
const Sinusoid &sinusoid2,
|
||||
BaseFloat energy2,
|
||||
MultiSinusoidDetectorOutput *output);
|
||||
|
||||
|
||||
// Returns std::min(max_samp, sum-of-samples-in-subsampled_signal_).
|
||||
// (the std::min is for efficiency so we don't have to visit the
|
||||
// whole list).
|
||||
int32 NumSubsampledSamplesReady(int32 max_samp) const;
|
||||
|
||||
MultiSinusoidDetectorConfig config_;
|
||||
int32 sample_freq_;
|
||||
int32 samples_per_frame_subsampled_; // (samples per frame at subsampled
|
||||
// rate).
|
||||
int32 samples_shift_subsampled_; // (samples per frame-shift at subsampled
|
||||
// rate).
|
||||
|
||||
// True if the user has called WaveformFinished().
|
||||
bool waveform_finished_;
|
||||
|
||||
// Pieces of the subsampled signal that are awaiting processing.
|
||||
// Normally there will be just one element here, but if someone calls
|
||||
// AcceptWaveform multiple times before getting output, there could
|
||||
// be more elements. All of these pieces are nonempty.
|
||||
std::deque<Vector<BaseFloat>* > subsampled_signal_;
|
||||
|
||||
// stores the number of samples consumed from the first member of
|
||||
// subsampled_signal_. We will always have samples_consumed_ >= 0 and either
|
||||
// (subsampled_signal_.empty() && samples_consumed_ == 0) or
|
||||
// samples_consumed_ < subsampled_signal_[0]->Dim().
|
||||
int32 samples_consumed_;
|
||||
|
||||
|
||||
// This object is used to subsample the signal.
|
||||
LinearResample resampler_;
|
||||
|
||||
// This object is used to detect sinusoids in the subsampled
|
||||
// frames.
|
||||
SinusoidDetector detector_;
|
||||
};
|
||||
|
||||
// Detect sinusoids. Signal should be sampled at detector->SamplingFrequency().
|
||||
void DetectSinusoids(const VectorBase<BaseFloat> &signal,
|
||||
MultiSinusoidDetector *detector,
|
||||
Matrix<BaseFloat> *output);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// @} End of "addtogroup feat"
|
||||
} // namespace kaldi
|
||||
#endif // KALDI_FEAT_SINUSOID_DETECTION_H_
|
|
@ -14,7 +14,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
|
|||
apply-cmvn-sliding compute-cmvn-stats-two-channel compute-kaldi-pitch-feats \
|
||||
process-kaldi-pitch-feats compare-feats wav-to-duration add-deltas-sdc \
|
||||
compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \
|
||||
append-vector-to-feats
|
||||
append-vector-to-feats detect-sinusoids
|
||||
|
||||
OBJFILES =
|
||||
|
||||
|
|
|
@ -0,0 +1,113 @@
|
|||
// featbin/detect-sinusoids.cc
|
||||
|
||||
// Copyright 2015 Johns Hopkins University (author: Daniel Povey)
|
||||
|
||||
// See ../../COPYING for clarification regarding multiple authors
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
|
||||
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
|
||||
// MERCHANTABLITY OR NON-INFRINGEMENT.
|
||||
// See the Apache 2 License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "base/kaldi-common.h"
|
||||
#include "util/common-utils.h"
|
||||
#include "feat/sinusoid-detection.h"
|
||||
#include "feat/wave-reader.h"
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
using namespace kaldi;
|
||||
const char *usage =
|
||||
"Detect sinusoids (one or two at a time) in waveform input and output\n"
|
||||
"frame-by-frame information on their frequencies and energies. Useful\n"
|
||||
"as part of DTMF and dialtone detection. Output is an archive of\n"
|
||||
"matrices; for each file, there is a row per frame, containing\n"
|
||||
"<signal-energy-per-sample> <frequency1> <energy1> <frequency2> <energy2>\n"
|
||||
"where the frequencies and energies may be zero if no sufficiently\n"
|
||||
"dominant sinusoid(s) was/were detected. If two frequencies were\n"
|
||||
"detected, frequency1 < frequency2. See options for more detail on\n"
|
||||
"configuration options.\n"
|
||||
"\n"
|
||||
"Usage: detect-sinusoids [options] <wav-rspecifier> <matrix-wspecifier>\n"
|
||||
"e.g.: detect-sinusoids scp:wav.scp ark,t:sinusoids.ark\n";
|
||||
|
||||
ParseOptions po(usage);
|
||||
MultiSinusoidDetectorConfig config;
|
||||
|
||||
config.Register(&po);
|
||||
|
||||
po.Read(argc, argv);
|
||||
|
||||
if (po.NumArgs() != 2) {
|
||||
po.PrintUsage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::string wav_rspecifier = po.GetArg(1),
|
||||
matrix_wspecifier = po.GetArg(2);
|
||||
|
||||
int32 num_done = 0, num_err = 0;
|
||||
|
||||
SequentialTableReader<WaveHolder> wav_reader(wav_rspecifier);
|
||||
BaseFloatMatrixWriter matrix_writer(matrix_wspecifier);
|
||||
|
||||
MultiSinusoidDetector *detector = NULL;
|
||||
|
||||
for (; !wav_reader.Done(); wav_reader.Next()) {
|
||||
const WaveData &wav_data = wav_reader.Value();
|
||||
const Matrix<BaseFloat> &data = wav_data.Data();
|
||||
BaseFloat samp_freq = wav_data.SampFreq();
|
||||
int32 num_channels = data.NumRows();
|
||||
if (num_channels != 1) {
|
||||
KALDI_WARN << "detect-sinusoids requires data with one "
|
||||
<< "channel. Recording " << wav_reader.Key() << " has "
|
||||
<< num_channels << ". First select one channel of your "
|
||||
<< "data (e.g. using sox)";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
if (samp_freq < config.subsample_freq) {
|
||||
KALDI_WARN << "Sampling frequency of data " << wav_reader.Key()
|
||||
<< " is too low " << samp_freq << " < "
|
||||
<< config.subsample_freq << ". Reduce --subsample-freq "
|
||||
<< "if you want to run on this data.";
|
||||
num_err++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (detector == NULL ||
|
||||
samp_freq != detector->SamplingFrequency()) {
|
||||
delete detector;
|
||||
detector = new MultiSinusoidDetector(config, samp_freq);
|
||||
}
|
||||
|
||||
Matrix<BaseFloat> output;
|
||||
DetectSinusoids(data.Row(0), detector, &output);
|
||||
|
||||
if (output.NumRows() == 0) {
|
||||
KALDI_WARN << "No output for " << wav_reader.Key();
|
||||
num_err++;
|
||||
} else {
|
||||
matrix_writer.Write(wav_reader.Key(), output);
|
||||
num_done++;
|
||||
}
|
||||
}
|
||||
delete detector;
|
||||
KALDI_LOG << "Detected sinusoids in " << num_done << " wave files,"
|
||||
<< num_err << " with errors.";
|
||||
return (num_done != 0 ? 0 : 1);
|
||||
} catch(const std::exception &e) {
|
||||
std::cerr << e.what();
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче