trunk: committing tools to detect sinusoid tones in signals. Will be used as input for later DTMF and dialtone detection.

git-svn-id: https://svn.code.sf.net/p/kaldi/code/trunk@4995 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
2015-04-09 23:52:31 +00:00 · 2015-04-09 23:52:31 +00:00 · 36dffb1ba2
--- a/.gitignore
+++ b/.gitignore
@ -796,6 +796,7 @@
 /src/featbin/compute-and-process-kaldi-pitch-feats
 /src/featbin/modify-cmvn-stats
 /src/featbin/wav-copy
+/src/featbin/detect-sinusoids
 /src/fstext/push-special-test
 /src/gmmbin/gmm-acc-mllt-global
 /src/gmmbin/gmm-est-fmllr-global
@ -850,6 +851,7 @@
 /src/transform/fmllr-raw-test
 /src/util/simple-options-test
 /src/feat/resample-test
+/src/feat/sinusoid-detection-test
 /src/feat/online-feature-test
 /src/probe/exp-test
 /src/fstext/prune-special-test
--- a/src/feat/Makefile
+++ b/src/feat/Makefile
@ -6,11 +6,11 @@ include ../kaldi.mk

 TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \
         feature-functions-test pitch-functions-test feature-sdc-test \
-         resample-test online-feature-test
+         resample-test online-feature-test sinusoid-detection-test

 OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \
           feature-spectrogram.o mel-computations.o wave-reader.o \
-           pitch-functions.o resample.o online-feature.o
+           pitch-functions.o resample.o online-feature.o sinusoid-detection.o

 LIBNAME = kaldi-feat

--- a/src/feat/sinusoid-detection-test.cc
+++ b/src/feat/sinusoid-detection-test.cc
@ -0,0 +1,450 @@
+// feat/sinusoid-detection-test.cc
+
+// Copyright    2015  Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <iostream>
+
+#include "base/kaldi-math.h"
+#include "feat/sinusoid-detection.h"
+
+
+namespace kaldi {
+
+// this function is used for testing AddSinusoid.
+void AddSinusoidSimple(BaseFloat samp_freq,
+                       const Sinusoid &sinusoid,
+                       VectorBase<BaseFloat> *signal) {
+  for (int32 i = 0; i < signal->Dim(); i++)
+    (*signal)(i) += sinusoid.amplitude *
+        cos(M_2PI * sinusoid.freq / samp_freq * i + sinusoid.phase);
+}
+
+void UnitTestAddSinusoid() {
+  BaseFloat samp_freq = 560.1;
+  int32 length = 511;
+  Vector<BaseFloat> orig(length);
+  orig.SetRandn();
+  Vector<BaseFloat> orig2(orig);
+  Sinusoid sinusoid(49.20, 2.111, 1.5);
+
+  AddSinusoid(samp_freq, sinusoid, &orig);
+  AddSinusoidSimple(samp_freq, sinusoid, &orig2);
+  AssertEqual(orig, orig2);
+}
+      
+
+
+void UnitTestQuadraticMaximizeEqualSpaced() {
+  for (int32 n = 0; n < 50; n++) {
+  
+    //  Let the cubic function be y = a x^2 + b x + c, and let
+    //   y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we
+    // want it evaluated at arbitrary x.
+    
+    BaseFloat  a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform();
+    BaseFloat y[3];
+    for (int32 i = 0; i < 3; i++) {
+      BaseFloat x = i;
+      y[i] = a * x * x + b * x + c;
+    }
+    BaseFloat x_max, y_max;
+    SinusoidDetector::QuadraticMaximizeEqualSpaced(y[0], y[1], y[2], &x_max, &y_max);
+
+    for (int32 m = 0; m <= 10; m++) {
+      BaseFloat x_test = 0.1 * m;
+      BaseFloat y_test = a * x_test * x_test + b * x_test + c;
+      KALDI_ASSERT(y_test <= y_max + 1.0e-05);
+    }
+  }
+}
+
+void UnitTestQuadraticMaximize() {
+  for (int32 n = 0; n < 50; n++) {
+  
+    //  Let the cubic function be y = a x^2 + b x + c, and let
+    //   y0,y1,y2 be its values evaluated at x = [0, 1, 2]; we
+    // want it evaluated at arbitrary x.
+    
+    BaseFloat  a = -0.5 + RandUniform(), b = -0.5 + RandUniform(), c = -0.5 + RandUniform(),
+        x = 0.1 + RandUniform() * 0.98;
+    BaseFloat y[3];
+    for (int32 i = 0; i < 3; i++) {
+      BaseFloat this_x;
+      if (i == 0) { this_x = 0.0; }
+      else if (i == 1) { this_x = x; }
+      else { this_x = 1.0; }
+      y[i] = a * this_x * this_x + b * this_x + c;
+    }
+    BaseFloat x_max, y_max;
+    SinusoidDetector::QuadraticMaximize(x, y[0], y[1], y[2], &x_max, &y_max);
+    
+    for (int32 m = 0; m <= 10; m++) {
+      BaseFloat x_test = 0.1 * m;
+      BaseFloat y_test = a * x_test * x_test + b * x_test + c;
+      if (n < 100 && m == 5) {
+        KALDI_VLOG(2) << "Checking y_test <= y_max: "
+                      << y_test << " <= " << y_max << " [x_max = "
+                      << x_max << "]";
+        KALDI_ASSERT(y_test <= y_max + 1.0e-05);
+      }
+    }
+  }
+}
+
+
+void UnitTestSinusoidDetector() {
+  BaseFloat samp_freq = 4000 + (rand() % 2000);
+  int32 num_samp = 128 + rand() % 400;
+  SinusoidDetector detector(samp_freq, num_samp);
+
+  for (int32 i = 0; i < 40; i++) {
+  
+    Vector<BaseFloat> signal(num_samp);
+
+    // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0);
+    // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5);
+
+    BaseFloat nyquist = samp_freq * 0.5;
+    BaseFloat freq = nyquist * RandUniform();
+    BaseFloat amplitude = RandUniform();
+    BaseFloat phase = M_2PI * RandUniform();
+
+    Sinusoid ref_sinusoid(amplitude, freq, phase);
+  
+    AddSinusoid(samp_freq, ref_sinusoid, &signal);
+
+
+    BaseFloat orig_energy = VecVec(signal, signal);
+    KALDI_LOG << "Real frequency is " << freq << ", amplitude "
+              << amplitude << ", phase " << phase << ", samp-freq "
+              << samp_freq;
+    KALDI_LOG << "Total energy of signal (with sinusoid) is " << orig_energy;
+  
+    Sinusoid sinusoid;
+    BaseFloat min_energy = 0.0;
+    BaseFloat energy = detector.DetectSinusoid(min_energy,
+                                               signal, &sinusoid);
+
+    Vector<BaseFloat> new_signal(signal);
+    sinusoid.phase += M_PI;  // Reverse the phase.
+    AddSinusoid(samp_freq, sinusoid, &new_signal);
+    BaseFloat delta_energy = VecVec(signal, signal) -
+        VecVec(new_signal, new_signal);
+    KALDI_LOG << "Projected delta energy = " << energy
+              << " and observed was " << delta_energy;
+
+    BaseFloat remaining_energy = VecVec(new_signal, new_signal);
+    if (remaining_energy > 0.01 * orig_energy) {
+      KALDI_WARN << "Energy remaining is " << remaining_energy
+                 << " vs. original " << orig_energy;
+      BaseFloat relative_freq = freq / nyquist;
+      BaseFloat inv_num_samp = 1.0 / num_samp;
+      // We only tolerate this kind of error for very ridiculous frequency,
+      // close to zero or the Nyquist.
+      KALDI_ASSERT(relative_freq < inv_num_samp ||
+                   relative_freq > 1.0 - inv_num_samp);
+    }
+  }
+}
+
+// as UnitTestSinusoidDetector(), but doing it in noisy signals.
+void UnitTestSinusoidDetectorNoisy() {
+  BaseFloat samp_freq = 4000 + (rand() % 2000);
+  int32 num_samp = 128 + rand() % 400;
+  SinusoidDetector detector(samp_freq, num_samp);
+
+  for (int32 i = 0; i < 40; i++) {
+  
+    Vector<BaseFloat> signal(num_samp);
+
+    signal.SetRandn();
+
+    BaseFloat rand_energy = VecVec(signal, signal);
+    
+    // Sinusoid ref_sinusoid(1.3, 312.5, M_PI * 0.0);
+    // Sinusoid ref_sinusoid(1.3, 324.125, M_PI * 0.5);
+
+    BaseFloat nyquist = samp_freq * 0.5;
+    BaseFloat freq = nyquist * RandUniform();
+    BaseFloat amplitude = 10.0 * RandUniform();
+    BaseFloat phase = M_2PI * RandUniform();
+
+    Sinusoid ref_sinusoid(amplitude, freq, phase);
+  
+    AddSinusoid(samp_freq, ref_sinusoid, &signal);
+
+    BaseFloat tot_energy = VecVec(signal, signal);
+
+    KALDI_LOG << "Real frequency is " << freq << ", amplitude "
+              << amplitude << ", phase " << phase << ", samp-freq "
+              << samp_freq;
+    KALDI_LOG << "Total energy of signal (with noise + sinusoid) is " << tot_energy;
+  
+    Sinusoid sinusoid;
+    BaseFloat min_energy = 0.0;
+    BaseFloat energy = detector.DetectSinusoid(min_energy,
+                                               signal, &sinusoid);
+
+    Vector<BaseFloat> new_signal(signal);
+    sinusoid.phase += M_PI;  // reverse the phase.
+    AddSinusoid(samp_freq, sinusoid, &new_signal);
+    BaseFloat delta_energy = VecVec(signal, signal) -
+        VecVec(new_signal, new_signal);
+    KALDI_LOG << "Projected delta energy = " << energy
+              << " and observed was " << delta_energy;
+
+    BaseFloat min_energy_diff = 0.99 * (tot_energy - rand_energy);
+    
+    if (delta_energy < min_energy_diff) {
+      KALDI_WARN << "Energy reduction is " << delta_energy
+                 << " vs. expected " << (tot_energy - rand_energy);
+      BaseFloat relative_freq = freq / nyquist;
+      BaseFloat inv_num_samp = 1.0 / num_samp;
+      // We only tolerate this kind of error for very ridiculous frequency,
+      // close to zero or the Nyquist.
+      KALDI_ASSERT(relative_freq < inv_num_samp ||
+                   relative_freq > 1.0 - inv_num_samp);
+    }
+  }
+}
+
+
+void AddFreqToSignal(BaseFloat base_freq,
+                     BaseFloat samp_freq,
+                     BaseFloat tolerance,
+                     BaseFloat gain,
+                     VectorBase<BaseFloat> *signal) {
+  BaseFloat error_scale = (2 * RandUniform() - 1) * tolerance;
+  BaseFloat freq = base_freq * (1.0 + error_scale);
+  KALDI_VLOG(3) << "base-freq = " << base_freq << ", factor = " << error_scale;
+  for (int32 i = 0; i < signal->Dim(); i++)
+    (*signal)(i) += gain * sin(i * 2.0 * 3.14159 * freq / samp_freq);
+}
+
+
+void GenerateDtmfTestCase(
+    BaseFloat sampling_rate,
+    Vector<BaseFloat> *signal,
+    std::vector<MultiSinusoidDetectorOutput> *ref_output) {
+  // the "ref_output" should correlate with the first of each run of frames with the same label.
+  
+  BaseFloat min_duration_secs = 0.04;  // min duration of dtmf or non-tone segments.
+  BaseFloat min_dialtone_duration_secs = 0.1;
+  BaseFloat frequency_tolerance = 0.035;
+  BaseFloat dialtone_frequency_tolerance = 0.4 * (440.0  - 425.0) / 440.0;
+
+  int32 num_events = 2 * (5 + rand() % 5) + 1; // odd number.
+  int32 tot_signal_dim = 0;
+
+  ref_output->resize(num_events);
+  std::vector<Vector<BaseFloat> > all_signals(num_events);
+  for (int32 i = 0; i < num_events; i++) {
+    MultiSinusoidDetectorOutput &this_output = (*ref_output)[i];
+    Vector<BaseFloat> &this_signal = all_signals[i];
+    BaseFloat duration_secs = min_duration_secs * (1 + rand() % 3);
+    int32 num_samp = sampling_rate * duration_secs;
+    tot_signal_dim += num_samp;
+
+    this_signal.Resize(num_samp);
+    this_signal.SetRandn();
+    
+    if (i % 2 == 0); // do nothing;
+    else if (rand() % 2 == 0 && duration_secs >= min_dialtone_duration_secs) {
+      // dialtone.
+      BaseFloat freq;
+      if (rand() % 3 == 0) { freq = 350; }
+      else if (rand() % 2 == 0) { freq = 440; }
+      else { freq = 425; }
+      BaseFloat gain = 10.0 * (1.0 + rand() % 2);
+      AddFreqToSignal(freq, sampling_rate, dialtone_frequency_tolerance,
+                      gain, &(this_signal));
+      this_output.freq1 = freq;
+    } else {
+      // dtmf.  use a subset of tones as examples.
+      BaseFloat freq1, freq2;
+      char c;
+      if (rand() % 4 == 0) {
+        c = '8'; freq1 = 852; freq2 = 1336;
+      } else if (rand() % 3 == 0) {
+        c = '0'; freq1 = 941; freq2 = 1336;
+      } else if (rand() % 2 == 0) {
+        c = '#'; freq1 = 941; freq2 = 1477;
+      } else {
+        c = '1'; freq1 = 697; freq2 = 1209;
+      }
+      BaseFloat base_gain = 10.0 * (1.0 +  (rand() % 3)),
+          gain_factor = 1.0 + 0.1 * (-2 + rand() % 5),
+          gain1 = base_gain, gain2 = gain_factor * base_gain;
+      AddFreqToSignal(freq1, sampling_rate, frequency_tolerance, gain1,
+                      &(this_signal));
+      AddFreqToSignal(freq2, sampling_rate, frequency_tolerance, gain2,
+                      &(this_signal));
+      this_output.freq1 = freq1;
+      this_output.freq2 = freq2;
+    }
+  }
+  signal->Resize(tot_signal_dim);
+  int32 signal_offset = 0;
+  for (int32 i = 0; i < num_events; i++) {
+    int32 this_dim = all_signals[i].Dim();
+    signal->Range(signal_offset, this_dim).CopyFromVec(all_signals[i]);
+    signal_offset += this_dim;
+  }
+}
+
+
+/*
+
+// Just a basic test to check that it produces output.
+
+void UnitTestToneDetection() {
+  BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000;
+  ToneDetectionConfig config;
+  
+  int32 num_frames = 100 + (rand() % 100);
+  int32 frame_length = static_cast<int32>(samp_freq * config.frame_length_secs);
+
+  int32 num_samples = frame_length * num_frames + rand() % frame_length;
+  Vector<BaseFloat> signal(num_samples);
+  signal.SetRandn();
+
+  ToneDetector tone_detector(config, samp_freq);
+
+  int32 signal_offset = 0;
+
+  std::vector<ToneDetectorOutput*> tone_detector_output;
+  
+  while (signal_offset < num_samples) {
+    int32 signal_remaining = num_samples - signal_offset,
+        chunk_size = std::min<int32>((rand() % 200) + 100,
+                                     signal_remaining);
+    SubVector<BaseFloat> signal_part(signal, signal_offset, chunk_size);
+    tone_detector.AcceptWaveform(signal_part);
+    signal_offset += chunk_size;
+
+    if (signal_offset == num_samples)
+      tone_detector.WaveformFinished();
+    while (!tone_detector.Done() &&
+           (rand() % 2 == 0 || signal_offset == num_samples)) {
+      ToneDetectorOutput *output = new ToneDetectorOutput();
+      tone_detector.GetNextFrame(output);
+      tone_detector_output.push_back(output);
+    }
+  }
+  KALDI_ASSERT(signal_offset == num_samples);  
+  
+  Vector<BaseFloat> signal2(signal.Dim());
+  signal_offset = 0;
+  for (int32 i = 0; i < tone_detector_output.size(); i++) {
+    ToneDetectorOutput *output = tone_detector_output[i];
+    signal2.Range(signal_offset,
+                  output->signal.Dim()).CopyFromVec(output->signal);
+    signal_offset += output->signal.Dim();
+    if (output->frame_type != 'n') {
+      KALDI_ERR << "Frame " << i << " badly classified, should be 'n', is: "
+                << output->frame_type;
+    }
+    delete output;
+  }
+  KALDI_ASSERT(signal_offset == num_samples &&
+               signal.ApproxEqual(signal2, 1.0e-10));
+
+}
+
+std::ostringstream & operator << (std::ostringstream &ostr,
+             const ToneDetectorOutput &output) {
+  ostr << output.frame_type;
+  if (output.frame_type == 'd')
+    ostr << output.dialtone_freq;
+  ostr << ' ';
+  return ostr;
+}
+
+*/
+
+
+// This version of the unit-test generates a signal that has tones in it, and
+// runs the detection on that signal.
+void UnitTestToneDetection2() {
+  BaseFloat samp_freq = (rand() % 2) == 0 ? 8000 : 16000;
+  Vector<BaseFloat> signal;
+  std::vector<MultiSinusoidDetectorOutput> ref_output;
+  GenerateDtmfTestCase(samp_freq, &signal, &ref_output);
+  
+  MultiSinusoidDetectorConfig config;
+
+  int32 num_samples = signal.Dim();
+  KALDI_ASSERT(num_samples > 0);
+
+  MultiSinusoidDetector multi_sinusoid_detector(config, samp_freq);
+  
+  int32 signal_offset = 0;
+
+  std::vector<MultiSinusoidDetectorOutput*> multi_sinusoid_detector_output;
+
+  while (signal_offset < num_samples) {
+    int32 signal_remaining = num_samples - signal_offset,
+        chunk_size = std::min<int32>((rand() % 200) + 100,
+                                     signal_remaining);
+    SubVector<BaseFloat> signal_part(signal, signal_offset, chunk_size);
+    multi_sinusoid_detector.AcceptWaveform(signal_part);
+    signal_offset += chunk_size;
+
+    if (signal_offset == num_samples)
+      multi_sinusoid_detector.WaveformFinished();
+    while (!multi_sinusoid_detector.Done() &&
+           (rand() % 2 == 0 || signal_offset == num_samples)) {
+      MultiSinusoidDetectorOutput *output = new MultiSinusoidDetectorOutput();
+      multi_sinusoid_detector.GetNextFrame(output);
+      multi_sinusoid_detector_output.push_back(output);
+    }
+  }
+  KALDI_ASSERT(signal_offset == num_samples);  
+  
+  // std::ostringstream str_ref, str_hyp;
+  //for (size_t i = 0; i < ref_output.size(); i++)
+  //    str_ref << ref_output[i];
+
+
+  for (size_t i = 0; i < multi_sinusoid_detector_output.size(); i++) {
+    MultiSinusoidDetectorOutput *output = multi_sinusoid_detector_output[i];
+    KALDI_LOG << "tot-energy = " << output->tot_energy
+              << ", freq1 " << output->freq1 << ", energy1 " << output->energy1
+              << ", freq2 " << output->freq2 << ", energy2 " << output->energy2;
+    delete output;
+  }
+}
+
+
+
+}  // namespace kaldi
+
+int main() {
+  using namespace kaldi;
+
+  SetVerboseLevel(4);
+
+  UnitTestToneDetection2();  
+  UnitTestAddSinusoid();
+  UnitTestQuadraticMaximizeEqualSpaced();
+  UnitTestQuadraticMaximize();
+  for (int32 i = 0; i < 10; i++) {
+    UnitTestSinusoidDetector();
+    UnitTestSinusoidDetectorNoisy();
+  }
+
+}
--- a/src/feat/sinusoid-detection.cc
+++ b/src/feat/sinusoid-detection.cc
@ -0,0 +1,945 @@
+// feat/sinusoid-detection.cc
+
+// Copyright    2015  Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "feat/sinusoid-detection.h"
+#include "matrix/matrix-functions.h"
+#include "feat/resample.h"
+
+namespace kaldi {
+
+
+
+// This function adds the given sinusoid to the signal, as:
+// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase).
+void AddSinusoid(BaseFloat samp_freq,
+                 const Sinusoid &sinusoid,
+                 VectorBase<BaseFloat> *signal) {
+  // treat "factor" as a complex variable equal to exp(i * 2 pi freq / samp_freq); it's
+  // the factor by which we multiply on each frame.
+  BaseFloat factor_real = cos(M_2PI * sinusoid.freq / samp_freq),
+      factor_im = sin(M_2PI * sinusoid.freq / samp_freq);
+  BaseFloat *signal_data = signal->Data();
+  int32 dim = signal->Dim(), batch_size = 100;
+  // process frames in batches of size "batch_size", after which we recompute
+  // the starting point to prevent loss of accuracy due to drift.
+  for (int32 b = 0; b * batch_size < dim; b++) {
+    int32 t_offset = b * batch_size,
+        t_end = std::min(dim, t_offset + batch_size);
+    double phase = sinusoid.phase + M_2PI * t_offset * sinusoid.freq / samp_freq;
+    // treat x as a complex variable which initially is equal to amplitude * exp(i * phase),
+    // but which gets multiplied by "factor" on each frame.
+    BaseFloat x_real = sinusoid.amplitude * cos(phase),
+        x_im = sinusoid.amplitude * sin(phase);
+    for (int32 t = t_offset; t < t_end; t++) {
+      signal_data[t] += x_real;
+      ComplexMul(factor_real, factor_im, &x_real, &x_im);  // x *= factor.
+    }
+  }
+}
+
+
+// static
+void SinusoidDetector::QuadraticMaximizeEqualSpaced(
+    BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat *x_max, BaseFloat *y_max) {
+  // Let the function be y = a x^2 + b x + c, and
+  // suppose we have the values of y(0), y(1) and y(2).
+  // We have y0 = c, y1 = a + b + c, and y2 = 4a + 2b + c,
+  // so c = y0.
+  // Also, y2 - 2 y1 = 2a - c, so
+  // a = (y2 - 2 y1 + c) / 2, and
+  // b = y1 - a - c.
+  BaseFloat c = y0, a = y2 - 2 * y1 + c, b = y1 - a - c;
+  if (a >= 0) {
+    // The maximum of the function will occur at one of the end points.
+    if (y0 > y2) {
+      *x_max = 0;
+      *y_max = y0;
+    } else {
+      *x_max = 2;
+      *y_max = y2;
+    }
+  } else {
+    // derivative y' = 2a x + b.  y' = 0 at x = -b / 2 a.
+    BaseFloat x = -b / (2.0 * a);
+    if (x <= 0.0) {
+      *x_max = 0;
+      *y_max = y0;
+    } else if (x >= 2.0) {
+      *x_max = 0;
+      *y_max = y2;
+    } else {
+      *x_max = x;
+      *y_max = a * x * x + b * x + c;
+    }
+  }
+}
+
+// static
+void SinusoidDetector::QuadraticMaximize(
+    BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat *x_max, BaseFloat *y_max) {
+  // Let the function be y = a x^2 + b x + c, and
+  // suppose we have the values of y(0), y(x1) and y(1),
+  // where 0 < x1 < 1.
+  // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c,
+  // so c = y0.
+  // Also,  x1.y2 - y1 =  a (x1 - x1^2) + (x1 - 1) c, so
+  // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and
+  // b = y2 - a - c.
+  BaseFloat c = y0, 
+      a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1),
+      b = y2 - a - c;
+
+  // TODO: remove these lines.
+  AssertEqual(y1, a * x1 * x1 + b * x1 + c);
+  AssertEqual(y2, a + b + c);
+
+  if (a >= 0) {
+    // The maximum of the function will occur at one of the end points.
+    if (y0 > y2) {
+      *x_max = 0;
+      *y_max = y0;
+    } else {
+      *x_max = 1.0;
+      *y_max = y2;
+    }
+  } else {
+    // derivative y' = 2a x + b.  y' = 0 at x = -b / 2 a.
+    BaseFloat x = -b / (2.0 * a);
+    if (x <= 0.0) {
+      *x_max = 0.0;
+      *y_max = y0;
+    } else if (x >= 1.0) {
+      *x_max = 1.0;
+      *y_max = y2;
+    } else {
+      *x_max = x;
+      *y_max = a * x * x + b * x + c;
+    }
+  }
+}
+
+//static
+BaseFloat SinusoidDetector::QuadraticInterpolate(
+    BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat x) {
+  // Let the function be y = a x^2 + b x + c, and
+  // suppose we have the values of y(0), y(x1) and y(1),
+  // where 0 < x1 < 1.
+  // We have y0 = c, y1 = x1^2 a + x1 b + c, and y2 = a + b + c,
+  // so c = y0.
+  // Also,  x1.y2 - y1 =  a (x1 - x1^2) + (x1 - 1) c, so
+  // a = ( (x1 y2 - y1) - (x1 - 1) c) / (x1 - x1^2), and
+  // b = y2 - a - c.
+  KALDI_ASSERT(x1 >= 0.0 && x1 <= 1.0);
+  if (x1 == 0.0) return y0;
+  else if (x1 == 1.0) return y2;
+  
+  BaseFloat c = y0, 
+      a = (x1 * y2 - y1 - (x1 - 1.0) * c) / (x1 - x1*x1),
+      b = y2 - a - c;
+  return a * x * x + b * x + c;
+}
+
+// This function does
+// (*cos)(t) = cos(2 pi t freq / samp_freq)
+// (*sin)(t) = sin(2 pi t freq / samp_freq)
+//static
+void SinusoidDetector::CreateCosAndSin(BaseFloat samp_freq,
+                                        BaseFloat freq,
+                                        VectorBase<BaseFloat> *cos_vec,
+                                        VectorBase<BaseFloat> *sin_vec) {
+  int32 dim = cos_vec->Dim(), batch_size = 100;
+  KALDI_ASSERT(dim == sin_vec->Dim());
+  BaseFloat *cos_data = cos_vec->Data(), *sin_data = sin_vec->Data();
+  BaseFloat factor_real = cos(M_2PI * freq / samp_freq),
+      factor_im = sin(M_2PI * freq / samp_freq);
+  
+  // process frames in batches of size "batch_size", after which we recompute
+  // the starting point to prevent loss of accuracy due to drift.
+  for (int32 b = 0; b * batch_size < dim; b++) {
+    int32 t_offset = b * batch_size,
+        t_end = std::min(dim, t_offset + batch_size);
+    double phase = M_2PI * t_offset * freq / samp_freq;
+    // treat x as a complex variable which initially is equal to amplitude * exp(i * phase),
+    // but which gets multiplied by "factor" on each frame.
+    BaseFloat x_real = cos(phase), x_im = sin(phase);
+    for (int32 t = t_offset; t < t_end; t++) {
+      cos_data[t] = x_real;
+      sin_data[t] = x_im;
+      ComplexMul(factor_real, factor_im, &x_real, &x_im);  // x *= factor.
+    }
+  }
+}
+
+SinusoidDetector::SinusoidDetector(BaseFloat samp_freq,
+                                     int32 num_samp): 
+    samp_freq_(samp_freq),
+    num_samples_(num_samp),
+    num_samples_padded_(RoundUpToNearestPowerOfTwo(num_samp)),
+    fft_(num_samples_padded_),
+    factor1_(3.1),
+    factor2_(1.42) {
+  ComputeCoefficients();
+}
+
+void SinusoidDetector::SelfTest(
+    const VectorBase<BaseFloat> &signal,
+    const std::vector<InfoForBin> &info,
+    BaseFloat final_freq,
+    BaseFloat final_energy) {
+  int32 num_bins = num_samples_padded_ * 2 + 1;
+
+  
+  {
+    BaseFloat cutoff = 0.0;
+    for (int32 k = 0; k <= num_bins; k += 4)
+      cutoff = std::max(cutoff, info[k].energy);
+    BaseFloat energy_upper_bound = factor1_ * cutoff;
+    if (final_energy > energy_upper_bound) {
+      KALDI_WARN << "Self-testing failed [factor1]: " 
+                 << final_energy << " > " << energy_upper_bound
+                 << ", num-samples is " << num_samples_
+                 << ", freq/nyquist = "
+                 << (final_freq / (samp_freq_ * 0.5))
+                 << "- would require factor1 >= "
+                 << (final_energy / cutoff);
+    }
+  }
+  {
+    BaseFloat cutoff = 0.0;
+    for (int32 k = 0; k <= num_bins; k += 2)
+      if (info[k].valid)
+        cutoff = std::max(cutoff, info[k].energy);
+    BaseFloat energy_upper_bound = factor2_ * cutoff;
+    if (final_energy > energy_upper_bound) {
+      KALDI_WARN << "Self-testing failed [factor2]: " 
+                 << final_energy << " > " << energy_upper_bound
+                 << ", num-samples is " << num_samples_
+                 << ", freq/nyquist = "
+                 << (final_freq / (samp_freq_ * 0.5))
+                 << "- would require factor2 >= "
+                 << (final_energy / cutoff);
+          
+    }
+  }
+  
+}
+
+
+BaseFloat SinusoidDetector::OptimizeFrequency(
+    const std::vector<InfoForBin> &info,
+    int32 *bin_out,
+    BaseFloat *offset_out) const {
+  
+  BaseFloat max_energy = 0.0;
+  *bin_out = -1;
+  int32 max_freq =  num_samples_padded_ * 2;
+
+  // For each bin, we consider the frequency range [bin, bin+1, bin+2],
+  // and if we have info for all those bins, do a quadratic interpolation to
+  // find the maximum within the range.
+  for (int32 bin = 0; bin + 2 <= max_freq; bin++) {
+    if (info[bin].valid && info[bin+1].valid && info[bin+2].valid) {
+      // First handle the left side of the bin.
+      BaseFloat best_x, best_y;
+      QuadraticMaximizeEqualSpaced(info[bin].energy, info[bin+1].energy,
+                                   info[bin+2].energy, &best_x, &best_y);
+      if (best_y > max_energy) {
+        max_energy = best_y;
+        if (best_x <= 1.0) {
+          *bin_out = bin;
+          *offset_out = best_x;
+        } else {
+          *bin_out = bin + 1;
+          *offset_out = best_x - 1;
+        }
+      }
+    }
+  }
+  return max_energy;
+}
+
+
+BaseFloat SinusoidDetector::DetectSinusoid(
+    BaseFloat min_energy,
+    const VectorBase<BaseFloat> &signal,
+    Sinusoid *sinusoid) {
+  if (signal(0) == 0.0 && signal.Norm(2.0) == 0.0)
+    return 0.0;
+  KALDI_ASSERT(signal.Dim() == num_samples_);
+  Vector<BaseFloat> fft(num_samples_padded_);
+  fft.Range(0, num_samples_).CopyFromVec(signal);
+  bool forward = true;
+  fft_.Compute(fft.Data(), forward);
+
+  std::vector<InfoForBin> info;
+  ComputeCoarseInfo(fft, &info);
+  // we now have info for the "coarse" bins.
+
+  // each element b of "bins" will be a multiple of 4: it's possible
+  // that the best frequency is in the range [b, b+4]
+  std::vector<int32> bins;
+  FindCandidateBins(min_energy, info, &bins);
+
+  if (bins.empty())
+    return 0.0;  // not enough energy in signal.
+
+  for (size_t i = 0; i < bins.size(); i++) {
+    int32 bin = bins[i];
+    ComputeBinInfo(signal, bin, &(info[bin]));
+  }
+
+  std::vector<int32> bins2;
+  FindCandidateBins2(min_energy, info, &bins2);
+
+  for (size_t i = 0; i < bins2.size(); i++) {
+    int32 bin = bins2[i];
+    ComputeBinInfo(signal, bin, &(info[bin]));
+  }
+
+  // compute energy for the predicted-optimum point, which will usually be
+  // between bins, with an offset.
+  int32 bin;
+  BaseFloat offset;
+  
+  BaseFloat opt_energy = OptimizeFrequency(info,  &bin, &offset);
+
+  if (opt_energy == 0.0)
+    return 0.0;
+
+  BaseFloat max_freq = (bin + offset) * samp_freq_ / (num_samples_padded_ * 4);
+  
+  KALDI_VLOG(4) << "Best frequency based on interpolation is "
+                << max_freq << ", best energy is "
+                << opt_energy << ", bin is " << bin;
+
+  OptimizedInfo final_info;
+  
+  FineOptimizeFrequency(signal, bin, offset, &info, &final_info);
+
+  // the following while loop will rarely be accessed.
+  while (final_info.offset == 0.0 && bin > 0) {
+    bin--;
+    FineOptimizeFrequency(signal, bin, 1.0, &info, &final_info);
+  }
+
+  // the following while loop will rarely be accessed.  
+  while (final_info.offset == 1.0 && bin < num_samples_padded_ * 2) {
+    bin++;
+    FineOptimizeFrequency(signal, bin, 0.0, &info, &final_info);
+  }
+
+  if (bin <= 1 || bin >= num_samples_padded_ * 2 - 2) {
+    // If we're in the lowest or next-to-lowest bin, or the highest or
+    // next-to-highest allowed bin (note, "bin" here is a range, and it can
+    // never have the value num_samples_padded_ * 2), we tend to get more
+    // estimation error than usual, so do another round of optimization.
+    FineOptimizeFrequency(signal, bin, final_info.offset, &info, &final_info);    
+  }
+  
+  BaseFloat final_freq = (final_info.bin + final_info.offset) * samp_freq_ / (num_samples_padded_ * 4);
+  KALDI_VLOG(4) << "Final optimized info is: freq " << final_freq
+                << ", cos coeff " << final_info.cos_coeff << ", sin coeff "
+                << final_info.sin_coeff << ", energy " << final_info.energy;
+
+  if (GetVerboseLevel() > 1)
+    SelfTest(signal, info, final_freq, final_info.energy);
+
+  if (final_info.energy >= min_energy) {
+    sinusoid->amplitude = std::sqrt(final_info.cos_coeff * final_info.cos_coeff
+                                    + final_info.sin_coeff * final_info.sin_coeff);
+    sinusoid->freq = final_freq;
+    sinusoid->phase = -std::atan2(final_info.sin_coeff, final_info.cos_coeff);
+    KALDI_VLOG(4) << "Phase is " << sinusoid->phase << ", amplitude is "
+                  << sinusoid->amplitude << ", freq is " << sinusoid->freq;
+    return final_info.energy;
+  } else {
+    return 0.0;
+  }
+}
+
+
+/*
+  This function computes, the original FFT bins, the amount of energy in
+  the signal that can be explained by a sinusoid at the corresponding frequency.
+
+  Let f be the continuous-valued frequency.
+
+  Define the vector C_f as
+    C_f = [ c_0, c_1 ... c_n ]  where  c_k = cos(2 pi k f / samp_freq).   [obviously this notation depends on f].
+  and S_f the same thing with sin in place of cos.
+
+  Let the signal, as a vector, be V.
+  We want to maximize the (positive) energy-difference:
+       ||V||^2  - || V - c C_f - s S_f ||^2 
+  where c and s are the coefficients of C_f and S_f.
+  This quantity can be expanded as follows, where . means dot product.
+   \delta E =    -c^2 C_f.C_f - s^2 S_f.S_f - 2 c s C_f.S_f  + 2 c V.C_f + 2 s V.S_f.
+  which can be written as follows, where . means dot-product and ' means transpose:
+    \delta E   =   2 [c s] v  -  [c s] M [c s]' 
+  where M = [ C_f.C_f, C_f.S_f, C_f.S_f,  S_f.S_f ],
+    and v = [V.C_f,  V.S_f].
+  If M is invertible (i.e. for nonzero frequencies), this is maximized by
+   [c s] = M^-1 v
+  giving us the value.
+    \delta E = v' M^{-1} v.
+  We'll compute the inverse of M in advance, inside ComputeCoefficients(), using
+  the formula [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a] For zero frequency and at the
+  Nyquist, M has the value [ a 0; 0 0 ], and we have the same type of expression
+  limited to the first dim of v, i.e. Minv = [ a^{-1} 0; 0 0 ], a kind of pseudo-inverse.
+ */
+
+void SinusoidDetector::ComputeCoarseInfo(
+    const Vector<BaseFloat> &fft,
+    std::vector<InfoForBin> *info) const {
+  info->resize(num_samples_padded_ * 2 + 1);  // 4 times resolution of FFT itself.
+
+  const BaseFloat *fft_data = fft.Data();
+
+  int32 num_bins = num_samples_padded_ / 2 + 1;
+  for (int32 k = 0; k < num_bins; k++) {
+    BaseFloat real, im;
+    if (k == 0) {
+      real = fft_data[0];
+      im = 0.0;
+    } else if (k == num_samples_padded_ / 2) {
+      real = fft_data[1];
+      im = 0.0;
+    } else {
+      real = fft_data[k * 2];
+      im = fft_data[k * 2 + 1];
+    }
+    // v1 and v2 are the two components of the vector v in the math above.
+    BaseFloat v1 = real, v2 = -im;
+    // Minv_'s row indexes correspond to frequencies with 4 times more
+    // resolution than the FFT bins.
+    const BaseFloat *Minv_data = Minv_.RowData(k * 4);
+    // The Matrix M^{-1} is of the form [a b; b d]
+    BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2];
+    // compute \delta E = v' M^{-1} v.
+    BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b;
+    InfoForBin &this_info = (*info)[k * 4];
+    this_info.valid = true;
+    this_info.cos_dot = real;
+    this_info.sin_dot = -im;
+    this_info.energy = delta_e;
+  }
+}
+
+
+void SinusoidDetector::ComputeCoefficients() {
+  int32 num_samp = num_samples_;
+  int32 num_freq =  num_samples_padded_ * 2 + 1;
+  cos_.Resize(num_freq, num_samp);
+  sin_.Resize(num_freq, num_samp);
+  
+  Vector<BaseFloat> cc(num_freq), cs(num_freq);
+  for (int32 k = 0; k < num_freq; k++) {
+    BaseFloat freq = k * samp_freq_ / (num_samples_padded_ * 4);
+    SubVector<BaseFloat> c(cos_, k), s(sin_, k);
+    CreateCosAndSin(samp_freq_, freq, &c, &s);
+    cc(k) = VecVec(c, c);
+    cs(k) = VecVec(c, s);
+  }
+  
+  M_.Resize(num_freq, 3, kUndefined);  
+  Minv_.Resize(num_freq, 3, kUndefined);
+  
+  for (int32 k = 0; k < num_freq; k++) {
+    // Let the matrix M be [ a b; b d ].   [we don't write c because c == b].
+    // We want to compute Minv_.
+    BaseFloat a = cc(k), b = cs(k), d = num_samples_ - a;
+    M_(k, 0) = a;
+    M_(k, 1) = b;
+    M_(k, 2) = d;
+    if (k == 0 || k == num_freq - 1) {
+      // this is a special case; it's not really the inverse of M but it will
+
+      // give us the expression we want; it's like an inverse in just one dimension.
+      Minv_(k, 0) = 1.0 / a;
+      Minv_(k, 1) = 0.0;
+      Minv_(k, 2) = 0.0;
+    } else {
+      BaseFloat inv_det = 1.0 / (a * d - b * b);
+      // check for NaN and inf.
+      KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0);
+      // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b.
+      BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det;
+      Minv_(k, 0) = inv_a;
+      Minv_(k, 1) = inv_b;
+      Minv_(k, 2) = inv_d;
+    }
+  }
+}
+
+
+// Does fine optimization of the frequency within this bin; returns the
+// final energy, the optimized frequency, and the cos and sin coefficients.
+void SinusoidDetector::FineOptimizeFrequency(
+    const VectorBase<BaseFloat> &signal,
+    int32 bin,
+    BaseFloat bin_offset,
+    std::vector<InfoForBin> *info_in,
+    OptimizedInfo *opt_info) const {
+  std::vector<InfoForBin> &info = *info_in;
+  if (!info[bin].valid) ComputeBinInfo(signal, bin, &(info[bin]));
+  if (!info[bin+1].valid) ComputeBinInfo(signal, bin+1, &(info[bin+1]));
+  
+  const BaseFloat epsilon = 0.02, delta = 0.001;
+
+  // If the offset is very close to the edges of the bin, move it
+  // closer to the center.  Otherwise we may have problems with the
+  // steps below.  The initial offset is only used as a starting point
+  // anyway, so this won't affect the final value much.
+  if (bin_offset < epsilon)
+    bin_offset = epsilon;
+  if (bin_offset > 1.0 - epsilon)
+    bin_offset = 1.0 - epsilon;
+  KALDI_VLOG(4) << "Initial bin offset = " << bin_offset << ", bin = " << bin;
+
+  // create cos and sin waves of the specified frequency.
+  BaseFloat freq = (bin + bin_offset) * samp_freq_ / (num_samples_padded_ * 4);
+  Vector<BaseFloat> c(num_samples_, kUndefined), s(num_samples_, kUndefined);
+  CreateCosAndSin(samp_freq_, freq, &c, &s);
+
+  // these a, b and d values are the elements of the M matrix at this frequency
+  // "freq", i.e. the matrix M_f [ a b; b d ].  This will be invertible because
+  // we have ensured that the frequency is not too close to zero or the Nyquist.
+  BaseFloat a = VecVec(c, c), b = VecVec(c, s), d = num_samples_ - a;
+  BaseFloat inv_det = 1.0 / (a * d - b * b);
+  BaseFloat inv_a = d * inv_det, inv_b = -b * inv_det, inv_d = a * inv_det;
+  
+
+  BaseFloat v1 = VecVec(c, signal), v2 = VecVec(s, signal);
+  
+  BaseFloat delta_e = v1 * v1 * inv_a + v2 * v2 * inv_d + 2 * v1 * v2 * inv_b;
+  
+  KALDI_VLOG(4) << "Actual energy-change at frequency " << freq << " is "
+                << delta_e;
+  // "freq" is frequency somewhere in the middle of the bin.
+  
+  BaseFloat final_offset, final_energy;
+  QuadraticMaximize(bin_offset, info[bin].energy, delta_e, info[bin+1].energy,
+                    &final_offset, &final_energy);
+
+  KALDI_VLOG(4) << "After further optimizing, offset was " << final_offset
+                << " giving freq "
+                << ((bin+final_offset) * samp_freq_ / (num_samples_padded_*4))
+                << ", with energy " << final_energy;
+
+  // Use interpolation (using a quadratic function) to get the entries of the M matrix
+  // the the final, tuned frequency.  Interpolation on M is better than M^{-1}, as its
+  // elements are much better behaved as the frequency varies.
+  const BaseFloat *M_left_data = M_.RowData(bin),
+      *M_right_data = M_.RowData(bin + 1);
+
+  BaseFloat a_interp = QuadraticInterpolate(bin_offset, M_left_data[0], a, M_right_data[0],
+                                            final_offset);
+  BaseFloat b_interp = QuadraticInterpolate(bin_offset, M_left_data[1], b, M_right_data[1],
+                                            final_offset);
+  BaseFloat d_interp = QuadraticInterpolate(bin_offset, M_left_data[2], d, M_right_data[2],
+                                            final_offset);
+
+  // Now get the inverse of the M matrix at the final point.
+  BaseFloat a_inv_interp, b_inv_interp, d_inv_interp;
+  
+  if ((bin == 0 && final_offset < delta) ||
+      (bin == num_samples_padded_ * 2 && final_offset > 1.0 - delta)) {
+    // If we're extremely close to zero or the Nyquist, we'll have trouble
+    // inverting M; just invert in the 1st dimension (only have a cos
+    // component).
+    a_inv_interp = 1.0 / a_interp;
+    b_inv_interp = 0.0;
+    d_inv_interp = 0.0;
+  } else {
+    BaseFloat inv_det = 1.0 / (a_interp * d_interp - b_interp * b_interp);
+    // check for NaN and inf.
+    KALDI_ASSERT(inv_det == inv_det && inv_det - inv_det == 0.0);
+    // use: [a b;c d]^-1 = 1/(ad - bc) [d -b; -c a], special case where c = b.
+    a_inv_interp = d_interp * inv_det;
+    b_inv_interp = -b_interp * inv_det;
+    d_inv_interp = a_interp * inv_det;
+  }
+
+  BaseFloat v1_interp = QuadraticInterpolate(bin_offset, info[bin].cos_dot, v1,
+                                             info[bin+1].cos_dot, final_offset);
+  BaseFloat v2_interp = QuadraticInterpolate(bin_offset, info[bin].sin_dot, v2,
+                                             info[bin+1].sin_dot, final_offset);
+  
+  opt_info->bin = bin;
+  opt_info->offset = final_offset;
+  // Recompute the energy-reduction using the more accurate interpolated values of
+  // v1 and v2 (the dot-products of the cos and sin with the signal), and
+  // of M.
+  opt_info->energy = v1_interp * v1_interp * a_inv_interp +
+      v2_interp * v2_interp * d_inv_interp +
+      2 * v1_interp * v2_interp * b_inv_interp;
+  // Compute the coefficients of the cos and sin in the optimal sinusoid, as
+  // M^{-1} v.
+  opt_info->cos_coeff = a_inv_interp * v1_interp + b_inv_interp * v2_interp;
+  opt_info->sin_coeff = b_inv_interp * v1_interp + d_inv_interp * v2_interp;  
+}
+
+void SinusoidDetector::FindCandidateBins(
+    BaseFloat min_energy,
+    const std::vector<InfoForBin> &info,
+    std::vector<int32> *bins) const {
+
+  int32 max_bin = num_samples_padded_ * 2;
+
+  BaseFloat cutoff = min_energy;
+  for (int32 k = 0; k <= max_bin; k += 4) {
+    KALDI_ASSERT(info[k].valid);
+    cutoff = std::max(cutoff, info[k].energy);
+  }
+  
+  for (int32 k = 0; k < max_bin; k += 4) {
+    BaseFloat energy_upper_bound =
+        factor1_ * std::max(info[k].energy,
+                            info[k+4].energy);
+    if (energy_upper_bound >= cutoff)
+      bins->push_back(k + 2);
+  }
+}
+
+
+void SinusoidDetector::FindCandidateBins2(
+    BaseFloat min_energy,
+    const std::vector<InfoForBin> &info,
+    std::vector<int32> *bins2) const {
+
+  int32 max_bin = num_samples_padded_ * 2;
+  
+  BaseFloat cutoff = min_energy;
+  for (int32 k = 0; k <= max_bin; k += 2) {
+    if (info[k].valid)
+      cutoff = std::max(cutoff, info[k].energy);
+  }
+
+  for (int32 k = 0; k < max_bin; k += 2) {  
+    if (info[k].valid && info[k+2].valid) {
+      BaseFloat energy_upper_bound =
+          factor2_ * std::max(info[k].energy,
+                              info[k+2].energy);
+      if (energy_upper_bound >= cutoff)
+        bins2->push_back(k + 1);
+    }
+  }
+}
+      
+
+void SinusoidDetector::ComputeBinInfo(
+    const VectorBase<BaseFloat> &signal,
+    int32 bin,
+    InfoForBin *info) const {
+  KALDI_ASSERT(!info->valid);  // or wasted time.
+  info->valid = true;
+  BaseFloat v1 = info->cos_dot = VecVec(cos_.Row(bin), signal);
+  BaseFloat v2 = info->sin_dot = VecVec(sin_.Row(bin), signal);
+  const BaseFloat *Minv_data = Minv_.RowData(bin);
+  BaseFloat a = Minv_data[0], b = Minv_data[1], d = Minv_data[2];
+  // compute \delta E = v' M^{-1} v.
+  BaseFloat delta_e = v1 * v1 * a + v2 * v2 * d + 2 * v1 * v2 * b;
+  info->energy = delta_e;
+}
+
+
+MultiSinusoidDetector::MultiSinusoidDetector(
+    const MultiSinusoidDetectorConfig &config,
+    int32 sampling_freq):
+    config_(config),
+    sample_freq_(sampling_freq),
+    samples_per_frame_subsampled_(0.001 * config.frame_length_ms *
+                                  static_cast<BaseFloat>(config.subsample_freq)),
+    samples_shift_subsampled_(0.001 * config.frame_shift_ms *
+                              static_cast<BaseFloat>(config.subsample_freq)),
+    waveform_finished_(false),
+    samples_consumed_(0),
+    resampler_(sampling_freq, config.subsample_freq,
+               config.subsample_filter_cutoff, config.subsample_filter_zeros),
+    detector_(config.subsample_freq, samples_per_frame_subsampled_) {
+  config.Check();
+}
+
+
+void MultiSinusoidDetector::Reset() {
+  waveform_finished_ = false;
+  samples_consumed_ = 0;
+  while(!subsampled_signal_.empty()) {
+    delete subsampled_signal_.front();
+    subsampled_signal_.pop_front();
+  }
+  resampler_.Reset();
+}
+
+void MultiSinusoidDetector::WaveformFinished() {
+  KALDI_ASSERT(!waveform_finished_ &&
+               "WaveformFinished() called twice.");
+
+  Vector<BaseFloat> empty_waveform;
+  subsampled_signal_.push_back(new Vector<BaseFloat>());
+  bool flush = true;
+  resampler_.Resample(empty_waveform, flush,
+                      subsampled_signal_.back());
+  waveform_finished_ = true;
+  if (subsampled_signal_.back()->Dim() == 0) {
+    delete subsampled_signal_.back();
+    subsampled_signal_.pop_back();
+  }
+}
+
+void MultiSinusoidDetector::AcceptWaveform(
+    const VectorBase<BaseFloat> &waveform) {
+
+
+  subsampled_signal_.push_back(new Vector<BaseFloat>());
+  bool flush = false;
+  resampler_.Resample(waveform, flush,
+                      subsampled_signal_.back());
+  if (subsampled_signal_.back()->Dim() == 0) {
+    delete subsampled_signal_.back();
+    subsampled_signal_.pop_back();
+  }
+}
+
+int32 MultiSinusoidDetector::NumSubsampledSamplesReady(int32 max_samp) const {
+  KALDI_ASSERT(samples_consumed_ >= 0 &&
+               ((subsampled_signal_.empty() && samples_consumed_ == 0) ||
+                (!subsampled_signal_.empty () && samples_consumed_ <
+                 subsampled_signal_[0]->Dim())));
+      
+  int32 ans = -samples_consumed_;
+  for (size_t i = 0; i < subsampled_signal_.size(); i++) {
+    ans += subsampled_signal_[i]->Dim();
+    if (ans > max_samp) break;
+  }
+  KALDI_ASSERT(ans >= 0);
+  return std::min(ans, max_samp);
+}
+
+bool MultiSinusoidDetector::Done() const {
+  int32 samp_ready = NumSubsampledSamplesReady(samples_per_frame_subsampled_);
+  if ((samp_ready >= samples_per_frame_subsampled_ && !waveform_finished_) ||
+      (samp_ready > 0 && waveform_finished_))
+    return false;
+  else
+    return true;
+}
+
+void MultiSinusoidDetector::GetNextFrameOfSignal(Vector<BaseFloat> *frame) {
+  frame->Resize(samples_per_frame_subsampled_, kUndefined);
+
+  int32 sample_offset = 0,
+      samples_needed = samples_per_frame_subsampled_;
+  while (samples_needed > 0 &&
+         !subsampled_signal_.empty()) {
+    Vector<BaseFloat> *src = subsampled_signal_.front();
+    int32 num_samples_avail = src->Dim() - samples_consumed_;
+    KALDI_ASSERT(num_samples_avail > 0);
+    int32 chunk_size = std::min(num_samples_avail,
+                                 samples_needed);
+    frame->Range(sample_offset, chunk_size).CopyFromVec(
+        src->Range(samples_consumed_, chunk_size));
+    sample_offset += chunk_size;
+    samples_needed -= chunk_size;
+    samples_consumed_ += chunk_size;
+    if (samples_consumed_ == src->Dim()) {
+      samples_consumed_ = 0;
+      delete src;
+      subsampled_signal_.pop_front();
+    }
+  }
+  if (samples_needed > 0) {
+    KALDI_ASSERT(waveform_finished_ && sample_offset > 0);  // or code error.
+    frame->Range(sample_offset, samples_needed).SetZero();
+  }
+}
+
+
+void MultiSinusoidDetector::GetNextFrame(MultiSinusoidDetectorOutput *output) {
+  Vector<BaseFloat> frame;
+  GetNextFrameOfSignal(&frame);
+  // Mean subtraction
+  frame.Add(-1.0 * frame.Sum() / frame.Dim());
+  *output = MultiSinusoidDetectorOutput();  // reset to default.
+
+  BaseFloat signal_energy = VecVec(frame, frame);
+  output->tot_energy = signal_energy / frame.Dim();
+  if (signal_energy == 0.0) return;
+
+  // min_energy1 is the lowest energy we might care about.
+  BaseFloat min_energy1 = signal_energy * 
+      std::min<BaseFloat>(config_.two_freq_min_total_energy * 0.5,
+                          config_.one_freq_min_energy);
+
+  Sinusoid sinusoid1;
+  BaseFloat energy1 = detector_.DetectSinusoid(min_energy1,
+                                                      frame,
+                                                      &sinusoid1);
+
+  if (energy1 == 0.0) return;  // Nothing detected.
+
+  // we only care about the 2nd sinusoid if
+  // energy1 + energy2 >= signal_energy * two_freq_min_total_energy,
+  // and energy2 >= signal_energy * config.two_freq_min_energy.
+
+  BaseFloat min_energy2 =
+      std::max(signal_energy * config_.two_freq_min_energy,
+               signal_energy * config_.two_freq_min_total_energy
+               - energy1);
+
+  BaseFloat energy2;
+  Sinusoid sinusoid2;
+
+  // If there is enough energy left in the signal that we could
+  // possibly detect a sinusoid of energy at least min_energy2...
+  if (min_energy2 <= signal_energy - energy1) {
+    sinusoid1.phase += M_PI;  // reverse the phase.
+    AddSinusoid(config_.subsample_freq, sinusoid1, &frame);
+
+
+    energy2 = detector_.DetectSinusoid(min_energy2,
+                                              frame,
+                                              &sinusoid2);
+
+    if (energy2 > energy1) {
+      // The following is just for our information, so we are aware
+      // when the sinusoid detection gives us the non-optimal sinusoid
+      // first.
+      BaseFloat factor = energy2 / energy1;
+      KALDI_VLOG(2) << "Second sinusoid greater than first by a factor of "
+                    << factor << ".  (This means sinusoid detection is not "
+                    << " working ideally).";
+    }
+    
+    if (DetectedTwoFrequency(signal_energy,
+                             sinusoid1, energy1,
+                             sinusoid2, energy2,
+                             output))
+      return;
+  } else {
+    energy2 = 0.0;
+  }
+  // We don't need the return status of the following; we just return anyway.
+  DetectedOneFrequency(signal_energy,
+                       sinusoid1, energy1,
+                       sinusoid2, energy2,
+                       output);
+}
+
+// acceptable two-frequency tone.
+bool MultiSinusoidDetector::DetectedTwoFrequency(
+    BaseFloat signal_energy,
+    const Sinusoid &sinusoid1,
+    BaseFloat energy1,
+    const Sinusoid &sinusoid2,
+    BaseFloat energy2,
+    MultiSinusoidDetectorOutput *output) {
+
+  if (energy1 + energy2 >= signal_energy *
+      config_.two_freq_min_total_energy &&
+      std::min(energy1, energy2) >= signal_energy *
+      config_.two_freq_min_energy &&
+      std::min(sinusoid1.freq, sinusoid2.freq) >= config_.min_freq &&
+      std::max(sinusoid1.freq, sinusoid2.freq) <= config_.max_freq) {
+    output->freq1 = sinusoid1.freq;
+    output->energy1 = energy1 / signal_energy;
+    output->freq2 = sinusoid2.freq;
+    output->energy2 = energy2 / signal_energy;
+    if (output->freq1 > output->freq2) {
+      std::swap(output->freq1, output->freq2);
+      std::swap(output->energy1, output->energy2);
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+
+// acceptable two-frequency tone.
+bool MultiSinusoidDetector::DetectedOneFrequency(
+    BaseFloat signal_energy,
+    const Sinusoid &sinusoid1,
+    BaseFloat energy1,
+    const Sinusoid &sinusoid2,
+    BaseFloat energy2,
+    MultiSinusoidDetectorOutput *output) {
+  // If sinusoid detection were performing exactly to spec, we could assume
+  // energy1 >= energy2, but we don't assume this as it's not guaranteed.
+  if (energy1 > energy2 && energy1 > signal_energy *
+      config_.one_freq_min_energy &&
+      sinusoid1.freq >= config_.min_freq &&
+      sinusoid1.freq <= config_.max_freq) {
+    output->freq1 = sinusoid1.freq;
+    output->energy1 = energy1 / signal_energy;
+    output->freq2 = 0.0;
+    output->energy2 = 0.0;
+    return true;
+  } else if (energy2 > energy1 && energy2 > signal_energy *
+             config_.one_freq_min_energy &&
+             sinusoid2.freq >= config_.min_freq &&
+             sinusoid2.freq <= config_.max_freq) {
+    output->freq1 = sinusoid2.freq;
+    output->energy1 = energy2 / signal_energy;
+    output->freq2 = 0.0;
+    output->energy2 = 0.0;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+
+void DetectSinusoids(const VectorBase<BaseFloat> &signal,
+                     MultiSinusoidDetector *detector,
+                     Matrix<BaseFloat> *output) {
+  std::vector<MultiSinusoidDetectorOutput> output_vec;
+  detector->AcceptWaveform(signal);
+  detector->WaveformFinished();
+
+  int32 safety_margin = 10, approx_num_frames = safety_margin + 
+      (signal.Dim() / (detector->SamplingFrequency() *
+                       detector->FrameShiftSecs()));
+  output_vec.reserve(approx_num_frames);
+  while (!detector->Done()) {
+    output_vec.resize(output_vec.size() + 1);
+    detector->GetNextFrame(&(output_vec.back()));
+  }  
+  detector->Reset();
+  if (output_vec.empty()) {
+    output->Resize(0, 0);
+  } else {
+    output->Resize(output_vec.size(), 5, kUndefined);
+    for (int32 i = 0; i < output->NumRows(); i++) {
+      BaseFloat *row_data = output->RowData(i);
+      MultiSinusoidDetectorOutput &this_output = output_vec[i];
+      row_data[0] = this_output.tot_energy;
+      row_data[1] = this_output.freq1;
+      row_data[2] = this_output.energy1;
+      row_data[3] = this_output.freq2;
+      row_data[4] = this_output.energy2;
+    }
+  }
+}
+
+
+}  // namespace kaldi
+
--- a/src/feat/sinusoid-detection.h
+++ b/src/feat/sinusoid-detection.h
@ -0,0 +1,436 @@
+// feat/sinusoid-detection.h
+
+// Copyright     2015  Johns Hopkins University (author: Daniel Povey)
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_FEAT_SINUSOID_DETECTION_H_
+#define KALDI_FEAT_SINUSOID_DETECTION_H_
+
+
+#include "base/kaldi-error.h"
+#include "matrix/matrix-lib.h"
+#include "util/common-utils.h"
+#include "feat/resample.h"
+#include <deque>
+
+namespace kaldi {
+/// @addtogroup  feat FeatureExtraction
+/// @{
+
+
+struct Sinusoid {
+  // this structure used to represent a sinusoid of type amplitude cos (2 pi
+  // freq t + phase), in the SinusoidDetector code.
+  BaseFloat amplitude;
+  BaseFloat freq;
+  BaseFloat phase;
+  Sinusoid(BaseFloat a, BaseFloat f, BaseFloat p):
+      amplitude(a), freq(f), phase(p) { }
+  Sinusoid() {}
+};
+
+
+// This function adds the given sinusoid to the signal, as:
+// (*signal)(t) += amplitude * cos(2 pi freq/samp_freq t + phase).
+void AddSinusoid(BaseFloat samp_freq,
+                 const Sinusoid &sinusoid,
+                 VectorBase<BaseFloat> *signal);
+
+
+class SinusoidDetector {
+ public:
+  SinusoidDetector(BaseFloat samp_freq,
+                    int32 num_samp);
+  
+
+  // Detect the dominant sinusoid component in the signal, as long as the
+  // energy-reduction of the signal from subtracting that sinuoid would be >=
+  // "min_energy_change", and return that energy reduction; or zero if no
+  // candidate was found.
+  // non-const because the FFT class has a temporary buffer.
+  BaseFloat DetectSinusoid(BaseFloat min_energy_change,
+                           const VectorBase<BaseFloat> &signal,
+                           Sinusoid *sinusoid);
+  
+  // This function does quadratic interpolation for a function that is known at
+  // three equally spaced points [x0 x1 x2] = [0 1 2], and we want the x-value
+  // and corresponding y-value at the maximum of the function within the range
+  // 0 <= x <= 2.  It's public for testing reasons.
+  static void QuadraticMaximizeEqualSpaced(
+    BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat *x, BaseFloat *y);
+
+
+  // This function does quadratic interpolation for a function that is known at
+  // three points x0, x1 and x2 with x0 = 0, 0 < x1 < 1 and x2 = 1, where we
+  // want the x-value and corresponding y-value at the maximum of the function
+  // within the range 0 <= x <= 1.  It's public for testing reasons.
+  static void QuadraticMaximize(
+    BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat *x, BaseFloat *y);
+
+  // This function does quadratic interpolation for a function that is known at
+  // three points x0, x1 and x2 with x0 = 0, 0 <= x1 <= 1 and x2 = 1, where
+  // we want the value at a specific value x.  The corresponding y-value is returned.
+  static BaseFloat QuadraticInterpolate(
+    BaseFloat x1, BaseFloat y0, BaseFloat y1, BaseFloat y2,
+    BaseFloat x);
+  
+
+ private:
+  BaseFloat samp_freq_;
+  int32 num_samples_;
+  int32 num_samples_padded_;  // Number of samples, after zero-padding to power of 2.
+  SplitRadixRealFft<BaseFloat> fft_;  // Object used to compute FFT of padded_signal_.
+
+  BaseFloat factor1_;  // When we search the range between two FFT bins, we
+                       // assume that the maximum energy-reduction within the
+                       // range may be greater than the maximum of the
+                       // energy-reductions at either side, by at most
+                       // "factor1", with factor1 > 1.0.  The analysis is quite
+                       // hard so we determine this factor empirically.  Making
+                       // this as small as possible helps us avoid searching too
+                       // many bins.
+
+  BaseFloat factor2_;  // As factor1, but for searches within a half-fft-bin
+                       // range.  Again determined empirically.  After that we
+                       // use quadratic interpolation to find the maximum energy.
+
+  // This matrix, of dimension (num_samples_padded_ * 2 + 1) by
+  // num_samples_, has in each row, a different frequency of cosine wave.
+  Matrix<BaseFloat> cos_;
+  // This matrix, of dimension (num_samples_padded_ * 2 + 1) by
+  // num_samples_, has in each row, a different frequency of sine wave.
+  Matrix<BaseFloat> sin_;
+
+  // M_ is a precomputed matrix of dimension (num_samples_padded_ * 2 + 1) by 3,
+  // containing the values x y z of a symmetric matrix [ a b; b c ].  There is
+  // one of these matrices for each frequency, sampled at one quarter the
+  // spacing of the FFT bins.  There is a long comment next to the definition of
+  // ComputeCoefficients that describes this. 
+  Matrix<BaseFloat> M_;
+
+  // Minv_ is the coefficients in the same format as M_, but containing the
+  // corresponding coefficients of the inverse matrix.  There is a long comment
+  // next to the definition of ComputeCoefficients that describes this.
+  Matrix<BaseFloat> Minv_;
+  
+
+  struct InfoForBin {
+    bool valid;
+    BaseFloat cos_dot;  // dot product of signal with cosine on left frequency
+    BaseFloat sin_dot;  // dot product of signal with sine on left frequency
+    BaseFloat energy;  // energy.
+    InfoForBin(): valid(false) { }
+  };
+
+  // Info after fine optimization within a bin.
+  struct OptimizedInfo {
+    int32 bin;
+    BaseFloat offset;
+    BaseFloat energy;
+    BaseFloat cos_coeff;
+    BaseFloat sin_coeff;
+  };
+  
+  // Compute the coefficients and energies at the original FFT bins (every
+  // fourth entry in "info"). 
+  void ComputeCoarseInfo(const Vector<BaseFloat> &fft,
+                         std::vector<InfoForBin> *info) const;
+
+
+  // After the coarse-level info is computed using ComputeCoarseInfo, finds a
+  // set of intermediate bin indexes to compute, that are the midpoints of
+  // coarse-level bins.
+  void FindCandidateBins(BaseFloat min_energy,
+                         const std::vector<InfoForBin> &info,
+                         std::vector<int32> *bins) const;
+
+  void FindCandidateBins2(BaseFloat min_energy,
+                          const std::vector<InfoForBin> &info,
+                          std::vector<int32> *bins) const;
+
+  
+  void ComputeBinInfo(const VectorBase<BaseFloat> &signal,
+                      int32 bin, InfoForBin *info) const;
+
+  
+  // For each bin b such that we have valid "info" data for bins b, b+1 and b+2,
+  // does quadratic interpolation to find the maximum predicted energy in the
+  // range [b, b+2].  The location of the maximum predicted energy is output to
+  // "bin_out" and "offset_out", and the corresponding predicted energy is
+  // returned.
+  //
+  // Note: if there are two different frequencies with similar maximum energies
+  // (e.g. within a factor of probably around 1.2 or so), the fact that
+  // OptimizeFrequency only returns one maximum may potentially lead to the
+  // smaller maximum being output.  We could have modified this to output
+  // multiple different maxima, which could have been more accurate in terms of
+  // being guaranteed to output the best maximum, but this probably wouldn't
+  // have a measurable impact on our application so we haven't bothered.
+  BaseFloat OptimizeFrequency(
+      const std::vector<InfoForBin> &info,
+      int32 *bin_out,
+      BaseFloat *offset_out) const;
+  
+
+  // This function does
+  // (*cos)(t) = cos(2 pi t freq / samp_freq)
+  // (*sin)(t) = sin(2 pi t freq / samp_freq)
+  static void CreateCosAndSin(BaseFloat samp_freq,
+                              BaseFloat freq,
+                              VectorBase<BaseFloat> *cos,
+                              VectorBase<BaseFloat> *sin);
+  
+  // Do fine optimization of the frequency within a bin, given a reasonable
+  // approximate position within it based on interpolation (that should be close
+  // to the optimum).
+  void FineOptimizeFrequency(
+      const VectorBase<BaseFloat> &signal,
+      int32 bin,
+      BaseFloat offset,
+      std::vector<InfoForBin> *info,
+      OptimizedInfo *opt_info) const;
+  
+  // Computes the coefficients cos_, sin_, and Minv_.
+  void ComputeCoefficients();
+
+  // Calls some self-testing code that prints warnings if
+  // some of our assumptions were wrong.
+  void SelfTest(const VectorBase<BaseFloat> &signal,
+                const std::vector<InfoForBin> &info,
+                BaseFloat final_freq,
+                BaseFloat final_energy);
+
+};
+
+
+
+/**
+   This configuration class is for the frame-by-frame detection of
+   cases where there are one or two sinusoids that can explain
+   a lot of the energy in the signal.
+*/
+struct MultiSinusoidDetectorConfig {
+
+  // frame length in milliseconds
+  BaseFloat frame_length_ms;
+  // frame shift in milliseconds
+  BaseFloat frame_shift_ms;
+
+  // Proportion of the total energy of the signal that the quieter of
+  // the two sinusoids must comprise, in order to be counted, if two
+  // sinusoids are detected.
+  BaseFloat two_freq_min_energy;
+
+  // Proportion of the total energy of the signal that both sinusoids (if
+  // two are detected) must comprise, in order to be output.
+  BaseFloat two_freq_min_total_energy;
+
+  // Proportion of the total energy of the signal that a single sinusoid
+  // must comprise, in order to be output, if we are considering
+  // reporting a single sinusoid.  Note: detection of two sinusoids
+  // will take precedence over detection of a single sinusoid.
+  BaseFloat one_freq_min_energy;
+
+  // Lower end of frequency range that we consider; frequencies outside
+  // this range are not candidates to appear in the detected output.
+  BaseFloat min_freq;
+  // Upper end of frequency range that we consider, see min_freq.
+  BaseFloat max_freq;
+
+  // Frequency to which we subsample the signal before processing it.
+  // Must be integer because of how LinearResample code works.
+  int32 subsample_freq;
+
+  // Filter cut-off frequency used in sub-sampling.
+  BaseFloat subsample_filter_cutoff;
+
+  // the following is not critical and is not exported to the
+  // command line.
+  int32 subsample_filter_zeros;
+  
+  MultiSinusoidDetectorConfig():
+      frame_length_ms(20), frame_shift_ms(10),
+      two_freq_min_energy(0.2), two_freq_min_total_energy(0.6),
+      one_freq_min_energy(0.75), min_freq(300.0),
+      max_freq(1800.0), subsample_freq(4000),
+      subsample_filter_cutoff(1900.0), subsample_filter_zeros(5) {}
+
+  void Register(OptionsItf *po) {
+    po->Register("frame-length", &frame_length_ms,
+                 "Frame length in milliseconds");
+    po->Register("frame-shift", &frame_shift_ms,
+                 "Frame shift in milliseconds");
+    po->Register("two-freq-min-energy", &two_freq_min_energy,
+                 "For detecting two-frequency tones, minimum energy that "
+                 "the quieter frequency must have (relative to total "
+                 "enegy of frame)");
+    po->Register("two-freq-min-total-energy", &two_freq_min_total_energy,
+                 "For detecting two-frequency tones, minimum energy that "
+                 "the two frequencies together must have (relative to total "
+                 "energy of frame)");
+    po->Register("one-freq-min-energy", &one_freq_min_energy, "For detecting "
+                 "single-frequency tones, minimum energy that the frequency "
+                 "must have relative to total energy of frame");
+    po->Register("min-freq", &min_freq, "Minimum frequency of sinusoid that "
+                 "will be detected");
+    po->Register("max-freq", &min_freq, "Maximum frequency of sinusoid that "
+                 "will be detected");
+    po->Register("subsample-freq", &subsample_freq, "Frequency at which "
+                 "we subsample the signal");
+    po->Register("subsample-filter-cutoff", &subsample_filter_cutoff, "Filter "
+                 "cut-off frequency used in subsampling");
+  }
+  void Check() const {
+    KALDI_ASSERT(frame_length_ms > 0 && frame_length_ms >= frame_shift_ms &&
+                 min_freq > 0 && max_freq > min_freq &&
+                 subsample_filter_cutoff > max_freq &&
+                 subsample_freq/2 > subsample_filter_cutoff &&
+                 subsample_filter_zeros > 2 &&
+                 subsample_filter_cutoff > 0.25 * subsample_freq &&
+                 two_freq_min_total_energy > two_freq_min_energy &&
+                 two_freq_min_energy <= 0.5 * two_freq_min_total_energy);
+    BaseFloat samples_per_frame_shift =
+        frame_shift_ms * 0.001 * subsample_freq;
+    // The following assert ensures that the frame-shift is an exact
+    // number of samples, so that the locations of the frames
+    // don't gradually drift out of sync.
+    KALDI_ASSERT(fabs(samples_per_frame_shift -
+                      static_cast<int32>(samples_per_frame_shift)) <
+                 0.001);
+                      
+  }             
+};
+
+struct MultiSinusoidDetectorOutput {
+  BaseFloat tot_energy;  // Total energy per sample of this frame (sum-square of
+                         // signal divided by number of samples... this is after
+                         // downsampling and mean subtraction.
+  BaseFloat freq1;  // Lower frequency detected, or 0 if none detected.
+  BaseFloat energy1; // Energy of lower frequency divided by total energy, or 0
+                     // if none detected.
+  BaseFloat freq2;  // Lower frequency detected, or 0 if zero or one
+                    // frequencies detected.
+  BaseFloat energy2; // Energy of higher frequency divided by total energy, or 0
+                     // if zero or one freqencies detected.
+  MultiSinusoidDetectorOutput(): tot_energy(0.0), freq1(0.0),
+                                 energy1(0.0), freq2(0.0), energy2(0.0) { }
+};
+
+
+class MultiSinusoidDetector {
+ public:
+
+  // Initialize sinusoid detector.  Sampling frequency must be integer.
+  MultiSinusoidDetector(const MultiSinusoidDetectorConfig &config,
+                        int32 sampling_freq);    
+
+  /// This is how the class acccepts its input.  You can put the waveform in
+  /// piece by piece, if it's an online application.
+  void AcceptWaveform(const VectorBase<BaseFloat> &waveform);
+  
+  /// The user calls this to announce to the class that the waveform has ended;
+  /// this forces any pending data to be flushed.
+  void WaveformFinished();
+
+  /// Resets the state of the class so you can start processing another waveform.
+  void Reset(); 
+  
+  /// This returns true if the class currently has no more data ready to output.
+  bool Done() const;
+
+  /// Outputs the next frame of output to "frame", which must be non-NULL.
+  /// It is an error to call this if Done() has returned true, or has not been
+  /// checked.
+  void GetNextFrame(MultiSinusoidDetectorOutput *output);
+
+  BaseFloat FrameShiftSecs() const { return 0.001 * config_.frame_shift_ms; }
+
+  BaseFloat SamplingFrequency() const { return sample_freq_; }
+  
+ private:
+  // Gets the next frame of subsampled signal, and consumes the appropriate
+  // amount of stored data.  It is an error to call this if Done() returned
+  // true.
+  void GetNextFrameOfSignal(Vector<BaseFloat> *frame);
+
+  // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we
+  // successfully detected an acceptable two-frequency tone.
+  bool DetectedTwoFrequency(BaseFloat signal_energy,
+                            const Sinusoid &sinusoid1,
+                            BaseFloat energy1,
+                            const Sinusoid &sinusoid2,
+                            BaseFloat energy2,
+                            MultiSinusoidDetectorOutput *output);
+
+  // returns true and sets freq1, freq1, energy1 and energy2 in "output" if we
+  // successfully detected an acceptable one-frequency tone.
+  bool DetectedOneFrequency(BaseFloat signal_energy,
+                            const Sinusoid &sinusoid1,
+                            BaseFloat energy1,
+                            const Sinusoid &sinusoid2,
+                            BaseFloat energy2,
+                            MultiSinusoidDetectorOutput *output);
+  
+  
+  // Returns std::min(max_samp, sum-of-samples-in-subsampled_signal_).
+  // (the std::min is for efficiency so we don't have to visit the
+  //  whole list).
+  int32 NumSubsampledSamplesReady(int32 max_samp) const;
+  
+  MultiSinusoidDetectorConfig config_;
+  int32 sample_freq_;
+  int32 samples_per_frame_subsampled_;  // (samples per frame at subsampled
+                                        // rate).
+  int32 samples_shift_subsampled_;  // (samples per frame-shift at subsampled
+                                    // rate).
+
+  // True if the user has called WaveformFinished().
+  bool waveform_finished_;
+  
+  // Pieces of the subsampled signal that are awaiting processing.
+  // Normally there will be just one element here, but if someone calls
+  // AcceptWaveform multiple times before getting output, there could
+  // be more elements.  All of these pieces are nonempty.
+  std::deque<Vector<BaseFloat>* > subsampled_signal_;
+
+  // stores the number of samples consumed from the first member of
+  // subsampled_signal_.  We will always have samples_consumed_ >= 0 and either
+  // (subsampled_signal_.empty() && samples_consumed_ == 0) or
+  // samples_consumed_ < subsampled_signal_[0]->Dim().
+  int32 samples_consumed_;
+  
+  
+  // This object is used to subsample the signal.
+  LinearResample resampler_;
+
+  // This object is used to detect sinusoids in the subsampled 
+  // frames.
+  SinusoidDetector detector_;
+};
+
+// Detect sinusoids.  Signal should be sampled at detector->SamplingFrequency().
+void DetectSinusoids(const VectorBase<BaseFloat> &signal,
+                     MultiSinusoidDetector *detector,
+                     Matrix<BaseFloat> *output);
+
+
+
+
+
+/// @} End of "addtogroup feat"
+}  // namespace kaldi
+#endif  // KALDI_FEAT_SINUSOID_DETECTION_H_
--- a/src/featbin/Makefile
+++ b/src/featbin/Makefile
@ -14,7 +14,7 @@ BINFILES = compute-mfcc-feats compute-plp-feats compute-fbank-feats \
    apply-cmvn-sliding compute-cmvn-stats-two-channel compute-kaldi-pitch-feats \
    process-kaldi-pitch-feats compare-feats wav-to-duration add-deltas-sdc \
    compute-and-process-kaldi-pitch-feats modify-cmvn-stats wav-copy \
-    append-vector-to-feats
+    append-vector-to-feats detect-sinusoids

 OBJFILES = 

--- a/src/featbin/detect-sinusoids.cc
+++ b/src/featbin/detect-sinusoids.cc
@ -0,0 +1,113 @@
+// featbin/detect-sinusoids.cc
+
+// Copyright 2015  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "feat/sinusoid-detection.h"
+#include "feat/wave-reader.h"
+
+
+int main(int argc, char *argv[]) {
+  try {
+    using namespace kaldi;
+    const char *usage =
+        "Detect sinusoids (one or two at a time) in waveform input and output\n"
+        "frame-by-frame information on their frequencies and energies.  Useful\n"
+        "as part of DTMF and dialtone detection.  Output is an archive of\n"
+        "matrices; for each file, there is a row per frame, containing\n"
+        "<signal-energy-per-sample> <frequency1> <energy1> <frequency2> <energy2>\n"
+        "where the frequencies and energies may be zero if no sufficiently\n"
+        "dominant sinusoid(s) was/were detected.  If two frequencies were\n"
+        "detected, frequency1 < frequency2.  See options for more detail on\n"
+        "configuration options.\n"
+        "\n"
+        "Usage: detect-sinusoids [options] <wav-rspecifier> <matrix-wspecifier>\n"
+        "e.g.: detect-sinusoids scp:wav.scp ark,t:sinusoids.ark\n";
+    
+    ParseOptions po(usage);
+    MultiSinusoidDetectorConfig config;
+
+    config.Register(&po);
+    
+    po.Read(argc, argv);
+
+    if (po.NumArgs() != 2) {
+      po.PrintUsage();
+      exit(1);
+    }
+
+    std::string wav_rspecifier = po.GetArg(1),
+        matrix_wspecifier = po.GetArg(2);
+    
+    int32 num_done = 0, num_err = 0;
+    
+    SequentialTableReader<WaveHolder> wav_reader(wav_rspecifier);
+    BaseFloatMatrixWriter matrix_writer(matrix_wspecifier);
+    
+    MultiSinusoidDetector *detector = NULL;
+    
+    for (; !wav_reader.Done(); wav_reader.Next()) {
+      const WaveData &wav_data = wav_reader.Value();
+      const Matrix<BaseFloat> &data = wav_data.Data();
+      BaseFloat samp_freq = wav_data.SampFreq();
+      int32 num_channels = data.NumRows();
+      if (num_channels != 1) {
+        KALDI_WARN << "detect-sinusoids requires data with one "
+                   << "channel. Recording " << wav_reader.Key() << " has "
+                   << num_channels << ".  First select one channel of your "
+                   << "data (e.g. using sox)";
+        num_err++;
+        continue;        
+      }
+      if (samp_freq < config.subsample_freq) {
+        KALDI_WARN << "Sampling frequency of data " << wav_reader.Key()
+                   << " is too low " << samp_freq << " < "
+                   << config.subsample_freq << ".  Reduce --subsample-freq "
+                   << "if you want to run on this data.";
+        num_err++;
+        continue;
+      }
+          
+      if (detector == NULL ||
+          samp_freq != detector->SamplingFrequency()) {
+        delete detector;
+        detector = new MultiSinusoidDetector(config, samp_freq);
+      }
+
+      Matrix<BaseFloat> output;
+      DetectSinusoids(data.Row(0), detector, &output);
+
+      if (output.NumRows() == 0) {
+        KALDI_WARN << "No output for " << wav_reader.Key();
+        num_err++;
+      } else {
+        matrix_writer.Write(wav_reader.Key(), output);
+        num_done++;
+      }
+    }
+    delete detector;    
+    KALDI_LOG << "Detected sinusoids in " << num_done << " wave files,"
+              << num_err << " with errors.";
+    return (num_done != 0 ? 0 : 1);
+  } catch(const std::exception &e) {
+    std::cerr << e.what();
+    return -1;
+  }
+}
+