Bug 886886: replace fixed-ratio capture resampler in webrtc with speex resample r=derf,jmspeex

2013-07-21 03:47:24 -04:00 · 2013-07-21 03:47:24 -04:00 · 03b686d2df
--- a/4
+++ b/4
@ -17,5 +17,5 @@
 #
 # Modifying this file will now automatically clobber the buildbot machines \o/
 #
-Bug 895670 - Remove SetupScreen and associated assets
-Android-only resource clobber (just rm objdir/mobile/android/base/res)
+Bug 886886 - replace fixed-ratio capture resampler in webrtc with speex resampler
+Requires a clobber due to modification of a .gypi file without a .gyp or configure.in change
--- a/media/webrtc/trunk/webrtc/common_audio/resampler/include/resampler.h
+++ b/media/webrtc/trunk/webrtc/common_audio/resampler/include/resampler.h
@ -17,98 +17,48 @@
 #define WEBRTC_RESAMPLER_RESAMPLER_H_

 #include "typedefs.h"
+#include "speex/speex_resampler.h"

 namespace webrtc
 {

-// TODO(andrew): the implementation depends on the exact values of this enum.
-// It should be rewritten in a less fragile way.
+#define FIXED_RATE_RESAMPLER 0x10
 enum ResamplerType
 {
-    // 4 MSB = Number of channels
-    // 4 LSB = Synchronous or asynchronous
-
-    kResamplerSynchronous = 0x10,
-    kResamplerAsynchronous = 0x11,
-    kResamplerSynchronousStereo = 0x20,
-    kResamplerAsynchronousStereo = 0x21,
-    kResamplerInvalid = 0xff
-};
-
-// TODO(andrew): doesn't need to be part of the interface.
-enum ResamplerMode
-{
-    kResamplerMode1To1,
-    kResamplerMode1To2,
-    kResamplerMode1To3,
-    kResamplerMode1To4,
-    kResamplerMode1To6,
-    kResamplerMode1To12,
-    kResamplerMode2To3,
-    kResamplerMode2To11,
-    kResamplerMode4To11,
-    kResamplerMode8To11,
-    kResamplerMode11To16,
-    kResamplerMode11To32,
-    kResamplerMode2To1,
-    kResamplerMode3To1,
-    kResamplerMode4To1,
-    kResamplerMode6To1,
-    kResamplerMode12To1,
-    kResamplerMode3To2,
-    kResamplerMode11To2,
-    kResamplerMode11To4,
-    kResamplerMode11To8
+    kResamplerSynchronous            = 0x00,
+    kResamplerSynchronousStereo      = 0x01,
+    kResamplerFixedSynchronous       = 0x00 | FIXED_RATE_RESAMPLER,
+    kResamplerFixedSynchronousStereo = 0x01 | FIXED_RATE_RESAMPLER,
 };

 class Resampler
 {
-
 public:
    Resampler();
    // TODO(andrew): use an init function instead.
-    Resampler(int inFreq, int outFreq, ResamplerType type);
+    Resampler(int in_freq, int out_freq, ResamplerType type);
    ~Resampler();

    // Reset all states
-    int Reset(int inFreq, int outFreq, ResamplerType type);
+    int Reset(int in_freq, int out_freq, ResamplerType type);

    // Reset all states if any parameter has changed
-    int ResetIfNeeded(int inFreq, int outFreq, ResamplerType type);
+    int ResetIfNeeded(int in_freq, int out_freq, ResamplerType type);

    // Synchronous resampling, all output samples are written to samplesOut
-    int Push(const int16_t* samplesIn, int lengthIn, int16_t* samplesOut,
-             int maxLen, int &outLen);
-
-    // Asynchronous resampling, input
-    int Insert(int16_t* samplesIn, int lengthIn);
-
-    // Asynchronous resampling output, remaining samples are buffered
-    int Pull(int16_t* samplesOut, int desiredLen, int &outLen);
+    int Push(const int16_t* samples_in, int length_in,
+             int16_t* samples_out, int max_len, int &out_len);

 private:
-    // Generic pointers since we don't know what states we'll need
-    void* state1_;
-    void* state2_;
-    void* state3_;
+    bool IsFixedRate() { return !!(type_ & FIXED_RATE_RESAMPLER); }

-    // Storage if needed
-    int16_t* in_buffer_;
-    int16_t* out_buffer_;
-    int in_buffer_size_;
-    int out_buffer_size_;
-    int in_buffer_size_max_;
-    int out_buffer_size_max_;
+    SpeexResamplerState* state_;

    // State
-    int my_in_frequency_khz_;
-    int my_out_frequency_khz_;
-    ResamplerMode my_mode_;
-    ResamplerType my_type_;
-
-    // Extra instance for stereo
-    Resampler* slave_left_;
-    Resampler* slave_right_;
+    int in_freq_;
+    int out_freq_;
+    int channels_;
+    ResamplerType type_;
 };

 } // namespace webrtc
--- a/media/webrtc/trunk/webrtc/common_audio/resampler/resampler.cc
+++ b/media/webrtc/trunk/webrtc/common_audio/resampler/resampler.cc
--- a/media/webrtc/trunk/webrtc/common_audio/resampler/resampler.gypi
+++ b/media/webrtc/trunk/webrtc/common_audio/resampler/resampler.gypi
@ -17,10 +17,24 @@
      'include_dirs': [
        'include',
      ],
+      'conditions': [
+        ['build_with_mozilla==1', {
+          'include_dirs': [
+            '$(DEPTH)/dist/include',
+          ],
+        }],
+      ],
      'direct_dependent_settings': {
        'include_dirs': [
          'include',
        ],
+        'conditions': [
+          ['build_with_mozilla==1', {
+            'include_dirs': [
+              '$(DEPTH)/dist/include',
+            ],
+          }],
+        ],
      },
      'sources': [
        'include/resampler.h',
--- a/media/webrtc/trunk/webrtc/common_audio/resampler/resampler_unittest.cc
+++ b/media/webrtc/trunk/webrtc/common_audio/resampler/resampler_unittest.cc
@ -8,6 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <math.h>
+
 #include "gtest/gtest.h"

 #include "common_audio/resampler/include/resampler.h"
@ -18,10 +20,7 @@ namespace webrtc {
 namespace {
 const ResamplerType kTypes[] = {
  kResamplerSynchronous,
-  kResamplerAsynchronous,
  kResamplerSynchronousStereo,
-  kResamplerAsynchronousStereo
-  // kResamplerInvalid excluded
 };
 const size_t kTypesSize = sizeof(kTypes) / sizeof(*kTypes);

@ -31,7 +30,7 @@ const int kRates[] = {
  8000,
  16000,
  32000,
-  44000,
+  44100,
  48000,
  kMaxRate
 };
@ -39,26 +38,19 @@ const size_t kRatesSize = sizeof(kRates) / sizeof(*kRates);
 const int kMaxChannels = 2;
 const size_t kDataSize = static_cast<size_t> (kMaxChannels * kMaxRate / 100);

-// TODO(andrew): should we be supporting these combinations?
-bool ValidRates(int in_rate, int out_rate) {
-  // Not the most compact notation, for clarity.
-  if ((in_rate == 44000 && (out_rate == 48000 || out_rate == 96000)) ||
-      (out_rate == 44000 && (in_rate == 48000 || in_rate == 96000))) {
-    return false;
-  }
-
-  return true;
-}
-
 class ResamplerTest : public testing::Test {
 protected:
  ResamplerTest();
  virtual void SetUp();
  virtual void TearDown();
+  void RunResampleTest(int channels,
+                       int src_sample_rate_hz,
+                       int dst_sample_rate_hz);

  Resampler rs_;
  int16_t data_in_[kDataSize];
  int16_t data_out_[kDataSize];
+  int16_t data_reference_[kDataSize];
 };

 ResamplerTest::ResamplerTest() {}
@ -83,34 +75,119 @@ TEST_F(ResamplerTest, Reset) {
        ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j]
            << ", type: " << kTypes[k];
        SCOPED_TRACE(ss.str());
-        if (ValidRates(kRates[i], kRates[j]))
-          EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
-        else
-          EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
+        EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kTypes[k]));
      }
    }
  }
 }

-// TODO(tlegrand): Replace code inside the two tests below with a function
-// with number of channels and ResamplerType as input.
-TEST_F(ResamplerTest, Synchronous) {
-  for (size_t i = 0; i < kRatesSize; ++i) {
-    for (size_t j = 0; j < kRatesSize; ++j) {
-      std::ostringstream ss;
-      ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j];
-      SCOPED_TRACE(ss.str());
+// Sets the signal value to increase by |data| with every sample. Floats are
+// used so non-integer values result in rounding error, but not an accumulating
+// error.
+void SetMonoFrame(int16_t* buffer, float data, int sample_rate_hz) {
+  for (int i = 0; i < sample_rate_hz / 100; i++) {
+    buffer[i] = data * i;
+  }
+}

-      if (ValidRates(kRates[i], kRates[j])) {
-        int in_length = kRates[i] / 100;
-        int out_length = 0;
-        EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j], kResamplerSynchronous));
-        EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
-                              out_length));
-        EXPECT_EQ(kRates[j] / 100, out_length);
-      } else {
-        EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j], kResamplerSynchronous));
-      }
+// Sets the signal value to increase by |left| and |right| with every sample in
+// each channel respectively.
+void SetStereoFrame(int16_t* buffer, float left, float right,
+                    int sample_rate_hz) {
+  for (int i = 0; i < sample_rate_hz / 100; i++) {
+    buffer[i * 2] = left * i;
+    buffer[i * 2 + 1] = right * i;
+  }
+}
+
+// Computes the best SNR based on the error between |ref_frame| and
+// |test_frame|. It allows for a sample delay between the signals to
+// compensate for the resampling delay.
+float ComputeSNR(const int16_t* reference, const int16_t* test,
+                 int sample_rate_hz, int channels, int max_delay) {
+  float best_snr = 0;
+  int best_delay = 0;
+  int samples_per_channel = sample_rate_hz/100;
+  for (int delay = 0; delay < max_delay; delay++) {
+    float mse = 0;
+    float variance = 0;
+    for (int i = 0; i < samples_per_channel * channels - delay; i++) {
+      int error = reference[i] - test[i + delay];
+      mse += error * error;
+      variance += reference[i] * reference[i];
+    }
+    float snr = 100;  // We assign 100 dB to the zero-error case.
+    if (mse > 0)
+      snr = 10 * log10(variance / mse);
+    if (snr > best_snr) {
+      best_snr = snr;
+      best_delay = delay;
+    }
+  }
+  printf("SNR=%.1f dB at delay=%d\n", best_snr, best_delay);
+  return best_snr;
+}
+
+void ResamplerTest::RunResampleTest(int channels,
+                                    int src_sample_rate_hz,
+                                    int dst_sample_rate_hz) {
+  Resampler resampler;  // Create a new one with every test.
+  const int16_t kSrcLeft = 60;  // Shouldn't overflow for any used sample rate.
+  const int16_t kSrcRight = 30;
+  const float kResamplingFactor = (1.0 * src_sample_rate_hz) /
+      dst_sample_rate_hz;
+  const float kDstLeft = kResamplingFactor * kSrcLeft;
+  const float kDstRight = kResamplingFactor * kSrcRight;
+  if (channels == 1)
+    SetMonoFrame(data_in_, kSrcLeft, src_sample_rate_hz);
+  else
+    SetStereoFrame(data_in_, kSrcLeft, kSrcRight, src_sample_rate_hz);
+
+  if (channels == 1) {
+    SetMonoFrame(data_out_, 0, dst_sample_rate_hz);
+    SetMonoFrame(data_reference_, kDstLeft, dst_sample_rate_hz);
+  } else {
+    SetStereoFrame(data_out_, 0, 0, dst_sample_rate_hz);
+    SetStereoFrame(data_reference_, kDstLeft, kDstRight, dst_sample_rate_hz);
+  }
+
+  // The speex resampler has a known delay dependent on quality and rates,
+  // which we approximate here. Multiplying by two gives us a crude maximum
+  // for any resampling, as the old resampler typically (but not always)
+  // has lower delay.  The actual delay is calculated internally based on the
+  // filter length in the QualityMap.
+  static const int kInputKernelDelaySamples = 16*3;
+  const int max_delay = std::min(1.0f, 1/kResamplingFactor) *
+                        kInputKernelDelaySamples * channels * 2;
+  printf("(%d, %d Hz) -> (%d, %d Hz) ",  // SNR reported on the same line later.
+      channels, src_sample_rate_hz, channels, dst_sample_rate_hz);
+
+  int in_length = channels * src_sample_rate_hz / 100;
+  int out_length = 0;
+  EXPECT_EQ(0, rs_.Reset(src_sample_rate_hz, dst_sample_rate_hz,
+                         (channels == 1 ?
+                          kResamplerSynchronous :
+                          kResamplerSynchronousStereo)));
+  EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
+                        out_length));
+  EXPECT_EQ(channels * dst_sample_rate_hz / 100, out_length);
+
+  //  EXPECT_EQ(0, Resample(src_frame_, &resampler, &dst_frame_));
+  EXPECT_GT(ComputeSNR(data_reference_, data_out_, dst_sample_rate_hz,
+                       channels, max_delay), 40.0f);
+}
+
+TEST_F(ResamplerTest, Synchronous) {
+  // Number of channels is 1, mono mode.
+  const int kChannels = 1;
+  // We don't attempt to be exhaustive here, but just get good coverage. Some
+  // combinations of rates will not be resampled, and some give an odd
+  // resampling factor which makes it more difficult to evaluate.
+  const int kSampleRates[] = {16000, 32000, 44100, 48000};
+  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
+  for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
+    for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
+      RunResampleTest(kChannels, kSampleRates[src_rate], kSampleRates[dst_rate]);
    }
  }
 }
@ -118,24 +195,14 @@ TEST_F(ResamplerTest, Synchronous) {
 TEST_F(ResamplerTest, SynchronousStereo) {
  // Number of channels is 2, stereo mode.
  const int kChannels = 2;
-  for (size_t i = 0; i < kRatesSize; ++i) {
-    for (size_t j = 0; j < kRatesSize; ++j) {
-      std::ostringstream ss;
-      ss << "Input rate: " << kRates[i] << ", output rate: " << kRates[j];
-      SCOPED_TRACE(ss.str());
-
-      if (ValidRates(kRates[i], kRates[j])) {
-        int in_length = kChannels * kRates[i] / 100;
-        int out_length = 0;
-        EXPECT_EQ(0, rs_.Reset(kRates[i], kRates[j],
-                               kResamplerSynchronousStereo));
-        EXPECT_EQ(0, rs_.Push(data_in_, in_length, data_out_, kDataSize,
-                              out_length));
-        EXPECT_EQ(kChannels * kRates[j] / 100, out_length);
-      } else {
-        EXPECT_EQ(-1, rs_.Reset(kRates[i], kRates[j],
-                                kResamplerSynchronousStereo));
-      }
+  // We don't attempt to be exhaustive here, but just get good coverage. Some
+  // combinations of rates will not be resampled, and some give an odd
+  // resampling factor which makes it more difficult to evaluate.
+  const int kSampleRates[] = {16000, 32000, 44100, 48000};
+  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
+  for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
+    for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
+      RunResampleTest(kChannels, kSampleRates[src_rate], kSampleRates[dst_rate]);
    }
  }
 }
--- a/media/webrtc/trunk/webrtc/modules/audio_coding/main/source/acm_resampler.cc
+++ b/media/webrtc/trunk/webrtc/modules/audio_coding/main/source/acm_resampler.cc
@ -47,8 +47,8 @@ int16_t ACMResampler::Resample10Msec(const int16_t* in_audio,

  int32_t ret;
  ResamplerType type;
-  type = (num_audio_channels == 1) ? kResamplerSynchronous :
-      kResamplerSynchronousStereo;
+  type = (num_audio_channels == 1) ? kResamplerFixedSynchronous :
+      kResamplerFixedSynchronousStereo;

  ret = resampler_.ResetIfNeeded(in_freq_hz, out_freq_hz, type);
  if (ret < 0) {
--- a/media/webrtc/trunk/webrtc/voice_engine/output_mixer_internal.cc
+++ b/media/webrtc/trunk/webrtc/voice_engine/output_mixer_internal.cc
@ -35,7 +35,7 @@ int RemixAndResample(const AudioFrame& src_frame,
  }

  const ResamplerType resampler_type = audio_ptr_num_channels == 1 ?
-      kResamplerSynchronous : kResamplerSynchronousStereo;
+      kResamplerFixedSynchronous : kResamplerFixedSynchronousStereo;
  if (resampler->ResetIfNeeded(src_frame.sample_rate_hz_,
                               dst_frame->sample_rate_hz_,
                               resampler_type) == -1) {
--- a/media/webrtc/trunk/webrtc/voice_engine/output_mixer_unittest.cc
+++ b/media/webrtc/trunk/webrtc/voice_engine/output_mixer_unittest.cc
@ -80,13 +80,14 @@ void VerifyParams(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
 }

 // Computes the best SNR based on the error between |ref_frame| and
-// |test_frame|. It allows for up to a 30 sample delay between the signals to
+// |test_frame|. It allows for a sample delay between the signals to
 // compensate for the resampling delay.
-float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame) {
+float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
+                 int max_delay) {
  VerifyParams(ref_frame, test_frame);
  float best_snr = 0;
  int best_delay = 0;
-  for (int delay = 0; delay < 30; delay++) {
+  for (int delay = 0; delay < max_delay; delay++) {
    float mse = 0;
    float variance = 0;
    for (int i = 0; i < ref_frame.samples_per_channel_ *
@ -147,18 +148,23 @@ void OutputMixerTest::RunResampleTest(int src_channels,
      SetStereoFrame(&golden_frame_, kDstLeft, kDstRight, dst_sample_rate_hz);
  }

+  // The speex resampler has a known delay dependent on quality and rates,
+  // which we approximate here. Multiplying by two gives us a crude maximum
+  // for any resampling, as the old resampler typically (but not always)
+  // has lower delay.  The actual delay is calculated internally based on the
+  // filter length in the QualityMap.
+  static const int kInputKernelDelaySamples = 16*3;
+  const int max_delay = std::min(1.0f, 1/kResamplingFactor) *
+                        kInputKernelDelaySamples * dst_channels * 2;
  printf("(%d, %d Hz) -> (%d, %d Hz) ",  // SNR reported on the same line later.
      src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
  EXPECT_EQ(0, RemixAndResample(src_frame_, &resampler, &dst_frame_));
-  EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_), 40.0f);
-}
-
-TEST_F(OutputMixerTest, RemixAndResampleFailsWithBadSampleRate) {
-  SetMonoFrame(&dst_frame_, 10, 44100);
-  EXPECT_EQ(-1, RemixAndResample(src_frame_, &resampler_, &dst_frame_));
-  VerifyFramesAreEqual(src_frame_, dst_frame_);
+  EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_, max_delay), 40.0f);
 }

+// These two tests assume memcpy() (no delay and no filtering) for input
+// freq == output freq && same channels.  RemixAndResample uses 'Fixed'
+// resamplers to enable this behavior
 TEST_F(OutputMixerTest, RemixAndResampleCopyFrameSucceeds) {
  // Stereo -> stereo.
  SetStereoFrame(&src_frame_, 10, 10);
@ -193,7 +199,7 @@ TEST_F(OutputMixerTest, RemixAndResampleSucceeds) {
  // We don't attempt to be exhaustive here, but just get good coverage. Some
  // combinations of rates will not be resampled, and some give an odd
  // resampling factor which makes it more difficult to evaluate.
-  const int kSampleRates[] = {16000, 32000, 48000};
+  const int kSampleRates[] = {16000, 32000, 44100, 48000};
  const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
  const int kChannels[] = {1, 2};
  const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
--- a/media/webrtc/trunk/webrtc/voice_engine/transmit_mixer.cc
+++ b/media/webrtc/trunk/webrtc/voice_engine/transmit_mixer.cc
@ -1152,6 +1152,8 @@ bool TransmitMixer::IsRecordingMic()
 }

 // TODO(andrew): use RemixAndResample for this.
+// Note that if drift compensation is done here, a buffering stage will be
+// needed and this will need to switch to non-fixed resamples.
 int TransmitMixer::GenerateAudioFrame(const int16_t audio[],
                                      int samples_per_channel,
                                      int num_channels,
@ -1179,7 +1181,7 @@ int TransmitMixer::GenerateAudioFrame(const int16_t audio[],
    }

    ResamplerType resampler_type = (num_channels == 1) ?
-            kResamplerSynchronous : kResamplerSynchronousStereo;
+            kResamplerFixedSynchronous : kResamplerFixedSynchronousStereo;

    if (_audioResampler.ResetIfNeeded(sample_rate_hz,
                                      destination_rate,