Bug 830707. Part 1: Add code for upmixing and downmixing following Web Audio's spec. r=jesup

--HG-- extra : rebase_source : 8cdcbc313fcd371b47ad194f2f0da1102e5e6240
2013-01-16 02:09:38 +13:00 · 2013-01-16 02:09:38 +13:00 · df228b8f85
--- a/content/media/AudioChannelFormat.cpp
+++ b/content/media/AudioChannelFormat.cpp
@ -0,0 +1,227 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "AudioChannelFormat.h"
+
+#include <algorithm>
+
+namespace mozilla {
+
+enum {
+  SURROUND_L,
+  SURROUND_R,
+  SURROUND_C,
+  SURROUND_LFE,
+  SURROUND_SL,
+  SURROUND_SR
+};
+
+static const int CUSTOM_CHANNEL_LAYOUTS = 6;
+
+uint32_t
+GetAudioChannelsSuperset(uint32_t aChannels1, uint32_t aChannels2)
+{
+  if (aChannels1 == 3 && aChannels2 == 4) {
+    // quad layout has no center channel, but input has a center channel as well
+    // as L and R, so we actually need a 5-channel layout here.
+    return 5;
+  }
+  return std::max(aChannels1, aChannels2);
+}
+
+void
+AudioChannelsUpMix(nsTArray<const void*>* aChannelArray,
+                   uint32_t aOutputChannelCount,
+                   const void* aZeroChannel)
+{
+  uint32_t inputChannelCount = aChannelArray->Length();
+  uint32_t outputChannelCount =
+    GetAudioChannelsSuperset(aOutputChannelCount, inputChannelCount);
+  NS_ASSERTION(outputChannelCount > inputChannelCount,
+               "No up-mix needed");
+  NS_ASSERTION(inputChannelCount > 0, "Bad number of channels");
+  NS_ASSERTION(outputChannelCount > 0, "Bad number of channels");
+
+  aChannelArray->SetLength(outputChannelCount);
+
+  if (inputChannelCount < CUSTOM_CHANNEL_LAYOUTS) {
+    const void* surroundChannels[CUSTOM_CHANNEL_LAYOUTS] =
+      { aZeroChannel, aZeroChannel, aZeroChannel,
+        aZeroChannel, aZeroChannel, aZeroChannel
+      };
+    // First just map everything up to 5.1
+    switch (inputChannelCount) {
+    case 1:
+      surroundChannels[SURROUND_C] = aChannelArray->ElementAt(0);
+      break;
+    case 2:
+      surroundChannels[SURROUND_L] = aChannelArray->ElementAt(0);
+      surroundChannels[SURROUND_R] = aChannelArray->ElementAt(1);
+      break;
+    case 3:
+      surroundChannels[SURROUND_L] = aChannelArray->ElementAt(0);
+      surroundChannels[SURROUND_R] = aChannelArray->ElementAt(1);
+      surroundChannels[SURROUND_C] = aChannelArray->ElementAt(2);
+      break;
+    case 4:
+      surroundChannels[SURROUND_L] = aChannelArray->ElementAt(0);
+      surroundChannels[SURROUND_R] = aChannelArray->ElementAt(1);
+      surroundChannels[SURROUND_SL] = aChannelArray->ElementAt(2);
+      surroundChannels[SURROUND_SR] = aChannelArray->ElementAt(3);
+      break;
+    case 5:
+      surroundChannels[SURROUND_L] = aChannelArray->ElementAt(0);
+      surroundChannels[SURROUND_R] = aChannelArray->ElementAt(1);
+      surroundChannels[SURROUND_C] = aChannelArray->ElementAt(2);
+      surroundChannels[SURROUND_SL] = aChannelArray->ElementAt(3);
+      surroundChannels[SURROUND_SR] = aChannelArray->ElementAt(4);
+      break;
+    }
+
+    if (outputChannelCount < CUSTOM_CHANNEL_LAYOUTS) {
+      // Map back to aOutputChannelCount
+      switch (outputChannelCount) {
+      case 2:
+        // Upmix from mono, so use the center channel.
+        aChannelArray->ElementAt(0) = surroundChannels[SURROUND_C];
+        aChannelArray->ElementAt(1) = surroundChannels[SURROUND_C];
+        break;
+      case 3:
+        aChannelArray->ElementAt(0) = surroundChannels[SURROUND_L];
+        aChannelArray->ElementAt(1) = surroundChannels[SURROUND_R];
+        aChannelArray->ElementAt(2) = surroundChannels[SURROUND_C];
+        break;
+      case 4:
+        // We avoided this case up above.
+        NS_ASSERTION(inputChannelCount != 3,
+                     "3->4 upmix not supported directly");
+        if (inputChannelCount == 1) {
+          // Output has no center channel, so map the mono to
+          // L+R channels per Web Audio
+          aChannelArray->ElementAt(0) = surroundChannels[SURROUND_C];
+          aChannelArray->ElementAt(1) = surroundChannels[SURROUND_C];
+        } else {
+          aChannelArray->ElementAt(0) = surroundChannels[SURROUND_L];
+          aChannelArray->ElementAt(1) = surroundChannels[SURROUND_R];
+        }
+        aChannelArray->ElementAt(2) = surroundChannels[SURROUND_SL];
+        aChannelArray->ElementAt(3) = surroundChannels[SURROUND_SR];
+        break;
+      case 5:
+        aChannelArray->ElementAt(0) = surroundChannels[SURROUND_L];
+        aChannelArray->ElementAt(1) = surroundChannels[SURROUND_R];
+        aChannelArray->ElementAt(2) = surroundChannels[SURROUND_C];
+        aChannelArray->ElementAt(3) = surroundChannels[SURROUND_SL];
+        aChannelArray->ElementAt(4) = surroundChannels[SURROUND_SR];
+      }
+      return;
+    }
+
+    memcpy(aChannelArray->Elements(), surroundChannels, sizeof(surroundChannels));
+    inputChannelCount = CUSTOM_CHANNEL_LAYOUTS;
+  }
+
+  for (uint32_t i = inputChannelCount; i < outputChannelCount; ++i) {
+    aChannelArray->ElementAt(i) = aZeroChannel;
+  }
+}
+
+/**
+ * DownMixMatrix represents a conversion matrix efficiently by exploiting the
+ * fact that each input channel contributes to at most one output channel,
+ * except possibly for the C input channel in layouts that have one. Also,
+ * every input channel is multiplied by the same coefficient for every output
+ * channel it contributes to.
+ */
+struct DownMixMatrix {
+  // Every input channel c is copied to output channel mInputDestination[c]
+  // after multiplying by mInputCoefficient[c].
+  uint8_t mInputDestination[CUSTOM_CHANNEL_LAYOUTS];
+  // If not IGNORE, then the C channel is copied to this output channel after
+  // multiplying by its coefficient.
+  uint8_t mCExtraDestination;
+  float mInputCoefficient[CUSTOM_CHANNEL_LAYOUTS];
+};
+
+static const int IGNORE = CUSTOM_CHANNEL_LAYOUTS;
+static const float IGNORE_F = 0.0f;
+
+static const DownMixMatrix
+gDownMixMatrices[CUSTOM_CHANNEL_LAYOUTS*(CUSTOM_CHANNEL_LAYOUTS - 1)/2] =
+{
+  // Downmixes to mono
+  { { 0, 0 }, IGNORE, { 0.5f, 0.5f } },
+  { { 0, 0, 0 }, IGNORE, { 0.3333f, 0.3333f, 0.3333f } },
+  { { 0, 0, 0, 0 }, IGNORE, { 0.25f, 0.25f, 0.25f, 0.25f } },
+  { { 0, 0, 0, 0, 0 }, IGNORE, { 0.7071f, 0.7071f, 1.0f, 0.5f, 0.5f } },
+  { { 0, 0, 0, IGNORE, 0, 0 }, IGNORE, { 0.7071f, 0.7071f, 1.0f, IGNORE_F, 0.5f, 0.5f } },
+  // Downmixes to stereo
+  { { 0, 1, 0 }, 1, { 1.0f, 1.0f, 0.7071f } },
+  { { 0, 1, 0, 1 }, IGNORE, { 0.5f, 0.5f, 0.5f, 0.5f } },
+  { { 0, 1, 0, 0, 1 }, 1, { 1.0f, 1.0f, 0.7071f, 0.7071f, 0.7071f } },
+  { { 0, 1, 0, IGNORE, 0, 1 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 0.7071f, 0.7071f } },
+  // Downmixes to 3-channel
+  { { 0, 1, 0, 1 }, IGNORE, { 0.25f, 0.25f, 0.25f, 0.25f } },
+  { { 0, 1, 2, 0, 1 }, IGNORE, { 0.5f, 0.5f, 1.0f, 0.5f, 0.5f } },
+  { { 0, 1, 2, IGNORE, 0, 1 }, IGNORE, { 0.5f, 0.5f, 1.0f, IGNORE_F, 0.5f, 0.5f } },
+  // Downmixes to quad
+  { { 0, 1, 0, 2, 3 }, 1, { 1.0f, 1.0f, 0.7071f, 1.0f, 1.0f } },
+  { { 0, 1, 0, IGNORE, 2, 3 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 1.0f, 1.0f } },
+  // Downmixes to 5-channel
+  { { 0, 1, 2, IGNORE, 3, 4 }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F, 1.0f, 1.0f } }
+};
+
+static const int gDownMixMatrixIndexByOutputChannels[CUSTOM_CHANNEL_LAYOUTS - 1] =
+  { 0, 5, 9, 12, 14 };
+
+void
+AudioChannelsDownMix(const nsTArray<const void*>& aChannelArray,
+                     float** aOutputChannels,
+                     uint32_t aOutputChannelCount,
+                     uint32_t aDuration)
+{
+  uint32_t inputChannelCount = aChannelArray.Length();
+  const void* const* inputChannels = aChannelArray.Elements();
+  NS_ASSERTION(inputChannelCount > aOutputChannelCount, "Nothing to do");
+
+  if (aOutputChannelCount >= 6) {
+    // Just drop the unknown channels.
+    for (uint32_t o = 0; o < aOutputChannelCount; ++o) {
+      memcpy(aOutputChannels[o], inputChannels[o], aDuration*sizeof(float));
+    }
+    return;
+  }
+
+  // Ignore unknown channels, they're just dropped.
+  inputChannelCount = std::min<uint32_t>(6, inputChannelCount);
+
+  const DownMixMatrix& m = gDownMixMatrices[
+    gDownMixMatrixIndexByOutputChannels[aOutputChannelCount - 1] +
+    inputChannelCount - aOutputChannelCount - 1];
+
+  // This is slow, but general. We can define custom code for special
+  // cases later.
+  for (uint32_t s = 0; s < aDuration; ++s) {
+    // Reserve an extra junk channel at the end for the cases where we
+    // want an input channel to contribute to nothing
+    float outputChannels[CUSTOM_CHANNEL_LAYOUTS];
+    memset(outputChannels, 0, sizeof(float)*(CUSTOM_CHANNEL_LAYOUTS - 1));
+    for (uint32_t c = 0; c < inputChannelCount; ++c) {
+      outputChannels[m.mInputDestination[c]] +=
+        m.mInputCoefficient[c]*(static_cast<const float*>(inputChannels[c]))[s];
+    }
+    // Utilize the fact that in every layout, C is the third channel.
+    if (m.mCExtraDestination != IGNORE) {
+      outputChannels[m.mCExtraDestination] +=
+        m.mInputCoefficient[SURROUND_C]*(static_cast<const float*>(inputChannels[SURROUND_C]))[s];
+    }
+
+    for (uint32_t c = 0; c < aOutputChannelCount; ++c) {
+      aOutputChannels[c][s] = outputChannels[c];
+    }
+  }
+}
+
+}
--- a/content/media/AudioChannelFormat.h
+++ b/content/media/AudioChannelFormat.h
@ -0,0 +1,70 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim:set ts=2 sw=2 sts=2 et cindent: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef MOZILLA_AUDIOCHANNELFORMAT_H_
+#define MOZILLA_AUDIOCHANNELFORMAT_H_
+
+#include "nsTArray.h"
+
+namespace mozilla {
+
+/*
+ * This file provides utilities for upmixing and downmixing channels.
+ *
+ * The channel layouts, upmixing and downmixing are consistent with the
+ * Web Audio spec.
+ *
+ * Channel layouts for up to 6 channels:
+ *   mono   { M }
+ *   stereo { L, R }
+ *          { L, R, C }
+ *   quad   { L, R, SL, SR }
+ *          { L, R, C, SL, SR }
+ *   5.1    { L, R, C, LFE, SL, SR }
+ *
+ * Only 1, 2, 4 and 6 are currently defined in Web Audio.
+ */
+
+/**
+ * Return a channel count whose channel layout includes all the channels from
+ * aChannels1 and aChannels2.
+ */
+uint32_t
+GetAudioChannelsSuperset(uint32_t aChannels1, uint32_t aChannels2);
+
+/**
+ * Given an array of input channel data, and an output channel count,
+ * replaces the array with an array of upmixed channels.
+ * This shuffles the array and may set some channel buffers to aZeroChannel.
+ * Don't call this with input count >= output count.
+ * This may return *more* channels than requested. In that case, downmixing
+ * is required to to get to aOutputChannelCount. (This is how we handle
+ * odd cases like 3 -> 4 upmixing.)
+ * If aChannelArray.Length() was the input to one of a series of
+ * GetAudioChannelsSuperset calls resulting in aOutputChannelCount,
+ * no downmixing will be required.
+ */
+void
+AudioChannelsUpMix(nsTArray<const void*>* aChannelArray,
+                   uint32_t aOutputChannelCount,
+                   const void* aZeroChannel);
+
+/**
+ * Given an array of input channels (which must be float format!),
+ * downmix to aOutputChannelCount, and copy the results to the
+ * channel buffers in aOutputChannels.
+ * Don't call this with input count <= output count.
+ */
+void
+AudioChannelsDownMix(const nsTArray<const void*>& aChannelArray,
+                     float** aOutputChannels,
+                     uint32_t aOutputChannelCount,
+                     uint32_t aDuration);
+
+// A version of AudioChannelsDownMix that downmixes int16_ts may be required.
+
+} // namespace mozilla
+
+#endif /* MOZILLA_AUDIOCHANNELFORMAT_H_ */
--- a/content/media/Makefile.in
+++ b/content/media/Makefile.in
@ -18,6 +18,7 @@ endif # !_MSC_VER

 EXPORTS = \
  AbstractMediaDecoder.h \
+  AudioChannelFormat.h \
  AudioSampleFormat.h \
  AudioSegment.h \
  BufferMediaResource.h \
@ -44,6 +45,7 @@ EXPORTS = \
  $(NULL)

 CPPSRCS = \
+  AudioChannelFormat.cpp \
  AudioSegment.cpp \
  DecoderTraits.cpp \
  FileBlockCache.cpp \