Bug 1156472 - Part 13 - Make necessary adjustments for integer audio. r=jesup

2015-07-24 14:28:17 +02:00 · 2015-07-24 14:28:17 +02:00 · 90a222f71a
--- a/dom/media/AudioCaptureStream.cpp
+++ b/dom/media/AudioCaptureStream.cpp
@ -0,0 +1,133 @@
+/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this file,
+ * You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MediaStreamGraphImpl.h"
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/unused.h"
+
+#include "AudioSegment.h"
+#include "mozilla/Logging.h"
+#include "mozilla/Attributes.h"
+#include "AudioCaptureStream.h"
+#include "ImageContainer.h"
+#include "AudioNodeEngine.h"
+#include "AudioNodeStream.h"
+#include "AudioNodeExternalInputStream.h"
+#include "webaudio/MediaStreamAudioDestinationNode.h"
+#include <algorithm>
+#include "DOMMediaStream.h"
+
+using namespace mozilla::layers;
+using namespace mozilla::dom;
+using namespace mozilla::gfx;
+
+namespace mozilla
+{
+
+// We are mixing to mono until PeerConnection can accept stereo
+static const uint32_t MONO = 1;
+
+AudioCaptureStream::AudioCaptureStream(DOMMediaStream* aWrapper)
+  : ProcessedMediaStream(aWrapper), mTrackCreated(false)
+{
+  MOZ_ASSERT(NS_IsMainThread());
+  MOZ_COUNT_CTOR(AudioCaptureStream);
+  mMixer.AddCallback(this);
+}
+
+AudioCaptureStream::~AudioCaptureStream()
+{
+  MOZ_COUNT_DTOR(AudioCaptureStream);
+  mMixer.RemoveCallback(this);
+}
+
+void
+AudioCaptureStream::ProcessInput(GraphTime aFrom, GraphTime aTo,
+                                 uint32_t aFlags)
+{
+  uint32_t inputCount = mInputs.Length();
+  StreamBuffer::Track* track = EnsureTrack(AUDIO_TRACK);
+  // Notify the DOM everything is in order.
+  if (!mTrackCreated) {
+    for (uint32_t i = 0; i < mListeners.Length(); i++) {
+      MediaStreamListener* l = mListeners[i];
+      AudioSegment tmp;
+      l->NotifyQueuedTrackChanges(
+        Graph(), AUDIO_TRACK, 0, MediaStreamListener::TRACK_EVENT_CREATED, tmp);
+      l->NotifyFinishedTrackCreation(Graph());
+    }
+    mTrackCreated = true;
+  }
+
+  // If the captured stream is connected back to a object on the page (be it an
+  // HTMLMediaElement with a stream as source, or an AudioContext), a cycle
+  // situation occur. This can work if it's an AudioContext with at least one
+  // DelayNode, but the MSG will mute the whole cycle otherwise.
+  bool blocked = mFinished || mBlocked.GetAt(aFrom);
+  if (blocked || InMutedCycle() || inputCount == 0) {
+    track->Get<AudioSegment>()->AppendNullData(aTo - aFrom);
+  } else {
+    // We mix down all the tracks of all inputs, to a stereo track. Everything
+    // is {up,down}-mixed to stereo.
+    mMixer.StartMixing();
+    AudioSegment output;
+    for (uint32_t i = 0; i < inputCount; i++) {
+      MediaStream* s = mInputs[i]->GetSource();
+      StreamBuffer::TrackIter tracks(s->GetStreamBuffer(), MediaSegment::AUDIO);
+      while (!tracks.IsEnded()) {
+        AudioSegment* inputSegment = tracks->Get<AudioSegment>();
+        StreamTime inputStart = s->GraphTimeToStreamTime(aFrom);
+        StreamTime inputEnd = s->GraphTimeToStreamTime(aTo);
+        AudioSegment toMix;
+        toMix.AppendSlice(*inputSegment, inputStart, inputEnd);
+        // Care for streams blocked in the [aTo, aFrom] range.
+        if (inputEnd - inputStart < aTo - aFrom) {
+          toMix.AppendNullData((aTo - aFrom) - (inputEnd - inputStart));
+        }
+        toMix.Mix(mMixer, MONO, Graph()->GraphRate());
+        tracks.Next();
+      }
+    }
+    // This calls MixerCallback below
+    mMixer.FinishMixing();
+  }
+
+  // Regardless of the status of the input tracks, we go foward.
+  mBuffer.AdvanceKnownTracksTime(GraphTimeToStreamTime((aTo)));
+}
+
+void
+AudioCaptureStream::MixerCallback(AudioDataValue* aMixedBuffer,
+                                  AudioSampleFormat aFormat, uint32_t aChannels,
+                                  uint32_t aFrames, uint32_t aSampleRate)
+{
+  nsAutoTArray<nsTArray<AudioDataValue>, MONO> output;
+  nsAutoTArray<const AudioDataValue*, MONO> bufferPtrs;
+  output.SetLength(MONO);
+  bufferPtrs.SetLength(MONO);
+
+  uint32_t written = 0;
+  // We need to copy here, because the mixer will reuse the storage, we should
+  // not hold onto it. Buffers are in planar format.
+  for (uint32_t channel = 0; channel < aChannels; channel++) {
+    AudioDataValue* out = output[channel].AppendElements(aFrames);
+    PodCopy(out, aMixedBuffer + written, aFrames);
+    bufferPtrs[channel] = out;
+    written += aFrames;
+  }
+  AudioChunk chunk;
+  chunk.mBuffer = new mozilla::SharedChannelArrayBuffer<AudioDataValue>(&output);
+  chunk.mDuration = aFrames;
+  chunk.mBufferFormat = aFormat;
+  chunk.mVolume = 1.0f;
+  chunk.mChannelData.SetLength(MONO);
+  for (uint32_t channel = 0; channel < aChannels; channel++) {
+    chunk.mChannelData[channel] = bufferPtrs[channel];
+  }
+
+  // Now we have mixed data, simply append it to out track.
+  EnsureTrack(AUDIO_TRACK)->Get<AudioSegment>()->AppendAndConsumeChunk(&chunk);
+}
+}
--- a/dom/media/AudioChannelFormat.cpp
+++ b/dom/media/AudioChannelFormat.cpp
@ -4,26 +4,11 @@
 * You can obtain one at http://mozilla.org/MPL/2.0/. */

 #include "AudioChannelFormat.h"
-#include "nsTArray.h"

 #include <algorithm>

 namespace mozilla {

-enum {
-  SURROUND_L,
-  SURROUND_R,
-  SURROUND_C,
-  SURROUND_LFE,
-  SURROUND_SL,
-  SURROUND_SR
-};
-
-static const uint32_t CUSTOM_CHANNEL_LAYOUTS = 6;
-
-static const int IGNORE = CUSTOM_CHANNEL_LAYOUTS;
-static const float IGNORE_F = 0.0f;
-
 uint32_t
 GetAudioChannelsSuperset(uint32_t aChannels1, uint32_t aChannels2)
 {
@ -63,9 +48,6 @@ gUpMixMatrices[CUSTOM_CHANNEL_LAYOUTS*(CUSTOM_CHANNEL_LAYOUTS - 1)/2] =
  { { 0, 1, 2, 3, 4, IGNORE } }
 };

-static const int gMixingMatrixIndexByChannels[CUSTOM_CHANNEL_LAYOUTS - 1] =
-  { 0, 5, 9, 12, 14 };
-
 void
 AudioChannelsUpMix(nsTArray<const void*>* aChannelArray,
                   uint32_t aOutputChannelCount,
@ -108,94 +90,4 @@ AudioChannelsUpMix(nsTArray<const void*>* aChannelArray,
  }
 }

-/**
- * DownMixMatrix represents a conversion matrix efficiently by exploiting the
- * fact that each input channel contributes to at most one output channel,
- * except possibly for the C input channel in layouts that have one. Also,
- * every input channel is multiplied by the same coefficient for every output
- * channel it contributes to.
- */
-struct DownMixMatrix {
-  // Every input channel c is copied to output channel mInputDestination[c]
-  // after multiplying by mInputCoefficient[c].
-  uint8_t mInputDestination[CUSTOM_CHANNEL_LAYOUTS];
-  // If not IGNORE, then the C channel is copied to this output channel after
-  // multiplying by its coefficient.
-  uint8_t mCExtraDestination;
-  float mInputCoefficient[CUSTOM_CHANNEL_LAYOUTS];
-};
-
-static const DownMixMatrix
-gDownMixMatrices[CUSTOM_CHANNEL_LAYOUTS*(CUSTOM_CHANNEL_LAYOUTS - 1)/2] =
-{
-  // Downmixes to mono
-  { { 0, 0 }, IGNORE, { 0.5f, 0.5f } },
-  { { 0, IGNORE, IGNORE }, IGNORE, { 1.0f, IGNORE_F, IGNORE_F } },
-  { { 0, 0, 0, 0 }, IGNORE, { 0.25f, 0.25f, 0.25f, 0.25f } },
-  { { 0, IGNORE, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, IGNORE_F, IGNORE_F, IGNORE_F, IGNORE_F } },
-  { { 0, 0, 0, IGNORE, 0, 0 }, IGNORE, { 0.7071f, 0.7071f, 1.0f, IGNORE_F, 0.5f, 0.5f } },
-  // Downmixes to stereo
-  { { 0, 1, IGNORE }, IGNORE, { 1.0f, 1.0f, IGNORE_F } },
-  { { 0, 1, 0, 1 }, IGNORE, { 0.5f, 0.5f, 0.5f, 0.5f } },
-  { { 0, 1, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, IGNORE_F, IGNORE_F, IGNORE_F } },
-  { { 0, 1, 0, IGNORE, 0, 1 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 0.7071f, 0.7071f } },
-  // Downmixes to 3-channel
-  { { 0, 1, 2, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F } },
-  { { 0, 1, 2, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F, IGNORE_F } },
-  { { 0, 1, 2, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F, IGNORE_F, IGNORE_F } },
-  // Downmixes to quad
-  { { 0, 1, 2, 3, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, 1.0f, IGNORE_F } },
-  { { 0, 1, 0, IGNORE, 2, 3 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 1.0f, 1.0f } },
-  // Downmixes to 5-channel
-  { { 0, 1, 2, 3, 4, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, IGNORE_F } }
-};
-
-void
-AudioChannelsDownMix(const nsTArray<const void*>& aChannelArray,
-                     float** aOutputChannels,
-                     uint32_t aOutputChannelCount,
-                     uint32_t aDuration)
-{
-  uint32_t inputChannelCount = aChannelArray.Length();
-  const void* const* inputChannels = aChannelArray.Elements();
-  NS_ASSERTION(inputChannelCount > aOutputChannelCount, "Nothing to do");
-
-  if (inputChannelCount > 6) {
-    // Just drop the unknown channels.
-    for (uint32_t o = 0; o < aOutputChannelCount; ++o) {
-      memcpy(aOutputChannels[o], inputChannels[o], aDuration*sizeof(float));
-    }
-    return;
-  }
-
-  // Ignore unknown channels, they're just dropped.
-  inputChannelCount = std::min<uint32_t>(6, inputChannelCount);
-
-  const DownMixMatrix& m = gDownMixMatrices[
-    gMixingMatrixIndexByChannels[aOutputChannelCount - 1] +
-    inputChannelCount - aOutputChannelCount - 1];
-
-  // This is slow, but general. We can define custom code for special
-  // cases later.
-  for (uint32_t s = 0; s < aDuration; ++s) {
-    // Reserve an extra junk channel at the end for the cases where we
-    // want an input channel to contribute to nothing
-    float outputChannels[CUSTOM_CHANNEL_LAYOUTS + 1];
-    memset(outputChannels, 0, sizeof(float)*(CUSTOM_CHANNEL_LAYOUTS));
-    for (uint32_t c = 0; c < inputChannelCount; ++c) {
-      outputChannels[m.mInputDestination[c]] +=
-        m.mInputCoefficient[c]*(static_cast<const float*>(inputChannels[c]))[s];
-    }
-    // Utilize the fact that in every layout, C is the third channel.
-    if (m.mCExtraDestination != IGNORE) {
-      outputChannels[m.mCExtraDestination] +=
-        m.mInputCoefficient[SURROUND_C]*(static_cast<const float*>(inputChannels[SURROUND_C]))[s];
-    }
-
-    for (uint32_t c = 0; c < aOutputChannelCount; ++c) {
-      aOutputChannels[c][s] = outputChannels[c];
-    }
-  }
-}
-
 } // namespace mozilla
--- a/dom/media/AudioChannelFormat.h
+++ b/dom/media/AudioChannelFormat.h
@ -9,6 +9,8 @@
 #include <stdint.h>

 #include "nsTArrayForwardDeclare.h"
+#include "AudioSampleFormat.h"
+#include "nsTArray.h"

 namespace mozilla {

@ -29,6 +31,26 @@ namespace mozilla {
 * Only 1, 2, 4 and 6 are currently defined in Web Audio.
 */

+enum {
+  SURROUND_L,
+  SURROUND_R,
+  SURROUND_C,
+  SURROUND_LFE,
+  SURROUND_SL,
+  SURROUND_SR
+};
+
+const uint32_t CUSTOM_CHANNEL_LAYOUTS = 6;
+
+// This is defined by some Windows SDK header.
+#undef IGNORE
+
+const int IGNORE = CUSTOM_CHANNEL_LAYOUTS;
+const float IGNORE_F = 0.0f;
+
+const int gMixingMatrixIndexByChannels[CUSTOM_CHANNEL_LAYOUTS - 1] =
+  { 0, 5, 9, 12, 14 };
+
 /**
 * Return a channel count whose channel layout includes all the channels from
 * aChannels1 and aChannels2.
@ -53,19 +75,102 @@ AudioChannelsUpMix(nsTArray<const void*>* aChannelArray,
                   uint32_t aOutputChannelCount,
                   const void* aZeroChannel);

-/**
- * Given an array of input channels (which must be float format!),
- * downmix to aOutputChannelCount, and copy the results to the
- * channel buffers in aOutputChannels.
- * Don't call this with input count <= output count.
- */
-void
-AudioChannelsDownMix(const nsTArray<const void*>& aChannelArray,
-                     float** aOutputChannels,
-                     uint32_t aOutputChannelCount,
-                     uint32_t aDuration);

-// A version of AudioChannelsDownMix that downmixes int16_ts may be required.
+/**
+ * DownMixMatrix represents a conversion matrix efficiently by exploiting the
+ * fact that each input channel contributes to at most one output channel,
+ * except possibly for the C input channel in layouts that have one. Also,
+ * every input channel is multiplied by the same coefficient for every output
+ * channel it contributes to.
+ */
+struct DownMixMatrix {
+  // Every input channel c is copied to output channel mInputDestination[c]
+  // after multiplying by mInputCoefficient[c].
+  uint8_t mInputDestination[CUSTOM_CHANNEL_LAYOUTS];
+  // If not IGNORE, then the C channel is copied to this output channel after
+  // multiplying by its coefficient.
+  uint8_t mCExtraDestination;
+  float mInputCoefficient[CUSTOM_CHANNEL_LAYOUTS];
+};
+
+static const DownMixMatrix
+gDownMixMatrices[CUSTOM_CHANNEL_LAYOUTS*(CUSTOM_CHANNEL_LAYOUTS - 1)/2] =
+{
+  // Downmixes to mono
+  { { 0, 0 }, IGNORE, { 0.5f, 0.5f } },
+  { { 0, IGNORE, IGNORE }, IGNORE, { 1.0f, IGNORE_F, IGNORE_F } },
+  { { 0, 0, 0, 0 }, IGNORE, { 0.25f, 0.25f, 0.25f, 0.25f } },
+  { { 0, IGNORE, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, IGNORE_F, IGNORE_F, IGNORE_F, IGNORE_F } },
+  { { 0, 0, 0, IGNORE, 0, 0 }, IGNORE, { 0.7071f, 0.7071f, 1.0f, IGNORE_F, 0.5f, 0.5f } },
+  // Downmixes to stereo
+  { { 0, 1, IGNORE }, IGNORE, { 1.0f, 1.0f, IGNORE_F } },
+  { { 0, 1, 0, 1 }, IGNORE, { 0.5f, 0.5f, 0.5f, 0.5f } },
+  { { 0, 1, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, IGNORE_F, IGNORE_F, IGNORE_F } },
+  { { 0, 1, 0, IGNORE, 0, 1 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 0.7071f, 0.7071f } },
+  // Downmixes to 3-channel
+  { { 0, 1, 2, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F } },
+  { { 0, 1, 2, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F, IGNORE_F } },
+  { { 0, 1, 2, IGNORE, IGNORE, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, IGNORE_F, IGNORE_F, IGNORE_F } },
+  // Downmixes to quad
+  { { 0, 1, 2, 3, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, 1.0f, IGNORE_F } },
+  { { 0, 1, 0, IGNORE, 2, 3 }, 1, { 1.0f, 1.0f, 0.7071f, IGNORE_F, 1.0f, 1.0f } },
+  // Downmixes to 5-channel
+  { { 0, 1, 2, 3, 4, IGNORE }, IGNORE, { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, IGNORE_F } }
+};
+
+/**
+ * Given an array of input channels, downmix to aOutputChannelCount, and copy
+ * the results to the channel buffers in aOutputChannels.  Don't call this with
+ * input count <= output count.
+ */
+template<typename T>
+void AudioChannelsDownMix(const nsTArray<const void*>& aChannelArray,
+                     T** aOutputChannels,
+                     uint32_t aOutputChannelCount,
+                     uint32_t aDuration)
+{
+  uint32_t inputChannelCount = aChannelArray.Length();
+  const void* const* inputChannels = aChannelArray.Elements();
+  NS_ASSERTION(inputChannelCount > aOutputChannelCount, "Nothing to do");
+
+  if (inputChannelCount > 6) {
+    // Just drop the unknown channels.
+    for (uint32_t o = 0; o < aOutputChannelCount; ++o) {
+      memcpy(aOutputChannels[o], inputChannels[o], aDuration*sizeof(T));
+    }
+    return;
+  }
+
+  // Ignore unknown channels, they're just dropped.
+  inputChannelCount = std::min<uint32_t>(6, inputChannelCount);
+
+  const DownMixMatrix& m = gDownMixMatrices[
+    gMixingMatrixIndexByChannels[aOutputChannelCount - 1] +
+    inputChannelCount - aOutputChannelCount - 1];
+
+  // This is slow, but general. We can define custom code for special
+  // cases later.
+  for (uint32_t s = 0; s < aDuration; ++s) {
+    // Reserve an extra junk channel at the end for the cases where we
+    // want an input channel to contribute to nothing
+    T outputChannels[CUSTOM_CHANNEL_LAYOUTS + 1];
+    memset(outputChannels, 0, sizeof(T)*(CUSTOM_CHANNEL_LAYOUTS));
+    for (uint32_t c = 0; c < inputChannelCount; ++c) {
+      outputChannels[m.mInputDestination[c]] +=
+        m.mInputCoefficient[c]*(static_cast<const T*>(inputChannels[c]))[s];
+    }
+    // Utilize the fact that in every layout, C is the third channel.
+    if (m.mCExtraDestination != IGNORE) {
+      outputChannels[m.mCExtraDestination] +=
+        m.mInputCoefficient[SURROUND_C]*(static_cast<const T*>(inputChannels[SURROUND_C]))[s];
+    }
+
+    for (uint32_t c = 0; c < aOutputChannelCount; ++c) {
+      aOutputChannels[c][s] = outputChannels[c];
+    }
+  }
+}
+

 } // namespace mozilla

--- a/dom/media/AudioSegment.cpp
+++ b/dom/media/AudioSegment.cpp
@ -206,13 +206,13 @@ AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
          AudioDataValue* ptr =
            PointerForOffsetInChannel(buf.Elements(), outBufferLength,
                                      aOutputChannels, channel, offsetSamples);
-          PodCopy(ptr, reinterpret_cast<const float*>(channelData[channel]),
+          PodCopy(ptr, reinterpret_cast<const AudioDataValue*>(channelData[channel]),
                  frames);
        }
        MOZ_ASSERT(channelData.Length() == aOutputChannels);
      } else if (channelData.Length() > aOutputChannels) {
        // Down mix.
-        nsAutoTArray<float*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
+        nsAutoTArray<AudioDataValue*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
        outChannelPtrs.SetLength(aOutputChannels);
        uint32_t offsetSamples = 0;
        for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
@ -228,7 +228,7 @@ AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
          AudioDataValue* ptr =
            PointerForOffsetInChannel(buf.Elements(), outBufferLength,
                                      aOutputChannels, channel, offsetSamples);
-          PodCopy(ptr, reinterpret_cast<const float*>(channelData[channel]),
+          PodCopy(ptr, reinterpret_cast<const AudioDataValue*>(channelData[channel]),
                  frames);
        }
      }