Bug 1702646 - Share AudioSegments among AudioInputTrack r=padenot

Each AudioInputTrack has its own AudioSegments storing the input audio data. When the AudioInputTrack is in pass-through mode, the AudioSegment is just the data de-interleaved from its raw data, without any audio processing magic applied on it. If there are multiple AudioInputTracks in pass-through mode exist in the same graph, then all of their AudioSegments are same. Before this patch, each of these AudioInputTracks allocates its own space to store its own AudioSegments even those data are same. This patch makes it possible for these AudioInputTracks to share the same AudioSegment data they need. By creating the AudioSegment in the NativeInputTrack, which is mapped to one specific device and is connected to the AudioInputTrack, the AudioInputTrack can fetch the AudioSegment data when they need and then append shared-references of the AudioChunk, inside the fetched AudioSegment, into their own AudioSegment. Therefore, we can have some AudioChunks created by the NativeInputTrack and shared among the AudioInputTracks in pass-through mode. Differential Revision: https://phabricator.services.mozilla.com/D114801
2021-06-08 00:48:20 +00:00 · 2021-06-08 00:48:20 +00:00 · c95e8979b6
--- a/dom/media/AudioSegment.h
+++ b/dom/media/AudioSegment.h
@ -377,6 +377,18 @@ class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
    chunk->mBufferFormat = AUDIO_FORMAT_S16;
    chunk->mPrincipalHandle = aPrincipalHandle;
  }
+  void AppendSegment(const AudioSegment* aSegment,
+                     const PrincipalHandle& aPrincipalHandle) {
+    MOZ_ASSERT(aSegment);
+
+    for (const AudioChunk& c : aSegment->mChunks) {
+      AudioChunk* chunk = AppendChunk(c.GetDuration());
+      chunk->mBuffer = c.mBuffer;
+      chunk->mChannelData = c.mChannelData;
+      chunk->mBufferFormat = c.mBufferFormat;
+      chunk->mPrincipalHandle = aPrincipalHandle;
+    }
+  }
  // Consumes aChunk, and returns a pointer to the persistent copy of aChunk
  // in the segment.
  AudioChunk* AppendAndConsumeChunk(AudioChunk* aChunk) {
--- a/dom/media/MediaTrackGraph.cpp
+++ b/dom/media/MediaTrackGraph.cpp
@ -62,23 +62,27 @@ LazyLogModule gMediaTrackGraphLog("MediaTrackGraph");
 */
 static nsTHashMap<nsUint32HashKey, MediaTrackGraphImpl*> gGraphs;

-void NativeInputTrack::AudioDataBuffers::SetOutputData(
-    const AudioDataValue* aBuffer, size_t aFrames, uint32_t aChannels) {
-  mOutputData.Set(aBuffer, aFrames, aChannels);
+void NativeInputTrack::AudioDataBuffers::SetOutputData(AudioDataValue* aBuffer,
+                                                       size_t aFrames,
+                                                       uint32_t aChannels,
+                                                       TrackRate aRate) {
+  mOutputData = Some(BufferInfo{aBuffer, aFrames, aChannels, aRate});
 }

-void NativeInputTrack::AudioDataBuffers::SetInputData(
-    const AudioDataValue* aBuffer, size_t aFrames, uint32_t aChannels) {
-  mInputData.Set(aBuffer, aFrames, aChannels);
+void NativeInputTrack::AudioDataBuffers::SetInputData(AudioDataValue* aBuffer,
+                                                      size_t aFrames,
+                                                      uint32_t aChannels,
+                                                      TrackRate aRate) {
+  mInputData = Some(BufferInfo{aBuffer, aFrames, aChannels, aRate});
 }

 void NativeInputTrack::AudioDataBuffers::Clear(Scope aScope) {
  if (aScope & Scope::Input) {
-    mInputData.Clear();
+    mInputData.take();
  }

  if (aScope & Scope::Output) {
-    mOutputData.Clear();
+    mOutputData.take();
  }
 }

@ -116,12 +120,60 @@ void NativeInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
                                    uint32_t aFlags) {
  MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
  TRACE_COMMENT("NativeInputTrack %p", this);
-  // TODO: Put input data to mSegment
+
+  if (!mDataHolder || !mDataHolder->mInputData) {
+    return;
+  }
+
+  // One NotifyInputData might have multiple following ProcessInput calls, but
+  // we only process one input per NotifyInputData call.
+  NativeInputTrack::AudioDataBuffers::BufferInfo inputInfo =
+      mDataHolder->mInputData.extract();
+
+  MOZ_ASSERT(mInputChannels == inputInfo.mChannels);
+  MOZ_ASSERT(inputInfo.mChannels >= 1 && inputInfo.mChannels <= 8,
+             "Support up to 8 channels");
+
+  CheckedInt<size_t> bufferSize(sizeof(AudioDataValue));
+  bufferSize *= inputInfo.mFrames;
+  bufferSize *= inputInfo.mChannels;
+  RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
+  AutoTArray<const AudioDataValue*, 8> channels;
+  if (inputInfo.mChannels == 1) {
+    PodCopy(static_cast<AudioDataValue*>(buffer->Data()), inputInfo.mBuffer,
+            inputInfo.mFrames);
+    channels.AppendElement(static_cast<AudioDataValue*>(buffer->Data()));
+  } else {
+    channels.SetLength(inputInfo.mChannels);
+    AutoTArray<AudioDataValue*, 8> writeChannels;
+    writeChannels.SetLength(inputInfo.mChannels);
+    AudioDataValue* samples = static_cast<AudioDataValue*>(buffer->Data());
+
+    size_t offset = 0;
+    for (uint32_t i = 0; i < inputInfo.mChannels; ++i) {
+      channels[i] = writeChannels[i] = samples + offset;
+      offset += inputInfo.mFrames;
+    }
+
+    DeinterleaveAndConvertBuffer(inputInfo.mBuffer, inputInfo.mFrames,
+                                 inputInfo.mChannels,
+                                 writeChannels.Elements());
+  }
+
+  LOG(LogLevel::Verbose,
+      ("NativeInputTrack %p Appending %zu frames of raw audio", this,
+       inputInfo.mFrames));
+
+  MOZ_ASSERT(inputInfo.mChannels == channels.Length());
+  GetData<AudioSegment>()->Clear();
+  GetData<AudioSegment>()->AppendFrames(buffer.forget(), channels,
+                                        static_cast<int32_t>(inputInfo.mFrames),
+                                        PRINCIPAL_HANDLE_NONE);
 }

 uint32_t NativeInputTrack::NumberOfChannels() const {
  MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
-  return mDataHolder ? mDataHolder->mInputData.mChannels : 0;
+  return mInputChannels;
 }

 void NativeInputTrack::InitDataHolderIfNeeded() {
@ -131,27 +183,15 @@ void NativeInputTrack::InitDataHolderIfNeeded() {
  }
 }

-Maybe<NativeInputTrack::BufferInfo> NativeInputTrack::GetInputBufferData() {
-  MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
-  if (!mDataHolder) {
-    return Nothing();
-  }
-  return Some(BufferInfo{mDataHolder->mInputData.mBuffer,
-                         mDataHolder->mInputData.mFrames,
-                         mDataHolder->mInputData.mChannels});
-}
-
 void NativeInputTrack::NotifyOutputData(MediaTrackGraphImpl* aGraph,
                                        AudioDataValue* aBuffer, size_t aFrames,
                                        TrackRate aRate, uint32_t aChannels) {
  MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
  MOZ_ASSERT(aGraph == mGraph, "Receive output data from another graph");
  MOZ_ASSERT(mDataHolder);
-  mDataHolder->SetOutputData(aBuffer, aFrames, aChannels);
+  mDataHolder->SetOutputData(aBuffer, aFrames, aChannels, aRate);
  for (auto& listener : mDataUsers) {
-    listener->NotifyOutputData(aGraph, mDataHolder->mOutputData.mBuffer,
-                               mDataHolder->mOutputData.mFrames, aRate,
-                               mDataHolder->mOutputData.mChannels);
+    listener->NotifyOutputData(aGraph, mDataHolder->mOutputData.value());
  }
 }

@ -160,6 +200,7 @@ void NativeInputTrack::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
  MOZ_ASSERT(aGraph == mGraph,
             "Receive input stopped signal from another graph");
  MOZ_ASSERT(mDataHolder);
+  mInputChannels = 0;
  mDataHolder->Clear(AudioDataBuffers::Scope::Input);
  for (auto& listener : mDataUsers) {
    listener->NotifyInputStopped(aGraph);
@ -175,11 +216,14 @@ void NativeInputTrack::NotifyInputData(MediaTrackGraphImpl* aGraph,
  MOZ_ASSERT(aGraph == mGraph, "Receive input data from another graph");

  MOZ_ASSERT(mDataHolder);
-  mDataHolder->SetInputData(aBuffer, aFrames, aChannels);
+  MOZ_ASSERT(aChannels);
+  if (!mInputChannels) {
+    mInputChannels = aChannels;
+  }
+  mDataHolder->SetInputData(const_cast<AudioDataValue*>(aBuffer), aFrames,
+                            aChannels, aRate);
  for (auto& listener : mDataUsers) {
-    listener->NotifyInputData(aGraph, mDataHolder->mInputData.mBuffer,
-                              mDataHolder->mInputData.mFrames, aRate,
-                              mDataHolder->mInputData.mChannels,
+    listener->NotifyInputData(aGraph, mDataHolder->mInputData.value(),
                              aAlreadyBuffered);
  }
 }
--- a/dom/media/MediaTrackGraph.h
+++ b/dom/media/MediaTrackGraph.h
@ -107,6 +107,14 @@ class AudioDataListenerInterface {
  virtual ~AudioDataListenerInterface() = default;

 public:
+  // Information for the interleaved buffer coming from the audio callbacks
+  struct BufferInfo {
+    AudioDataValue* mBuffer = nullptr;
+    size_t mFrames = 0;
+    uint32_t mChannels = 0;
+    TrackRate mRate = 0;
+  };
+
  /* These are for cubeb audio input & output streams: */
  /**
   * Output data to speakers, for use as the "far-end" data for echo
@ -114,8 +122,7 @@ class AudioDataListenerInterface {
   * chunks.
   */
  virtual void NotifyOutputData(MediaTrackGraphImpl* aGraph,
-                                AudioDataValue* aBuffer, size_t aFrames,
-                                TrackRate aRate, uint32_t aChannels) = 0;
+                                BufferInfo aInfo) = 0;
  /**
   * An AudioCallbackDriver with an input stream signaling that it has stopped
   * for any reason and the AudioDataListener will not be notified of input data
@ -127,8 +134,7 @@ class AudioDataListenerInterface {
   * guaranteed to be in any particular size chunks.
   */
  virtual void NotifyInputData(MediaTrackGraphImpl* aGraph,
-                               const AudioDataValue* aBuffer, size_t aFrames,
-                               TrackRate aRate, uint32_t aChannels,
+                               const BufferInfo aInfo,
                               uint32_t aAlreadyBuffered) = 0;

  /**
--- a/dom/media/MediaTrackGraphImpl.h
+++ b/dom/media/MediaTrackGraphImpl.h
@ -67,27 +67,6 @@ class NativeInputTrack : public ProcessedMediaTrack {
  // Other Graph Thread APIs
  void InitDataHolderIfNeeded();

-  struct BufferInfo {
-    AudioDataValue* mBuffer = nullptr;
-    size_t mFrames = 0;
-    uint32_t mChannels = 0;
-
-    void Set(const AudioDataValue* aBuffer, size_t aFrames,
-             uint32_t aChannels) {
-      mBuffer = const_cast<AudioDataValue*>(aBuffer);
-      mFrames = aFrames;
-      mChannels = aChannels;
-    }
-
-    void Clear() {
-      mBuffer = nullptr;
-      mFrames = 0;
-      mChannels = 0;
-    }
-  };
-  // TODO: Return data from GetData<AudioSegment>() instead
-  Maybe<BufferInfo> GetInputBufferData();
-
  // Any thread
  NativeInputTrack* AsNativeInputTrack() override { return this; }

@ -99,10 +78,10 @@ class NativeInputTrack : public ProcessedMediaTrack {
  class AudioDataBuffers {
   public:
    AudioDataBuffers() = default;
-    void SetOutputData(const AudioDataValue* aBuffer, size_t aFrames,
-                       uint32_t aChannels);
-    void SetInputData(const AudioDataValue* aBuffer, size_t aFrames,
-                      uint32_t aChannels);
+    void SetOutputData(AudioDataValue* aBuffer, size_t aFrames,
+                       uint32_t aChannels, TrackRate aRate);
+    void SetInputData(AudioDataValue* aBuffer, size_t aFrames,
+                      uint32_t aChannels, TrackRate aRate);

    enum Scope : unsigned char {
      Input = 0x01,
@ -110,16 +89,20 @@ class NativeInputTrack : public ProcessedMediaTrack {
    };
    void Clear(Scope aScope);

+    typedef AudioDataListenerInterface::BufferInfo BufferInfo;
    // Storing the audio output data coming from NotifyOutputData
-    BufferInfo mOutputData;
+    Maybe<BufferInfo> mOutputData;
    // Storing the audio input data coming from NotifyInputData
-    BufferInfo mInputData;
+    Maybe<BufferInfo> mInputData;
  };

  // Only accessed on the graph thread.
  // Storing the audio data coming from GraphDriver directly.
  Maybe<AudioDataBuffers> mDataHolder;

+  // Only accessed on the graph thread.
+  uint32_t mInputChannels = 0;
+
  // Only accessed on the main thread.
  // When this becomes zero, this NativeInputTrack is no longer needed.
  int32_t mUserCount = 0;
--- a/dom/media/gtest/TestAudioInputProcessing.cpp
+++ b/dom/media/gtest/TestAudioInputProcessing.cpp
@ -70,7 +70,9 @@ TEST(TestAudioInputProcessing, UnaccountedPacketizerBuffering)
    processedTime = 0;
    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(nrFrames);
    generator.GenerateInterleaved(buffer.Elements(), nrFrames);
-    aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
+    aip->NotifyInputData(graph,
+                         AudioInputProcessing::BufferInfo{
+                             buffer.Elements(), nrFrames, channels, rate},
                         nextTime - nrFrames);
    aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
              true, &ended);
@ -87,7 +89,9 @@ TEST(TestAudioInputProcessing, UnaccountedPacketizerBuffering)
    processedTime = nextTime;
    nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * nrFrames);
    generator.GenerateInterleaved(buffer.Elements(), nrFrames);
-    aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
+    aip->NotifyInputData(graph,
+                         AudioInputProcessing::BufferInfo{
+                             buffer.Elements(), nrFrames, channels, rate},
                         nextTime - (2 * nrFrames));
    aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
              true, &ended);
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp
@ -625,8 +625,7 @@ AudioInputProcessing::AudioInputProcessing(
      mLiveBufferingAppended(0),
      mPrincipal(aPrincipalHandle),
      mEnabled(false),
-      mEnded(false),
-      mInputProcessed(true) {}
+      mEnded(false) {}

 void AudioInputProcessing::Disconnect(MediaTrackGraphImpl* aGraph) {
  // This method is just for asserts.
@ -868,21 +867,20 @@ void AudioInputProcessing::Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom,
 }

 void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
-                                            AudioDataValue* aBuffer,
-                                            size_t aFrames, TrackRate aRate,
-                                            uint32_t aChannels) {
+                                            BufferInfo aInfo) {
  MOZ_ASSERT(aGraph->OnGraphThread());
  MOZ_ASSERT(mEnabled);

-  if (!mPacketizerOutput || mPacketizerOutput->mPacketSize != aRate / 100u ||
-      mPacketizerOutput->mChannels != aChannels) {
+  if (!mPacketizerOutput ||
+      mPacketizerOutput->mPacketSize != aInfo.mRate / 100u ||
+      mPacketizerOutput->mChannels != aInfo.mChannels) {
    // It's ok to drop the audio still in the packetizer here: if this changes,
    // we changed devices or something.
    mPacketizerOutput = MakeUnique<AudioPacketizer<AudioDataValue, float>>(
-        aRate / 100, aChannels);
+        aInfo.mRate / 100, aInfo.mChannels);
  }

-  mPacketizerOutput->Input(aBuffer, aFrames);
+  mPacketizerOutput->Input(aInfo.mBuffer, aInfo.mFrames);

  while (mPacketizerOutput->PacketsAvailable()) {
    uint32_t samplesPerPacket =
@ -901,11 +899,11 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
    uint32_t channelCountFarend = 0;
    uint32_t framesPerPacketFarend = 0;

-    // Downmix from aChannels to MAX_CHANNELS if needed. We always have floats
-    // here, the packetized performed the conversion.
-    if (aChannels > MAX_CHANNELS) {
+    // Downmix from aInfo.mChannels to MAX_CHANNELS if needed. We always have
+    // floats here, the packetized performed the conversion.
+    if (aInfo.mChannels > MAX_CHANNELS) {
      AudioConverter converter(
-          AudioConfig(aChannels, 0, AudioConfig::FORMAT_FLT),
+          AudioConfig(aInfo.mChannels, 0, AudioConfig::FORMAT_FLT),
          AudioConfig(MAX_CHANNELS, 0, AudioConfig::FORMAT_FLT));
      framesPerPacketFarend = mPacketizerOutput->mPacketSize;
      framesPerPacketFarend =
@ -915,9 +913,9 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
      deinterleavedPacketDataChannelPointers.SetLength(MAX_CHANNELS);
    } else {
      interleavedFarend = packet;
-      channelCountFarend = aChannels;
+      channelCountFarend = aInfo.mChannels;
      framesPerPacketFarend = mPacketizerOutput->mPacketSize;
-      deinterleavedPacketDataChannelPointers.SetLength(aChannels);
+      deinterleavedPacketDataChannelPointers.SetLength(aInfo.mChannels);
    }

    MOZ_ASSERT(interleavedFarend &&
@ -943,7 +941,7 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,

    // Having the same config for input and output means we potentially save
    // some CPU.
-    StreamConfig inputConfig(aRate, channelCountFarend, false);
+    StreamConfig inputConfig(aInfo.mRate, channelCountFarend, false);
    StreamConfig outputConfig = inputConfig;

    // Passing the same pointers here saves a copy inside this function.
@ -1081,29 +1079,28 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
 }

 void AudioInputProcessing::ProcessInput(MediaTrackGraphImpl* aGraph,
-                                        const AudioDataValue* aBuffer,
-                                        size_t aFrames, TrackRate aRate,
-                                        uint32_t aChannels) {
+                                        const AudioSegment* aSegment) {
  MOZ_ASSERT(aGraph);
  MOZ_ASSERT(aGraph->OnGraphThread());

-  if (mEnded || !mEnabled || !mLiveFramesAppended || mInputProcessed ||
-      !aBuffer) {
+  if (mEnded || !mEnabled || !mLiveFramesAppended || !mInputData || !aSegment) {
    return;
  }

+  // One NotifyInputData might have multiple following ProcessInput calls, but
+  // we only process one input per NotifyInputData call.
+  BufferInfo inputInfo = mInputData.extract();
+
  // If some processing is necessary, packetize and insert in the WebRTC.org
  // code. Otherwise, directly insert the mic data in the MTG, bypassing all
  // processing.
  if (PassThrough(aGraph)) {
-    InsertInGraph<AudioDataValue>(aGraph, aBuffer, aFrames, aChannels);
+    mSegment.AppendSegment(aSegment, mPrincipal);
  } else {
-    PacketizeAndProcess(aGraph, aBuffer, aFrames, aRate, aChannels);
+    MOZ_ASSERT(aGraph->GraphRate() == inputInfo.mRate);
+    PacketizeAndProcess(aGraph, inputInfo.mBuffer, inputInfo.mFrames,
+                        inputInfo.mRate, inputInfo.mChannels);
  }
-
-  // One NotifyInputData might have multiple following ProcessInput calls, but
-  // we only process one input per NotifyInputData call.
-  mInputProcessed = true;
 }

 template <typename T>
@ -1158,16 +1155,13 @@ void AudioInputProcessing::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
  if (mPacketizerInput) {
    mPacketizerInput->Clear();
  }
-  // Stop processing input
-  mInputProcessed = true;
+  mInputData.take();
 }

 // Called back on GraphDriver thread!
 // Note this can be called back after ::Stop()
 void AudioInputProcessing::NotifyInputData(MediaTrackGraphImpl* aGraph,
-                                           const AudioDataValue* aBuffer,
-                                           size_t aFrames, TrackRate aRate,
-                                           uint32_t aChannels,
+                                           const BufferInfo aInfo,
                                           uint32_t aAlreadyBuffered) {
  MOZ_ASSERT(aGraph->OnGraphThread());
  TRACE();
@ -1181,8 +1175,7 @@ void AudioInputProcessing::NotifyInputData(MediaTrackGraphImpl* aGraph,
    mLiveBufferingAppended = aAlreadyBuffered;
  }

-  // Ask to process input data
-  mInputProcessed = false;
+  mInputData = Some(aInfo);
 }

 #define ResetProcessingIfNeeded(_processing)                         \
@ -1215,8 +1208,8 @@ void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) {

 void AudioInputProcessing::End() {
  mEnded = true;
-  mInputProcessed = true;  // Stop processing input data
  mSegment.Clear();
+  mInputData.take();
 }

 TrackTime AudioInputProcessing::NumBufferedFrames(
@ -1288,14 +1281,11 @@ void AudioInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,

  // Push the input data from the connected NativeInputTrack to mInputProcessing
  if (source) {
-    Maybe<NativeInputTrack::BufferInfo> inputInfo =
-        source->GetInputBufferData();
-    if (inputInfo) {
-      MOZ_ASSERT(GraphImpl()->GraphRate() == mSampleRate);
-      mInputProcessing->ProcessInput(GraphImpl(), inputInfo->mBuffer,
-                                     inputInfo->mFrames, mSampleRate,
-                                     inputInfo->mChannels);
-    }
+    MOZ_ASSERT(source->GraphImpl() == GraphImpl());
+    MOZ_ASSERT(source->mSampleRate == mSampleRate);
+    MOZ_ASSERT(GraphImpl()->GraphRate() == mSampleRate);
+    mInputProcessing->ProcessInput(GraphImpl(),
+                                   source->GetData<AudioSegment>());
  }

  bool ended = false;
--- a/dom/media/webrtc/MediaEngineWebRTCAudio.h
+++ b/dom/media/webrtc/MediaEngineWebRTCAudio.h
@ -141,13 +141,9 @@ class AudioInputProcessing : public AudioDataListener {
            GraphTime aTrackEnd, AudioSegment* aSegment,
            bool aLastPullThisIteration, bool* aEnded);

-  void NotifyOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
-                        size_t aFrames, TrackRate aRate,
-                        uint32_t aChannels) override;
+  void NotifyOutputData(MediaTrackGraphImpl* aGraph, BufferInfo aInfo) override;
  void NotifyInputStopped(MediaTrackGraphImpl* aGraph) override;
-  void NotifyInputData(MediaTrackGraphImpl* aGraph,
-                       const AudioDataValue* aBuffer, size_t aFrames,
-                       TrackRate aRate, uint32_t aChannels,
+  void NotifyInputData(MediaTrackGraphImpl* aGraph, const BufferInfo aInfo,
                       uint32_t aAlreadyBuffered) override;
  bool IsVoiceInput(MediaTrackGraphImpl* aGraph) const override {
    // If we're passing data directly without AEC or any other process, this
@ -167,8 +163,8 @@ class AudioInputProcessing : public AudioDataListener {

  void Disconnect(MediaTrackGraphImpl* aGraph) override;

-  void ProcessInput(MediaTrackGraphImpl* aGraph, const AudioDataValue* aBuffer,
-                    size_t aFrames, TrackRate aRate, uint32_t aChannels);
+  // aSegment stores the unprocessed non-interleaved audio input data from mic
+  void ProcessInput(MediaTrackGraphImpl* aGraph, const AudioSegment* aSegment);

  template <typename T>
  void InsertInGraph(MediaTrackGraphImpl* aGraph, const T* aBuffer,
@ -250,8 +246,8 @@ class AudioInputProcessing : public AudioDataListener {
  bool mEnabled;
  // Whether or not we've ended and removed the AudioInputTrack.
  bool mEnded;
-  // Whether or not the new input data arrives.
-  bool mInputProcessed;
+  // Store the unprocessed interleaved audio input data
+  Maybe<BufferInfo> mInputData;
 };

 // MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.