Bug 1264199: P1. Perform audio conversion in the reader's taskqueue and ahead of use. r=kinetik

This will allow to easily detect audio configuration change prior immediate playback. MozReview-Commit-ID: IwDUGOd17ND
2016-04-13 17:17:54 +10:00 · 2016-04-13 17:17:54 +10:00 · b130178b94
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@ -362,7 +362,7 @@ MediaDecoderStateMachine::CreateAudioSink()
  auto audioSinkCreator = [self] () {
    MOZ_ASSERT(self->OnTaskQueue());
    return new DecodedAudioDataSink(
-      self->mAudioQueue, self->GetMediaTime(),
+      self->mTaskQueue, self->mAudioQueue, self->GetMediaTime(),
      self->mInfo.mAudio, self->mAudioChannel);
  };
  return new AudioSinkWrapper(mTaskQueue, audioSinkCreator);
--- a/dom/media/mediasink/DecodedAudioDataSink.cpp
+++ b/dom/media/mediasink/DecodedAudioDataSink.cpp
@ -29,7 +29,8 @@ namespace media {
 // The amount of audio frames that is used to fuzz rounding errors.
 static const int64_t AUDIO_FUZZ_FRAMES = 1;

-DecodedAudioDataSink::DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
+DecodedAudioDataSink::DecodedAudioDataSink(AbstractThread* aThread,
+                                           MediaQueue<MediaData>& aAudioQueue,
                                           int64_t aStartTime,
                                           const AudioInfo& aInfo,
                                           dom::AudioChannel aChannel)
@ -40,16 +41,21 @@ DecodedAudioDataSink::DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
  , mInfo(aInfo)
  , mChannel(aChannel)
  , mPlaying(true)
+  , mErrored(false)
  , mPlaybackComplete(false)
+  , mOwnerThread(aThread)
+  , mFramesParsed(0)
+  , mLastEndTime(0)
 {
  bool resampling = gfxPrefs::AudioSinkResampling();
  uint32_t resamplingRate = gfxPrefs::AudioSinkResampleRate();
+  mOutputRate = resampling ? resamplingRate : mInfo.mRate;
+  mOutputChannels = mInfo.mChannels > 2 && gfxPrefs::AudioSinkForceStereo()
+                      ? 2 : mInfo.mChannels;
  mConverter =
    MakeUnique<AudioConverter>(
      AudioConfig(mInfo.mChannels, mInfo.mRate),
-      AudioConfig(mInfo.mChannels > 2 && gfxPrefs::AudioSinkForceStereo()
-                    ? 2 : mInfo.mChannels,
-                  resampling ? resamplingRate : mInfo.mRate));
+      AudioConfig(mOutputChannels, mOutputRate));
 }

 DecodedAudioDataSink::~DecodedAudioDataSink()
@ -59,6 +65,16 @@ DecodedAudioDataSink::~DecodedAudioDataSink()
 RefPtr<GenericPromise>
 DecodedAudioDataSink::Init(const PlaybackParams& aParams)
 {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener = mAudioQueue.PushEvent().Connect(
+    mOwnerThread, this, &DecodedAudioDataSink::OnAudioPushed);
+  mProcessedQueueListener = mProcessedQueue.PopEvent().Connect(
+    mOwnerThread, this, &DecodedAudioDataSink::OnAudioPopped);
+
+  // To ensure at least one audio packet will be popped from AudioQueue and
+  // ready to be played.
+  NotifyAudioNeeded();
  RefPtr<GenericPromise> p = mEndPromise.Ensure(__func__);
  nsresult rv = InitializeAudioStream(aParams);
  if (NS_FAILED(rv)) {
@ -95,10 +111,16 @@ DecodedAudioDataSink::HasUnplayedFrames()
 void
 DecodedAudioDataSink::Shutdown()
 {
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+  mAudioQueueListener.Disconnect();
+  mProcessedQueueListener.Disconnect();
+
  if (mAudioStream) {
    mAudioStream->Shutdown();
    mAudioStream = nullptr;
  }
+  mProcessedQueue.Reset();
  mEndPromise.ResolveIfExists(true, __func__);
 }

@ -146,9 +168,7 @@ nsresult
 DecodedAudioDataSink::InitializeAudioStream(const PlaybackParams& aParams)
 {
  mAudioStream = new AudioStream(*this);
-  nsresult rv = mAudioStream->Init(mConverter->OutputConfig().Channels(),
-                                   mConverter->OutputConfig().Rate(),
-                                   mChannel);
+  nsresult rv = mAudioStream->Init(mOutputChannels, mOutputRate, mChannel);
  if (NS_FAILED(rv)) {
    mAudioStream->Shutdown();
    mAudioStream = nullptr;
@ -168,13 +188,14 @@ DecodedAudioDataSink::InitializeAudioStream(const PlaybackParams& aParams)
 int64_t
 DecodedAudioDataSink::GetEndTime() const
 {
-  CheckedInt64 playedUsecs =
-    FramesToUsecs(mWritten, mConverter->OutputConfig().Rate()) + mStartTime;
+  CheckedInt64 playedUsecs = FramesToUsecs(mWritten, mOutputRate) + mStartTime;
  if (!playedUsecs.isValid()) {
    NS_WARNING("Int overflow calculating audio end time");
    return -1;
  }
-  return playedUsecs.value();
+  // As we may be resampling, rounding errors may occur. Ensure we never get
+  // past the original end time.
+  return std::min<int64_t>(mLastEndTime, playedUsecs.value());
 }

 UniquePtr<AudioStream::Chunk>
@ -217,78 +238,13 @@ DecodedAudioDataSink::PopFrames(uint32_t aFrames)
    UniquePtr<AudioDataValue[]> mData;
  };

-  while (!mCurrentData) {
+  if (!mCurrentData) {
    // No data in the queue. Return an empty chunk.
-    if (AudioQueue().GetSize() == 0) {
+    if (!mProcessedQueue.GetSize()) {
      return MakeUnique<Chunk>();
    }

-    AudioData* a = AudioQueue().PeekFront()->As<AudioData>();
-
-    // Ignore the element with 0 frames and try next.
-    if (a->mFrames == 0) {
-      RefPtr<MediaData> releaseMe = AudioQueue().PopFront();
-      continue;
-    }
-
-    // Ignore invalid samples.
-    if (a->mRate != mInfo.mRate || a->mChannels != mInfo.mChannels) {
-      NS_WARNING(nsPrintfCString(
-        "mismatched sample format, data=%p rate=%u channels=%u frames=%u",
-        a->mAudioData.get(), a->mRate, a->mChannels, a->mFrames).get());
-      RefPtr<MediaData> releaseMe = AudioQueue().PopFront();
-      continue;
-    }
-
-    // See if there's a gap in the audio. If there is, push silence into the
-    // audio hardware, so we can play across the gap.
-    // Calculate the timestamp of the next chunk of audio in numbers of
-    // samples.
-    CheckedInt64 sampleTime = UsecsToFrames(AudioQueue().PeekFront()->mTime,
-                                            mConverter->OutputConfig().Rate());
-    // Calculate the number of frames that have been pushed onto the audio hardware.
-    CheckedInt64 playedFrames = UsecsToFrames(mStartTime,
-                                              mConverter->OutputConfig().Rate()) +
-                                static_cast<int64_t>(mWritten);
-    CheckedInt64 missingFrames = sampleTime - playedFrames;
-
-    if (!missingFrames.isValid() || !sampleTime.isValid()) {
-      NS_WARNING("Int overflow in DecodedAudioDataSink");
-      mErrored = true;
-      return MakeUnique<Chunk>();
-    }
-
-    const uint32_t rate = mConverter->OutputConfig().Rate();
-    const uint32_t channels = mConverter->OutputConfig().Channels();
-
-    if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
-      // The next audio chunk begins some time after the end of the last chunk
-      // we pushed to the audio hardware. We must push silence into the audio
-      // hardware so that the next audio chunk begins playback at the correct
-      // time.
-      missingFrames = std::min<int64_t>(UINT32_MAX, missingFrames.value());
-      auto framesToPop = std::min<uint32_t>(missingFrames.value(), aFrames);
-      mWritten += framesToPop;
-      return MakeUnique<SilentChunk>(framesToPop, channels, rate);
-    }
-
-    RefPtr<AudioData> data =
-      dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
-    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
-      AlignedAudioBuffer convertedData =
-        mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
-      mCurrentData =
-        new AudioData(data->mOffset,
-                      data->mTime,
-                      data->mDuration,
-                      convertedData.Length() / channels,
-                      Move(convertedData),
-                      channels,
-                      rate);
-    } else {
-      mCurrentData = Move(data);
-    }
-
+    mCurrentData = dont_AddRef(mProcessedQueue.PopFront().take());
    mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->mAudioData.get(),
                                            mCurrentData->mChannels,
                                            mCurrentData->mFrames);
@ -308,7 +264,7 @@ DecodedAudioDataSink::PopFrames(uint32_t aFrames)

  // All frames are popped. Reset mCurrentData so we can pop new elements from
  // the audio queue in next calls to PopFrames().
-  if (mCursor->Available() == 0) {
+  if (!mCursor->Available()) {
    mCurrentData = nullptr;
  }

@ -330,5 +286,119 @@ DecodedAudioDataSink::Drained()
  mEndPromise.ResolveIfExists(true, __func__);
 }

+void
+DecodedAudioDataSink::OnAudioPopped(const RefPtr<MediaData>& aSample)
+{
+  SINK_LOG_V("AudioStream has used an audio packet.");
+  NotifyAudioNeeded();
+}
+
+void
+DecodedAudioDataSink::OnAudioPushed(const RefPtr<MediaData>& aSample)
+{
+  SINK_LOG_V("One new audio packet available.");
+  NotifyAudioNeeded();
+}
+
+void
+DecodedAudioDataSink::NotifyAudioNeeded()
+{
+  MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
+             "Not called from the owner's thread");
+
+  // Always ensure we have two processed frames pending to allow for processing
+  // latency.
+  while (AudioQueue().GetSize() && mProcessedQueue.GetSize() < 2) {
+    RefPtr<AudioData> data =
+      dont_AddRef(AudioQueue().PopFront().take()->As<AudioData>());
+
+    // Ignore the element with 0 frames and try next.
+    if (!data->mFrames) {
+      continue;
+    }
+
+    // Ignore invalid samples.
+    if (data->mRate != mConverter->InputConfig().Rate() ||
+        data->mChannels != mConverter->InputConfig().Channels()) {
+      NS_WARNING(nsPrintfCString(
+        "mismatched sample format, data=%p rate=%u channels=%u frames=%u",
+        data->mAudioData.get(), data->mRate, data->mChannels, data->mFrames).get());
+      continue;
+    }
+
+    // See if there's a gap in the audio. If there is, push silence into the
+    // audio hardware, so we can play across the gap.
+    // Calculate the timestamp of the next chunk of audio in numbers of
+    // samples.
+    CheckedInt64 sampleTime = UsecsToFrames(data->mTime - mStartTime,
+                                            data->mRate);
+    // Calculate the number of frames that have been pushed onto the audio hardware.
+    CheckedInt64 missingFrames = sampleTime - mFramesParsed;
+
+    if (!missingFrames.isValid()) {
+      NS_WARNING("Int overflow in DecodedAudioDataSink");
+      mErrored = true;
+      return;
+    }
+
+    if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
+      // The next audio packet begins some time after the end of the last packet
+      // we pushed to the audio hardware. We must push silence into the audio
+      // hardware so that the next audio packet begins playback at the correct
+      // time.
+      missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
+      mFramesParsed += missingFrames.value();
+      AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels);
+      if (!silenceData) {
+        NS_WARNING("OOM in DecodedAudioDataSink");
+        mErrored = true;
+        return;
+      }
+      RefPtr<AudioData> silence = CreateAudioFromBuffer(Move(silenceData), data);
+      if (silence) {
+        mProcessedQueue.Push(silence);
+      }
+    }
+
+    mLastEndTime = data->GetEndTime();
+    mFramesParsed += data->mFrames;
+
+    if (mConverter->InputConfig() != mConverter->OutputConfig()) {
+      AlignedAudioBuffer convertedData =
+        mConverter->Process(AudioSampleBuffer(Move(data->mAudioData))).Forget();
+      data = CreateAudioFromBuffer(Move(convertedData), data);
+      if (!data) {
+        continue;
+      }
+    }
+    mProcessedQueue.Push(data);
+  }
+}
+
+already_AddRefed<AudioData>
+DecodedAudioDataSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
+                                            AudioData* aReference)
+{
+  uint32_t frames = aBuffer.Length() / mOutputChannels;
+  if (!frames) {
+    return nullptr;
+  }
+  CheckedInt64 duration = FramesToUsecs(frames, mOutputRate);
+  if (!duration.isValid()) {
+    NS_WARNING("Int overflow in DecodedAudioDataSink");
+    mErrored = true;
+    return nullptr;
+  }
+  RefPtr<AudioData> data =
+    new AudioData(aReference->mOffset,
+                  aReference->mTime,
+                  duration.value(),
+                  frames,
+                  Move(aBuffer),
+                  mOutputChannels,
+                  mOutputRate);
+  return data.forget();
+}
+
 } // namespace media
 } // namespace mozilla
--- a/dom/media/mediasink/DecodedAudioDataSink.h
+++ b/dom/media/mediasink/DecodedAudioDataSink.h
@ -28,7 +28,8 @@ namespace media {
 class DecodedAudioDataSink : public AudioSink,
                             private AudioStream::DataSource {
 public:
-  DecodedAudioDataSink(MediaQueue<MediaData>& aAudioQueue,
+  DecodedAudioDataSink(AbstractThread* aThread,
+                       MediaQueue<MediaData>& aAudioQueue,
                       int64_t aStartTime,
                       const AudioInfo& aInfo,
                       dom::AudioChannel aChannel);
@ -102,12 +103,31 @@ private:
  // Keep track of the read position of mCurrentData.
  UniquePtr<AudioBufferCursor> mCursor;
  // True if there is any error in processing audio data like overflow.
-  bool mErrored = false;
+  Atomic<bool> mErrored;

  // Set on the callback thread of cubeb once the stream has drained.
  Atomic<bool> mPlaybackComplete;

+  const RefPtr<AbstractThread> mOwnerThread;
+
+  // Audio Processing objects and methods
+  void OnAudioPopped(const RefPtr<MediaData>& aSample);
+  void OnAudioPushed(const RefPtr<MediaData>& aSample);
+  void NotifyAudioNeeded();
+  already_AddRefed<AudioData> CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer,
+                                                    AudioData* aReference);
  UniquePtr<AudioConverter> mConverter;
+  MediaQueue<AudioData> mProcessedQueue;
+  MediaEventListener mAudioQueueListener;
+  MediaEventListener mProcessedQueueListener;
+  // Number of frames processed from AudioQueue(). Used to determine gaps in
+  // the input stream. It indicates the time in frames since playback started
+  // at the current input framerate.
+  int64_t mFramesParsed;
+  int64_t mLastEndTime;
+  // Never modifed after construction.
+  uint32_t mOutputRate;
+  uint32_t mOutputChannels;
 };

 } // namespace media