Bug 1262276 - part12 : store looping offset in the media queue in order to keep timestamp consistantly increasing across different states. r=padenot

When leaving looping state to another state, media data stored in the media queue are already adjusted. If the new state requests a new data but doesn't adjust its timestamp, then the data in the media queue will be out of order. If that happens on video data, it would causes a/v unsync and the video frame would be discarded because it doesn't catch up with the clock time, which might have grown a lot via looping multiple times. The example transitions from the looping state which would encounter this situation are buffering state (decoding too slow), decoding state (cancel looping) and video only seek (bkg video resume). In the premise of letting the clock time keep growing, we would need to put the offset to somewhere independent to states. Therefore, we choose to let the media queue do the task of the timestamp adjustment. So even if we leave the looping state, the new coming data would be adjusted their timestamp correctly and match the clock time. If we enter the looping state again, we can also smoothly keep adding more offset to all future data. Differential Revision: https://phabricator.services.mozilla.com/D160576
2022-11-05 02:23:33 +00:00 · 2022-11-05 02:23:33 +00:00 · ca960ebd52
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@ -871,6 +871,20 @@ class MediaDecoderStateMachine::LoopingDecodingState
        mVideoEndedBeforeEnteringStateWithoutDuration = true;
      }
    }
+
+    // If we've looped at least once before, the master's media queues have
+    // already stored some adjusted data. If a track has reached EOS, we need to
+    // update queue offset correctly. Otherwise, it would cause a/v unsync.
+    if (mMaster->mOriginalDecodedDuration != media::TimeUnit::Zero()) {
+      if (mIsReachingAudioEOS && mMaster->HasAudio()) {
+        AudioQueue().SetOffset(AudioQueue().GetOffset() +
+                               mMaster->mOriginalDecodedDuration);
+      }
+      if (mIsReachingVideoEOS && mMaster->HasVideo()) {
+        VideoQueue().SetOffset(VideoQueue().GetOffset() +
+                               mMaster->mOriginalDecodedDuration);
+      }
+    }
  }

  void Enter() {
@ -890,8 +904,8 @@ class MediaDecoderStateMachine::LoopingDecodingState
    SLOG("Leaving looping state, offset [a=%" PRId64 ",v=%" PRId64
         "], endtime [a=%" PRId64 ",v=%" PRId64 "], track duration [a=%" PRId64
         ",v=%" PRId64 "], waiting=%s",
-         mAudioLoopingOffset.ToMicroseconds(),
-         mVideoLoopingOffset.ToMicroseconds(),
+         AudioQueue().GetOffset().ToMicroseconds(),
+         VideoQueue().GetOffset().ToMicroseconds(),
         mMaster->mDecodedAudioEndTime.ToMicroseconds(),
         mMaster->mDecodedVideoEndTime.ToMicroseconds(),
         mMaster->mAudioTrackDecodedDuration
@ -935,30 +949,24 @@ class MediaDecoderStateMachine::LoopingDecodingState

  void HandleAudioDecoded(AudioData* aAudio) override {
    // TODO : check if we need to update mOriginalDecodedDuration
-    MediaResult rv = LoopingAudioTimeAdjustment(aAudio);
-    if (NS_WARN_IF(NS_FAILED(rv))) {
-      mMaster->DecodeError(rv);
-      return;
-    }
+
+    // After pushing data to the queue, timestamp might be adjusted.
+    DecodingState::HandleAudioDecoded(aAudio);
    mMaster->mDecodedAudioEndTime =
        std::max(aAudio->GetEndTime(), mMaster->mDecodedAudioEndTime);
    SLOG("audio sample after time-adjustment [%" PRId64 ",%" PRId64 "]",
         aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds());
-    DecodingState::HandleAudioDecoded(aAudio);
  }

  void HandleVideoDecoded(VideoData* aVideo) override {
    // TODO : check if we need to update mOriginalDecodedDuration
-    MediaResult rv = LoopingVideoTimeAdjustment(aVideo);
-    if (NS_WARN_IF(NS_FAILED(rv))) {
-      mMaster->DecodeError(rv);
-      return;
-    }
+
+    // After pushing data to the queue, timestamp might be adjusted.
+    DecodingState::HandleVideoDecoded(aVideo);
    mMaster->mDecodedVideoEndTime =
        std::max(aVideo->GetEndTime(), mMaster->mDecodedVideoEndTime);
    SLOG("video sample after time-adjustment [%" PRId64 ",%" PRId64 "]",
         aVideo->mTime.ToMicroseconds(), aVideo->GetEndTime().ToMicroseconds());
-    DecodingState::HandleVideoDecoded(aVideo);
  }

  void HandleEndOfAudio() override {
@ -969,14 +977,15 @@ class MediaDecoderStateMachine::LoopingDecodingState
          mMaster->mDecodedAudioEndTime);
    }
    if (DetermineOriginalDecodedDurationIfNeeded()) {
-      mAudioLoopingOffset += mMaster->mOriginalDecodedDuration;
+      AudioQueue().SetOffset(AudioQueue().GetOffset() +
+                             mMaster->mOriginalDecodedDuration);
    }

    SLOG(
        "received audio EOS when seamless looping, starts seeking, "
-        "mAudioLoopingOffset=[%" PRId64
-        "], mAudioTrackDecodedDuration=[%" PRId64 "]",
-        mAudioLoopingOffset.ToMicroseconds(),
+        "audioLoopingOffset=[%" PRId64 "], mAudioTrackDecodedDuration=[%" PRId64
+        "]",
+        AudioQueue().GetOffset().ToMicroseconds(),
        mMaster->mAudioTrackDecodedDuration->ToMicroseconds());
    RequestDataFromStartPosition(TrackInfo::TrackType::kAudioTrack);
    ProcessSamplesWaitingAdjustmentIfAny();
@ -990,14 +999,15 @@ class MediaDecoderStateMachine::LoopingDecodingState
          mMaster->mDecodedVideoEndTime);
    }
    if (DetermineOriginalDecodedDurationIfNeeded()) {
-      mVideoLoopingOffset += mMaster->mOriginalDecodedDuration;
+      VideoQueue().SetOffset(VideoQueue().GetOffset() +
+                             mMaster->mOriginalDecodedDuration);
    }

    SLOG(
        "received video EOS when seamless looping, starts seeking, "
-        "mVideoLoopingOffset=[%" PRId64
-        "], mVideoTrackDecodedDuration=[%" PRId64 "]",
-        mVideoLoopingOffset.ToMicroseconds(),
+        "videoLoopingOffset=[%" PRId64 "], mVideoTrackDecodedDuration=[%" PRId64
+        "]",
+        VideoQueue().GetOffset().ToMicroseconds(),
        mMaster->mVideoTrackDecodedDuration->ToMicroseconds());
    RequestDataFromStartPosition(TrackInfo::TrackType::kVideoTrack);
    ProcessSamplesWaitingAdjustmentIfAny();
@ -1168,10 +1178,6 @@ class MediaDecoderStateMachine::LoopingDecodingState
      return;
    }

-    MOZ_ASSERT_IF(mIsReachingAudioEOS,
-                  mAudioLoopingOffset == media::TimeUnit::Zero());
-    MOZ_ASSERT_IF(mIsReachingVideoEOS,
-                  mVideoLoopingOffset == media::TimeUnit::Zero());
    // If we have already reached EOS before starting media sink, the sink
    // has not started yet and the current position is larger than last decoded
    // end time, that means we directly seeked to EOS and playback would start
@ -1282,9 +1288,19 @@ class MediaDecoderStateMachine::LoopingDecodingState
    SLOG("process %s sample waiting for timestamp adjustment",
         isAudio ? "audio" : "video");
    if (isAudio) {
+      // Waiting sample is for next round of looping, so the queue offset
+      // shouldn't be zero. This happens when the track has reached EOS before
+      // entering the state (and looping never happens before). Same for below
+      // video case.
+      if (AudioQueue().GetOffset() == media::TimeUnit::Zero()) {
+        AudioQueue().SetOffset(mMaster->mOriginalDecodedDuration);
+      }
      HandleAudioDecoded(data->As<AudioData>());
    } else {
      MOZ_DIAGNOSTIC_ASSERT(data->mType == MediaData::Type::VIDEO_DATA);
+      if (VideoQueue().GetOffset() == media::TimeUnit::Zero()) {
+        VideoQueue().SetOffset(mMaster->mOriginalDecodedDuration);
+      }
      HandleVideoDecoded(data->As<VideoData>());
    }
  }
@ -1324,42 +1340,6 @@ class MediaDecoderStateMachine::LoopingDecodingState
    MaybeStopPrerolling();
  }

-  MediaResult LoopingAudioTimeAdjustment(AudioData* aAudio) {
-    // `mOriginalDecodedDuration` can only be determined after we know both
-    // tracks' original duration. This case happens when audio track reaches EOS
-    // before entering the state, so we haven't set its offset yet.
-    if (mAudioLoopingOffset == media::TimeUnit::Zero() &&
-        mMaster->mOriginalDecodedDuration != media::TimeUnit::Zero()) {
-      mAudioLoopingOffset += mMaster->mOriginalDecodedDuration;
-    }
-    if (mAudioLoopingOffset != media::TimeUnit::Zero()) {
-      aAudio->mTime += mAudioLoopingOffset;
-    }
-    return aAudio->mTime.IsValid()
-               ? MediaResult(NS_OK)
-               : MediaResult(
-                     NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
-                     "Audio sample overflow during looping time adjustment");
-  }
-
-  MediaResult LoopingVideoTimeAdjustment(VideoData* aVideo) {
-    // `mOriginalDecodedDuration` can only be determined after we know both
-    // tracks' original duration. This case happens when audio track reaches EOS
-    // before entering the state, so we haven't set its offset yet.
-    if (mVideoLoopingOffset == media::TimeUnit::Zero() &&
-        mMaster->mOriginalDecodedDuration != media::TimeUnit::Zero()) {
-      mVideoLoopingOffset += mMaster->mOriginalDecodedDuration;
-    }
-    if (mVideoLoopingOffset != media::TimeUnit::Zero()) {
-      aVideo->mTime += mVideoLoopingOffset;
-    }
-    return aVideo->mTime.IsValid()
-               ? MediaResult(NS_OK)
-               : MediaResult(
-                     NS_ERROR_DOM_MEDIA_OVERFLOW_ERR,
-                     "Video sample overflow during looping time adjustment");
-  }
-
  bool ShouldDiscardLoopedData(MediaData::Type aType) const {
    if (!mMaster->mMediaSink->IsStarted()) {
      return false;
@ -1387,7 +1367,8 @@ class MediaDecoderStateMachine::LoopingDecodingState
     *    ClockTime        offset      mDecodedXXXEndTime
     *
     */
-    const auto offset = isAudio ? mAudioLoopingOffset : mVideoLoopingOffset;
+    const auto offset =
+        isAudio ? AudioQueue().GetOffset() : VideoQueue().GetOffset();
    const auto endTime =
        isAudio ? mMaster->mDecodedAudioEndTime : mMaster->mDecodedVideoEndTime;
    const auto clockTime = mMaster->GetClock();
@ -1399,7 +1380,8 @@ class MediaDecoderStateMachine::LoopingDecodingState
    MOZ_DIAGNOSTIC_ASSERT(aType == MediaData::Type::AUDIO_DATA ||
                          aType == MediaData::Type::VIDEO_DATA);
    const bool isAudio = aType == MediaData::Type::AUDIO_DATA;
-    const auto offset = isAudio ? mAudioLoopingOffset : mVideoLoopingOffset;
+    const auto offset =
+        isAudio ? AudioQueue().GetOffset() : VideoQueue().GetOffset();
    if (offset == media::TimeUnit::Zero()) {
      return;
    }
@ -1494,15 +1476,6 @@ class MediaDecoderStateMachine::LoopingDecodingState
   */
  RefPtr<MediaData> mDataWaitingTimestampAdjustment;

-  // The accumuated offset after looping to the start position for tracks.
-  // Eg. Media duration 10, and we've looped 5 times, offset will be 50.
-  // Note, most of time they will be the same when we have both tracks, but we
-  // separate them in order to handle the case where both tracks reach EOS at
-  // different time. Eg. media duration 10, if audio track reaches to EOS
-  // already, but video hasn't. Then audio offset is 10, but video is 0.
-  media::TimeUnit mAudioLoopingOffset = media::TimeUnit::Zero();
-  media::TimeUnit mVideoLoopingOffset = media::TimeUnit::Zero();
-
  MozPromiseRequestHolder<MediaFormatReader::SeekPromise> mAudioSeekRequest;
  MozPromiseRequestHolder<MediaFormatReader::SeekPromise> mVideoSeekRequest;
  MozPromiseRequestHolder<AudioDataPromise> mAudioDataRequest;
--- a/dom/media/MediaQueue.h
+++ b/dom/media/MediaQueue.h
@ -17,7 +17,34 @@

 namespace mozilla {

+extern LazyLogModule gMediaDecoderLog;
+
+#  define QLOG(msg, ...)                       \
+    MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
+            ("MediaQueue=%p " msg, this, ##__VA_ARGS__))
+
 class AudioData;
+class VideoData;
+
+template <typename T>
+struct TimestampAdjustmentTrait {
+  static const bool mValue = false;
+};
+
+template <>
+struct TimestampAdjustmentTrait<AudioData> {
+  static const bool mValue = true;
+};
+
+template <>
+struct TimestampAdjustmentTrait<VideoData> {
+  static const bool mValue = true;
+};
+
+template <typename T>
+struct NonTimestampAdjustmentTrait {
+  static const bool mValue = !TimestampAdjustmentTrait<T>::mValue;
+};

 template <class T>
 class MediaQueue : private nsRefPtrDeque<T> {
@ -34,8 +61,34 @@ class MediaQueue : private nsRefPtrDeque<T> {
    return nsRefPtrDeque<T>::GetSize();
  }

+  template <typename U,
+            std::enable_if_t<TimestampAdjustmentTrait<U>::mValue, bool> = true>
+  inline void AdjustTimeStampIfNeeded(U* aItem) {
+    static_assert(std::is_same_v<U, AudioData> || std::is_same_v<U, VideoData>);
+    if (mOffset != media::TimeUnit::Zero()) {
+      const auto prev = aItem->mTime, prevEndTime = aItem->GetEndTime();
+      aItem->mTime += mOffset;
+      if (!aItem->mTime.IsValid()) {
+        NS_WARNING("Reverting timestamp adjustment due to sample overflow!");
+        aItem->mTime = prev;
+      } else {
+        QLOG("adjusted %s sample [%" PRId64 ",%" PRId64 "] -> [%" PRId64
+             ",%" PRId64 "]",
+             std::is_same_v<U, AudioData> ? "audio" : "video",
+             prev.ToMicroseconds(), prevEndTime.ToMicroseconds(),
+             aItem->mTime.ToMicroseconds(),
+             aItem->GetEndTime().ToMicroseconds());
+      }
+    }
+  }
+
+  template <typename U, std::enable_if_t<NonTimestampAdjustmentTrait<U>::mValue,
+                                         bool> = true>
+  inline void AdjustTimeStampIfNeeded(U* aItem) {}
+
  inline void PushFront(T* aItem) {
    RecursiveMutexAutoLock lock(mRecursiveMutex);
+    AdjustTimeStampIfNeeded(aItem);
    nsRefPtrDeque<T>::PushFront(aItem);
  }

@ -50,6 +103,7 @@ class MediaQueue : private nsRefPtrDeque<T> {

    MOZ_DIAGNOSTIC_ASSERT(item);
    MOZ_DIAGNOSTIC_ASSERT(item->GetEndTime() >= item->mTime);
+    AdjustTimeStampIfNeeded(item);
    nsRefPtrDeque<T>::Push(dont_AddRef(item));
    mPushEvent.Notify(RefPtr<T>(item));

@ -88,6 +142,7 @@ class MediaQueue : private nsRefPtrDeque<T> {
  void Reset() {
    RecursiveMutexAutoLock lock(mRecursiveMutex);
    nsRefPtrDeque<T>::Erase();
+    SetOffset(media::TimeUnit::Zero());
    mEndOfStream = false;
  }

@ -154,6 +209,22 @@ class MediaQueue : private nsRefPtrDeque<T> {
    return frames;
  }

+  bool SetOffset(const media::TimeUnit& aOffset) {
+    if (!aOffset.IsValid()) {
+      QLOG("Invalid offset!");
+      return false;
+    }
+    RecursiveMutexAutoLock lock(mRecursiveMutex);
+    mOffset = aOffset;
+    QLOG("Set media queue offset %" PRId64, mOffset.ToMicroseconds());
+    return true;
+  }
+
+  media::TimeUnit GetOffset() const {
+    RecursiveMutexAutoLock lock(mRecursiveMutex);
+    return mOffset;
+  }
+
  MediaEventSource<RefPtr<T>>& PopFrontEvent() { return mPopFrontEvent; }

  MediaEventSource<RefPtr<T>>& PushEvent() { return mPushEvent; }
@ -186,8 +257,14 @@ class MediaQueue : private nsRefPtrDeque<T> {
  // True when we've decoded the last frame of data in the
  // bitstream for which we're queueing frame data.
  bool mEndOfStream;
+  // This offset will be added to any data pushed into the queue. We use it when
+  // the media queue starts receiving looped data, which timestamp needs to be
+  // modified.
+  media::TimeUnit mOffset;
 };

 }  // namespace mozilla

+#  undef QLOG
+
 #endif
--- a/dom/media/gtest/TestMediaQueue.cpp
+++ b/dom/media/gtest/TestMediaQueue.cpp
@ -10,10 +10,23 @@
 using namespace mozilla;
 using mozilla::media::TimeUnit;

-MediaData* CreateDataRawPtr(int64_t aStartTime, int64_t aEndTime) {
+MediaData* CreateDataRawPtr(
+    int64_t aStartTime, int64_t aEndTime,
+    MediaData::Type aType = MediaData::Type::NULL_DATA) {
  const TimeUnit startTime = TimeUnit::FromMicroseconds(aStartTime);
  const TimeUnit endTime = TimeUnit::FromMicroseconds(aEndTime);
-  return new NullData(0, startTime, endTime - startTime);
+  MediaData* data;
+  if (aType == MediaData::Type::AUDIO_DATA) {
+    AlignedAudioBuffer samples;
+    data = new AudioData(0, startTime, std::move(samples), 2, 44100);
+    data->mDuration = endTime - startTime;
+  } else if (aType == MediaData::Type::VIDEO_DATA) {
+    data = new VideoData(0, startTime, endTime - startTime, true, startTime,
+                         gfx::IntSize(), 0);
+  } else {
+    data = new NullData(0, startTime, endTime - startTime);
+  }
+  return data;
 }

 already_AddRefed<MediaData> CreateData(int64_t aStartTime, int64_t aEndTime) {
@ -196,4 +209,80 @@ TEST(MediaQueue, CallGetElementAfterOnMultipleElements)
  EXPECT_TRUE(emptyResult.IsEmpty());
 }

+TEST(MediaQueue, TimestampAdjustmentForSupportDataType)
+{
+  const size_t kOffSet = 30;
+  {
+    MediaQueue<AudioData> audioQueue;
+    audioQueue.Push(
+        CreateDataRawPtr(0, 10, MediaData::Type::AUDIO_DATA)->As<AudioData>());
+    audioQueue.SetOffset(TimeUnit::FromMicroseconds(kOffSet));
+    audioQueue.Push(
+        CreateDataRawPtr(0, 10, MediaData::Type::AUDIO_DATA)->As<AudioData>());
+
+    // Data stored before setting the offset shouldn't be changed
+    RefPtr<AudioData> data = audioQueue.PopFront();
+    EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+    EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+
+    // Data stored after setting the offset should be changed
+    data = audioQueue.PopFront();
+    EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0 + kOffSet));
+    EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10 + kOffSet));
+
+    // Reset will clean the offset.
+    audioQueue.Reset();
+    audioQueue.Push(
+        CreateDataRawPtr(0, 10, MediaData::Type::AUDIO_DATA)->As<AudioData>());
+    data = audioQueue.PopFront();
+    EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+    EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+  }
+
+  // Check another supported type
+  MediaQueue<VideoData> videoQueue;
+  videoQueue.Push(
+      CreateDataRawPtr(0, 10, MediaData::Type::VIDEO_DATA)->As<VideoData>());
+  videoQueue.SetOffset(TimeUnit::FromMicroseconds(kOffSet));
+  videoQueue.Push(
+      CreateDataRawPtr(0, 10, MediaData::Type::VIDEO_DATA)->As<VideoData>());
+
+  // Data stored before setting the offset shouldn't be changed
+  RefPtr<VideoData> data = videoQueue.PopFront();
+  EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+  EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+
+  // Data stored after setting the offset should be changed
+  data = videoQueue.PopFront();
+  EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0 + kOffSet));
+  EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10 + kOffSet));
+
+  // Reset will clean the offset.
+  videoQueue.Reset();
+  videoQueue.Push(
+      CreateDataRawPtr(0, 10, MediaData::Type::VIDEO_DATA)->As<VideoData>());
+  data = videoQueue.PopFront();
+  EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+  EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+}
+
+TEST(MediaQueue, TimestampAdjustmentForNotSupportDataType)
+{
+  const size_t kOffSet = 30;
+
+  MediaQueue<MediaData> queue;
+  queue.Push(CreateDataRawPtr(0, 10));
+  queue.SetOffset(TimeUnit::FromMicroseconds(kOffSet));
+  queue.Push(CreateDataRawPtr(0, 10));
+
+  // Offset won't affect any data at all.
+  RefPtr<MediaData> data = queue.PopFront();
+  EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+  EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+
+  data = queue.PopFront();
+  EXPECT_EQ(data->mTime, TimeUnit::FromMicroseconds(0));
+  EXPECT_EQ(data->GetEndTime(), TimeUnit::FromMicroseconds(10));
+}
+
 #undef EXPECT_EQUAL_SIZE_T