Bug 1536766 - Better handle overlapping video frames in DecodedStream::SendVideo. r=jya

A case where this wasn't working was bipbop-lateaudio.mp4, where the last frame has duration 0 and starts and ends before the previous frame has ended. The VideoSink still renders this frame at the end of playback, so this patch brings DecodedStream closer to that behavior by rendering all frames with a start time after the previous frame's start time. The track's duration is still based on absolute times so things don't blow up. Differential Revision: https://phabricator.services.mozilla.com/D27267 --HG-- extra : moz-landing-system : lando
2019-04-18 15:24:12 +00:00 · 2019-04-18 15:24:12 +00:00 · cdbaaceba1
--- a/dom/media/mediasink/DecodedStream.cpp
+++ b/dom/media/mediasink/DecodedStream.cpp
@ -49,7 +49,7 @@ class DecodedStreamTrackListener : public MediaStreamTrackListener {
 private:
  const RefPtr<DecodedStreamGraphListener> mGraphListener;
  const RefPtr<SourceMediaStream> mStream;
-  const mozilla::TrackID mTrackID;
+  const TrackID mTrackID;
 };

 class DecodedStreamGraphListener {
@ -111,7 +111,7 @@ class DecodedStreamGraphListener {
      MOZ_CRASH("Unexpected TrackID");
    }
    mStream->Graph()->DispatchToMainThreadStableState(
-        NewRunnableMethod<mozilla::TrackID>(
+        NewRunnableMethod<TrackID>(
            "DecodedStreamGraphListener::DoNotifyTrackEnded", this,
            &DecodedStreamGraphListener::DoNotifyTrackEnded, aTrackID));
  }
@ -178,7 +178,7 @@ class DecodedStreamGraphListener {

 DecodedStreamTrackListener::DecodedStreamTrackListener(
    DecodedStreamGraphListener* aGraphListener, SourceMediaStream* aStream,
-    mozilla::TrackID aTrackID)
+    TrackID aTrackID)
    : mGraphListener(aGraphListener), mStream(aStream), mTrackID(aTrackID) {}

 void DecodedStreamTrackListener::NotifyOutput(MediaStreamGraph* aGraph,
@ -210,6 +210,12 @@ class DecodedStreamData {
  void Forget();
  nsCString GetDebugInfo();

+  void WriteVideoToSegment(layers::Image* aImage, const TimeUnit& aStart,
+                           const TimeUnit& aEnd,
+                           const gfx::IntSize& aIntrinsicSize,
+                           const TimeStamp& aTimeStamp, VideoSegment* aOutput,
+                           const PrincipalHandle& aPrincipalHandle);
+
  /* The following group of fields are protected by the decoder's monitor
   * and can be read or written on any thread.
   */
@ -219,11 +225,21 @@ class DecodedStreamData {
  StreamTime mStreamVideoWritten;
  // Count of audio frames written to the stream in the stream's rate
  StreamTime mStreamAudioWritten;
-  // mNextVideoTime is the end timestamp for the last packet sent to the stream.
-  // Therefore video packets starting at or after this time need to be copied
+  // mNextAudioTime is the end timestamp for the last packet sent to the stream.
+  // Therefore audio packets starting at or after this time need to be copied
  // to the output stream.
-  TimeUnit mNextVideoTime;
  TimeUnit mNextAudioTime;
+  // mLastVideoStartTime is the start timestamp for the last packet sent to the
+  // stream. Therefore video packets starting after this time need to be copied
+  // to the output stream.
+  Maybe<TimeUnit> mLastVideoStartTime;
+  // mLastVideoEndTime is the end timestamp for the last packet sent to the
+  // stream. It is used to adjust durations of chunks sent to the output stream
+  // when there are overlaps in VideoData.
+  Maybe<TimeUnit> mLastVideoEndTime;
+  // The timestamp of the last frame, so we can ensure time never goes
+  // backwards.
+  TimeStamp mLastVideoTimeStamp;
  // The last video image sent to the stream. Useful if we need to replicate
  // the image.
  RefPtr<layers::Image> mLastVideoImage;
@ -247,7 +263,6 @@ DecodedStreamData::DecodedStreamData(
    : mAudioFramesWritten(0),
      mStreamVideoWritten(0),
      mStreamAudioWritten(0),
-      mNextVideoTime(aInit.mStartTime),
      mNextAudioTime(aInit.mStartTime),
      mHaveSentFinishAudio(false),
      mHaveSentFinishVideo(false),
@ -301,10 +316,15 @@ nsCString DecodedStreamData::GetDebugInfo() {
  return nsPrintfCString(
      "DecodedStreamData=%p mAudioFramesWritten=%" PRId64
      " mStreamAudioWritten=%" PRId64 " mStreamVideoWritten=%" PRId64
-      " mNextAudioTime=%" PRId64 " mNextVideoTime=%" PRId64
-      "mHaveSentFinishAudio=%d mHaveSentFinishVideo=%d",
+      " mNextAudioTime=%" PRId64 " mLastVideoStartTime=%" PRId64
+      " mLastVideoEndTime=%" PRId64
+      " mHaveSentFinishAudio=%d mHaveSentFinishVideo=%d",
      this, mAudioFramesWritten, mStreamAudioWritten, mStreamVideoWritten,
-      mNextAudioTime.ToMicroseconds(), mNextVideoTime.ToMicroseconds(),
+      mNextAudioTime.ToMicroseconds(),
+      mLastVideoStartTime.valueOr(TimeUnit::FromMicroseconds(-1))
+          .ToMicroseconds(),
+      mLastVideoEndTime.valueOr(TimeUnit::FromMicroseconds(-1))
+          .ToMicroseconds(),
      mHaveSentFinishAudio, mHaveSentFinishVideo);
 }

@ -425,7 +445,7 @@ nsresult DecodedStream::Start(const TimeUnit& aStartTime,
                                  std::move(videoEndedHolder),
                                  mOutputStreamManager, mAbstractMainThread);
  SyncRunnable::DispatchToThread(
-      SystemGroup::EventTargetFor(mozilla::TaskCategory::Other), r);
+      SystemGroup::EventTargetFor(TaskCategory::Other), r);
  mData = static_cast<R*>(r.get())->ReleaseData();

  if (mData) {
@ -590,8 +610,7 @@ void DecodedStream::SendAudio(double aVolume, bool aIsSameOrigin,
  }

  // |mNextAudioTime| is updated as we process each audio sample in
-  // SendStreamAudio(). This is consistent with how |mNextVideoTime|
-  // is updated for video samples.
+  // SendStreamAudio().
  if (output.GetDuration() > 0) {
    mData->mStreamAudioWritten +=
        sourceStream->AppendToTrack(audioTrackId, &output);
@ -603,23 +622,24 @@ void DecodedStream::SendAudio(double aVolume, bool aIsSameOrigin,
  }
 }

-static void WriteVideoToMediaStream(MediaStream* aStream, layers::Image* aImage,
-                                    const TimeUnit& aStart,
-                                    const TimeUnit& aEnd,
-                                    const mozilla::gfx::IntSize& aIntrinsicSize,
-                                    const TimeStamp& aTimeStamp,
-                                    VideoSegment* aOutput,
-                                    const PrincipalHandle& aPrincipalHandle) {
+void DecodedStreamData::WriteVideoToSegment(
+    layers::Image* aImage, const TimeUnit& aStart, const TimeUnit& aEnd,
+    const gfx::IntSize& aIntrinsicSize, const TimeStamp& aTimeStamp,
+    VideoSegment* aOutput, const PrincipalHandle& aPrincipalHandle) {
  RefPtr<layers::Image> image = aImage;
-  auto end = aStream->MicrosecondsToStreamTimeRoundDown(aEnd.ToMicroseconds());
+  auto end = mStream->MicrosecondsToStreamTimeRoundDown(aEnd.ToMicroseconds());
  auto start =
-      aStream->MicrosecondsToStreamTimeRoundDown(aStart.ToMicroseconds());
+      mStream->MicrosecondsToStreamTimeRoundDown(aStart.ToMicroseconds());
  aOutput->AppendFrame(image.forget(), aIntrinsicSize, aPrincipalHandle, false,
                       aTimeStamp);
  // Extend this so we get accurate durations for all frames.
  // Because this track is pushed, we need durations so the graph can track
  // when playout of the track has finished.
  aOutput->ExtendLastFrameBy(end - start);
+
+  mLastVideoStartTime = Some(aStart);
+  mLastVideoEndTime = Some(aEnd);
+  mLastVideoTimeStamp = aTimeStamp;
 }

 static bool ZeroDurationAtLastChunk(VideoSegment& aInput) {
@ -657,15 +677,20 @@ void DecodedStream::ResetVideo(const PrincipalHandle& aPrincipalHandle) {
                       aPrincipalHandle, false, currentTime);
  mData->mStream->AppendToTrack(mInfo.mVideo.mTrackId, &resetter);

-  // Consumer buffers have been reset. We now set mNextVideoTime to the start
+  // Consumer buffers have been reset. We now set the next time to the start
  // time of the current frame, so that it can be displayed again on resuming.
  if (RefPtr<VideoData> v = mVideoQueue.PeekFront()) {
-    mData->mNextVideoTime = v->mTime;
+    mData->mLastVideoStartTime = Some(v->mTime - TimeUnit::FromMicroseconds(1));
+    mData->mLastVideoEndTime = Some(v->mTime);
  } else {
-    // There was no current frame in the queue. We set the next time to push to
-    // the current time, so we at least don't resume starting in the future.
-    mData->mNextVideoTime = currentPosition;
+    // There was no current frame in the queue. We set the next time to the
+    // current time, so we at least don't resume starting in the future.
+    mData->mLastVideoStartTime =
+        Some(currentPosition - TimeUnit::FromMicroseconds(1));
+    mData->mLastVideoEndTime = Some(currentPosition);
  }
+
+  mData->mLastVideoTimeStamp = currentTime;
 }

 void DecodedStream::SendVideo(bool aIsSameOrigin,
@ -687,15 +712,22 @@ void DecodedStream::SendVideo(bool aIsSameOrigin,

  // It's OK to hold references to the VideoData because VideoData
  // is ref-counted.
-  mVideoQueue.GetElementsAfter(mData->mNextVideoTime, &video);
+  mVideoQueue.GetElementsAfter(
+      mData->mLastVideoStartTime.valueOr(mStartTime.ref()), &video);

  TimeStamp currentTime;
  TimeUnit currentPosition = GetPosition(&currentTime);

+  if (mData->mLastVideoTimeStamp.IsNull()) {
+    mData->mLastVideoTimeStamp = currentTime;
+  }
+
  for (uint32_t i = 0; i < video.Length(); ++i) {
    VideoData* v = video[i];
+    TimeUnit lastStart = mData->mLastVideoStartTime.valueOr(mStartTime.ref());
+    TimeUnit lastEnd = mData->mLastVideoEndTime.valueOr(mStartTime.ref());

-    if (mData->mNextVideoTime < v->mTime) {
+    if (lastEnd < v->mTime) {
      // Write last video frame to catch up. mLastVideoImage can be null here
      // which is fine, it just means there's no video.

@ -705,25 +737,28 @@ void DecodedStream::SendVideo(bool aIsSameOrigin,
      // video frame). E.g. if we have a video frame that is 30 sec long
      // and capture happens at 15 sec, we'll have to append a black frame
      // that is 15 sec long.
-      WriteVideoToMediaStream(
-          sourceStream, mData->mLastVideoImage, mData->mNextVideoTime, v->mTime,
-          mData->mLastVideoImageDisplaySize,
-          currentTime +
-              (mData->mNextVideoTime - currentPosition).ToTimeDuration(),
-          &output, aPrincipalHandle);
-      mData->mNextVideoTime = v->mTime;
-    }
-
-    if (mData->mNextVideoTime < v->GetEndTime()) {
-      WriteVideoToMediaStream(
-          sourceStream, v->mImage, mData->mNextVideoTime, v->GetEndTime(),
-          v->mDisplay,
-          currentTime +
-              (mData->mNextVideoTime - currentPosition).ToTimeDuration(),
-          &output, aPrincipalHandle);
-      mData->mNextVideoTime = v->GetEndTime();
+      TimeStamp t =
+          std::max(mData->mLastVideoTimeStamp,
+                   currentTime + (lastEnd - currentPosition).ToTimeDuration());
+      mData->WriteVideoToSegment(mData->mLastVideoImage, lastEnd, v->mTime,
+                                 mData->mLastVideoImageDisplaySize, t, &output,
+                                 aPrincipalHandle);
+    } else if (lastStart < v->mTime) {
+      // This frame starts after the last frame's start. Note that this could be
+      // before the last frame's end time for some videos. This only matters for
+      // the track's lifetime in the MSG, as rendering is based on timestamps,
+      // aka frame start times.
+      TimeStamp t =
+          std::max(mData->mLastVideoTimeStamp,
+                   currentTime + (lastEnd - currentPosition).ToTimeDuration());
+      TimeUnit end = std::max(
+          v->GetEndTime(),
+          lastEnd + TimeUnit::FromMicroseconds(
+                        sourceStream->StreamTimeToMicroseconds(1) + 1));
      mData->mLastVideoImage = v->mImage;
      mData->mLastVideoImageDisplaySize = v->mDisplay;
+      mData->WriteVideoToSegment(v->mImage, lastEnd, end, v->mDisplay, t,
+                                 &output, aPrincipalHandle);
    }
  }

@ -758,17 +793,16 @@ void DecodedStream::SendVideo(bool aIsSameOrigin,
    if (compensateEOS) {
      VideoSegment endSegment;
      // Calculate the deviation clock time from DecodedStream.
-      // We round the nr of microseconds up, because WriteVideoToMediaStream
+      // We round the nr of microseconds up, because WriteVideoToSegment
      // will round the conversion from microseconds to StreamTime down.
-      auto deviation =
-          FromMicroseconds(sourceStream->StreamTimeToMicroseconds(1) + 1);
-      WriteVideoToMediaStream(
-          sourceStream, mData->mLastVideoImage, mData->mNextVideoTime,
-          mData->mNextVideoTime + deviation, mData->mLastVideoImageDisplaySize,
-          currentTime + (mData->mNextVideoTime + deviation - currentPosition)
-                            .ToTimeDuration(),
+      auto deviation = TimeUnit::FromMicroseconds(
+          sourceStream->StreamTimeToMicroseconds(1) + 1);
+      auto start = mData->mLastVideoEndTime.valueOr(mStartTime.ref());
+      mData->WriteVideoToSegment(
+          mData->mLastVideoImage, start, start + deviation,
+          mData->mLastVideoImageDisplaySize,
+          currentTime + (start + deviation - currentPosition).ToTimeDuration(),
          &endSegment, aPrincipalHandle);
-      mData->mNextVideoTime += deviation;
      MOZ_ASSERT(endSegment.GetDuration() > 0);
      if (!aIsSameOrigin) {
        endSegment.ReplaceWithDisabled();
@ -817,7 +851,7 @@ TimeUnit DecodedStream::GetEndTime(TrackType aType) const {
      return t;
    }
  } else if (aType == TrackInfo::kVideoTrack && mData) {
-    return mData->mNextVideoTime;
+    return mData->mLastVideoEndTime.valueOr(mStartTime.ref());
  }
  return TimeUnit::Zero();
 }
@ -835,8 +869,12 @@ TimeUnit DecodedStream::GetPosition(TimeStamp* aTimeStamp) const {

 void DecodedStream::NotifyOutput(int64_t aTime) {
  AssertOwnerThread();
-  MOZ_ASSERT(mLastOutputTime <= FromMicroseconds(aTime));
-  mLastOutputTime = FromMicroseconds(aTime);
+  TimeUnit time = TimeUnit::FromMicroseconds(aTime);
+  if (time == mLastOutputTime) {
+    return;
+  }
+  MOZ_ASSERT(mLastOutputTime < time);
+  mLastOutputTime = time;
  auto currentTime = GetPosition();

  // Remove audio samples that have been played by MSG from the queue.
--- a/dom/media/mediasink/DecodedStream.h
+++ b/dom/media/mediasink/DecodedStream.h
@ -73,9 +73,6 @@ class DecodedStream : public MediaSink {
  virtual ~DecodedStream();

 private:
-  media::TimeUnit FromMicroseconds(int64_t aTime) {
-    return media::TimeUnit::FromMicroseconds(aTime);
-  }
  void DestroyData(UniquePtr<DecodedStreamData>&& aData);
  void SendAudio(double aVolume, bool aIsSameOrigin,
                 const PrincipalHandle& aPrincipalHandle);