From d9860a80b4bc654e425687322494b3ed02f8fcb7 Mon Sep 17 00:00:00 2001
From: Bobby Holley <bobbyholley@gmail.com>
Date: Sat, 6 Jun 2015 14:42:40 -0700
Subject: [PATCH] Bug 1163223 - Introduce StartTimeRendezvous and route samples
 through it. r=jww

No sample adjusting is done yet - this just makes the data available.
---
 dom/media/MediaData.cpp                   |   7 +-
 dom/media/MediaData.h                     |   8 +-
 dom/media/MediaDecoderReader.h            |   3 +
 dom/media/MediaDecoderStateMachine.cpp    |  36 ++++--
 dom/media/MediaDecoderStateMachine.h      | 132 ++++++++++++++++++++++
 dom/media/MediaFormatReader.cpp           |   6 +
 dom/media/MediaFormatReader.h             |   2 +
 dom/media/mediasource/MediaSourceReader.h |   1 +
 8 files changed, 184 insertions(+), 11 deletions(-)

diff --git a/dom/media/MediaData.cpp b/dom/media/MediaData.cpp
index 260f869a2524..4213911fd072 100644
--- a/dom/media/MediaData.cpp
+++ b/dom/media/MediaData.cpp
@@ -25,6 +25,9 @@ using layers::ImageContainer;
 using layers::PlanarYCbCrImage;
 using layers::PlanarYCbCrData;
 
+const char* AudioData::sTypeName = "audio";
+const char* VideoData::sTypeName = "video";
+
 void
 AudioData::EnsureAudioBuffer()
 {
@@ -109,7 +112,7 @@ VideoData::VideoData(int64_t aOffset,
                      int64_t aTime,
                      int64_t aDuration,
                      int64_t aTimecode)
-  : MediaData(VIDEO_DATA, aOffset, aTime, aDuration)
+  : MediaData(sType, aOffset, aTime, aDuration)
   , mDuplicate(true)
 {
   NS_ASSERTION(mDuration >= 0, "Frame must have non-negative duration.");
@@ -122,7 +125,7 @@ VideoData::VideoData(int64_t aOffset,
                      bool aKeyframe,
                      int64_t aTimecode,
                      IntSize aDisplay)
-  : MediaData(VIDEO_DATA, aOffset, aTime, aDuration)
+  : MediaData(sType, aOffset, aTime, aDuration)
   , mDisplay(aDisplay)
   , mDuplicate(false)
 {
diff --git a/dom/media/MediaData.h b/dom/media/MediaData.h
index 93e0526fd084..08fe8bfcdad2 100644
--- a/dom/media/MediaData.h
+++ b/dom/media/MediaData.h
@@ -99,12 +99,15 @@ public:
             AudioDataValue* aData,
             uint32_t aChannels,
             uint32_t aRate)
-    : MediaData(AUDIO_DATA, aOffset, aTime, aDuration)
+    : MediaData(sType, aOffset, aTime, aDuration)
     , mFrames(aFrames)
     , mChannels(aChannels)
     , mRate(aRate)
     , mAudioData(aData) {}
 
+  static const Type sType = AUDIO_DATA;
+  static const char* sTypeName;
+
   // Creates a new VideoData identical to aOther, but with a different
   // specified timestamp and duration. All data from aOther is copied
   // into the new AudioData but the audio data which is transferred.
@@ -148,6 +151,9 @@ public:
   typedef layers::Image Image;
   typedef layers::PlanarYCbCrImage PlanarYCbCrImage;
 
+  static const Type sType = VIDEO_DATA;
+  static const char* sTypeName;
+
   // YCbCr data obtained from decoding the video. The index's are:
   //   0 = Y
   //   1 = Cb
diff --git a/dom/media/MediaDecoderReader.h b/dom/media/MediaDecoderReader.h
index bc5e689dafba..185bdae7cded 100644
--- a/dom/media/MediaDecoderReader.h
+++ b/dom/media/MediaDecoderReader.h
@@ -213,6 +213,9 @@ public:
   // called.
   virtual media::TimeIntervals GetBuffered();
 
+  // MediaSourceReader opts out of the start-time-guessing mechanism.
+  virtual bool ForceZeroStartTime() const { return false; }
+
   virtual int64_t ComputeStartTime(const VideoData* aVideo, const AudioData* aAudio);
 
   // The MediaDecoderStateMachine uses various heuristics that assume that
diff --git a/dom/media/MediaDecoderStateMachine.cpp b/dom/media/MediaDecoderStateMachine.cpp
index 38fa3b34ca98..8c60c7ba337f 100644
--- a/dom/media/MediaDecoderStateMachine.cpp
+++ b/dom/media/MediaDecoderStateMachine.cpp
@@ -56,8 +56,6 @@ using namespace mozilla::media;
 #undef DECODER_LOG
 #undef VERBOSE_LOG
 
-extern PRLogModuleInfo* gMediaDecoderLog;
-extern PRLogModuleInfo* gMediaSampleLog;
 #define LOG(m, l, x, ...) \
   MOZ_LOG(m, l, ("Decoder=%p " x, mDecoder.get(), ##__VA_ARGS__))
 #define DECODER_LOG(x, ...) \
@@ -1568,6 +1566,11 @@ void MediaDecoderStateMachine::Shutdown()
 
   Reset();
 
+  // Shut down our start time rendezvous.
+  if (mStartTimeRendezvous) {
+    mStartTimeRendezvous->Destroy();
+  }
+
   // Put a task in the decode queue to shutdown the reader.
   // the queue to spin down.
   ProxyMediaCall(DecodeTaskQueue(), mReader.get(), __func__, &MediaDecoderReader::Shutdown)
@@ -2170,6 +2173,13 @@ MediaDecoderStateMachine::OnMetadataRead(MetadataHolder* aMetadata)
   mInfo = aMetadata->mInfo;
   mMetadataTags = aMetadata->mTags.forget();
 
+  // Set up the start time rendezvous if it doesn't already exist (which is
+  // generally the case, unless we're coming out of dormant mode).
+  if (!mStartTimeRendezvous) {
+    mStartTimeRendezvous = new StartTimeRendezvous(TaskQueue(), HasAudio(), HasVideo(),
+                                                   mReader->ForceZeroStartTime() || IsRealTime());
+  }
+
   if (mInfo.mMetadataDuration.isSome() || mInfo.mMetadataEndTime.isSome()) {
     RecomputeDuration();
   }
@@ -2283,17 +2293,27 @@ MediaDecoderStateMachine::DecodeFirstFrame()
     NS_ENSURE_SUCCESS(res, res);
   } else {
     if (HasAudio()) {
-      mAudioDataRequest.Begin(ProxyMediaCall(DecodeTaskQueue(), mReader.get(),
-                                             __func__, &MediaDecoderReader::RequestAudioData)
+      mAudioDataRequest.Begin(
+        ProxyMediaCall(DecodeTaskQueue(), mReader.get(), __func__,
+                       &MediaDecoderReader::RequestAudioData)
+        ->Then(TaskQueue(), __func__, mStartTimeRendezvous.get(),
+               &StartTimeRendezvous::ProcessFirstSample<AudioDataPromise>,
+               &StartTimeRendezvous::FirstSampleRejected<AudioData>)
+        ->CompletionPromise()
         ->Then(TaskQueue(), __func__, this,
                &MediaDecoderStateMachine::OnAudioDecoded,
-               &MediaDecoderStateMachine::OnAudioNotDecoded));
+               &MediaDecoderStateMachine::OnAudioNotDecoded)
+      );
     }
     if (HasVideo()) {
       mVideoDecodeStartTime = TimeStamp::Now();
-      mVideoDataRequest.Begin(ProxyMediaCall(DecodeTaskQueue(), mReader.get(),
-                                             __func__, &MediaDecoderReader::RequestVideoData, false,
-                                             int64_t(0))
+      mVideoDataRequest.Begin(
+        ProxyMediaCall(DecodeTaskQueue(), mReader.get(), __func__,
+                       &MediaDecoderReader::RequestVideoData, false, int64_t(0))
+        ->Then(TaskQueue(), __func__, mStartTimeRendezvous.get(),
+               &StartTimeRendezvous::ProcessFirstSample<VideoDataPromise>,
+               &StartTimeRendezvous::FirstSampleRejected<VideoData>)
+        ->CompletionPromise()
         ->Then(TaskQueue(), __func__, this,
                &MediaDecoderStateMachine::OnVideoDecoded,
                &MediaDecoderStateMachine::OnVideoNotDecoded));
diff --git a/dom/media/MediaDecoderStateMachine.h b/dom/media/MediaDecoderStateMachine.h
index de9915df692e..c643bb210418 100644
--- a/dom/media/MediaDecoderStateMachine.h
+++ b/dom/media/MediaDecoderStateMachine.h
@@ -100,6 +100,9 @@ class AudioSegment;
 class MediaTaskQueue;
 class AudioSink;
 
+extern PRLogModuleInfo* gMediaDecoderLog;
+extern PRLogModuleInfo* gMediaSampleLog;
+
 /*
   The state machine class. This manages the decoding and seeking in the
   MediaDecoderReader on the decode task queue, and A/V sync on the shared
@@ -117,6 +120,8 @@ class MediaDecoderStateMachine
   friend class AudioSink;
   NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MediaDecoderStateMachine)
 public:
+  typedef MediaDecoderReader::AudioDataPromise AudioDataPromise;
+  typedef MediaDecoderReader::VideoDataPromise VideoDataPromise;
   typedef MediaDecoderOwner::NextFrameStatus NextFrameStatus;
   MediaDecoderStateMachine(MediaDecoder* aDecoder,
                            MediaDecoderReader* aReader,
@@ -782,6 +787,133 @@ public:
 
   } mDelayedScheduler;
 
+  // StartTimeRendezvous is a helper class that quarantines the first sample
+  // until it gets a sample from both channels, such that we can be guaranteed
+  // to know the start time by the time On{Audio,Video}Decoded is called.
+  class StartTimeRendezvous {
+  public:
+    typedef MediaDecoderReader::AudioDataPromise AudioDataPromise;
+    typedef MediaDecoderReader::VideoDataPromise VideoDataPromise;
+    typedef MediaPromise<bool, bool, /* isExclusive = */ false> HaveStartTimePromise;
+
+    NS_INLINE_DECL_THREADSAFE_REFCOUNTING(StartTimeRendezvous);
+    StartTimeRendezvous(AbstractThread* aOwnerThread, bool aHasAudio, bool aHasVideo,
+                        bool aForceZeroStartTime)
+      : mOwnerThread(aOwnerThread)
+    {
+      if (aForceZeroStartTime) {
+        mAudioStartTime.emplace(0);
+        mVideoStartTime.emplace(0);
+        return;
+      }
+
+      if (!aHasAudio) {
+        mAudioStartTime.emplace(INT64_MAX);
+      }
+
+      if (!aHasVideo) {
+        mVideoStartTime.emplace(INT64_MAX);
+      }
+    }
+
+    void Destroy()
+    {
+      mAudioStartTime = Some(mAudioStartTime.refOr(INT64_MAX));
+      mVideoStartTime = Some(mVideoStartTime.refOr(INT64_MAX));
+      mHaveStartTimePromise.RejectIfExists(false, __func__);
+    }
+
+    nsRefPtr<HaveStartTimePromise> AwaitStartTime()
+    {
+      if (HaveStartTime()) {
+        return HaveStartTimePromise::CreateAndResolve(true, __func__);
+      }
+      return mHaveStartTimePromise.Ensure(__func__);
+    }
+
+    template<typename PromiseType>
+    struct PromiseSampleType {
+      typedef typename PromiseType::ResolveValueType::element_type Type;
+    };
+
+    template<typename PromiseType>
+    nsRefPtr<PromiseType> ProcessFirstSample(typename PromiseSampleType<PromiseType>::Type* aData)
+    {
+      typedef typename PromiseSampleType<PromiseType>::Type DataType;
+      typedef typename PromiseType::Private PromisePrivate;
+      MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+
+      MaybeSetChannelStartTime<DataType>(aData->mTime);
+
+      nsRefPtr<PromisePrivate> p = new PromisePrivate(__func__);
+      nsRefPtr<DataType> data = aData;
+      nsRefPtr<StartTimeRendezvous> self = this;
+      AwaitStartTime()->Then(mOwnerThread, __func__,
+                             [p, data, self] () -> void {
+                               MOZ_ASSERT(self->mOwnerThread->IsCurrentThreadIn());
+                               p->Resolve(data, __func__);
+                             },
+                             [p] () -> void { p->Reject(MediaDecoderReader::CANCELED, __func__); });
+
+      return p.forget();
+    }
+
+    template<typename SampleType>
+    void FirstSampleRejected(MediaDecoderReader::NotDecodedReason aReason)
+    {
+      MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
+      if (aReason == MediaDecoderReader::DECODE_ERROR) {
+        mHaveStartTimePromise.RejectIfExists(false, __func__);
+      } else if (aReason == MediaDecoderReader::END_OF_STREAM) {
+        MOZ_LOG(gMediaDecoderLog, LogLevel::Debug,
+                ("StartTimeRendezvous=%p %s Has no samples.", this, SampleType::sTypeName));
+        MaybeSetChannelStartTime<SampleType>(INT64_MAX);
+      }
+    }
+
+    bool HaveStartTime() { return mAudioStartTime.isSome() && mVideoStartTime.isSome(); }
+    int64_t StartTime()
+    {
+      int64_t time = std::min(mAudioStartTime.ref(), mVideoStartTime.ref());
+      return time == INT64_MAX ? 0 : time;
+    }
+  private:
+    virtual ~StartTimeRendezvous() {}
+
+    template<typename SampleType>
+    void MaybeSetChannelStartTime(int64_t aStartTime)
+    {
+      if (ChannelStartTime(SampleType::sType).isSome()) {
+        // If we're initialized with aForceZeroStartTime=true, the channel start
+        // times are already set.
+        return;
+      }
+
+      MOZ_LOG(gMediaDecoderLog, LogLevel::Debug,
+              ("StartTimeRendezvous=%p Setting %s start time to %lld",
+               this, SampleType::sTypeName, aStartTime));
+
+      ChannelStartTime(SampleType::sType).emplace(aStartTime);
+      if (HaveStartTime()) {
+        mHaveStartTimePromise.ResolveIfExists(true, __func__);
+      }
+    }
+
+    Maybe<int64_t>& ChannelStartTime(MediaData::Type aType)
+    {
+      return aType == MediaData::AUDIO_DATA ? mAudioStartTime : mVideoStartTime;
+    }
+
+    MediaPromiseHolder<HaveStartTimePromise> mHaveStartTimePromise;
+    nsRefPtr<AbstractThread> mOwnerThread;
+    Maybe<int64_t> mAudioStartTime;
+    Maybe<int64_t> mVideoStartTime;
+  };
+  nsRefPtr<StartTimeRendezvous> mStartTimeRendezvous;
+
+  bool HaveStartTime() { return mStartTimeRendezvous && mStartTimeRendezvous->HaveStartTime(); }
+  int64_t StartTime() { return mStartTimeRendezvous->StartTime(); }
+
   // Time at which the last video sample was requested. If it takes too long
   // before the sample arrives, we will increase the amount of audio we buffer.
   // This is necessary for legacy synchronous decoders to prevent underruns.
diff --git a/dom/media/MediaFormatReader.cpp b/dom/media/MediaFormatReader.cpp
index faea8880c103..109ca04ea7df 100644
--- a/dom/media/MediaFormatReader.cpp
+++ b/dom/media/MediaFormatReader.cpp
@@ -1474,6 +1474,12 @@ MediaFormatReader::NotifyDataRemoved()
   TaskQueue()->Dispatch(task.forget());
 }
 
+bool
+MediaFormatReader::ForceZeroStartTime() const
+{
+  return !mDemuxer->ShouldComputeStartTime();
+}
+
 int64_t
 MediaFormatReader::ComputeStartTime(const VideoData* aVideo, const AudioData* aAudio)
 {
diff --git a/dom/media/MediaFormatReader.h b/dom/media/MediaFormatReader.h
index 6abc3329d806..3b38cb9c911a 100644
--- a/dom/media/MediaFormatReader.h
+++ b/dom/media/MediaFormatReader.h
@@ -75,6 +75,8 @@ public:
 
   media::TimeIntervals GetBuffered() override;
 
+  virtual bool ForceZeroStartTime() const override;
+
   // For Media Resource Management
   void SetIdle() override;
   bool IsDormantNeeded() override;
diff --git a/dom/media/mediasource/MediaSourceReader.h b/dom/media/mediasource/MediaSourceReader.h
index 2e68e2d5d3ee..0fd27ad4da46 100644
--- a/dom/media/mediasource/MediaSourceReader.h
+++ b/dom/media/mediasource/MediaSourceReader.h
@@ -94,6 +94,7 @@ public:
   // We can't compute a proper start time since we won't necessarily
   // have the first frame of the resource available. This does the same
   // as chrome/blink and assumes that we always start at t=0.
+  virtual bool ForceZeroStartTime() const override { return true; }
   virtual int64_t ComputeStartTime(const VideoData* aVideo, const AudioData* aAudio) override { return 0; }
 
   // Buffering heuristics don't make sense for MSE, because the arrival of data