Bug 1839391 - Use AudioTrimmer to trim audio in OGG containers. r=alwu

This removes all custom code there. The AudioTrimmer change allows handling decoders that have one packet of delay, such as some codecs when used through ffmpeg. There was a comment about this, but the code was gone somehow. We can verify that this happens when enabling `PlaformDecoderModule:4` and decoding using ffmpeg: lots of `EAGAIN` logs will be output, because ffmpeg requires more input. Differential Revision: https://phabricator.services.mozilla.com/D181522
2023-08-08 12:12:47 +00:00 · 2023-08-08 12:12:47 +00:00 · d2caeb9b64
--- a/dom/media/ogg/OggCodecState.cpp
+++ b/dom/media/ogg/OggCodecState.cpp
@ -1263,7 +1263,16 @@ already_AddRefed<MediaRawData> OpusState::PacketOutAsMediaRawData() {
    int64_t startFrame = mPrevPacketGranulepos;
    frames -= std::max<int64_t>(
        0, std::min(endFrame - startFrame, static_cast<int64_t>(frames)));
-    data->mDiscardPadding = frames;
+    TimeUnit toTrim = TimeUnit(frames, 48000);
+    LOG(LogLevel::Debug,
+        ("Trimming last opus packet: [%s, %s] to [%s, %s]",
+         data->mTime.ToString().get(), data->GetEndTime().ToString().get(),
+         data->mTime.ToString().get(),
+         (data->mTime + data->mDuration - toTrim).ToString().get()));
+
+    data->mOriginalPresentationWindow =
+        Some(media::TimeInterval{data->mTime, data->mTime + data->mDuration});
+    data->mDuration -= toTrim;
  }

  // Save this packet's granule position in case we need to perform end
--- a/dom/media/ogg/OggDemuxer.cpp
+++ b/dom/media/ogg/OggDemuxer.cpp
@ -8,6 +8,7 @@
 #include "OggRLBox.h"
 #include "MediaDataDemuxer.h"
 #include "OggCodecState.h"
+#include "TimeUnits.h"
 #include "XiphExtradata.h"
 #include "mozilla/AbstractThread.h"
 #include "mozilla/Atomics.h"
@ -1397,12 +1398,14 @@ RefPtr<OggTrackDemuxer::SeekPromise> OggTrackDemuxer::Seek(
 }

 RefPtr<MediaRawData> OggTrackDemuxer::NextSample() {
+  OGG_DEBUG("OggTrackDemuxer::NextSample");
  if (mQueuedSample) {
    RefPtr<MediaRawData> nextSample = mQueuedSample;
    mQueuedSample = nullptr;
    if (mType == TrackInfo::kAudioTrack) {
      nextSample->mTrackInfo = mParent->mSharedAudioTrackInfo;
    }
+    OGG_DEBUG("OggTrackDemuxer::NextSample (queued)");
    return nextSample;
  }
  ogg_packet* packet = mParent->GetNextPacket(mType);
@ -1435,6 +1438,41 @@ RefPtr<MediaRawData> OggTrackDemuxer::NextSample() {
  if (!data->mTime.IsValid()) {
    return nullptr;
  }
+  TimeUnit mediaStartTime = mParent->mStartTime.valueOr(TimeUnit::Zero());
+  TimeUnit mediaEndTime =
+      mediaStartTime +
+      mParent->mInfo.mMetadataDuration.valueOr(TimeUnit::FromInfinity());
+  // Trim packets that end after the media duration.
+  if (mType == TrackInfo::kAudioTrack) {
+    OGG_DEBUG("Check trimming %s > %s", data->GetEndTime().ToString().get(),
+              mediaEndTime.ToString().get());
+    // Because of a quirk of this demuxer, this needs to be >=. It looks
+    // useless, because `toTrim` is going to be 0, but it allows setting
+    // `mOriginalPresentationWindow`, so that the trimming logic will later
+    // remove extraneous frames.
+    // This demuxer sets the end time of a packet to be the end time that
+    // should be played, not the end time that corresponds to the number of
+    // decoded frames, that we can only have after decoding.
+    // >= allows detecting the last packet, and trimming it appropriately,
+    // after decoding has happened, with the AudioTrimmer.
+    if (data->GetEndTime() >= mediaEndTime) {
+      TimeUnit toTrim = data->GetEndTime() - mediaEndTime;
+      TimeUnit originalDuration = data->mDuration;
+      OGG_DEBUG(
+          "Demuxed past media end time, trimming: packet [%s,%s] to [%s,%s]",
+          data->mTime.ToString().get(), data->GetEndTime().ToString().get(),
+          data->mTime.ToString().get(),
+          (data->mTime + originalDuration).ToString().get());
+      data->mOriginalPresentationWindow =
+          Some(TimeInterval{data->mTime, data->GetEndTime()});
+      data->mDuration -= toTrim;
+    }
+  }
+
+  OGG_DEBUG("OGG packet demuxed: [%s,%s] (duration: %s, type: %s)",
+            data->mTime.ToString().get(), data->GetEndTime().ToString().get(),
+            data->mDuration.ToString().get(),
+            mType == TrackInfo::kAudioTrack ? "audio" : "video");

  return data;
 }
--- a/dom/media/platforms/wrappers/AudioTrimmer.cpp
+++ b/dom/media/platforms/wrappers/AudioTrimmer.cpp
@ -110,7 +110,9 @@ RefPtr<MediaDataDecoder::DecodePromise> AudioTrimmer::HandleDecodedResult(
    // the next call to Decode().
    LOGV("No sample returned for sample[%s, %s]", rawStart.ToString().get(),
         rawEnd.ToString().get());
+    return DecodePromise::CreateAndResolve(std::move(results), __func__);
  }
+
  for (uint32_t i = 0; i < results.Length();) {
    const RefPtr<MediaData>& data = results[i];
    MOZ_ASSERT(data->mType == MediaData::Type::AUDIO_DATA);
@ -184,11 +186,6 @@ RefPtr<MediaDataDecoder::DecodePromise> AudioTrimmer::DecodeBatch(
          ->Then(GetCurrentSerialEventTarget(), __func__,
                 [self = RefPtr{this}](
                     DecodePromise::ResolveOrRejectValue&& aValue) {
-                   // If the decoder returned less samples than what we fed it.
-                   // We can assume that this is due to the decoder encoding
-                   // delay and that all decoded frames have been shifted by n =
-                   // compressedSamples.Length() - decodedSamples.Length() and
-                   // that the first n compressed samples returned nothing.
                   return self->HandleDecodedResult(std::move(aValue), nullptr);
                 });
  return p;