Bug 1669503 - Handle mp3 encoder padding when the padding duration spans more than one packet. r=jya

We can't handle this at the decoder level, because the decoder doesn't know that a particular packet it's seeing is the second to last packet and it should start trimming the end of this packet because the encoder padding spans multiple packet. Differential Revision: https://phabricator.services.mozilla.com/D92645
2020-11-05 14:24:37 +00:00 · 2020-11-05 14:24:37 +00:00 · f8210c4ae7
--- a/dom/media/MediaData.cpp
+++ b/dom/media/MediaData.cpp
@ -13,6 +13,7 @@
 #include "mozilla/layers/ImageBridgeChild.h"
 #include "mozilla/layers/KnowsCompositor.h"
 #include "mozilla/layers/SharedRGBImage.h"
+#include "nsMathUtils.h"

 #include <stdint.h>

@ -73,15 +74,17 @@ bool AudioData::SetTrimWindow(const media::TimeInterval& aTrim) {
    return false;
  }

-  auto trimBefore = TimeUnitToFrames(aTrim.mStart - mOriginalTime, mRate);
+  auto roundToNearestFrame = [&](const TimeUnit& aTime) -> uint64_t {
+    MOZ_ASSERT((TimeUnitToFrames(aTime, mRate) + 1).isValid());
+    return NS_round(aTime.ToSeconds() * mRate);
+  };
+
+  uint64_t trimBefore = roundToNearestFrame(aTrim.mStart - mOriginalTime);
  auto trimAfter = aTrim.mEnd == GetEndTime()
                       ? originalFrames
-                       : TimeUnitToFrames(aTrim.mEnd - mOriginalTime, mRate);
-  if (!trimBefore.isValid() || !trimAfter.isValid()) {
-    // Overflow.
-    return false;
-  }
-  MOZ_DIAGNOSTIC_ASSERT(trimAfter.value() >= trimBefore.value(),
+                       : roundToNearestFrame(aTrim.mEnd - mOriginalTime);
+
+  MOZ_DIAGNOSTIC_ASSERT(trimAfter >= trimBefore,
                        "Something went wrong with trimming value");
  if (!mTrimWindow && trimBefore == 0 && trimAfter == originalFrames) {
    // Nothing to change, abort early to prevent rounding errors.
@ -89,13 +92,13 @@ bool AudioData::SetTrimWindow(const media::TimeInterval& aTrim) {
  }

  mTrimWindow = Some(aTrim);
-  mDataOffset = trimBefore.value() * mChannels;
+  mDataOffset = trimBefore * mChannels;
  MOZ_DIAGNOSTIC_ASSERT(mDataOffset <= mAudioData.Length(),
                        "Data offset outside original buffer");
-  mFrames = (trimAfter - trimBefore).value();
+  mFrames = trimAfter - trimBefore;
  MOZ_DIAGNOSTIC_ASSERT(mFrames <= originalFrames,
                        "More frames than found in container");
-  mTime = mOriginalTime + FramesToTimeUnit(trimBefore.value(), mRate);
+  mTime = mOriginalTime + FramesToTimeUnit(trimBefore, mRate);
  mDuration = FramesToTimeUnit(mFrames, mRate);

  return true;
--- a/dom/media/mp3/MP3Demuxer.cpp
+++ b/dom/media/mp3/MP3Demuxer.cpp
@ -6,11 +6,13 @@

 #include "MP3Demuxer.h"

-#include <algorithm>
 #include <inttypes.h>
+
+#include <algorithm>
 #include <limits>

 #include "ByteWriter.h"
+#include "Intervals.h"
 #include "TimeUnits.h"
 #include "VideoUtils.h"
 #include "mozilla/Assertions.h"
@ -22,6 +24,7 @@ extern mozilla::LazyLogModule gMediaDemuxerLog;
  DDMOZ_LOG(gMediaDemuxerLog, LogLevel::Verbose, msg, ##__VA_ARGS__)

 using mozilla::BufferReader;
+using mozilla::media::Interval;
 using mozilla::media::TimeInterval;
 using mozilla::media::TimeIntervals;
 using mozilla::media::TimeUnit;
@ -631,15 +634,6 @@ already_AddRefed<MediaRawData> MP3TrackDemuxer::GetNextFrame(

  UpdateState(aRange);

-  frame->mTime = Duration(mFrameIndex - 1);
-  frame->mDuration = Duration(1);
-  frame->mTimecode = frame->mTime;
-  frame->mKeyframe = true;
-  frame->mEOS = mEOS;
-
-  MOZ_ASSERT(!frame->mTime.IsNegative());
-  MOZ_ASSERT(frame->mDuration.IsPositive());
-
  if (mNumParsedFrames == 1) {
    // First frame parsed, let's read VBR info if available.
    BufferReader reader(frame->Data(), frame->Size());
@ -656,6 +650,31 @@ already_AddRefed<MediaRawData> MP3TrackDemuxer::GetNextFrame(
    }
  }

+  frame->mTime = Duration(mFrameIndex - 1);
+  frame->mDuration = Duration(1);
+  frame->mTimecode = frame->mTime;
+  frame->mKeyframe = true;
+  frame->mEOS = mEOS;
+
+  auto duration = Duration();
+  if (duration) {
+    auto actualFramesInterval = TimeInterval(
+        FramesToTimeUnit(mEncoderDelay, mSamplesPerSecond),
+        *duration - FramesToTimeUnit(mEncoderPadding, mSamplesPerSecond));
+
+    auto frameInterval = TimeInterval(frame->mTime, frame->GetEndTime());
+    auto realFrameInterval = actualFramesInterval.Intersection(frameInterval);
+    if (realFrameInterval != frameInterval) {
+      frame->mOriginalPresentationWindow = Some(frameInterval);
+      frame->mDuration = realFrameInterval.Length();
+      frame->mTime = realFrameInterval.mStart;
+    }
+  }
+
+  MOZ_ASSERT(!frame->mTime.IsNegative());
+  MOZ_ASSERT(frame->mDuration.IsPositive() ||
+             frame->mDuration.ToSeconds() == 0.);
+
  MP3LOGV("GetNext() End mOffset=%" PRIu64 " mNumParsedFrames=%" PRIu64
          " mFrameIndex=%" PRId64 " mTotalFrameLen=%" PRIu64
          " mSamplesPerFrame=%d mSamplesPerSecond=%d mChannels=%d, mEOS=%s",
--- a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.cpp
@ -22,14 +22,7 @@ FFmpegAudioDecoder<LIBAV_VER>::FFmpegAudioDecoder(FFmpegLibWrapper* aLib,
  if (aConfig.mCodecSpecificConfig && aConfig.mCodecSpecificConfig->Length()) {
    mExtraData = new MediaByteBuffer;
    mExtraData->AppendElements(*aConfig.mCodecSpecificConfig);
-    if (mCodecID == AV_CODEC_ID_MP3) {
-      BufferReader reader(mExtraData->Elements(), mExtraData->Length());
-      mEncoderDelay = reader.ReadU32().unwrapOr(0);
-      mEncoderPadding = reader.ReadU32().unwrapOr(0);
-      FFMPEG_LOG("FFmpegAudioDecoder, found encoder delay (%" PRIu32
-                 ") and padding values (%" PRIu32 ") in extra data",
-                 mEncoderDelay, mEncoderPadding);
-    }
+    BufferReader reader(mExtraData->Elements(), mExtraData->Length());
  }
 }

@ -236,25 +229,6 @@ MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample,
      if (!audio) {
        return MediaResult(NS_ERROR_OUT_OF_MEMORY, __func__);
      }
-
-      bool trimmed = false;
-      if (mEncoderDelay) {
-        trimmed = true;
-        uint32_t toPop = std::min((uint32_t)mFrame->nb_samples, mEncoderDelay);
-        audio.PopFront(toPop * numChannels);
-        mFrame->nb_samples -= toPop;
-        mEncoderDelay -= toPop;
-      }
-
-      if (aSample->mEOS && mEncoderPadding) {
-        trimmed = true;
-        uint32_t toTrim =
-            std::min((uint32_t)mFrame->nb_samples, mEncoderPadding);
-        mEncoderPadding -= toTrim;
-        audio.PopBack(toTrim * numChannels);
-        mFrame->nb_samples = audio.Length() / numChannels;
-      }
-
      media::TimeUnit duration =
          FramesToTimeUnit(mFrame->nb_samples, samplingRate);
      if (!duration.IsValid()) {
@ -272,8 +246,7 @@ MediaResult FFmpegAudioDecoder<LIBAV_VER>::DoDecode(MediaRawData* aSample,
      RefPtr<AudioData> data =
          new AudioData(samplePosition, pts, std::move(audio), numChannels,
                        samplingRate, mCodecContext->channel_layout);
-      MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration || trimmed,
-                            "must be equal");
+      MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
      aResults.AppendElement(std::move(data));

      pts = newpts;
--- a/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.h
+++ b/dom/media/platforms/ffmpeg/FFmpegAudioDecoder.h
@ -38,8 +38,6 @@ class FFmpegAudioDecoder<LIBAV_VER>
 private:
  MediaResult DoDecode(MediaRawData* aSample, uint8_t* aData, int aSize,
                       bool* aGotFrame, DecodedData& aResults) override;
-  uint32_t mEncoderDelay = 0;
-  uint32_t mEncoderPadding = 0;
 };

 }  // namespace mozilla