Bug 1667728 - Define sample rate for GetPacketDuration(). r=bryce

AudioTrackEncoder uses GetPacketDuration() for signaling upwards that data is available to be encoded. Data to be encoded is sampled at the input rate while GetPacketDuration() is the duration in the output rate. Meanwhile, OpusTrackEncoder uses GetPacketDuration() internally for deciding how much data to encode. This is after resampling so correctly in the output rate. To support both these cases, this patch adds NumOutputFramesPerPacket(), modeled on GetOutputSampleRate(), denoting the packet duration in the output rate. GetPacketDuration() is renamed to NumInputFramesPerPacket() and changed to be the packet duration in the input rate. Differential Revision: https://phabricator.services.mozilla.com/D91952
2020-10-08 15:24:34 +00:00 · 2020-10-08 15:24:34 +00:00 · 810d0f98b6
--- a/dom/media/encoder/OpusTrackEncoder.cpp
+++ b/dom/media/encoder/OpusTrackEncoder.cpp
@ -192,11 +192,15 @@ nsresult OpusTrackEncoder::Init(int aChannels) {
  return NS_OK;
 }

-int OpusTrackEncoder::GetOutputSampleRate() {
+int OpusTrackEncoder::GetOutputSampleRate() const {
  return mResampler ? kOpusSamplingRate : mTrackRate;
 }

-int OpusTrackEncoder::GetPacketDuration() {
+int OpusTrackEncoder::NumInputFramesPerPacket() const {
+  return mTrackRate * kFrameDurationMs / 1000;
+}
+
+int OpusTrackEncoder::NumOutputFramesPerPacket() const {
  return GetOutputSampleRate() * kFrameDurationMs / 1000;
 }

@ -257,19 +261,19 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
    // re-sampled frames left last time which didn't fit into an Opus packet
    // duration.
    const int framesLeft = mResampledLeftover.Length() / mChannels;
-    // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
-    // of kOpusSamplingRate. There is not precision loss in the integer division
-    // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
-    // framesToFetch to ensure there will be at least n frames after
+    // When framesLeft is 0, (NumOutputFramesPerPacket() - framesLeft) is a
+    // multiple of kOpusSamplingRate. There is not precision loss in the integer
+    // division in computing framesToFetch. If frameLeft > 0, we need to add 1
+    // to framesToFetch to ensure there will be at least n frames after
    // re-sampling.
    const int frameRoundUp = framesLeft ? 1 : 0;

-    MOZ_ASSERT(GetPacketDuration() >= framesLeft);
+    MOZ_ASSERT(NumOutputFramesPerPacket() >= framesLeft);
    // Try to fetch m frames such that there will be n frames
-    // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
+    // where (n + frameLeft) >= NumOutputFramesPerPacket() after re-sampling.
    const int framesToFetch = !mResampler
-                                  ? GetPacketDuration()
-                                  : (GetPacketDuration() - framesLeft) *
+                                  ? NumOutputFramesPerPacket()
+                                  : (NumOutputFramesPerPacket() - framesLeft) *
                                            mTrackRate / kOpusSamplingRate +
                                        frameRoundUp;

@ -287,7 +291,7 @@ nsresult OpusTrackEncoder::GetEncodedTrack(

    // Start encoding data.
    AutoTArray<AudioDataValue, 9600> pcm;
-    pcm.SetLength(GetPacketDuration() * mChannels);
+    pcm.SetLength(NumOutputFramesPerPacket() * mChannels);

    int frameCopied = 0;

@ -360,7 +364,8 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
              mResampledLeftover.Length());

      uint32_t outframesToCopy = std::min(
-          outframes, static_cast<uint32_t>(GetPacketDuration() - framesLeft));
+          outframes,
+          static_cast<uint32_t>(NumOutputFramesPerPacket() - framesLeft));

      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
                 outframesToCopy * mChannels);
@ -380,8 +385,8 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
    }

    // Remove the raw data which has been pulled to pcm buffer.
-    // The value of frameCopied should equal to (or smaller than, if eos)
-    // GetPacketDuration().
+    // The value of frameCopied should be equal to (or smaller than, if eos)
+    // NumOutputFramesPerPacket().
    mSourceSegment.RemoveLeading(frameCopied);

    // Has reached the end of input stream and all queued data has pulled for
@ -391,13 +396,13 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
      LOG("[Opus] Done encoding.");
    }

-    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());
+    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == NumOutputFramesPerPacket());

    // Append null data to pcm buffer if the leftover data is not enough for
    // opus encoder.
-    if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
+    if (framesInPCM < NumOutputFramesPerPacket() && mEosSetInEncoder) {
      PodZero(pcm.Elements() + framesInPCM * mChannels,
-              (GetPacketDuration() - framesInPCM) * mChannels);
+              (NumOutputFramesPerPacket() - framesInPCM) * mChannels);
    }
    auto frameData = MakeRefPtr<EncodedFrame::FrameData>();
    // Encode the data with Opus Encoder.
@ -406,11 +411,11 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
    result = 0;
 #ifdef MOZ_SAMPLE_TYPE_S16
    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
-    result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
+    result = opus_encode(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
                         frameData->Elements(), MAX_DATA_BYTES);
 #else
    const float* pcmBuf = static_cast<float*>(pcm.Elements());
-    result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
+    result = opus_encode_float(mEncoder, pcmBuf, NumOutputFramesPerPacket(),
                               frameData->Elements(), MAX_DATA_BYTES);
 #endif
    frameData->SetLength(result >= 0 ? result : 0);
@ -432,7 +437,7 @@ nsresult OpusTrackEncoder::GetEncodedTrack(
        EncodedFrame::OPUS_AUDIO_FRAME, std::move(frameData)));

    mOutputTimeStamp +=
-        FramesToTimeUnit(GetPacketDuration(), kOpusSamplingRate);
+        FramesToTimeUnit(NumOutputFramesPerPacket(), kOpusSamplingRate);
    LOG("[Opus] mOutputTimeStamp %.3f.", mOutputTimeStamp.ToSeconds());
  }

--- a/dom/media/encoder/OpusTrackEncoder.h
+++ b/dom/media/encoder/OpusTrackEncoder.h
@ -40,15 +40,25 @@ class OpusTrackEncoder : public AudioTrackEncoder {
  media::TimeUnit GetCodecDelay() const { return mCodecDelay; }

 protected:
-  int GetPacketDuration() override;
+  /**
+   * The number of frames, in the input rate mTrackRate, needed to fill an
+   * encoded opus packet. A frame is a sample per channel.
+   */
+  int NumInputFramesPerPacket() const override;

  nsresult Init(int aChannels) override;

+  /**
+   * The number of frames, in the output rate (see GetOutputSampleRate), needed
+   * to fill an encoded opus packet. A frame is a sample per channel.
+   */
+  int NumOutputFramesPerPacket() const;
+
  /**
   * Get the samplerate of the data to be fed to the Opus encoder. This might be
   * different from the input samplerate if resampling occurs.
   */
-  int GetOutputSampleRate();
+  int GetOutputSampleRate() const;

 private:
  /**
--- a/dom/media/encoder/TrackEncoder.cpp
+++ b/dom/media/encoder/TrackEncoder.cpp
@ -142,7 +142,8 @@ void AudioTrackEncoder::AppendAudioSegment(AudioSegment&& aSegment) {
    mOutgoingBuffer.AppendFrom(&aSegment);
  }

-  if (mInitialized && mOutgoingBuffer.GetDuration() >= GetPacketDuration()) {
+  if (mInitialized &&
+      mOutgoingBuffer.GetDuration() >= NumInputFramesPerPacket()) {
    OnDataAvailable();
  }
 }
--- a/dom/media/encoder/TrackEncoder.h
+++ b/dom/media/encoder/TrackEncoder.h
@ -290,7 +290,7 @@ class AudioTrackEncoder : public TrackEncoder {
   * frame size required by audio encoder, and listeners will be notified when
   * at least this much data has been added to mOutgoingBuffer.
   */
-  virtual int GetPacketDuration() { return 0; }
+  virtual int NumInputFramesPerPacket() const { return 0; }

  /**
   * Initializes the audio encoder. The call of this method is delayed until we