diff --git a/dom/media/AudioConverter.cpp b/dom/media/AudioConverter.cpp index c9660d3555f9..162e7c730258 100644 --- a/dom/media/AudioConverter.cpp +++ b/dom/media/AudioConverter.cpp @@ -33,19 +33,7 @@ AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut) MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported"); mIn.Layout().MappingTable(mOut.Layout(), mChannelOrderMap); if (aIn.Rate() != aOut.Rate()) { - int error; - mResampler = speex_resampler_init(aOut.Channels(), - aIn.Rate(), - aOut.Rate(), - SPEEX_RESAMPLER_QUALITY_DEFAULT, - &error); - - if (error == RESAMPLER_ERR_SUCCESS) { - speex_resampler_skip_zeros(mResampler); - } else { - NS_WARNING("Failed to initialize resampler."); - mResampler = nullptr; - } + RecreateResampler(); } } @@ -282,6 +270,46 @@ AudioConverter::ResampleAudio(void* aOut, const void* aIn, size_t aFrames) return outframes; } +void +AudioConverter::RecreateResampler() +{ + if (mResampler) { + speex_resampler_destroy(mResampler); + } + int error; + mResampler = speex_resampler_init(mOut.Channels(), + mIn.Rate(), + mOut.Rate(), + SPEEX_RESAMPLER_QUALITY_DEFAULT, + &error); + + if (error == RESAMPLER_ERR_SUCCESS) { + speex_resampler_skip_zeros(mResampler); + } else { + NS_WARNING("Failed to initialize resampler."); + mResampler = nullptr; + } +} + +size_t +AudioConverter::DrainResampler(void* aOut) +{ + if (!mResampler) { + return 0; + } + int frames = speex_resampler_get_input_latency(mResampler); + AlignedByteBuffer buffer(FramesOutToSamples(frames) * + AudioConfig::SampleSize(mOut.Format())); + if (!buffer) { + // OOM + return 0; + } + frames = ResampleAudio(aOut, buffer.Data(), frames); + // Tore down the resampler as it's easier than handling follow-up. + RecreateResampler(); + return frames; +} + size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const { @@ -327,7 +355,13 @@ AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const { - return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1; + if (!aFrames && mIn.Rate() != mOut.Rate()) { + // The resampler will be drained, account for frames currently buffered + // in the resampler. + return speex_resampler_get_output_latency(mResampler); + } else { + return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1; + } } size_t diff --git a/dom/media/AudioConverter.h b/dom/media/AudioConverter.h index e544eaeaf7ae..637ffcdc2f76 100644 --- a/dom/media/AudioConverter.h +++ b/dom/media/AudioConverter.h @@ -123,6 +123,8 @@ public: // Convert the AudioDataBuffer. // Conversion will be done in place if possible. Otherwise a new buffer will // be returned. + // Providing an empty buffer and resampling is expected, the resampler + // will be drained. template AudioDataBuffer Process(AudioDataBuffer&& aBuffer) { @@ -152,7 +154,7 @@ public: return AudioDataBuffer(Move(temp1)); } frames = ProcessInternal(temp1.Data(), aBuffer.Data(), frames); - if (!frames || mIn.Rate() == mOut.Rate()) { + if (mIn.Rate() == mOut.Rate()) { temp1.SetLength(FramesOutToSamples(frames)); return AudioDataBuffer(Move(temp1)); } @@ -161,13 +163,17 @@ public: // If we are downsampling we can re-use it. AlignedBuffer* outputBuffer = &temp1; AlignedBuffer temp2; - if (mOut.Rate() > mIn.Rate()) { - // We are upsampling, we can't work in place. Allocate another temporary - // buffer where the upsampling will occur. + if (!frames || mOut.Rate() > mIn.Rate()) { + // We are upsampling or about to drain, we can't work in place. + // Allocate another temporary buffer where the upsampling will occur. temp2.SetLength(FramesOutToSamples(ResampleRecipientFrames(frames))); outputBuffer = &temp2; } - frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames); + if (!frames) { + frames = DrainResampler(outputBuffer->Data()); + } else { + frames = ResampleAudio(outputBuffer->Data(), temp1.Data(), frames); + } outputBuffer->SetLength(FramesOutToSamples(frames)); return AudioDataBuffer(Move(*outputBuffer)); } @@ -223,6 +229,8 @@ private: SpeexResamplerState* mResampler; size_t ResampleAudio(void* aOut, const void* aIn, size_t aFrames); size_t ResampleRecipientFrames(size_t aFrames) const; + void RecreateResampler(); + size_t DrainResampler(void* aOut); }; } // namespace mozilla diff --git a/dom/media/mediasink/DecodedAudioDataSink.cpp b/dom/media/mediasink/DecodedAudioDataSink.cpp index bb5b83a8019e..9ad133aed7df 100644 --- a/dom/media/mediasink/DecodedAudioDataSink.cpp +++ b/dom/media/mediasink/DecodedAudioDataSink.cpp @@ -321,6 +321,12 @@ DecodedAudioDataSink::NotifyAudioNeeded() return; } + if (AudioQueue().IsFinished() && !AudioQueue().GetSize()) { + // We have reached the end of the data, drain the resampler. + DrainConverter(); + return; + } + // Always ensure we have two processed frames pending to allow for processing // latency. while (AudioQueue().GetSize() && mProcessedQueue.GetSize() < 2) { @@ -339,6 +345,8 @@ DecodedAudioDataSink::NotifyAudioNeeded() mConverter ? mConverter->InputConfig().Rate() : 0, data->mChannels, data->mRate); + DrainConverter(); + // mFramesParsed indicates the current playtime in frames at the current // input sampling rate. Recalculate it per the new sampling rate. if (mFramesParsed) { @@ -385,10 +393,15 @@ DecodedAudioDataSink::NotifyAudioNeeded() // time. missingFrames = std::min(INT32_MAX, missingFrames.value()); mFramesParsed += missingFrames.value(); - AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels); - RefPtr silence = CreateAudioFromBuffer(Move(silenceData), data); - if (silence) { - mProcessedQueue.Push(silence); + // We need to insert silence, first use drained frames if any. + missingFrames -= DrainConverter(missingFrames.value()); + // Insert silence is still needed. + if (missingFrames.value()) { + AlignedAudioBuffer silenceData(missingFrames.value() * mOutputChannels); + RefPtr silence = CreateAudioFromBuffer(Move(silenceData), data); + if (silence) { + mProcessedQueue.Push(silence); + } } } @@ -404,6 +417,7 @@ DecodedAudioDataSink::NotifyAudioNeeded() } } mProcessedQueue.Push(data); + mLastProcessedPacket = Some(data); } } @@ -432,5 +446,36 @@ DecodedAudioDataSink::CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer, return data.forget(); } +uint32_t +DecodedAudioDataSink::DrainConverter(uint32_t aMaxFrames) +{ + MOZ_ASSERT(mProcessingThread->IsCurrentThreadIn()); + + if (!mConverter || !mLastProcessedPacket) { + // nothing to drain. + return 0; + } + + // To drain we simply provide an empty packet to the audio converter. + AlignedAudioBuffer convertedData = + mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget(); + + uint32_t frames = convertedData.Length() / mOutputChannels; + convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels); + + // We assume the start time of the drained data is just before the end of the + // previous packet. Ultimately, the start time doesn't really matter, however + // we do not want to trigger the gap detection in PopFrames. + RefPtr data = CreateAudioFromBuffer(Move(convertedData), + mLastProcessedPacket.ref()); + mLastProcessedPacket.reset(); + + if (!data) { + return 0; + } + mProcessedQueue.Push(data); + return data->mFrames; +} + } // namespace media } // namespace mozilla diff --git a/dom/media/mediasink/DecodedAudioDataSink.h b/dom/media/mediasink/DecodedAudioDataSink.h index aa85a874fae9..e58349e19675 100644 --- a/dom/media/mediasink/DecodedAudioDataSink.h +++ b/dom/media/mediasink/DecodedAudioDataSink.h @@ -115,6 +115,9 @@ private: void OnAudioPopped(const RefPtr& aSample); void OnAudioPushed(const RefPtr& aSample); void NotifyAudioNeeded(); + // Drain the converter and add the output to the processed audio queue. + // A maximum of aMaxFrames will be added. + uint32_t DrainConverter(uint32_t aMaxFrames = UINT32_MAX); already_AddRefed CreateAudioFromBuffer(AlignedAudioBuffer&& aBuffer, AudioData* aReference); UniquePtr mConverter; @@ -125,6 +128,7 @@ private: // the input stream. It indicates the time in frames since playback started // at the current input framerate. int64_t mFramesParsed; + Maybe> mLastProcessedPacket; int64_t mLastEndTime; // Never modifed after construction. uint32_t mOutputRate;