/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim:set ts=2 sw=2 sts=2 et cindent: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "AudioSink.h" #include "AudioConverter.h" #include "MediaQueue.h" #include "VideoUtils.h" #include "mozilla/CheckedInt.h" #include "mozilla/DebugOnly.h" #include "mozilla/IntegerPrintfMacros.h" #include "mozilla/StaticPrefs.h" #include "nsPrintfCString.h" namespace mozilla { extern LazyLogModule gMediaDecoderLog; #define SINK_LOG(msg, ...) \ MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \ ("AudioSink=%p " msg, this, ##__VA_ARGS__)) #define SINK_LOG_V(msg, ...) \ MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, \ ("AudioSink=%p " msg, this, ##__VA_ARGS__)) // The amount of audio frames that is used to fuzz rounding errors. static const int64_t AUDIO_FUZZ_FRAMES = 1; // Amount of audio frames we will be processing ahead of use static const int32_t LOW_AUDIO_USECS = 300000; using media::TimeUnit; AudioSink::AudioSink(AbstractThread* aThread, MediaQueue& aAudioQueue, const TimeUnit& aStartTime, const AudioInfo& aInfo) : mStartTime(aStartTime), mInfo(aInfo), mPlaying(true), mMonitor("AudioSink"), mWritten(0), mErrored(false), mPlaybackComplete(false), mOwnerThread(aThread), mProcessedQueueLength(0), mFramesParsed(0), mIsAudioDataAudible(false), mAudioQueue(aAudioQueue) { bool resampling = StaticPrefs::MediaResamplingEnabled(); if (resampling) { mOutputRate = 48000; } else if (mInfo.mRate == 44100 || mInfo.mRate == 48000) { // The original rate is of good quality and we want to minimize unecessary // resampling. The common scenario being that the sampling rate is one or // the other, this allows to minimize audio quality regression and hoping // content provider want change from those rates mid-stream. mOutputRate = mInfo.mRate; } else { // We will resample all data to match cubeb's preferred sampling rate. mOutputRate = AudioStream::GetPreferredRate(); } MOZ_DIAGNOSTIC_ASSERT(mOutputRate, "output rate can't be 0."); mOutputChannels = DecideAudioPlaybackChannels(mInfo); } AudioSink::~AudioSink() {} nsresult AudioSink::Init(const PlaybackParams& aParams, RefPtr& aEndedPromise) { MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); mAudioQueueListener = mAudioQueue.PushEvent().Connect( mOwnerThread, this, &AudioSink::OnAudioPushed); mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect( mOwnerThread, this, &AudioSink::NotifyAudioNeeded); mProcessedQueueListener = mProcessedQueue.PopFrontEvent().Connect( mOwnerThread, this, &AudioSink::OnAudioPopped); // To ensure at least one audio packet will be popped from AudioQueue and // ready to be played. NotifyAudioNeeded(); aEndedPromise = mEndedPromise.Ensure(__func__); nsresult rv = InitializeAudioStream(aParams); if (NS_FAILED(rv)) { mEndedPromise.Reject(rv, __func__); } return rv; } TimeUnit AudioSink::GetPosition() { int64_t tmp; if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) { TimeUnit pos = TimeUnit::FromMicroseconds(tmp); NS_ASSERTION(pos >= mLastGoodPosition, "AudioStream position shouldn't go backward"); TimeUnit tmp = mStartTime + pos; if (!tmp.IsValid()) { mErrored = true; return mStartTime + mLastGoodPosition; } // Update the last good position when we got a good one. if (pos >= mLastGoodPosition) { mLastGoodPosition = pos; } } return mStartTime + mLastGoodPosition; } bool AudioSink::HasUnplayedFrames() { // Experimentation suggests that GetPositionInFrames() is zero-indexed, // so we need to add 1 here before comparing it to mWritten. int64_t total; { MonitorAutoLock mon(mMonitor); total = mWritten + (mCursor.get() ? mCursor->Available() : 0); } return mProcessedQueue.GetSize() || (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < total); } void AudioSink::Shutdown() { MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); mAudioQueueListener.Disconnect(); mAudioQueueFinishListener.Disconnect(); mProcessedQueueListener.Disconnect(); if (mAudioStream) { mAudioStream->Shutdown(); mAudioStream = nullptr; } mProcessedQueue.Reset(); mProcessedQueue.Finish(); mEndedPromise.ResolveIfExists(true, __func__); } void AudioSink::SetVolume(double aVolume) { if (mAudioStream) { mAudioStream->SetVolume(aVolume); } } void AudioSink::SetPlaybackRate(double aPlaybackRate) { MOZ_ASSERT(aPlaybackRate != 0, "Don't set the playbackRate to 0 on AudioStream"); if (mAudioStream) { mAudioStream->SetPlaybackRate(aPlaybackRate); } } void AudioSink::SetPreservesPitch(bool aPreservesPitch) { if (mAudioStream) { mAudioStream->SetPreservesPitch(aPreservesPitch); } } void AudioSink::SetPlaying(bool aPlaying) { if (!mAudioStream || mPlaying == aPlaying || mPlaybackComplete) { return; } // pause/resume AudioStream as necessary. if (!aPlaying) { mAudioStream->Pause(); } else if (aPlaying) { mAudioStream->Resume(); } mPlaying = aPlaying; } nsresult AudioSink::InitializeAudioStream(const PlaybackParams& aParams) { mAudioStream = new AudioStream(*this); // When AudioQueue is empty, there is no way to know the channel layout of // the coming audio data, so we use the predefined channel map instead. AudioConfig::ChannelLayout::ChannelMap channelMap = mConverter ? mConverter->OutputConfig().Layout().Map() : AudioConfig::ChannelLayout(mOutputChannels).Map(); // The layout map used here is already processed by mConverter with // mOutputChannels into SMPTE format, so there is no need to worry if // StaticPrefs::accessibility_monoaudio_enable() or // StaticPrefs::MediaForcestereoEnabled() is applied. nsresult rv = mAudioStream->Init(mOutputChannels, channelMap, mOutputRate, aParams.mSink); if (NS_FAILED(rv)) { mAudioStream->Shutdown(); mAudioStream = nullptr; return rv; } // Set playback params before calling Start() so they can take effect // as soon as the 1st DataCallback of the AudioStream fires. mAudioStream->SetVolume(aParams.mVolume); mAudioStream->SetPlaybackRate(aParams.mPlaybackRate); mAudioStream->SetPreservesPitch(aParams.mPreservesPitch); return mAudioStream->Start(); } TimeUnit AudioSink::GetEndTime() const { int64_t written; { MonitorAutoLock mon(mMonitor); written = mWritten; } TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime; if (!played.IsValid()) { NS_WARNING("Int overflow calculating audio end time"); return TimeUnit::Zero(); } // As we may be resampling, rounding errors may occur. Ensure we never get // past the original end time. return std::min(mLastEndTime, played); } UniquePtr AudioSink::PopFrames(uint32_t aFrames) { class Chunk : public AudioStream::Chunk { public: Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData) : mBuffer(aBuffer), mFrames(aFrames), mData(aData) {} Chunk() : mFrames(0), mData(nullptr) {} const AudioDataValue* Data() const override { return mData; } uint32_t Frames() const override { return mFrames; } uint32_t Channels() const override { return mBuffer ? mBuffer->mChannels : 0; } uint32_t Rate() const override { return mBuffer ? mBuffer->mRate : 0; } AudioDataValue* GetWritable() const override { return mData; } private: const RefPtr mBuffer; const uint32_t mFrames; AudioDataValue* const mData; }; bool needPopping = false; if (!mCurrentData) { // No data in the queue. Return an empty chunk. if (!mProcessedQueue.GetSize()) { return MakeUnique(); } // We need to update our values prior popping the processed queue in // order to prevent the pop event to fire too early (prior // mProcessedQueueLength being updated) or prevent HasUnplayedFrames // to incorrectly return true during the time interval betweeen the // when mProcessedQueue is read and mWritten is updated. needPopping = true; mCurrentData = mProcessedQueue.PeekFront(); { MonitorAutoLock mon(mMonitor); mCursor = MakeUnique(mCurrentData->Data(), mCurrentData->mChannels, mCurrentData->Frames()); } MOZ_ASSERT(mCurrentData->Frames() > 0); mProcessedQueueLength -= FramesToUsecs(mCurrentData->Frames(), mOutputRate).value(); } auto framesToPop = std::min(aFrames, mCursor->Available()); SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u", mCurrentData->mTime.ToMicroseconds(), mCurrentData->Frames() - mCursor->Available(), framesToPop); UniquePtr chunk = MakeUnique(mCurrentData, framesToPop, mCursor->Ptr()); { MonitorAutoLock mon(mMonitor); mWritten += framesToPop; mCursor->Advance(framesToPop); } // All frames are popped. Reset mCurrentData so we can pop new elements from // the audio queue in next calls to PopFrames(). if (!mCursor->Available()) { mCurrentData = nullptr; } if (needPopping) { // We can now safely pop the audio packet from the processed queue. // This will fire the popped event, triggering a call to NotifyAudioNeeded. RefPtr releaseMe = mProcessedQueue.PopFront(); CheckIsAudible(releaseMe); } return chunk; } bool AudioSink::Ended() const { // Return true when error encountered so AudioStream can start draining. return mProcessedQueue.IsFinished() || mErrored; } void AudioSink::Drained() { SINK_LOG("Drained"); mPlaybackComplete = true; mEndedPromise.ResolveIfExists(true, __func__); } void AudioSink::CheckIsAudible(const AudioData* aData) { MOZ_ASSERT(aData); bool isAudible = aData->IsAudible(); if (isAudible != mIsAudioDataAudible) { mIsAudioDataAudible = isAudible; mAudibleEvent.Notify(mIsAudioDataAudible); } } void AudioSink::OnAudioPopped(const RefPtr& aSample) { SINK_LOG_V("AudioStream has used an audio packet."); NotifyAudioNeeded(); } void AudioSink::OnAudioPushed(const RefPtr& aSample) { SINK_LOG_V("One new audio packet available."); NotifyAudioNeeded(); } void AudioSink::NotifyAudioNeeded() { MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(), "Not called from the owner's thread"); // Always ensure we have two processed frames pending to allow for processing // latency. while (mAudioQueue.GetSize() && (mAudioQueue.IsFinished() || mProcessedQueueLength < LOW_AUDIO_USECS || mProcessedQueue.GetSize() < 2)) { RefPtr data = mAudioQueue.PopFront(); // Ignore the element with 0 frames and try next. if (!data->Frames()) { continue; } if (!mConverter || (data->mRate != mConverter->InputConfig().Rate() || data->mChannels != mConverter->InputConfig().Channels())) { SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz", mConverter ? mConverter->InputConfig().Channels() : 0, mConverter ? mConverter->InputConfig().Rate() : 0, data->mChannels, data->mRate); DrainConverter(); // mFramesParsed indicates the current playtime in frames at the current // input sampling rate. Recalculate it per the new sampling rate. if (mFramesParsed) { // We minimize overflow. uint32_t oldRate = mConverter->InputConfig().Rate(); uint32_t newRate = data->mRate; CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate); if (!result.isValid()) { NS_WARNING("Int overflow in AudioSink"); mErrored = true; return; } mFramesParsed = result.value(); } const AudioConfig::ChannelLayout inputLayout = data->mChannelMap ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap) : AudioConfig::ChannelLayout(data->mChannels); const AudioConfig::ChannelLayout outputLayout = mOutputChannels == data->mChannels ? inputLayout : AudioConfig::ChannelLayout(mOutputChannels); mConverter = MakeUnique( AudioConfig(inputLayout, data->mChannels, data->mRate), AudioConfig(outputLayout, mOutputChannels, mOutputRate)); } // See if there's a gap in the audio. If there is, push silence into the // audio hardware, so we can play across the gap. // Calculate the timestamp of the next chunk of audio in numbers of // samples. CheckedInt64 sampleTime = TimeUnitToFrames(data->mTime - mStartTime, data->mRate); // Calculate the number of frames that have been pushed onto the audio // hardware. CheckedInt64 missingFrames = sampleTime - mFramesParsed; if (!missingFrames.isValid() || !sampleTime.isValid()) { NS_WARNING("Int overflow in AudioSink"); mErrored = true; return; } if (missingFrames.value() > AUDIO_FUZZ_FRAMES) { // The next audio packet begins some time after the end of the last packet // we pushed to the audio hardware. We must push silence into the audio // hardware so that the next audio packet begins playback at the correct // time. missingFrames = std::min(INT32_MAX, missingFrames.value()); mFramesParsed += missingFrames.value(); RefPtr silenceData; AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels); if (!silenceBuffer) { NS_WARNING("OOM in AudioSink"); mErrored = true; return; } if (mConverter->InputConfig() != mConverter->OutputConfig()) { AlignedAudioBuffer convertedData = mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer))) .Forget(); silenceData = CreateAudioFromBuffer(std::move(convertedData), data); } else { silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data); } PushProcessedAudio(silenceData); } mLastEndTime = data->GetEndTime(); mFramesParsed += data->Frames(); if (mConverter->InputConfig() != mConverter->OutputConfig()) { AlignedAudioBuffer buffer(data->MoveableData()); AlignedAudioBuffer convertedData = mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget(); data = CreateAudioFromBuffer(std::move(convertedData), data); } if (PushProcessedAudio(data)) { mLastProcessedPacket = Some(data); } } if (mAudioQueue.IsFinished()) { // We have reached the end of the data, drain the resampler. DrainConverter(); mProcessedQueue.Finish(); } } uint32_t AudioSink::PushProcessedAudio(AudioData* aData) { if (!aData || !aData->Frames()) { return 0; } mProcessedQueue.Push(aData); mProcessedQueueLength += FramesToUsecs(aData->Frames(), mOutputRate).value(); return aData->Frames(); } already_AddRefed AudioSink::CreateAudioFromBuffer( AlignedAudioBuffer&& aBuffer, AudioData* aReference) { uint32_t frames = aBuffer.Length() / mOutputChannels; if (!frames) { return nullptr; } auto duration = FramesToTimeUnit(frames, mOutputRate); if (!duration.IsValid()) { NS_WARNING("Int overflow in AudioSink"); mErrored = true; return nullptr; } RefPtr data = new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer), mOutputChannels, mOutputRate); MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal"); return data.forget(); } uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) { MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); if (!mConverter || !mLastProcessedPacket || !aMaxFrames) { // nothing to drain. return 0; } RefPtr lastPacket = mLastProcessedPacket.ref(); mLastProcessedPacket.reset(); // To drain we simply provide an empty packet to the audio converter. AlignedAudioBuffer convertedData = mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget(); uint32_t frames = convertedData.Length() / mOutputChannels; if (!convertedData.SetLength(std::min(frames, aMaxFrames) * mOutputChannels)) { // This can never happen as we were reducing the length of convertData. mErrored = true; return 0; } RefPtr data = CreateAudioFromBuffer(std::move(convertedData), lastPacket); if (!data) { return 0; } mProcessedQueue.Push(data); return data->Frames(); } void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) { MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn()); aInfo.mAudioSink.mStartTime = mStartTime.ToMicroseconds(); aInfo.mAudioSink.mLastGoodPosition = mLastGoodPosition.ToMicroseconds(); aInfo.mAudioSink.mIsPlaying = mPlaying; aInfo.mAudioSink.mOutputRate = mOutputRate; aInfo.mAudioSink.mWritten = mWritten; aInfo.mAudioSink.mHasErrored = bool(mErrored); aInfo.mAudioSink.mPlaybackComplete = bool(mPlaybackComplete); } } // namespace mozilla