From c906c38e32fe0af8c1ef289c31e96227058093cd Mon Sep 17 00:00:00 2001 From: Paul Adenot Date: Mon, 24 Mar 2014 11:06:06 +0100 Subject: [PATCH] Bug 982490 - Ensure for MSG cycle that each MediaStream write the same number of frames to their AudioStream. r=jesup,roc --- content/media/AudioMixer.h | 85 +++++++++ content/media/AudioSampleFormat.h | 14 +- content/media/AudioSegment.cpp | 110 ++++++------ content/media/AudioSegment.h | 3 +- content/media/MediaSegment.h | 5 +- content/media/MediaStreamGraph.cpp | 165 +++++++++++++----- content/media/MediaStreamGraph.h | 7 + content/media/MediaStreamGraphImpl.h | 15 +- content/media/compiledtest/TestAudioMixer.cpp | 155 ++++++++++++++++ content/media/compiledtest/moz.build | 16 ++ content/media/moz.build | 3 + .../media/webrtc/MediaEngineWebRTCAudio.cpp | 2 + 12 files changed, 475 insertions(+), 105 deletions(-) create mode 100644 content/media/AudioMixer.h create mode 100644 content/media/compiledtest/TestAudioMixer.cpp create mode 100644 content/media/compiledtest/moz.build diff --git a/content/media/AudioMixer.h b/content/media/AudioMixer.h new file mode 100644 index 000000000000..0c6e6799b4aa --- /dev/null +++ b/content/media/AudioMixer.h @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MOZILLA_AUDIOMIXER_H_ +#define MOZILLA_AUDIOMIXER_H_ + +#include "AudioSampleFormat.h" +#include "nsTArray.h" +#include "mozilla/PodOperations.h" + +namespace mozilla { +typedef void(*MixerFunc)(AudioDataValue* aMixedBuffer, + AudioSampleFormat aFormat, + uint32_t aChannels, + uint32_t aFrames); + +/** + * This class mixes multiple streams of audio together to output a single audio + * stream. + * + * AudioMixer::Mix is to be called repeatedly with buffers that have the same + * length, sample rate, sample format and channel count. + * + * When all the tracks have been mixed, calling FinishMixing will call back with + * a buffer containing the mixed audio data. + * + * This class is not thread safe. + */ +class AudioMixer +{ +public: + AudioMixer(MixerFunc aCallback) + : mCallback(aCallback), + mFrames(0), + mChannels(0) + { } + + /* Get the data from the mixer. This is supposed to be called when all the + * tracks have been mixed in. The caller should not hold onto the data. */ + void FinishMixing() { + mCallback(mMixedAudio.Elements(), + AudioSampleTypeToFormat::Format, + mChannels, + mFrames); + PodZero(mMixedAudio.Elements(), mMixedAudio.Length()); + mChannels = mFrames = 0; + } + + /* Add a buffer to the mix. aSamples is interleaved. */ + void Mix(AudioDataValue* aSamples, uint32_t aChannels, uint32_t aFrames) { + if (!mFrames && !mChannels) { + mFrames = aFrames; + mChannels = aChannels; + EnsureCapacityAndSilence(); + } + + MOZ_ASSERT(aFrames == mFrames); + MOZ_ASSERT(aChannels == mChannels); + + for (uint32_t i = 0; i < aFrames * aChannels; i++) { + mMixedAudio[i] += aSamples[i]; + } + } +private: + void EnsureCapacityAndSilence() { + if (mFrames * mChannels > mMixedAudio.Length()) { + mMixedAudio.SetLength(mFrames* mChannels); + } + PodZero(mMixedAudio.Elements(), mMixedAudio.Length()); + } + + /* Function that is called when the mixing is done. */ + MixerFunc mCallback; + /* Number of frames for this mixing block. */ + uint32_t mFrames; + /* Number of channels for this mixing block. */ + uint32_t mChannels; + /* Buffer containing the mixed audio data. */ + nsTArray mMixedAudio; +}; +} + +#endif // MOZILLA_AUDIOMIXER_H_ diff --git a/content/media/AudioSampleFormat.h b/content/media/AudioSampleFormat.h index f35c83712335..89a38ff9f550 100644 --- a/content/media/AudioSampleFormat.h +++ b/content/media/AudioSampleFormat.h @@ -49,7 +49,19 @@ public: typedef AudioSampleTraits::Type AudioDataValue; -// Single-sample conversion +template class AudioSampleTypeToFormat; + +template <> class AudioSampleTypeToFormat { +public: + static const AudioSampleFormat Format = AUDIO_FORMAT_FLOAT32; +}; + +template <> class AudioSampleTypeToFormat { +public: + static const AudioSampleFormat Format = AUDIO_FORMAT_S16; +}; + +// Single-sample conversion /* * Use "2^N" conversion since it's simple, fast, "bit transparent", used by * many other libraries and apparently behaves reasonably. diff --git a/content/media/AudioSegment.cpp b/content/media/AudioSegment.cpp index 68b80765b601..359bc43e379e 100644 --- a/content/media/AudioSegment.cpp +++ b/content/media/AudioSegment.cpp @@ -6,6 +6,7 @@ #include "AudioSegment.h" #include "AudioStream.h" +#include "AudioMixer.h" #include "AudioChannelFormat.h" #include "Latency.h" #include "speex/speex_resampler.h" @@ -134,69 +135,74 @@ void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler) } void -AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput) +AudioSegment::WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer) { uint32_t outputChannels = aOutput->GetChannels(); nsAutoTArray buf; nsAutoTArray channelData; + if (!GetDuration()) { + return; + } + + uint32_t outBufferLength = GetDuration() * outputChannels; + buf.SetLength(outBufferLength); + + // Offset in the buffer that will end up sent to the AudioStream. + uint32_t offset = 0; + for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) { AudioChunk& c = *ci; - TrackTicks offset = 0; - while (offset < c.mDuration) { - TrackTicks durationTicks = - std::min(c.mDuration - offset, AUDIO_PROCESSING_FRAMES); - if (uint64_t(outputChannels)*durationTicks > INT32_MAX || offset > INT32_MAX) { - NS_ERROR("Buffer overflow"); - return; - } + uint32_t frames = c.mDuration; - uint32_t duration = uint32_t(durationTicks); - - // If we have written data in the past, or we have real (non-silent) data - // to write, we can proceed. Otherwise, it means we just started the - // AudioStream, and we don't have real data to write to it (just silence). - // To avoid overbuffering in the AudioStream, we simply drop the silence, - // here. The stream will underrun and output silence anyways. - if (c.mBuffer || aOutput->GetWritten()) { - buf.SetLength(outputChannels*duration); - if (c.mBuffer) { - channelData.SetLength(c.mChannelData.Length()); - for (uint32_t i = 0; i < channelData.Length(); ++i) { - channelData[i] = - AddAudioSampleOffset(c.mChannelData[i], c.mBufferFormat, int32_t(offset)); - } - - if (channelData.Length() < outputChannels) { - // Up-mix. Note that this might actually make channelData have more - // than outputChannels temporarily. - AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); - } - - if (channelData.Length() > outputChannels) { - // Down-mix. - DownmixAndInterleave(channelData, c.mBufferFormat, duration, - c.mVolume, outputChannels, buf.Elements()); - } else { - InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, - duration, c.mVolume, - outputChannels, - buf.Elements()); - } - } else { - // Assumes that a bit pattern of zeroes == 0.0f - memset(buf.Elements(), 0, buf.Length()*sizeof(AudioDataValue)); + // If we have written data in the past, or we have real (non-silent) data + // to write, we can proceed. Otherwise, it means we just started the + // AudioStream, and we don't have real data to write to it (just silence). + // To avoid overbuffering in the AudioStream, we simply drop the silence, + // here. The stream will underrun and output silence anyways. + if (c.mBuffer || aOutput->GetWritten()) { + if (c.mBuffer) { + channelData.SetLength(c.mChannelData.Length()); + for (uint32_t i = 0; i < channelData.Length(); ++i) { + channelData[i] = c.mChannelData[i]; } - aOutput->Write(buf.Elements(), int32_t(duration), &(c.mTimeStamp)); + + if (channelData.Length() < outputChannels) { + // Up-mix. Note that this might actually make channelData have more + // than outputChannels temporarily. + AudioChannelsUpMix(&channelData, outputChannels, gZeroChannel); + } + + if (channelData.Length() > outputChannels) { + // Down-mix. + DownmixAndInterleave(channelData, c.mBufferFormat, frames, + c.mVolume, outputChannels, buf.Elements() + offset); + } else { + InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat, + frames, c.mVolume, + outputChannels, + buf.Elements() + offset); + } + } else { + // Assumes that a bit pattern of zeroes == 0.0f + memset(buf.Elements() + offset, 0, outputChannels * frames * sizeof(AudioDataValue)); } - if(!c.mTimeStamp.IsNull()) { - TimeStamp now = TimeStamp::Now(); - // would be more efficient to c.mTimeStamp to ms on create time then pass here - LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, - (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); - } - offset += duration; } + + offset += frames * outputChannels; + + if (!c.mTimeStamp.IsNull()) { + TimeStamp now = TimeStamp::Now(); + // would be more efficient to c.mTimeStamp to ms on create time then pass here + LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID, + (now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp); + } + } + + aOutput->Write(buf.Elements(), GetDuration(), &(mChunks[mChunks.Length() - 1].mTimeStamp)); + + if (aMixer) { + aMixer->Mix(buf.Elements(), outputChannels, GetDuration()); } aOutput->Start(); } diff --git a/content/media/AudioSegment.h b/content/media/AudioSegment.h index c80a3ae45bc3..d30d0051722f 100644 --- a/content/media/AudioSegment.h +++ b/content/media/AudioSegment.h @@ -27,6 +27,7 @@ public: }; class AudioStream; +class AudioMixer; /** * For auto-arrays etc, guess this as the common number of channels. @@ -215,7 +216,7 @@ public: return chunk; } void ApplyVolume(float aVolume); - void WriteTo(uint64_t aID, AudioStream* aOutput); + void WriteTo(uint64_t aID, AudioStream* aOutput, AudioMixer* aMixer = nullptr); int ChannelCount() { NS_WARN_IF_FALSE(!mChunks.IsEmpty(), diff --git a/content/media/MediaSegment.h b/content/media/MediaSegment.h index bbad4ec1397c..ef3793aa1c21 100644 --- a/content/media/MediaSegment.h +++ b/content/media/MediaSegment.h @@ -267,9 +267,8 @@ protected: void AppendSliceInternal(const MediaSegmentBase& aSource, TrackTicks aStart, TrackTicks aEnd) { - NS_ASSERTION(aStart <= aEnd, "Endpoints inverted"); - NS_WARN_IF_FALSE(aStart >= 0 && aEnd <= aSource.mDuration, - "Slice out of range"); + MOZ_ASSERT(aStart <= aEnd, "Endpoints inverted"); + MOZ_ASSERT(aStart >= 0 && aEnd <= aSource.mDuration, "Slice out of range"); mDuration += aEnd - aStart; TrackTicks offset = 0; for (uint32_t i = 0; i < aSource.mChunks.Length() && offset < aEnd; ++i) { diff --git a/content/media/MediaStreamGraph.cpp b/content/media/MediaStreamGraph.cpp index db17f4d7df27..108fd6648631 100644 --- a/content/media/MediaStreamGraph.cpp +++ b/content/media/MediaStreamGraph.cpp @@ -577,17 +577,30 @@ MediaStreamGraphImpl::UpdateStreamOrderForStream(mozilla::LinkedListmHasBeenOrdered = false; stream->mIsConsumed = false; stream->mIsOnOrderingStack = false; stream->mInBlockingSet = false; + if (stream->AsSourceStream() && + stream->AsSourceStream()->NeedsMixing()) { + shouldMix = true; + } ProcessedMediaStream* ps = stream->AsProcessedStream(); if (ps) { ps->mInCycle = false; @@ -598,6 +611,12 @@ MediaStreamGraphImpl::UpdateStreamOrder() } } + if (!mMixer && shouldMix) { + mMixer = new AudioMixer(AudioMixerCallback); + } else if (mMixer && !shouldMix) { + mMixer = nullptr; + } + mozilla::LinkedList stack; for (uint32_t i = 0; i < mOldStreams.Length(); ++i) { nsRefPtr& s = mOldStreams[i]; @@ -810,6 +829,7 @@ MediaStreamGraphImpl::CreateOrDestroyAudioStreams(GraphTime aAudioOutputStartTim aStream->mAudioOutputStreams.AppendElement(); audioOutputStream->mAudioPlaybackStartTime = aAudioOutputStartTime; audioOutputStream->mBlockedAudioTime = 0; + audioOutputStream->mLastTickWritten = 0; audioOutputStream->mStream = new AudioStream(); // XXX for now, allocate stereo output. But we need to fix this to // match the system's ideal channel configuration. @@ -831,14 +851,22 @@ MediaStreamGraphImpl::CreateOrDestroyAudioStreams(GraphTime aAudioOutputStartTim } } -void +TrackTicks MediaStreamGraphImpl::PlayAudio(MediaStream* aStream, GraphTime aFrom, GraphTime aTo) { MOZ_ASSERT(mRealtime, "Should only attempt to play audio in realtime mode"); + TrackTicks ticksWritten = 0; + // We compute the number of needed ticks by converting a difference of graph + // time rather than by substracting two converted stream time to ensure that + // the rounding between {Graph,Stream}Time and track ticks is not dependant + // on the absolute value of the {Graph,Stream}Time, and so that number of + // ticks to play is the same for each cycle. + TrackTicks ticksNeeded = TimeToTicksRoundDown(IdealAudioRate(), aTo) - TimeToTicksRoundDown(IdealAudioRate(), aFrom); + if (aStream->mAudioOutputStreams.IsEmpty()) { - return; + return 0; } // When we're playing multiple copies of this stream at the same time, they're @@ -852,6 +880,25 @@ MediaStreamGraphImpl::PlayAudio(MediaStream* aStream, MediaStream::AudioOutputStream& audioOutput = aStream->mAudioOutputStreams[i]; StreamBuffer::Track* track = aStream->mBuffer.FindTrack(audioOutput.mTrackID); AudioSegment* audio = track->Get(); + AudioSegment output; + MOZ_ASSERT(track->GetRate() == IdealAudioRate()); + + // offset and audioOutput.mLastTickWritten can differ by at most one sample, + // because of the rounding issue. We track that to ensure we don't skip a + // sample, or play a sample twice. + TrackTicks offset = track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, aFrom)); + if (!audioOutput.mLastTickWritten) { + audioOutput.mLastTickWritten = offset; + } + if (audioOutput.mLastTickWritten != offset) { + // If there is a global underrun of the MSG, this property won't hold, and + // we reset the sample count tracking. + if (std::abs(audioOutput.mLastTickWritten - offset) != 1) { + audioOutput.mLastTickWritten = offset; + } else { + offset = audioOutput.mLastTickWritten; + } + } // We don't update aStream->mBufferStartTime here to account for // time spent blocked. Instead, we'll update it in UpdateCurrentTime after the @@ -859,54 +906,59 @@ MediaStreamGraphImpl::PlayAudio(MediaStream* aStream, // right offsets in the stream buffer, even if we've already written silence for // some amount of blocked time after the current time. GraphTime t = aFrom; - while (t < aTo) { + while (ticksNeeded) { GraphTime end; bool blocked = aStream->mBlocked.GetAt(t, &end); end = std::min(end, aTo); - AudioSegment output; - if (blocked) { - // Track total blocked time in aStream->mBlockedAudioTime so that - // the amount of silent samples we've inserted for blocking never gets - // more than one sample away from the ideal amount. - TrackTicks startTicks = - TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime); - audioOutput.mBlockedAudioTime += end - t; - TrackTicks endTicks = - TimeToTicksRoundDown(IdealAudioRate(), audioOutput.mBlockedAudioTime); - - output.InsertNullDataAtStart(endTicks - startTicks); - STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing blocking-silence samples for %f to %f", - aStream, MediaTimeToSeconds(t), MediaTimeToSeconds(end))); + // Check how many ticks of sound we can provide if we are blocked some + // time in the middle of this cycle. + TrackTicks toWrite = 0; + if (end >= aTo) { + toWrite = ticksNeeded; } else { - TrackTicks startTicks = - track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, t)); - TrackTicks endTicks = - track->TimeToTicksRoundDown(GraphTimeToStreamTime(aStream, end)); - - // If startTicks is before the track start, then that part of 'audio' - // will just be silence, which is fine here. But if endTicks is after - // the track end, then 'audio' won't be long enough, so we'll need - // to explicitly play silence. - TrackTicks sliceEnd = std::min(endTicks, audio->GetDuration()); - if (sliceEnd > startTicks) { - output.AppendSlice(*audio, startTicks, sliceEnd); - } - // Play silence where the track has ended - output.AppendNullData(endTicks - sliceEnd); - NS_ASSERTION(endTicks == sliceEnd || track->IsEnded(), - "Ran out of data but track not ended?"); - output.ApplyVolume(volume); - STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing samples for %f to %f (samples %lld to %lld)", - aStream, MediaTimeToSeconds(t), MediaTimeToSeconds(end), - startTicks, endTicks)); + toWrite = TimeToTicksRoundDown(IdealAudioRate(), end - aFrom); + } + + if (blocked) { + output.InsertNullDataAtStart(toWrite); + STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing %ld blocking-silence samples for %f to %f (%ld to %ld)\n", + aStream, toWrite, MediaTimeToSeconds(t), MediaTimeToSeconds(end), + offset, offset + toWrite)); + ticksNeeded -= toWrite; + } else { + TrackTicks endTicksNeeded = offset + toWrite; + TrackTicks endTicksAvailable = audio->GetDuration(); + if (endTicksNeeded <= endTicksAvailable) { + output.AppendSlice(*audio, offset, endTicksNeeded); + } else { + MOZ_ASSERT(track->IsEnded(), "Not enough data, and track not ended."); + // If we are at the end of the track, maybe write the remaining + // samples, and pad with/output silence. + if (endTicksNeeded > endTicksAvailable && + offset < endTicksAvailable) { + output.AppendSlice(*audio, offset, endTicksAvailable); + ticksNeeded -= endTicksAvailable - offset; + toWrite -= endTicksAvailable - offset; + } + output.AppendNullData(toWrite); + } + output.ApplyVolume(volume); + STREAM_LOG(PR_LOG_DEBUG+1, ("MediaStream %p writing %ld samples for %f to %f (samples %ld to %ld)\n", + aStream, toWrite, MediaTimeToSeconds(t), MediaTimeToSeconds(end), + offset, endTicksNeeded)); + ticksNeeded -= toWrite; } - // Need unique id for stream & track - and we want it to match the inserter - output.WriteTo(LATENCY_STREAM_ID(aStream, track->GetID()), - audioOutput.mStream); t = end; + offset += toWrite; + audioOutput.mLastTickWritten += toWrite; } + + // Need unique id for stream & track - and we want it to match the inserter + output.WriteTo(LATENCY_STREAM_ID(aStream, track->GetID()), + audioOutput.mStream, mMixer); } + return ticksWritten; } static void @@ -1241,6 +1293,9 @@ MediaStreamGraphImpl::RunThread() bool allBlockedForever = true; // True when we've done ProcessInput for all processed streams. bool doneAllProducing = false; + // This is the number of frame that are written to the AudioStreams, for + // this cycle. + TrackTicks ticksPlayed = 0; // Figure out what each stream wants to do for (uint32_t i = 0; i < mStreams.Length(); ++i) { MediaStream* stream = mStreams[i]; @@ -1277,7 +1332,13 @@ MediaStreamGraphImpl::RunThread() if (mRealtime) { // Only playback audio and video in real-time mode CreateOrDestroyAudioStreams(prevComputedTime, stream); - PlayAudio(stream, prevComputedTime, mStateComputedTime); + TrackTicks ticksPlayedForThisStream = PlayAudio(stream, prevComputedTime, mStateComputedTime); + if (!ticksPlayed) { + ticksPlayed = ticksPlayedForThisStream; + } else { + MOZ_ASSERT(!ticksPlayedForThisStream || ticksPlayedForThisStream == ticksPlayed, + "Each stream should have the same number of frame."); + } PlayVideo(stream); } SourceMediaStream* is = stream->AsSourceStream(); @@ -1289,6 +1350,11 @@ MediaStreamGraphImpl::RunThread() allBlockedForever = false; } } + + if (mMixer) { + mMixer->FinishMixing(); + } + if (ensureNextIteration || !allBlockedForever) { EnsureNextIteration(); } @@ -2317,6 +2383,20 @@ SourceMediaStream::GetBufferedTicks(TrackID aID) return 0; } +void +SourceMediaStream::RegisterForAudioMixing() +{ + MutexAutoLock lock(mMutex); + mNeedsMixing = true; +} + +bool +SourceMediaStream::NeedsMixing() +{ + MutexAutoLock lock(mMutex); + return mNeedsMixing; +} + void MediaInputPort::Init() { @@ -2501,6 +2581,7 @@ MediaStreamGraphImpl::MediaStreamGraphImpl(bool aRealtime) , mNonRealtimeProcessing(false) , mStreamOrderDirty(false) , mLatencyLog(AsyncLatencyLogger::Get()) + , mMixer(nullptr) { #ifdef PR_LOGGING if (!gMediaStreamGraphLog) { diff --git a/content/media/MediaStreamGraph.h b/content/media/MediaStreamGraph.h index 0889a9c2fde5..cf2dd23df0d7 100644 --- a/content/media/MediaStreamGraph.h +++ b/content/media/MediaStreamGraph.h @@ -18,6 +18,7 @@ #include "MainThreadUtils.h" #include "nsAutoRef.h" #include "speex/speex_resampler.h" +#include "AudioMixer.h" class nsIRunnable; @@ -572,6 +573,8 @@ protected: // Amount of time that we've wanted to play silence because of the stream // blocking. MediaTime mBlockedAudioTime; + // Last tick written to the audio output. + TrackTicks mLastTickWritten; nsAutoPtr mStream; TrackID mTrackID; }; @@ -782,6 +785,9 @@ public: bool mHaveEnough; }; + void RegisterForAudioMixing(); + bool NeedsMixing(); + protected: TrackData* FindDataForTrack(TrackID aID) { @@ -815,6 +821,7 @@ protected: bool mPullEnabled; bool mUpdateFinished; bool mDestroyed; + bool mNeedsMixing; }; /** diff --git a/content/media/MediaStreamGraphImpl.h b/content/media/MediaStreamGraphImpl.h index f6e1ea1db64d..e0beef184d17 100644 --- a/content/media/MediaStreamGraphImpl.h +++ b/content/media/MediaStreamGraphImpl.h @@ -13,12 +13,15 @@ #include "nsIThread.h" #include "nsIRunnable.h" #include "Latency.h" +#include "mozilla/WeakPtr.h" namespace mozilla { template class LinkedList; +class AudioMixer; + /** * Assume we can run an iteration of the MediaStreamGraph loop in this much time * or less. @@ -52,10 +55,6 @@ static const int AUDIO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS + static const int VIDEO_TARGET_MS = 2*MEDIA_GRAPH_TARGET_PERIOD_MS + SCHEDULE_SAFETY_MARGIN_MS; -/** - * Rate at which we run the video tracks. - */ - /** * A per-stream update message passed from the media graph thread to the * main thread. @@ -327,9 +326,9 @@ public: MediaStream* aStream); /** * Queue audio (mix of stream audio and silence for blocked intervals) - * to the audio output stream. + * to the audio output stream. Returns the number of frames played. */ - void PlayAudio(MediaStream* aStream, GraphTime aFrom, GraphTime aTo); + TrackTicks PlayAudio(MediaStream* aStream, GraphTime aFrom, GraphTime aTo); /** * Set the correct current video frame for stream aStream. */ @@ -575,6 +574,10 @@ public: * Hold a ref to the Latency logger */ nsRefPtr mLatencyLog; + /** + * If this is not null, all the audio output for the MSG will be mixed down. + */ + nsAutoPtr mMixer; }; } diff --git a/content/media/compiledtest/TestAudioMixer.cpp b/content/media/compiledtest/TestAudioMixer.cpp new file mode 100644 index 000000000000..10f6cb8354b2 --- /dev/null +++ b/content/media/compiledtest/TestAudioMixer.cpp @@ -0,0 +1,155 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "AudioMixer.h" +#include + +using mozilla::AudioDataValue; +using mozilla::AudioSampleFormat; + +/* In this test, the different audio stream and channels are always created to + * cancel each other. */ +void MixingDone(AudioDataValue* aData, AudioSampleFormat aFormat, uint32_t aChannels, uint32_t aFrames) +{ + bool silent = true; + for (uint32_t i = 0; i < aChannels * aFrames; i++) { + if (aData[i] != 0.0) { + if (aFormat == mozilla::AUDIO_FORMAT_S16) { + fprintf(stderr, "Sample at %d is not silent: %d\n", i, (short)aData[i]); + } else { + fprintf(stderr, "Sample at %d is not silent: %f\n", i, (float)aData[i]); + } + silent = false; + } + } + if (!silent) { + MOZ_CRASH(); + } +} + +/* Helper function to give us the maximum and minimum value that don't clip, + * for a given sample format (integer or floating-point). */ +template +T GetLowValue(); + +template +T GetHighValue(); + +template<> +float GetLowValue() { + return -1.0; +} + +template<> +short GetLowValue() { + return -INT16_MAX; +} + +template<> +float GetHighValue() { + return 1.0; +} + +template<> +short GetHighValue() { + return INT16_MAX; +} + +void FillBuffer(AudioDataValue* aBuffer, uint32_t aLength, AudioDataValue aValue) +{ + AudioDataValue* end = aBuffer + aLength; + while (aBuffer != end) { + *aBuffer++ = aValue; + } +} + +int main(int argc, char* argv[]) { + const uint32_t CHANNEL_LENGTH = 256; + AudioDataValue a[CHANNEL_LENGTH * 2]; + AudioDataValue b[CHANNEL_LENGTH * 2]; + FillBuffer(a, CHANNEL_LENGTH, GetLowValue()); + FillBuffer(a + CHANNEL_LENGTH, CHANNEL_LENGTH, GetHighValue()); + FillBuffer(b, CHANNEL_LENGTH, GetHighValue()); + FillBuffer(b + CHANNEL_LENGTH, CHANNEL_LENGTH, GetLowValue()); + + { + int iterations = 2; + mozilla::AudioMixer mixer(MixingDone); + + fprintf(stderr, "Test AudioMixer constant buffer length.\n"); + + while (iterations--) { + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.FinishMixing(); + } + } + + { + mozilla::AudioMixer mixer(MixingDone); + + fprintf(stderr, "Test AudioMixer variable buffer length.\n"); + + FillBuffer(a, CHANNEL_LENGTH / 2, GetLowValue()); + FillBuffer(a + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetLowValue()); + FillBuffer(b, CHANNEL_LENGTH / 2, GetHighValue()); + FillBuffer(b + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetHighValue()); + mixer.Mix(a, 2, CHANNEL_LENGTH / 2); + mixer.Mix(b, 2, CHANNEL_LENGTH / 2); + mixer.FinishMixing(); + FillBuffer(a, CHANNEL_LENGTH, GetLowValue()); + FillBuffer(a + CHANNEL_LENGTH, CHANNEL_LENGTH, GetHighValue()); + FillBuffer(b, CHANNEL_LENGTH, GetHighValue()); + FillBuffer(b + CHANNEL_LENGTH, CHANNEL_LENGTH, GetLowValue()); + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.FinishMixing(); + FillBuffer(a, CHANNEL_LENGTH / 2, GetLowValue()); + FillBuffer(a + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetLowValue()); + FillBuffer(b, CHANNEL_LENGTH / 2, GetHighValue()); + FillBuffer(b + CHANNEL_LENGTH / 2, CHANNEL_LENGTH / 2, GetHighValue()); + mixer.Mix(a, 2, CHANNEL_LENGTH / 2); + mixer.Mix(b, 2, CHANNEL_LENGTH / 2); + mixer.FinishMixing(); + } + + FillBuffer(a, CHANNEL_LENGTH, GetLowValue()); + FillBuffer(b, CHANNEL_LENGTH, GetHighValue()); + + { + mozilla::AudioMixer mixer(MixingDone); + fprintf(stderr, "Test AudioMixer variable channel count.\n"); + + mixer.Mix(a, 1, CHANNEL_LENGTH); + mixer.Mix(b, 1, CHANNEL_LENGTH); + mixer.FinishMixing(); + mixer.Mix(a, 1, CHANNEL_LENGTH); + mixer.Mix(b, 1, CHANNEL_LENGTH); + mixer.FinishMixing(); + mixer.Mix(a, 1, CHANNEL_LENGTH); + mixer.Mix(b, 1, CHANNEL_LENGTH); + mixer.FinishMixing(); + } + + { + mozilla::AudioMixer mixer(MixingDone); + fprintf(stderr, "Test AudioMixer variable stream count.\n"); + + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.FinishMixing(); + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.FinishMixing(); + mixer.Mix(a, 2, CHANNEL_LENGTH); + mixer.Mix(b, 2, CHANNEL_LENGTH); + mixer.FinishMixing(); + } + + return 0; +} + diff --git a/content/media/compiledtest/moz.build b/content/media/compiledtest/moz.build new file mode 100644 index 000000000000..8a1cffa507b9 --- /dev/null +++ b/content/media/compiledtest/moz.build @@ -0,0 +1,16 @@ +# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +CPP_UNIT_TESTS += [ + 'TestAudioMixer.cpp', +] + +FAIL_ON_WARNINGS = True + +LOCAL_INCLUDES += [ + '..', +] + diff --git a/content/media/moz.build b/content/media/moz.build index 868ab143c0ae..f48fce4940e0 100644 --- a/content/media/moz.build +++ b/content/media/moz.build @@ -12,6 +12,8 @@ PARALLEL_DIRS += [ 'webvtt' ] +TEST_TOOL_DIRS += ['compiledtest'] + if CONFIG['MOZ_RAW']: PARALLEL_DIRS += ['raw'] @@ -57,6 +59,7 @@ EXPORTS += [ 'AudioChannelFormat.h', 'AudioCompactor.h', 'AudioEventTimeline.h', + 'AudioMixer.h', 'AudioNodeEngine.h', 'AudioNodeExternalInputStream.h', 'AudioNodeStream.h', diff --git a/content/media/webrtc/MediaEngineWebRTCAudio.cpp b/content/media/webrtc/MediaEngineWebRTCAudio.cpp index 9b69d22985aa..09eb4e207d5e 100644 --- a/content/media/webrtc/MediaEngineWebRTCAudio.cpp +++ b/content/media/webrtc/MediaEngineWebRTCAudio.cpp @@ -158,6 +158,8 @@ MediaEngineWebRTCAudioSource::Start(SourceMediaStream* aStream, TrackID aID) AudioSegment* segment = new AudioSegment(); aStream->AddTrack(aID, SAMPLE_FREQUENCY, 0, segment); aStream->AdvanceKnownTracksTime(STREAM_TIME_MAX); + // XXX Make this based on the pref. + aStream->RegisterForAudioMixing(); LOG(("Start audio for stream %p", aStream)); if (mState == kStarted) {