Bug 1428392 - Remove AudioOutputObserver, and feed the reverse-stream directly to the AEC. r=pehrsons

MozReview-Commit-ID: EVpH5LUOZ6g

--HG--
extra : rebase_source : d172cd10dffee89ac48b9ec6b6435dcf0c0581d4
This commit is contained in:
Paul Adenot 2018-01-05 18:10:23 +01:00
Родитель 0b9f42aad6
Коммит e3c5088847
7 изменённых файлов: 93 добавлений и 303 удалений

Просмотреть файл

@ -30,9 +30,6 @@
#include "VideoFrameContainer.h"
#include "mozilla/AbstractThread.h"
#include "mozilla/Unused.h"
#ifdef MOZ_WEBRTC
#include "AudioOutputObserver.h"
#endif
#include "mtransport/runnable_utils.h"
#include "VideoUtils.h"
@ -3569,9 +3566,6 @@ MediaStreamGraphImpl::MediaStreamGraphImpl(GraphDriverType aDriverRequested,
, mLatencyLog(AsyncLatencyLogger::Get())
, mAbstractMainThread(aMainThread)
, mThreadPool(GetMediaThreadPool(MediaThreadType::MSG_CONTROL))
#ifdef MOZ_WEBRTC
, mFarendObserverRef(nullptr)
#endif
, mSelfRef(this)
, mOutputChannels(std::min<uint32_t>(8, CubebUtils::MaxNumberOfChannels()))
#ifdef DEBUG

Просмотреть файл

@ -32,9 +32,6 @@ class ShutdownTicket;
template <typename T>
class LinkedList;
#ifdef MOZ_WEBRTC
class AudioOutputObserver;
#endif
/**
* A per-stream update message passed from the media graph thread to the
@ -818,9 +815,6 @@ public:
AudioMixer mMixer;
const RefPtr<AbstractThread> mAbstractMainThread;
RefPtr<SharedThreadPool> mThreadPool;
#ifdef MOZ_WEBRTC
RefPtr<AudioOutputObserver> mFarendObserverRef;
#endif
// used to limit graph shutdown time
// Only accessed on the main thread.

Просмотреть файл

@ -1,4 +1,3 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -29,9 +28,6 @@
#include <algorithm>
#include "DOMMediaStream.h"
#include "GeckoProfiler.h"
#ifdef MOZ_WEBRTC
#include "AudioOutputObserver.h"
#endif
using namespace mozilla::layers;
using namespace mozilla::dom;

Просмотреть файл

@ -1,61 +0,0 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef AUDIOOUTPUTOBSERVER_H_
#define AUDIOOUTPUTOBSERVER_H_
#include "mozilla/StaticPtr.h"
#include "nsAutoPtr.h"
#include "AudioMixer.h"
#include "MediaData.h"
namespace webrtc {
class SingleRwFifo;
}
namespace mozilla {
typedef struct FarEndAudioChunk_ {
size_t mSamples;
bool mOverrun;
AudioDataValue mData[1]; // variable-length
} FarEndAudioChunk;
// This class is used to packetize and send the mixed audio from an MSG, in
// float, to the AEC module of WebRTC.org.
class AudioOutputObserver
{
public:
AudioOutputObserver();
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioOutputObserver);
void Clear();
void InsertFarEnd(const AudioDataValue *aBuffer, uint32_t aFrames, bool aOverran,
int aFreq, int aChannels);
uint32_t PlayoutFrequency() { return mPlayoutFreq; }
uint32_t PlayoutChannels() { return mPlayoutChannels; }
FarEndAudioChunk *Pop();
uint32_t Size();
private:
virtual ~AudioOutputObserver();
uint32_t mPlayoutFreq;
uint32_t mPlayoutChannels;
nsAutoPtr<webrtc::SingleRwFifo> mPlayoutFifo;
uint32_t mChunkSize;
// chunking to 10ms support
FarEndAudioChunk *mSaved; // can't be nsAutoPtr since we need to use free(), not delete
uint32_t mSamplesSaved;
AlignedAudioBuffer mDownmixBuffer;
};
extern StaticRefPtr<AudioOutputObserver> gFarendObserver;
}
#endif

Просмотреть файл

@ -59,7 +59,6 @@
#include "webrtc/modules/video_capture/video_capture_defines.h"
#include "NullTransport.h"
#include "AudioOutputObserver.h"
namespace mozilla {
@ -522,10 +521,10 @@ private:
static int sChannelsOpen;
const UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
const RefPtr<AudioOutputObserver> mAudioOutputObserver;
// accessed from the GraphDriver thread except for deletion
// accessed from the GraphDriver thread except for deletion.
nsAutoPtr<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
nsAutoPtr<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
// mMonitor protects mSources[] and mPrinicpalIds[] access/changes, and
// transitions of mState from kStarted to kStopped (which are combined with
@ -557,9 +556,12 @@ private:
// To only update microphone when needed, we keep track of previous settings.
MediaEnginePrefs mLastPrefs;
// Stores the mixed audio output for the reverse-stream of the AEC.
AlignedFloatBuffer mOutputBuffer;
AlignedFloatBuffer mInputBuffer;
AlignedFloatBuffer mDeinterleavedBuffer;
AlignedAudioBuffer mInputDownmixBuffer;
AlignedFloatBuffer mInputDownmixBuffer;
};
class MediaEngineWebRTC : public MediaEngine

Просмотреть файл

@ -1,3 +1,4 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
@ -53,134 +54,6 @@ NS_IMPL_ISUPPORTS0(MediaEngineWebRTCAudioCaptureSource)
int MediaEngineWebRTCMicrophoneSource::sChannelsOpen = 0;
AudioOutputObserver::AudioOutputObserver()
: mPlayoutFreq(0)
, mPlayoutChannels(0)
, mChunkSize(0)
, mSaved(nullptr)
, mSamplesSaved(0)
, mDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100)
{
// Buffers of 10ms chunks
mPlayoutFifo = new SingleRwFifo(MAX_AEC_FIFO_DEPTH/10);
}
AudioOutputObserver::~AudioOutputObserver()
{
Clear();
free(mSaved);
mSaved = nullptr;
}
void
AudioOutputObserver::Clear()
{
while (mPlayoutFifo->size() > 0) {
free(mPlayoutFifo->Pop());
}
// we'd like to touch mSaved here, but we can't if we might still be getting callbacks
}
FarEndAudioChunk *
AudioOutputObserver::Pop()
{
return (FarEndAudioChunk *) mPlayoutFifo->Pop();
}
uint32_t
AudioOutputObserver::Size()
{
return mPlayoutFifo->size();
}
// static
void
AudioOutputObserver::InsertFarEnd(const AudioDataValue *aBuffer, uint32_t aFrames, bool aOverran,
int aFreq, int aChannels)
{
// Prepare for downmix if needed
int channels = aChannels;
if (aChannels > MAX_CHANNELS) {
channels = MAX_CHANNELS;
}
if (mPlayoutChannels != 0) {
if (mPlayoutChannels != static_cast<uint32_t>(channels)) {
MOZ_CRASH();
}
} else {
MOZ_ASSERT(channels <= MAX_CHANNELS);
mPlayoutChannels = static_cast<uint32_t>(channels);
}
if (mPlayoutFreq != 0) {
if (mPlayoutFreq != static_cast<uint32_t>(aFreq)) {
MOZ_CRASH();
}
} else {
MOZ_ASSERT(aFreq <= MAX_SAMPLING_FREQ);
MOZ_ASSERT(!(aFreq % 100), "Sampling rate for far end data should be multiple of 100.");
mPlayoutFreq = aFreq;
mChunkSize = aFreq/100; // 10ms
}
#ifdef LOG_FAREND_INSERTION
static FILE *fp = fopen("insertfarend.pcm","wb");
#endif
if (mSaved) {
// flag overrun as soon as possible, and only once
mSaved->mOverrun = aOverran;
aOverran = false;
}
// Rechunk to 10ms.
// The AnalyzeReverseStream() and WebRtcAec_BufferFarend() functions insist on 10ms
// samples per call. Annoying...
while (aFrames) {
if (!mSaved) {
mSaved = (FarEndAudioChunk *) moz_xmalloc(sizeof(FarEndAudioChunk) +
(mChunkSize * channels - 1)*sizeof(AudioDataValue));
mSaved->mSamples = mChunkSize;
mSaved->mOverrun = aOverran;
aOverran = false;
}
uint32_t to_copy = mChunkSize - mSamplesSaved;
if (to_copy > aFrames) {
to_copy = aFrames;
}
AudioDataValue* dest = &(mSaved->mData[mSamplesSaved * channels]);
if (aChannels > MAX_CHANNELS) {
AudioConverter converter(AudioConfig(aChannels, 0), AudioConfig(channels, 0));
converter.Process(mDownmixBuffer, aBuffer, to_copy);
ConvertAudioSamples(mDownmixBuffer.Data(), dest, to_copy * channels);
} else {
ConvertAudioSamples(aBuffer, dest, to_copy * channels);
}
#ifdef LOG_FAREND_INSERTION
if (fp) {
fwrite(&(mSaved->mData[mSamplesSaved * aChannels]), to_copy * aChannels, sizeof(AudioDataValue), fp);
}
#endif
aFrames -= to_copy;
mSamplesSaved += to_copy;
aBuffer += to_copy * aChannels;
if (mSamplesSaved >= mChunkSize) {
int free_slots = mPlayoutFifo->capacity() - mPlayoutFifo->size();
if (free_slots <= 0) {
// XXX We should flag an overrun for the reader. We can't drop data from it due to
// thread safety issues.
break;
} else {
mPlayoutFifo->Push((int8_t *) mSaved); // takes ownership
mSaved = nullptr;
mSamplesSaved = 0;
}
}
}
}
MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource(
mozilla::AudioInput* aAudioInput,
int aIndex,
@ -191,7 +64,6 @@ MediaEngineWebRTCMicrophoneSource::MediaEngineWebRTCMicrophoneSource(
: MediaEngineAudioSource(kReleased)
, mAudioInput(aAudioInput)
, mAudioProcessing(AudioProcessing::Create())
, mAudioOutputObserver(new AudioOutputObserver())
, mMonitor("WebRTCMic.Monitor")
, mCapIndex(aIndex)
, mDelayAgnostic(aDelayAgnostic)
@ -633,8 +505,6 @@ MediaEngineWebRTCMicrophoneSource::Start(SourceMediaStream *aStream,
// Make sure logger starts before capture
AsyncLatencyLogger::Get(true);
mAudioOutputObserver->Clear();
mAudioInput->StartRecording(aStream, mListener);
return NS_OK;
@ -697,9 +567,88 @@ MediaEngineWebRTCMicrophoneSource::NotifyOutputData(MediaStreamGraph* aGraph,
TrackRate aRate,
uint32_t aChannels)
{
if (!PassThrough()) {
mAudioOutputObserver->InsertFarEnd(aBuffer, aFrames, false,
aRate, aChannels);
if (!mPacketizerOutput ||
mPacketizerOutput->PacketSize() != aRate/100u ||
mPacketizerOutput->Channels() != aChannels) {
// It's ok to drop the audio still in the packetizer here: if this changes,
// we changed devices or something.
mPacketizerOutput =
new AudioPacketizer<AudioDataValue, float>(aRate/100, aChannels);
}
mPacketizerOutput->Input(aBuffer, aFrames);
while (mPacketizerOutput->PacketsAvailable()) {
uint32_t samplesPerPacket = mPacketizerOutput->PacketSize() *
mPacketizerOutput->Channels();
if (mOutputBuffer.Length() < samplesPerPacket) {
mOutputBuffer.SetLength(samplesPerPacket);
}
if (mDeinterleavedBuffer.Length() < samplesPerPacket) {
mDeinterleavedBuffer.SetLength(samplesPerPacket);
}
float* packet = mOutputBuffer.Data();
mPacketizerOutput->Output(packet);
AutoTArray<float*, MAX_CHANNELS> deinterleavedPacketDataChannelPointers;
float* interleavedFarend = nullptr;
uint32_t channelCountFarend = 0;
uint32_t framesPerPacketFarend = 0;
// Downmix from aChannels to MAX_CHANNELS if needed. We always have floats
// here, the packetized performed the conversion.
if (aChannels > MAX_CHANNELS) {
AudioConverter converter(AudioConfig(aChannels, 0, AudioConfig::FORMAT_FLT),
AudioConfig(MAX_CHANNELS, 0, AudioConfig::FORMAT_FLT));
framesPerPacketFarend = mPacketizerOutput->PacketSize();
framesPerPacketFarend =
converter.Process(mInputDownmixBuffer,
packet,
framesPerPacketFarend);
interleavedFarend = mInputDownmixBuffer.Data();
channelCountFarend = MAX_CHANNELS;
deinterleavedPacketDataChannelPointers.SetLength(MAX_CHANNELS);
} else {
interleavedFarend = packet;
channelCountFarend = aChannels;
framesPerPacketFarend = mPacketizerOutput->PacketSize();
deinterleavedPacketDataChannelPointers.SetLength(aChannels);
}
MOZ_ASSERT(interleavedFarend &&
(channelCountFarend == 1 || channelCountFarend == 2) &&
framesPerPacketFarend);
if (mInputBuffer.Length() < framesPerPacketFarend * channelCountFarend) {
mInputBuffer.SetLength(framesPerPacketFarend * channelCountFarend);
}
size_t offset = 0;
for (size_t i = 0; i < deinterleavedPacketDataChannelPointers.Length(); ++i) {
deinterleavedPacketDataChannelPointers[i] = mInputBuffer.Data() + offset;
offset += framesPerPacketFarend;
}
// Deinterleave, prepare a channel pointers array, with enough storage for
// the frames.
DeinterleaveAndConvertBuffer(interleavedFarend,
framesPerPacketFarend,
channelCountFarend,
deinterleavedPacketDataChannelPointers.Elements());
// Having the same config for input and output means we potentially save
// some CPU.
StreamConfig inputConfig(aRate, channelCountFarend, false);
StreamConfig outputConfig = inputConfig;
// Passing the same pointers here saves a copy inside this function.
DebugOnly<int> err =
mAudioProcessing->ProcessReverseStream(deinterleavedPacketDataChannelPointers.Elements(),
inputConfig,
outputConfig,
deinterleavedPacketDataChannelPointers.Elements());
MOZ_ASSERT(!err, "Could not process the reverse stream.");
}
}
@ -722,94 +671,11 @@ MediaEngineWebRTCMicrophoneSource::PacketizeAndProcess(MediaStreamGraph* aGraph,
new AudioPacketizer<AudioDataValue, float>(aRate/100, aChannels);
}
// On initial capture, throw away all far-end data except the most recent sample
// since it's already irrelevant and we want to keep avoid confusing the AEC far-end
// input code with "old" audio.
// On initial capture, throw away all far-end data except the most recent
// sample since it's already irrelevant and we want to avoid confusing the AEC
// far-end input code with "old" audio.
if (!mStarted) {
mStarted = true;
while (mAudioOutputObserver->Size() > 1) {
free(mAudioOutputObserver->Pop()); // only call if size() > 0
}
}
// Feed the far-end audio data (speakers) to the feedback input of the AEC.
while (mAudioOutputObserver->Size() > 0) {
// Bug 1414837: This will call `free()`, and we should remove it.
// Pop gives ownership.
nsAutoPtr<FarEndAudioChunk> buffer(mAudioOutputObserver->Pop()); // only call if size() > 0
if (!buffer) {
continue;
}
AudioDataValue* packetDataPointer = buffer->mData;
AutoTArray<float*, MAX_CHANNELS> deinterleavedPacketDataChannelPointers;
AudioDataValue* interleavedFarend = nullptr;
uint32_t channelCountFarend = 0;
uint32_t framesPerPacketFarend = 0;
// Downmix from aChannels to MAX_CHANNELS if needed
if (mAudioOutputObserver->PlayoutChannels() > MAX_CHANNELS) {
AudioConverter converter(AudioConfig(aChannels, 0, AudioConfig::FORMAT_DEFAULT),
AudioConfig(MAX_CHANNELS, 0, AudioConfig::FORMAT_DEFAULT));
framesPerPacketFarend =
buffer->mSamples;
framesPerPacketFarend =
converter.Process(mInputDownmixBuffer,
packetDataPointer,
framesPerPacketFarend);
interleavedFarend = mInputDownmixBuffer.Data();
channelCountFarend = MAX_CHANNELS;
deinterleavedPacketDataChannelPointers.SetLength(MAX_CHANNELS);
} else {
uint32_t outputChannels = mAudioOutputObserver->PlayoutChannels();
interleavedFarend = packetDataPointer;
channelCountFarend = outputChannels;
framesPerPacketFarend = buffer->mSamples;
deinterleavedPacketDataChannelPointers.SetLength(outputChannels);
}
MOZ_ASSERT(interleavedFarend &&
(channelCountFarend == 1 || channelCountFarend == 2) &&
framesPerPacketFarend);
if (mInputBuffer.Length() < framesPerPacketFarend * channelCountFarend) {
mInputBuffer.SetLength(framesPerPacketFarend * channelCountFarend);
}
offset = 0;
for (size_t i = 0; i < deinterleavedPacketDataChannelPointers.Length(); ++i) {
deinterleavedPacketDataChannelPointers[i] = mInputBuffer.Data() + offset;
offset += framesPerPacketFarend;
}
// Deinterleave, prepare a channel pointers array, with enough storage for
// the frames.
//
// If this is a platform that uses s16 for audio input and output,
// convert to floats, the APM API we use only accepts floats.
DeinterleaveAndConvertBuffer(interleavedFarend,
framesPerPacketFarend,
channelCountFarend,
deinterleavedPacketDataChannelPointers.Elements());
// Having the same config for input and output means we potentially save
// some CPU.
StreamConfig inputConfig(mAudioOutputObserver->PlayoutFrequency(),
channelCountFarend,
false /* we don't use typing detection*/);
StreamConfig outputConfig = inputConfig;
// Passing the same pointers here saves a copy inside this function.
int err =
mAudioProcessing->ProcessReverseStream(deinterleavedPacketDataChannelPointers.Elements(),
inputConfig,
outputConfig,
deinterleavedPacketDataChannelPointers.Elements());
if (err) {
MOZ_LOG(GetMediaManagerLog(), LogLevel::Error,
("error in audio ProcessReverseStream(): %d", err));
return;
}
}
// Packetize our input data into 10ms chunks, deinterleave into planar channel

Просмотреть файл

@ -22,8 +22,7 @@ EXPORTS += [
]
if CONFIG['MOZ_WEBRTC']:
EXPORTS += ['AudioOutputObserver.h',
'MediaEngineRemoteVideoSource.h',
EXPORTS += ['MediaEngineRemoteVideoSource.h',
'MediaEngineWebRTC.h']
EXPORTS.mozilla.dom += [ 'RTCIdentityProviderRegistrar.h' ]
UNIFIED_SOURCES += [