gecko-dev/dom/media/webrtc/MediaEngineWebRTC.h

695 строки
21 KiB
C++

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef MEDIAENGINEWEBRTC_H_
#define MEDIAENGINEWEBRTC_H_
#include "prcvar.h"
#include "prthread.h"
#include "nsIThread.h"
#include "nsIRunnable.h"
#include "mozilla/dom/File.h"
#include "mozilla/Mutex.h"
#include "mozilla/StaticMutex.h"
#include "mozilla/Monitor.h"
#include "mozilla/Sprintf.h"
#include "mozilla/UniquePtr.h"
#include "nsAutoPtr.h"
#include "nsCOMPtr.h"
#include "nsThreadUtils.h"
#include "DOMMediaStream.h"
#include "nsDirectoryServiceDefs.h"
#include "nsComponentManagerUtils.h"
#include "nsRefPtrHashtable.h"
#include "ipc/IPCMessageUtils.h"
#include "VideoUtils.h"
#include "MediaEngineCameraVideoSource.h"
#include "VideoSegment.h"
#include "AudioSegment.h"
#include "StreamTracks.h"
#include "MediaStreamGraph.h"
#include "cubeb/cubeb.h"
#include "CubebUtils.h"
#include "AudioPacketizer.h"
#include "MediaEngineWrapper.h"
#include "mozilla/dom/MediaStreamTrackBinding.h"
#include "CamerasChild.h"
// WebRTC library includes follow
// Audio Engine
#include "webrtc/voice_engine/include/voe_base.h"
#include "webrtc/voice_engine/include/voe_codec.h"
#include "webrtc/voice_engine/include/voe_network.h"
#include "webrtc/voice_engine/include/voe_audio_processing.h"
#include "webrtc/voice_engine/include/voe_volume_control.h"
#include "webrtc/voice_engine/include/voe_external_media.h"
#include "webrtc/voice_engine/include/voe_audio_processing.h"
#include "webrtc/modules/audio_device/include/audio_device.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
// Video Engine
// conflicts with #include of scoped_ptr.h
#undef FF
// WebRTC imports
#include "webrtc/modules/video_capture/video_capture_defines.h"
#include "NullTransport.h"
#include "AudioOutputObserver.h"
namespace mozilla {
class MediaEngineWebRTCAudioCaptureSource : public MediaEngineAudioSource
{
public:
NS_DECL_THREADSAFE_ISUPPORTS
explicit MediaEngineWebRTCAudioCaptureSource(const char* aUuid)
: MediaEngineAudioSource(kReleased)
{
}
void GetName(nsAString& aName) const override;
void GetUUID(nsACString& aUUID) const override;
nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
const MediaEnginePrefs& aPrefs,
const nsString& aDeviceId,
const mozilla::ipc::PrincipalInfo& aPrincipalInfo,
AllocationHandle** aOutHandle,
const char** aOutBadConstraint) override
{
// Nothing to do here, everything is managed in MediaManager.cpp
*aOutHandle = nullptr;
return NS_OK;
}
nsresult Deallocate(AllocationHandle* aHandle) override
{
// Nothing to do here, everything is managed in MediaManager.cpp
MOZ_ASSERT(!aHandle);
return NS_OK;
}
nsresult Start(SourceMediaStream* aMediaStream,
TrackID aId,
const PrincipalHandle& aPrincipalHandle) override;
nsresult Stop(SourceMediaStream* aMediaStream, TrackID aId) override;
nsresult Restart(AllocationHandle* aHandle,
const dom::MediaTrackConstraints& aConstraints,
const MediaEnginePrefs &aPrefs,
const nsString& aDeviceId,
const char** aOutBadConstraint) override;
void NotifyOutputData(MediaStreamGraph* aGraph,
AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override
{}
void DeviceChanged() override
{}
void NotifyInputData(MediaStreamGraph* aGraph,
const AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override
{}
void NotifyPull(MediaStreamGraph* aGraph,
SourceMediaStream* aSource,
TrackID aID,
StreamTime aDesiredTime,
const PrincipalHandle& aPrincipalHandle) override
{}
dom::MediaSourceEnum GetMediaSource() const override
{
return dom::MediaSourceEnum::AudioCapture;
}
bool IsFake() override
{
return false;
}
nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override
{
return NS_ERROR_NOT_IMPLEMENTED;
}
uint32_t GetBestFitnessDistance(
const nsTArray<const NormalizedConstraintSet*>& aConstraintSets,
const nsString& aDeviceId) const override;
protected:
virtual ~MediaEngineWebRTCAudioCaptureSource() {}
nsCString mUUID;
};
// Small subset of VoEHardware
class AudioInput
{
public:
explicit AudioInput(webrtc::VoiceEngine* aVoiceEngine) : mVoiceEngine(aVoiceEngine) {};
// Threadsafe because it's referenced from an MicrophoneSource, which can
// had references to it on other threads.
NS_INLINE_DECL_THREADSAFE_REFCOUNTING(AudioInput)
virtual int GetNumOfRecordingDevices(int& aDevices) = 0;
virtual int GetRecordingDeviceName(int aIndex, char (&aStrNameUTF8)[128],
char aStrGuidUTF8[128]) = 0;
virtual int GetRecordingDeviceStatus(bool& aIsAvailable) = 0;
virtual void GetChannelCount(uint32_t& aChannels) = 0;
virtual int GetMaxAvailableChannels(uint32_t& aChannels) = 0;
virtual void StartRecording(SourceMediaStream *aStream, AudioDataListener *aListener) = 0;
virtual void StopRecording(SourceMediaStream *aStream) = 0;
virtual int SetRecordingDevice(int aIndex) = 0;
virtual void SetUserChannelCount(uint32_t aChannels) = 0;
protected:
// Protected destructor, to discourage deletion outside of Release():
virtual ~AudioInput() {}
webrtc::VoiceEngine* mVoiceEngine;
};
class AudioInputCubeb final : public AudioInput
{
public:
explicit AudioInputCubeb(webrtc::VoiceEngine* aVoiceEngine, int aIndex = 0) :
AudioInput(aVoiceEngine), mSelectedDevice(aIndex), mInUseCount(0)
{
if (!mDeviceIndexes) {
mDeviceIndexes = new nsTArray<int>;
mDeviceNames = new nsTArray<nsCString>;
mDefaultDevice = -1;
}
}
static void CleanupGlobalData()
{
cubeb_device_collection_destroy(CubebUtils::GetCubebContext(), &mDevices);
delete mDeviceIndexes;
mDeviceIndexes = nullptr;
delete mDeviceNames;
mDeviceNames = nullptr;
}
int GetNumOfRecordingDevices(int& aDevices)
{
#ifdef MOZ_WIDGET_ANDROID
// OpenSL ES does not support enumerate device.
aDevices = 1;
#else
UpdateDeviceList();
aDevices = mDeviceIndexes->Length();
#endif
return 0;
}
static int32_t DeviceIndex(int aIndex)
{
// -1 = system default if any
if (aIndex == -1) {
if (mDefaultDevice == -1) {
aIndex = 0;
} else {
aIndex = mDefaultDevice;
}
}
MOZ_ASSERT(mDeviceIndexes);
if (aIndex < 0 || aIndex >= (int) mDeviceIndexes->Length()) {
return -1;
}
// Note: if the device is gone, this will be -1
return (*mDeviceIndexes)[aIndex]; // translate to mDevices index
}
static StaticMutex& Mutex()
{
return sMutex;
}
static bool GetDeviceID(int aDeviceIndex, CubebUtils::AudioDeviceID &aID)
{
// Assert sMutex is held
sMutex.AssertCurrentThreadOwns();
#ifdef MOZ_WIDGET_ANDROID
aID = nullptr;
return true;
#else
int dev_index = DeviceIndex(aDeviceIndex);
if (dev_index != -1) {
aID = mDevices.device[dev_index].devid;
return true;
}
return false;
#endif
}
int GetRecordingDeviceName(int aIndex, char (&aStrNameUTF8)[128],
char aStrGuidUTF8[128])
{
#ifdef MOZ_WIDGET_ANDROID
aStrNameUTF8[0] = '\0';
aStrGuidUTF8[0] = '\0';
#else
int32_t devindex = DeviceIndex(aIndex);
if (mDevices.count == 0 || devindex < 0) {
return 1;
}
SprintfLiteral(aStrNameUTF8, "%s%s", aIndex == -1 ? "default: " : "",
mDevices.device[devindex].friendly_name);
aStrGuidUTF8[0] = '\0';
#endif
return 0;
}
int GetRecordingDeviceStatus(bool& aIsAvailable)
{
// With cubeb, we only expose devices of type CUBEB_DEVICE_TYPE_INPUT,
// so unless it was removed, say it's available
aIsAvailable = true;
return 0;
}
void GetChannelCount(uint32_t& aChannels)
{
GetUserChannelCount(mSelectedDevice, aChannels);
}
static void GetUserChannelCount(int aDeviceIndex, uint32_t& aChannels)
{
aChannels = sUserChannelCount;
}
int GetMaxAvailableChannels(uint32_t& aChannels)
{
return GetDeviceMaxChannels(mSelectedDevice, aChannels);
}
static int GetDeviceMaxChannels(int aDeviceIndex, uint32_t& aChannels)
{
#ifdef MOZ_WIDGET_ANDROID
aChannels = 1;
#else
int32_t devindex = DeviceIndex(aDeviceIndex);
if (mDevices.count == 0 || devindex < 0) {
return 1;
}
aChannels = mDevices.device[devindex].max_channels;
#endif
return 0;
}
void SetUserChannelCount(uint32_t aChannels)
{
if (GetDeviceMaxChannels(mSelectedDevice, sUserChannelCount)) {
sUserChannelCount = 1; // error capture mono
return;
}
if (aChannels && aChannels < sUserChannelCount) {
sUserChannelCount = aChannels;
}
}
void StartRecording(SourceMediaStream *aStream, AudioDataListener *aListener)
{
#ifdef MOZ_WIDGET_ANDROID
// OpenSL ES does not support enumerating devices.
MOZ_ASSERT(mDevices.count == 0);
#else
MOZ_ASSERT(mDevices.count > 0);
#endif
if (mInUseCount == 0) {
ScopedCustomReleasePtr<webrtc::VoEExternalMedia> ptrVoEXMedia;
ptrVoEXMedia = webrtc::VoEExternalMedia::GetInterface(mVoiceEngine);
if (ptrVoEXMedia) {
ptrVoEXMedia->SetExternalRecordingStatus(true);
}
mAnyInUse = true;
}
mInUseCount++;
// Always tell the stream we're using it for input
aStream->OpenAudioInput(mSelectedDevice, aListener);
}
void StopRecording(SourceMediaStream *aStream)
{
aStream->CloseAudioInput();
if (--mInUseCount == 0) {
mAnyInUse = false;
}
}
int SetRecordingDevice(int aIndex)
{
mSelectedDevice = aIndex;
return 0;
}
protected:
~AudioInputCubeb() {
MOZ_RELEASE_ASSERT(mInUseCount == 0);
}
private:
// It would be better to watch for device-change notifications
void UpdateDeviceList();
// We have an array, which consists of indexes to the current mDevices
// list. This is updated on mDevices updates. Many devices in mDevices
// won't be included in the array (wrong type, etc), or if a device is
// removed it will map to -1 (and opens of this device will need to check
// for this - and be careful of threading access. The mappings need to
// updated on each re-enumeration.
int mSelectedDevice;
uint32_t mInUseCount;
// pointers to avoid static constructors
static nsTArray<int>* mDeviceIndexes;
static int mDefaultDevice; // -1 == not set
static nsTArray<nsCString>* mDeviceNames;
static cubeb_device_collection mDevices;
static bool mAnyInUse;
static StaticMutex sMutex;
static uint32_t sUserChannelCount;
};
class AudioInputWebRTC final : public AudioInput
{
public:
explicit AudioInputWebRTC(webrtc::VoiceEngine* aVoiceEngine) : AudioInput(aVoiceEngine) {}
int GetNumOfRecordingDevices(int& aDevices)
{
ScopedCustomReleasePtr<webrtc::VoEBase> ptrVoEBase;
ptrVoEBase = webrtc::VoEBase::GetInterface(mVoiceEngine);
if (!ptrVoEBase) {
return 1;
}
aDevices = ptrVoEBase->audio_device_module()->RecordingDevices();
return 0;
}
int GetRecordingDeviceName(int aIndex, char (&aStrNameUTF8)[128],
char aStrGuidUTF8[128])
{
ScopedCustomReleasePtr<webrtc::VoEBase> ptrVoEBase;
ptrVoEBase = webrtc::VoEBase::GetInterface(mVoiceEngine);
if (!ptrVoEBase) {
return 1;
}
return ptrVoEBase->audio_device_module()->RecordingDeviceName(aIndex,
aStrNameUTF8,
aStrGuidUTF8);
}
int GetRecordingDeviceStatus(bool& aIsAvailable)
{
ScopedCustomReleasePtr<webrtc::VoEBase> ptrVoEBase;
ptrVoEBase = webrtc::VoEBase::GetInterface(mVoiceEngine);
if (!ptrVoEBase) {
return 1;
}
return ptrVoEBase->audio_device_module()->RecordingIsAvailable(&aIsAvailable);
}
void GetChannelCount(uint32_t& aChannels)
{
aChannels = 1; // default to mono
}
int GetMaxAvailableChannels(uint32_t& aChannels)
{
aChannels = 1;
return 0;
}
void SetUserChannelCount(uint32_t aChannels)
{}
void StartRecording(SourceMediaStream *aStream, AudioDataListener *aListener) {}
void StopRecording(SourceMediaStream *aStream) {}
int SetRecordingDevice(int aIndex)
{
ScopedCustomReleasePtr<webrtc::VoEBase> ptrVoEBase;
ptrVoEBase = webrtc::VoEBase::GetInterface(mVoiceEngine);
if (!ptrVoEBase) {
return 1;
}
return ptrVoEBase->audio_device_module()->SetRecordingDevice(aIndex);
}
protected:
// Protected destructor, to discourage deletion outside of Release():
~AudioInputWebRTC() {}
};
class WebRTCAudioDataListener : public AudioDataListener
{
protected:
// Protected destructor, to discourage deletion outside of Release():
virtual ~WebRTCAudioDataListener() {}
public:
explicit WebRTCAudioDataListener(MediaEngineAudioSource* aAudioSource)
: mMutex("WebRTCAudioDataListener")
, mAudioSource(aAudioSource)
{}
// AudioDataListenerInterface methods
virtual void NotifyOutputData(MediaStreamGraph* aGraph,
AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override
{
MutexAutoLock lock(mMutex);
if (mAudioSource) {
mAudioSource->NotifyOutputData(aGraph, aBuffer, aFrames, aRate, aChannels);
}
}
virtual void NotifyInputData(MediaStreamGraph* aGraph,
const AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override
{
MutexAutoLock lock(mMutex);
if (mAudioSource) {
mAudioSource->NotifyInputData(aGraph, aBuffer, aFrames, aRate, aChannels);
}
}
virtual void DeviceChanged() override
{
MutexAutoLock lock(mMutex);
if (mAudioSource) {
mAudioSource->DeviceChanged();
}
}
void Shutdown()
{
MutexAutoLock lock(mMutex);
mAudioSource = nullptr;
}
private:
Mutex mMutex;
RefPtr<MediaEngineAudioSource> mAudioSource;
};
class MediaEngineWebRTCMicrophoneSource : public MediaEngineAudioSource,
public webrtc::VoEMediaProcess
{
typedef MediaEngineAudioSource Super;
public:
MediaEngineWebRTCMicrophoneSource(webrtc::VoiceEngine* aVoiceEnginePtr,
mozilla::AudioInput* aAudioInput,
int aIndex,
const char* name,
const char* uuid,
bool aDelayAgnostic,
bool aExtendedFilter);
void GetName(nsAString& aName) const override;
void GetUUID(nsACString& aUUID) const override;
nsresult Deallocate(AllocationHandle* aHandle) override;
nsresult Start(SourceMediaStream* aStream,
TrackID aID,
const PrincipalHandle& aPrincipalHandle) override;
nsresult Stop(SourceMediaStream* aSource, TrackID aID) override;
nsresult Restart(AllocationHandle* aHandle,
const dom::MediaTrackConstraints& aConstraints,
const MediaEnginePrefs &aPrefs,
const nsString& aDeviceId,
const char** aOutBadConstraint) override;
void NotifyPull(MediaStreamGraph* aGraph,
SourceMediaStream* aSource,
TrackID aId,
StreamTime aDesiredTime,
const PrincipalHandle& aPrincipalHandle) override;
// AudioDataListenerInterface methods
void NotifyOutputData(MediaStreamGraph* aGraph,
AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override;
void NotifyInputData(MediaStreamGraph* aGraph,
const AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels) override;
void DeviceChanged() override;
bool IsFake() override {
return false;
}
dom::MediaSourceEnum GetMediaSource() const override {
return dom::MediaSourceEnum::Microphone;
}
nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override
{
return NS_ERROR_NOT_IMPLEMENTED;
}
uint32_t GetBestFitnessDistance(
const nsTArray<const NormalizedConstraintSet*>& aConstraintSets,
const nsString& aDeviceId) const override;
// VoEMediaProcess.
virtual void Process(int channel, webrtc::ProcessingTypes type,
int16_t audio10ms[], size_t length,
int samplingFreq, bool isStereo) override;
void Shutdown() override;
NS_DECL_THREADSAFE_ISUPPORTS
protected:
~MediaEngineWebRTCMicrophoneSource() {}
private:
nsresult
UpdateSingleSource(const AllocationHandle* aHandle,
const NormalizedConstraints& aNetConstraints,
const MediaEnginePrefs& aPrefs,
const nsString& aDeviceId,
const char** aOutBadConstraint) override;
void SetLastPrefs(const MediaEnginePrefs& aPrefs);
// These allocate/configure and release the channel
bool AllocChannel();
void FreeChannel();
// These start/stop VoEBase and associated interfaces
bool InitEngine();
void DeInitEngine();
// This is true when all processing is disabled, we can skip
// packetization, resampling and other processing passes.
bool PassThrough() {
return mSkipProcessing;
}
template<typename T>
void InsertInGraph(const T* aBuffer,
size_t aFrames,
uint32_t aChannels);
void PacketizeAndProcess(MediaStreamGraph* aGraph,
const AudioDataValue* aBuffer,
size_t aFrames,
TrackRate aRate,
uint32_t aChannels);
webrtc::VoiceEngine* mVoiceEngine;
RefPtr<mozilla::AudioInput> mAudioInput;
RefPtr<WebRTCAudioDataListener> mListener;
RefPtr<AudioOutputObserver> mAudioOutputObserver;
// Note: shared across all microphone sources - we don't want to Terminate()
// the VoEBase until there are no active captures
static int sChannelsOpen;
static ScopedCustomReleasePtr<webrtc::VoEBase> mVoEBase;
static ScopedCustomReleasePtr<webrtc::VoEExternalMedia> mVoERender;
static ScopedCustomReleasePtr<webrtc::VoENetwork> mVoENetwork;
static ScopedCustomReleasePtr<webrtc::VoEAudioProcessing> mVoEProcessing;
// accessed from the GraphDriver thread except for deletion
nsAutoPtr<AudioPacketizer<AudioDataValue, int16_t>> mPacketizer;
ScopedCustomReleasePtr<webrtc::VoEExternalMedia> mVoERenderListener;
// mMonitor protects mSources[] and mPrinicpalIds[] access/changes, and
// transitions of mState from kStarted to kStopped (which are combined with
// EndTrack()). mSources[] and mPrincipalHandles[] are accessed from webrtc
// threads.
Monitor mMonitor;
nsTArray<RefPtr<SourceMediaStream>> mSources;
nsTArray<PrincipalHandle> mPrincipalHandles; // Maps to mSources.
int mCapIndex;
int mChannel;
bool mDelayAgnostic;
bool mExtendedFilter;
MOZ_INIT_OUTSIDE_CTOR TrackID mTrackID;
bool mStarted;
nsString mDeviceName;
nsCString mDeviceUUID;
int32_t mSampleFrequency;
uint64_t mTotalFrames;
uint64_t mLastLogFrames;
int32_t mPlayoutDelay;
NullTransport *mNullTransport;
nsTArray<int16_t> mInputBuffer;
// mSkipProcessing is true if none of the processing passes are enabled,
// because of prefs or constraints. This allows simply copying the audio into
// the MSG, skipping resampling and the whole webrtc.org code.
bool mSkipProcessing;
// To only update microphone when needed, we keep track of previous settings.
MediaEnginePrefs mLastPrefs;
AlignedShortBuffer mInputDownmixBuffer;
};
class MediaEngineWebRTC : public MediaEngine
{
typedef MediaEngine Super;
public:
explicit MediaEngineWebRTC(MediaEnginePrefs& aPrefs);
virtual void SetFakeDeviceChangeEvents() override;
// Clients should ensure to clean-up sources video/audio sources
// before invoking Shutdown on this class.
void Shutdown() override;
// Returns whether the host supports duplex audio stream.
bool SupportsDuplex();
void EnumerateVideoDevices(dom::MediaSourceEnum,
nsTArray<RefPtr<MediaEngineVideoSource>>*) override;
void EnumerateAudioDevices(dom::MediaSourceEnum,
nsTArray<RefPtr<MediaEngineAudioSource>>*) override;
private:
~MediaEngineWebRTC() {}
nsCOMPtr<nsIThread> mThread;
// gUM runnables can e.g. Enumerate from multiple threads
Mutex mMutex;
webrtc::VoiceEngine* mVoiceEngine;
RefPtr<mozilla::AudioInput> mAudioInput;
bool mFullDuplex;
bool mDelayAgnostic;
bool mExtendedFilter;
bool mHasTabVideoSource;
// Store devices we've already seen in a hashtable for quick return.
// Maps UUID to MediaEngineSource (one set for audio, one for video).
nsRefPtrHashtable<nsStringHashKey, MediaEngineVideoSource> mVideoSources;
nsRefPtrHashtable<nsStringHashKey, MediaEngineAudioSource> mAudioSources;
};
}
#endif /* NSMEDIAENGINEWEBRTC_H_ */