Bug 1741959 - Move audio data processing to ProcessInput r=padenot,pehrsons

The interface for getting the data source of the AudioInputProcessing in
AudioInputTrack is moved from AudioInputProcessing::NotifyInputData to
::ProcessInput, which takes an AudioSegment forwarded from the
AudioInputTrack's source track

Depends on D131870

Differential Revision: https://phabricator.services.mozilla.com/D122513
This commit is contained in:
Chun-Min Chang 2021-12-18 15:09:46 +00:00
Родитель 8d5a436659
Коммит 082eb01c54
8 изменённых файлов: 729 добавлений и 499 удалений

Просмотреть файл

@ -61,6 +61,60 @@ void AudioSegment::ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
}
}
size_t AudioSegment::WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
uint32_t aChannels) const {
size_t offset = 0;
if (GetDuration() <= 0) {
MOZ_ASSERT(GetDuration() == 0);
return offset;
}
// Calculate how many samples in this segment
size_t frames = static_cast<size_t>(GetDuration());
CheckedInt<size_t> samples(frames);
samples *= static_cast<size_t>(aChannels);
MOZ_ASSERT(samples.isValid());
if (!samples.isValid()) {
return offset;
}
// Enlarge buffer space if needed
if (samples.value() > aBuffer.Capacity()) {
aBuffer.SetCapacity(samples.value());
}
aBuffer.SetLengthAndRetainStorage(samples.value());
aBuffer.ClearAndRetainStorage();
// Convert the de-interleaved chunks into an interleaved buffer. Note that
// we may upmix or downmix the audio data if the channel in the chunks
// mismatch with aChannels
for (ConstChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
const AudioChunk& c = *ci;
size_t samplesInChunk = static_cast<size_t>(c.mDuration) * aChannels;
switch (c.mBufferFormat) {
case AUDIO_FORMAT_S16:
WriteChunk<int16_t>(c, aChannels, c.mVolume,
aBuffer.Elements() + offset);
break;
case AUDIO_FORMAT_FLOAT32:
WriteChunk<float>(c, aChannels, c.mVolume, aBuffer.Elements() + offset);
break;
case AUDIO_FORMAT_SILENCE:
PodZero(aBuffer.Elements() + offset, samplesInChunk);
break;
default:
MOZ_ASSERT_UNREACHABLE("Unknown format");
PodZero(aBuffer.Elements() + offset, samplesInChunk);
break;
}
offset += samplesInChunk;
}
MOZ_DIAGNOSTIC_ASSERT(samples.value() == offset,
"Segment's duration is incorrect");
aBuffer.SetLengthAndRetainStorage(offset);
return offset;
}
// This helps to to safely get a pointer to the position we want to start
// writing a planar audio buffer, depending on the channel and the offset in the
// buffer.

Просмотреть файл

@ -422,6 +422,12 @@ class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
MOZ_ASSERT(aChannels == channels.Length());
AppendFrames(buffer.forget(), channels, aFrames, aPrincipalHandle);
}
// Write the segement data into an interleaved buffer. Do mixing if the
// AudioChunk's channel count in the segment is different from aChannels.
// Returns sample count of the converted audio data. The converted data will
// be stored into aBuffer.
size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
uint32_t aChannels) const;
// Consumes aChunk, and returns a pointer to the persistent copy of aChunk
// in the segment.
AudioChunk* AppendAndConsumeChunk(AudioChunk&& aChunk) {
@ -486,8 +492,8 @@ class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
};
template <typename SrcT>
void WriteChunk(AudioChunk& aChunk, uint32_t aOutputChannels, float aVolume,
AudioDataValue* aOutputBuffer) {
void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,
float aVolume, AudioDataValue* aOutputBuffer) {
AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
channelData = aChunk.ChannelData<SrcT>().Clone();

Просмотреть файл

@ -11,11 +11,45 @@
namespace mozilla {
#ifdef LOG_INTERNAL
# undef LOG_INTERNAL
#endif // LOG_INTERNAL
#define LOG_INTERNAL(level, msg, ...) \
MOZ_LOG(gMediaTrackGraphLog, LogLevel::level, (msg, ##__VA_ARGS__))
#ifdef LOG
# undef LOG
#endif // LOG
#define LOG(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
// This can only be called in graph thread since mGraph->CurrentDriver() is
// graph thread only
#ifdef TRACK_GRAPH_LOG_INTERNAL
# undef TRACK_GRAPH_LOG_INTERNAL
#endif // TRACK_GRAPH_LOG_INTERNAL
#define TRACK_GRAPH_LOG_INTERNAL(level, msg, ...) \
LOG_INTERNAL(level, "(Graph %p, Driver %p) NativeInputTrack %p, " msg, \
this->mGraph, this->mGraph->CurrentDriver(), this, \
##__VA_ARGS__)
#ifdef TRACK_GRAPH_LOG
# undef TRACK_GRAPH_LOG
#endif // TRACK_GRAPH_LOG
#define TRACK_GRAPH_LOG(msg, ...) \
TRACK_GRAPH_LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
#ifdef TRACK_GRAPH_LOGV
# undef TRACK_GRAPH_LOGV
#endif // TRACK_GRAPH_LOGV
#define TRACK_GRAPH_LOGV(msg, ...) \
TRACK_GRAPH_LOG_INTERNAL(Verbose, msg, ##__VA_ARGS__)
/* static */
NativeInputTrack* NativeInputTrack::Create(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(NS_IsMainThread());
NativeInputTrack* track = new NativeInputTrack(aGraph->GraphRate());
LOG("Create NativeInputTrack %p in MTG %p", track, aGraph);
aGraph->AddTrack(track);
return track;
}
@ -35,7 +69,7 @@ size_t NativeInputTrack::RemoveUser() {
void NativeInputTrack::DestroyImpl() {
MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
mInputData.Clear();
mPendingData.Clear();
ProcessedMediaTrack::DestroyImpl();
}
@ -44,23 +78,27 @@ void NativeInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
TRACE_COMMENT("NativeInputTrack::ProcessInput", "%p", this);
if (mInputData.IsEmpty()) {
TRACK_GRAPH_LOGV("ProcessInput from %" PRId64 " to %" PRId64
", needs %" PRId64 " frames",
aFrom, aTo, aTo - aFrom);
TrackTime from = GraphTimeToTrackTime(aFrom);
TrackTime to = GraphTimeToTrackTime(aTo);
if (from >= to) {
return;
}
// The number of NotifyInputData and ProcessInput calls could be different. We
// always process the input data from NotifyInputData in the first
// ProcessInput after the NotifyInputData
MOZ_ASSERT_IF(!mIsBufferingAppended, mPendingData.IsEmpty());
// The mSegment will be the de-interleaved audio data converted from
// mInputData
TrackTime need = to - from;
TrackTime dataNeed = std::min(mPendingData.GetDuration(), need);
TrackTime silenceNeed = std::max(need - dataNeed, (TrackTime)0);
GetData<AudioSegment>()->Clear();
GetData<AudioSegment>()->AppendFromInterleavedBuffer(
mInputData.Data(), mInputData.FrameCount(), mInputData.Channels(),
PRINCIPAL_HANDLE_NONE);
MOZ_ASSERT_IF(dataNeed > 0, silenceNeed == 0);
mInputData.Clear();
GetData<AudioSegment>()->AppendSlice(mPendingData, 0, dataNeed);
mPendingData.RemoveLeading(dataNeed);
GetData<AudioSegment>()->AppendNullData(silenceNeed);
}
uint32_t NativeInputTrack::NumberOfChannels() const {
@ -82,8 +120,10 @@ void NativeInputTrack::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
MOZ_ASSERT(aGraph == mGraph,
"Receive input stopped signal from another graph");
TRACK_GRAPH_LOG("NotifyInputStopped");
mInputChannels = 0;
mInputData.Clear();
mIsBufferingAppended = false;
mPendingData.Clear();
for (auto& listener : mDataUsers) {
listener->NotifyInputStopped(aGraph);
}
@ -96,12 +136,30 @@ void NativeInputTrack::NotifyInputData(MediaTrackGraphImpl* aGraph,
uint32_t aAlreadyBuffered) {
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
MOZ_ASSERT(aGraph == mGraph, "Receive input data from another graph");
TRACK_GRAPH_LOGV(
"NotifyInputData: frames=%zu, rate=%d, channel=%u, alreadyBuffered=%u",
aFrames, aRate, aChannels, aAlreadyBuffered);
if (!mIsBufferingAppended) {
// First time we see live frames getting added. Use what's already buffered
// in the driver's scratch buffer as a starting point.
MOZ_ASSERT(mPendingData.IsEmpty());
constexpr TrackTime buffering = WEBAUDIO_BLOCK_SIZE;
const TrackTime remaining =
buffering - static_cast<TrackTime>(aAlreadyBuffered);
mPendingData.AppendNullData(remaining);
mIsBufferingAppended = true;
TRACK_GRAPH_LOG("Set mIsBufferingAppended by appending %" PRId64 " frames.",
remaining);
}
MOZ_ASSERT(aChannels);
if (!mInputChannels) {
mInputChannels = aChannels;
}
mInputData.Push(aBuffer, aFrames, aRate, aChannels);
mPendingData.AppendFromInterleavedBuffer(aBuffer, aFrames, aChannels,
PRINCIPAL_HANDLE_NONE);
for (auto& listener : mDataUsers) {
listener->NotifyInputData(aGraph, aBuffer, aFrames, aRate, aChannels,
aAlreadyBuffered);
@ -112,10 +170,16 @@ void NativeInputTrack::DeviceChanged(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
MOZ_ASSERT(aGraph == mGraph,
"Receive device changed signal from another graph");
mInputData.Clear();
TRACK_GRAPH_LOG("DeviceChanged");
for (auto& listener : mDataUsers) {
listener->DeviceChanged(aGraph);
}
}
#undef LOG_INTERNAL
#undef LOG
#undef TRACK_GRAPH_LOG_INTERNAL
#undef TRACK_GRAPH_LOG
#undef TRACK_GRAPH_LOGV
} // namespace mozilla

Просмотреть файл

@ -17,7 +17,8 @@ class NativeInputTrack : public ProcessedMediaTrack {
~NativeInputTrack() = default;
explicit NativeInputTrack(TrackRate aSampleRate)
: ProcessedMediaTrack(aSampleRate, MediaSegment::AUDIO,
new AudioSegment()) {}
new AudioSegment()),
mIsBufferingAppended(false) {}
public:
// Main Thread API
@ -49,9 +50,13 @@ class NativeInputTrack : public ProcessedMediaTrack {
nsTArray<RefPtr<AudioDataListener>> mDataUsers;
private:
// Indicate whether we append extra frames in mPendingData. The extra number
// of frames is in [0, WEBAUDIO_BLOCK_SIZE] range.
bool mIsBufferingAppended;
// Queue the audio input data coming from NotifyInputData. Used in graph
// thread only.
AudioInputSamples mInputData;
AudioSegment mPendingData;
// Only accessed on the graph thread.
uint32_t mInputChannels = 0;

Просмотреть файл

@ -37,286 +37,163 @@ class MockGraph : public MediaTrackGraphImpl {
~MockGraph() = default;
};
TEST(TestAudioInputProcessing, UnaccountedPacketizerBuffering)
// AudioInputProcessing will put extra frames as pre-buffering data to avoid
// glitchs in non pass-through mode. The main goal of the test is to check how
// many frames left in the AudioInputProcessing's mSegment in various situations
// after input data has been processed.
TEST(TestAudioInputProcessing, Buffering)
{
const TrackRate rate = 48000;
const uint32_t channels = 2;
auto graph = MakeRefPtr<NiceMock<MockGraph>>(48000, 2);
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
AudioGenerator<AudioDataValue> generator(channels, rate);
// The packetizer takes 480 frames. To trigger this we need to populate the
// packetizer without filling it completely the first iteration, then trigger
// the unbounded-buffering-assertion on the second iteration.
const size_t nrFrames = 440;
const size_t bufferSize = nrFrames * channels;
GraphTime processedTime;
GraphTime nextTime;
nsTArray<AudioDataValue> buffer(bufferSize);
buffer.AppendElements(bufferSize);
AudioSegment segment;
bool ended;
aip->Start();
{
// First iteration.
// 440 does not fill the packetizer but accounts for pre-silence buffering.
// Iterations have processed 72 frames more than provided by callbacks:
// 512 - 440 = 72
// Thus the total amount of pre-silence buffering added is:
// 480 + 128 - 72 = 536
// The iteration pulls in 512 frames of silence, leaving 24 frames buffered.
processedTime = 0;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(nrFrames);
generator.GenerateInterleaved(buffer.Elements(), nrFrames);
aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
nextTime - nrFrames);
aip->ProcessInput(graph, nullptr);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), 24U);
}
{
// Second iteration.
// 880 fills a packet of 480 frames. 400 left in the packetizer.
// Last iteration left 24 frames buffered, making this iteration have 504
// frames in the buffer while pulling 384 frames.
// That leaves 120 frames buffered, which must be no more than the total
// intended buffering of 480 + 128 = 608 frames.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * nrFrames);
generator.GenerateInterleaved(buffer.Elements(), nrFrames);
aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
nextTime - (2 * nrFrames));
aip->ProcessInput(graph, nullptr);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), 120U);
}
graph->Destroy();
}
TEST(TestAudioInputProcessing, InputDataCapture)
{
// This test simulates an audio cut issue happens when using Redmi AirDots.
// Similar issues could happen when using other Bluetooth devices like Bose QC
// 35 II or Sony WH-XB900N.
const TrackRate rate = 8000; // So the packetizer takes 80 frames
const TrackRate rate = 8000; // So packet size is 80
const uint32_t channels = 1;
auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate, channels);
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
AudioGenerator<AudioDataValue> generator(channels, rate);
const size_t frames = 72;
const size_t bufferSize = frames * channels;
nsTArray<AudioDataValue> buffer(bufferSize);
buffer.AppendElements(bufferSize);
AudioGenerator<AudioDataValue> generator(channels, rate);
GraphTime processedTime;
GraphTime nextTime;
AudioSegment segment;
bool ended;
AudioSegment output;
aip->Start();
// Toggle pass-through mode without starting
{
EXPECT_EQ(aip->PassThrough(graph), false);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
aip->SetPassThrough(graph, true);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
aip->SetPassThrough(graph, false);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
aip->SetPassThrough(graph, true);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
}
{
// First iteration.
// aip will fill (WEBAUDIO_BLOCK_SIZE + packetizer-size) = 128 + 80 = 208
// silence frames in begining of its data storage. The iteration will take
// (nextTime - segment-duration) = (128 - 0) = 128 frames to segment,
// leaving 208 - 128 = 80 silence frames.
const TrackTime bufferedFrames = 80U;
// Need (nextTime - processedTime) = 128 - 0 = 128 frames this round.
// aip has not started and set to processing mode yet, so output will be
// filled with silence data directly.
processedTime = 0;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels, 0);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, nullptr);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
}
// Set aip to processing/non-pass-through mode
aip->SetPassThrough(graph, false);
{
// Second iteration.
// We will packetize 80 frames to aip's data storage. The last round left 80
// frames so we have 80 + 80 = 160 frames. The iteration will take (nextTime
// - segment-duration) = (256 - 128) = 128 frames to segment, leaving 160 -
// 128 = 32 frames.
const TrackTime bufferedFrames = 32U;
// Need (nextTime - processedTime) = 256 - 128 = 128 frames this round.
// aip has not started yet, so output will be filled with silence data
// directly.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, nullptr);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
}
// aip has been started and set to processing mode so it will insert 80 frames
// into aip's internal buffer as pre-buffering.
aip->Start(graph);
{
// Third iteration.
// Sometimes AudioCallbackDriver's buffer, whose type is
// AudioCallbackBufferWrapper, could be unavailable, and therefore
// ProcessInput won't be called. In this case, we should queue the audio
// data and process them when ProcessInput can be called again.
// Need (nextTime - processedTime) = 256 - 256 = 0 frames this round.
// The Process() aip will take 0 frames from input, packetize and process
// these frames into 0 80-frame packet(0 frames left in packetizer), insert
// packets into aip's internal buffer, then move 0 frames the internal
// buffer to output, leaving 80 + 0 - 0 = 80 frames in aip's internal
// buffer.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
// Note that processedTime is *equal* to nextTime (processedTime ==
// nextTime) now but it's ok since we don't call ProcessInput here.
generator.GenerateInterleaved(buffer.Elements(), frames);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
Unused << processedTime;
buffer.ClearAndRetainStorage();
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 80);
}
{
// Fourth iteration.
// We will packetize 80 (previous round) + 80 (this round) = 160 frames to
// aip's data storage. 32 frames are left after the second iteration, so we
// have 160 + 32 = 192 frames. The iteration will take (nextTime
// - segment-duration) = (384 - 256) = 128 frames to segment, leaving 192 -
// 128 = 64 frames.
const TrackTime bufferedFrames = 64U;
// Need (nextTime - processedTime) = 384 - 256 = 128 frames this round.
// The Process() aip will take 128 frames from input, packetize and process
// these frames into floor(128/80) = 1 80-frame packet (48 frames left in
// packetizer), insert packets into aip's internal buffer, then move 128
// frames the internal buffer to output, leaving 80 + 80 - 128 = 32 frames
// in aip's internal buffer.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, nullptr);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
}
graph->Destroy();
}
{
// Need (nextTime - processedTime) = 384 - 384 = 0 frames this round.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(5 * frames);
TEST(TestAudioInputProcessing, InputDataCapturePassThrough)
{
// This test simulates an audio cut issue happens when using Redmi AirDots.
// Similar issues could happen when using other Bluetooth devices like Bose QC
// 35 II or Sony WH-XB900N.
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
const TrackRate rate = 8000; // So the packetizer takes 80 frames
const uint32_t channels = 1;
auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate, channels);
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
AudioGenerator<AudioDataValue> generator(channels, rate);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
}
const size_t frames = 72;
const size_t bufferSize = frames * channels;
nsTArray<AudioDataValue> buffer(bufferSize);
buffer.AppendElements(bufferSize);
{
// Need (nextTime - processedTime) = 512 - 384 = 128 frames this round.
// The Process() aip will take 128 frames from input, packetize and process
// these frames into floor(128+48/80) = 2 80-frame packet (16 frames left in
// packetizer), insert packets into aip's internal buffer, then move 128
// frames the internal buffer to output, leaving 32 + 2*80 - 128 = 64 frames
// in aip's internal buffer.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(6 * frames);
GraphTime processedTime;
GraphTime nextTime;
AudioSegment segment;
AudioSegment source;
bool ended;
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), nextTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 64);
}
aip->SetPassThrough(graph, true);
aip->Start();
{
// First iteration.
// aip will fill (WEBAUDIO_BLOCK_SIZE + ) = 128 + 72 = 200 silence frames in
// begining of its data storage. The iteration will take (nextTime -
// segment-duration) = (128 - 0) = 128 frames to segment, leaving 200 - 128
// = 72 silence frames.
const TrackTime bufferedFrames = 72U;
processedTime = 0;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
PRINCIPAL_HANDLE_NONE);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels, 0);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, &source);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
source.Clear();
}
{
// Second iteration.
// We will feed 72 frames to aip's data storage. The last round left 72
// frames so we have 72 + 72 = 144 frames. The iteration will take (nextTime
// - segment-duration) = (256 - 128) = 128 frames to segment, leaving 144 -
// 128 = 16 frames.
const TrackTime bufferedFrames = 16U;
// Need (nextTime - processedTime) = 512 - 512 = 0 frames this round.
// No buffering in pass-through mode
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(7 * frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
PRINCIPAL_HANDLE_NONE);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, &source);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
source.Clear();
}
{
// Third iteration.
// Sometimes AudioCallbackDriver's buffer, whose type is
// AudioCallbackBufferWrapper, could be unavailable, and therefore
// ProcessInput won't be called. In this case, we should queue the audio
// data and process them when ProcessInput can be called again.
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
// Note that processedTime is *equal* to nextTime (processedTime ==
// nextTime) now but it's ok since we don't call ProcessInput here.
generator.GenerateInterleaved(buffer.Elements(), frames);
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
PRINCIPAL_HANDLE_NONE);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
Unused << processedTime;
buffer.ClearAndRetainStorage();
}
{
// Fourth iteration.
// We will feed 72 (previous round) + 72 (this round) = 144 frames to aip's
// data storage. 16 frames are left after the second iteration, so we have
// 144 + 16 = 160 frames. The iteration will take (nextTime -
// segment-duration) = (384 - 256) = 128 frames to segment, leaving 160 -
// 128 = 32 frames.
const TrackTime bufferedFrames = 32U;
processedTime = nextTime;
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
generator.GenerateInterleaved(buffer.Elements(), frames);
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
PRINCIPAL_HANDLE_NONE);
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
0 /* ignored */);
buffer.ClearAndRetainStorage();
aip->ProcessInput(graph, &source);
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
true, &ended);
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
source.Clear();
AudioSegment input;
generator.Generate(input, nextTime - processedTime);
aip->Process(graph, processedTime, nextTime, &input, &output);
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
EXPECT_EQ(output.GetDuration(), processedTime);
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
}
aip->Stop(graph);
graph->Destroy();
}

Просмотреть файл

@ -48,15 +48,16 @@ struct StartInputProcessing : public ControlMessage {
: ControlMessage(aTrack),
mInputTrack(aTrack),
mInputProcessing(aInputProcessing) {}
void Run() override { mInputProcessing->Start(); }
void Run() override { mInputProcessing->Start(mTrack->GraphImpl()); }
};
struct StopInputProcessing : public ControlMessage {
const RefPtr<AudioInputProcessing> mInputProcessing;
explicit StopInputProcessing(AudioInputProcessing* aInputProcessing)
: ControlMessage(nullptr), mInputProcessing(aInputProcessing) {}
void Run() override { mInputProcessing->Stop(); }
explicit StopInputProcessing(AudioInputTrack* aTrack,
AudioInputProcessing* aInputProcessing)
: ControlMessage(aTrack), mInputProcessing(aInputProcessing) {}
void Run() override { mInputProcessing->Stop(mTrack->GraphImpl()); }
};
struct SetPassThrough : public ControlMessage {
@ -279,7 +280,7 @@ TEST(TestAudioTrackGraph, ErrorCallback)
// Clean up.
DispatchFunction([&] {
inputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(listener));
MakeUnique<StopInputProcessing>(inputTrack, listener));
inputTrack->CloseAudioInput();
inputTrack->Destroy();
});
@ -348,7 +349,7 @@ TEST(TestAudioTrackGraph, AudioInputTrack)
outputTrack->Destroy();
port->Destroy();
inputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(listener));
MakeUnique<StopInputProcessing>(inputTrack, listener));
inputTrack->CloseAudioInput();
inputTrack->Destroy();
});
@ -363,7 +364,7 @@ TEST(TestAudioTrackGraph, AudioInputTrack)
EXPECT_EQ(estimatedFreq, inputFrequency);
std::cerr << "PreSilence: " << preSilenceSamples << std::endl;
// We buffer 128 frames in passthrough mode. See AudioInputProcessing::Pull.
// We buffer 128 frames. See DeviceInputTrack::ProcessInput.
EXPECT_GE(preSilenceSamples, 128U);
// If the fallback system clock driver is doing a graph iteration before the
// first audio driver iteration comes in, that iteration is ignored and
@ -485,7 +486,7 @@ TEST(TestAudioTrackGraph, ReOpenAudioInput)
outputTrack->Destroy();
port->Destroy();
inputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(listener));
MakeUnique<StopInputProcessing>(inputTrack, listener));
inputTrack->CloseAudioInput();
inputTrack->Destroy();
});
@ -501,7 +502,8 @@ TEST(TestAudioTrackGraph, ReOpenAudioInput)
EXPECT_EQ(estimatedFreq, inputFrequency);
std::cerr << "PreSilence: " << preSilenceSamples << std::endl;
// We buffer 10ms worth of frames in non-passthrough mode, plus up to 128
// frames as we round up to the nearest block. See AudioInputProcessing::Pull.
// frames as we round up to the nearest block. See
// AudioInputProcessing::Process and DeviceInputTrack::PrcoessInput.
EXPECT_GE(preSilenceSamples, 128U + inputRate / 100);
// If the fallback system clock driver is doing a graph iteration before the
// first audio driver iteration comes in, that iteration is ignored and
@ -605,7 +607,7 @@ TEST(TestAudioTrackGraph, AudioInputTrackDisabling)
outputTrack->Destroy();
port->Destroy();
inputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(listener));
MakeUnique<StopInputProcessing>(inputTrack, listener));
inputTrack->CloseAudioInput();
inputTrack->Destroy();
});
@ -686,7 +688,7 @@ struct AudioTrackSet {
mOutputTrack->Destroy();
mPort->Destroy();
mInputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(mListener));
MakeUnique<StopInputProcessing>(mInputTrack, mListener));
mInputTrack->CloseAudioInput();
mInputTrack->Destroy();
@ -1028,7 +1030,7 @@ void TestCrossGraphPort(uint32_t aInputRate, uint32_t aOutputRate,
transmitter->Destroy();
port->Destroy();
inputTrack->GraphImpl()->AppendMessage(
MakeUnique<StopInputProcessing>(listener));
MakeUnique<StopInputProcessing>(inputTrack, listener));
inputTrack->CloseAudioInput();
inputTrack->Destroy();
});

Просмотреть файл

@ -388,18 +388,19 @@ class StartStopMessage : public ControlMessage {
public:
enum StartStop { Start, Stop };
StartStopMessage(AudioInputProcessing* aInputProcessing, StartStop aAction)
: ControlMessage(nullptr),
StartStopMessage(MediaTrack* aTrack, AudioInputProcessing* aInputProcessing,
StartStop aAction)
: ControlMessage(aTrack),
mInputProcessing(aInputProcessing),
mAction(aAction) {}
void Run() override {
if (mAction == StartStopMessage::Start) {
TRACE("InputProcessing::Start")
mInputProcessing->Start();
mInputProcessing->Start(mTrack->GraphImpl());
} else if (mAction == StartStopMessage::Stop) {
TRACE("InputProcessing::Stop")
mInputProcessing->Stop();
mInputProcessing->Stop(mTrack->GraphImpl());
} else {
MOZ_CRASH("Invalid enum value");
}
@ -439,7 +440,7 @@ nsresult MediaEngineWebRTCMicrophoneSource::Start() {
}
track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>(
inputProcessing, StartStopMessage::Start));
track, inputProcessing, StartStopMessage::Start));
track->OpenAudioInput(deviceID, inputProcessing);
}));
@ -470,7 +471,7 @@ nsresult MediaEngineWebRTCMicrophoneSource::Stop() {
}
track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>(
inputProcessing, StartStopMessage::Stop));
track, inputProcessing, StartStopMessage::Stop));
MOZ_ASSERT(track->DeviceId().value() == deviceInfo->DeviceID());
track->CloseAudioInput();
}));
@ -493,7 +494,6 @@ AudioInputProcessing::AudioInputProcessing(
mRequestedInputChannelCount(aMaxChannelCount),
mSkipProcessing(false),
mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100),
mLiveBufferingAppended(Nothing()),
mPrincipal(aPrincipalHandle),
mEnabled(false),
mEnded(false),
@ -513,22 +513,24 @@ void AudioInputProcessing::SetPassThrough(MediaTrackGraphImpl* aGraph,
bool aPassThrough) {
MOZ_ASSERT(aGraph->OnGraphThread());
if (!mSkipProcessing && aPassThrough) {
// Reset AudioProcessing so that if we resume processing in the future it
// doesn't depend on old state.
mAudioProcessing->Initialize();
if (mPacketizerInput) {
MOZ_ASSERT(mPacketizerInput->PacketsAvailable() == 0);
LOG_FRAME(
"AudioInputProcessing %p Appending %u frames of null data for data "
"discarded in the packetizer",
this, mPacketizerInput->FramesAvailable());
mSegment.AppendNullData(mPacketizerInput->FramesAvailable());
mPacketizerInput->Clear();
}
if (aPassThrough == mSkipProcessing) {
return;
}
mSkipProcessing = aPassThrough;
if (!mEnabled) {
MOZ_ASSERT(!mPacketizerInput);
return;
}
if (aPassThrough) {
// Turn on pass-through
ResetAudioProcessing(aGraph);
} else {
// Turn off pass-through
MOZ_ASSERT(!mPacketizerInput);
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
}
}
uint32_t AudioInputProcessing::GetRequestedInputChannelCount() {
@ -542,104 +544,222 @@ void AudioInputProcessing::SetRequestedInputChannelCount(
aGraph->ReevaluateInputDevice();
}
void AudioInputProcessing::Start() {
mEnabled = true;
mLiveBufferingAppended = Nothing();
}
void AudioInputProcessing::Stop() { mEnabled = false; }
void AudioInputProcessing::Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom,
GraphTime aTo, GraphTime aTrackEnd,
AudioSegment* aSegment,
bool aLastPullThisIteration, bool* aEnded) {
void AudioInputProcessing::Start(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThread());
if (mEnded) {
*aEnded = true;
if (mEnabled) {
return;
}
mEnabled = true;
if (mSkipProcessing) {
return;
}
TrackTime delta = aTo - aTrackEnd;
MOZ_ASSERT(delta >= 0, "We shouldn't append more than requested");
TrackTime buffering = 0;
MOZ_ASSERT(!mPacketizerInput);
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
}
// Add the amount of buffering required to not underrun and glitch.
void AudioInputProcessing::Stop(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThread());
// Make sure there's at least one extra block buffered until audio callbacks
// come in, since we round graph iteration durations up to the nearest block.
buffering += WEBAUDIO_BLOCK_SIZE;
if (!PassThrough(aGraph) && mPacketizerInput) {
// Processing is active and is processed in chunks of 10ms through the
// input packetizer. We allow for 10ms of silence on the track to
// accomodate the buffering worst-case.
buffering += mPacketizerInput->mPacketSize;
}
if (delta <= 0) {
if (!mEnabled) {
return;
}
if (MOZ_LIKELY(mLiveBufferingAppended)) {
if (MOZ_UNLIKELY(buffering > *mLiveBufferingAppended)) {
// We need to buffer more data. This could happen the first time we pull
// input data, or the first iteration after starting to use the
// packetizer.
TrackTime silence = buffering - *mLiveBufferingAppended;
LOG_FRAME("AudioInputProcessing %p Inserting %" PRId64
" frames of silence due to buffer increase",
this, silence);
mSegment.InsertNullDataAtStart(silence);
mLiveBufferingAppended = Some(buffering);
} else if (MOZ_UNLIKELY(buffering < *mLiveBufferingAppended)) {
// We need to clear some buffered data to reduce latency now that the
// packetizer is no longer used.
MOZ_ASSERT(PassThrough(aGraph), "Must have turned on passthrough");
TrackTime removal = *mLiveBufferingAppended - buffering;
MOZ_ASSERT(mSegment.GetDuration() >= removal);
TrackTime frames = std::min(mSegment.GetDuration(), removal);
LOG_FRAME("AudioInputProcessing %p Removing %" PRId64
" frames of silence due to buffer decrease",
this, frames);
*mLiveBufferingAppended -= frames;
mSegment.RemoveLeading(frames);
}
}
mEnabled = false;
if (mSegment.GetDuration() > 0) {
MOZ_ASSERT(buffering == *mLiveBufferingAppended);
TrackTime frames = std::min(mSegment.GetDuration(), delta);
LOG_FRAME("AudioInputProcessing %p Appending %" PRId64
" frames of real data for %u channels.",
this, frames, mRequestedInputChannelCount);
aSegment->AppendSlice(mSegment, 0, frames);
mSegment.RemoveLeading(frames);
delta -= frames;
// Assert that the amount of data buffered doesn't grow unboundedly.
MOZ_ASSERT_IF(aLastPullThisIteration, mSegment.GetDuration() <= buffering);
}
if (delta <= 0) {
if (mSegment.GetDuration() == 0) {
mLiveBufferingAppended = Some(-delta);
}
if (mSkipProcessing) {
return;
}
LOG_FRAME("AudioInputProcessing %p Pulling %" PRId64
" frames of silence for %u channels.",
this, delta, mRequestedInputChannelCount);
// Packetizer is active and we were just stopped. Stop the packetizer and
// processing.
ResetAudioProcessing(aGraph);
}
// This assertion fails if we append silence here after having appended live
// frames. Before appending live frames we should add sufficient buffering to
// not have to glitch (aka append silence). Failing this meant the buffering
// was not sufficient.
MOZ_ASSERT_IF(mEnabled, !mLiveBufferingAppended);
mLiveBufferingAppended = Nothing();
// The following is how how Process() works in pass-through and non-pass-through
// mode. In both mode, Process() outputs the same amount of the frames as its
// input data.
//
// I. In non-pass-through mode:
//
// We will use webrtc::AudioProcessing to process the input audio data in this
// mode. The data input in webrtc::AudioProcessing needs to be a 10ms chunk,
// while the input data passed to Process() is not necessary to have times of
// 10ms-chunk length. To divide the input data into 10ms chunks,
// mPacketizerInput is introduced.
//
// We will add one 10ms-chunk silence into the internal buffer before Process()
// works. Those extra frames is called pre-buffering. It aims to avoid glitches
// we may have when producing data in mPacketizerInput. Without pre-buffering,
// when the input data length is not 10ms-times, we could end up having no
// enough output needs since mPacketizerInput would keep some input data, which
// is the remainder of the 10ms-chunk length. To force processing those data
// left in mPacketizerInput, we would need to add some extra frames to make
// mPacketizerInput produce a 10ms-chunk. For example, if the sample rate is
// 44100 Hz, then the packet-size is 441 frames. When we only have 384 input
// frames, we would need to put additional 57 frames to mPacketizerInput to
// produce a packet. However, those extra 57 frames result in a glitch sound.
//
// By adding one 10ms-chunk silence in advance to the internal buffer, we won't
// need to add extra frames between the input data no matter what data length it
// is. The only drawback is the input data won't be processed and send to output
// immediately. Process() will consume pre-buffering data for its output first.
// The below describes how it works:
//
//
// Process()
// +-----------------------------+
// input D(N) | +--------+ +--------+ | output D(N)
// --------------|-->| P(N) |-->| S(N) |---|-------------->
// | +--------+ +--------+ |
// | packetizer mSegment |
// +-----------------------------+
// <------ internal buffer ------>
//
//
// D(N): number of frames from the input and the output needs in the N round
// Z: number of frames of a 10ms chunk(packet) in mPacketizerInput, Z >= 1
// (if Z = 1, packetizer has no effect)
// P(N): number of frames left in mPacketizerInput after the N round. Once the
// frames in packetizer >= Z, packetizer will produce a packet to
// mSegment, so P(N) = (P(N-1) + D(N)) % Z, 0 <= P(N) <= Z-1
// S(N): number of frames left in mSegment after the N round. The input D(N)
// frames will be passed to mPacketizerInput first, and then
// mPacketizerInput may append some packets to mSegment, so
// S(N) = S(N-1) + Z * floor((P(N-1) + D(N)) / Z) - D(N)
//
// At the first, we set P(0) = 0, S(0) = X, where X >= Z-1. X is the
// pre-buffering put in the internal buffer. With this settings, P(K) + S(K) = X
// always holds.
//
// Intuitively, this seems true: We put X frames in the internal buffer at
// first. If the data won't be blocked in packetizer, after the Process(), the
// internal buffer should still hold X frames since the number of frames coming
// from input is the same as the output needs. The key of having enough data for
// output needs, while the input data is piled up in packetizer, is by putting
// at least Z-1 frames as pre-buffering, since the maximum number of frames
// stuck in the packetizer before it can emit a packet is packet-size - 1.
// Otherwise, we don't have enough data for output if the new input data plus
// the data left in packetizer produces a smaller-than-10ms chunk, which will be
// left in packetizer. Thus we must have some pre-buffering frames in the
// mSegment to make up the length of the left chunk we need for output. This can
// also be told by by induction:
// (1) This holds when K = 0
// (2) Assume this holds when K = N: so P(N) + S(N) = X
// => P(N) + S(N) = X >= Z-1 => S(N) >= Z-1-P(N)
// (3) When K = N+1, we have D(N+1) input frames comes
// a. if P(N) + D(N+1) < Z, then packetizer has no enough data for one
// packet. No data produced by packertizer, so the mSegment now has
// S(N) >= Z-1-P(N) frames. Output needs D(N+1) < Z-P(N) frames. So it
// needs at most Z-P(N)-1 frames, and mSegment has enough frames for
// output, Then, P(N+1) = P(N) + D(N+1) and S(N+1) = S(N) - D(N+1)
// => P(N+1) + S(N+1) = P(N) + S(N) = X
// b. if P(N) + D(N+1) = Z, then packetizer will produce one packet for
// mSegment, so mSegment now has S(N) + Z frames. Output needs D(N+1)
// = Z-P(N) frames. S(N) has at least Z-1-P(N)+Z >= Z-P(N) frames, since
// Z >= 1. So mSegment has enough frames for output. Then, P(N+1) = 0 and
// S(N+1) = S(N) + Z - D(N+1) = S(N) + P(N)
// => P(N+1) + S(N+1) = P(N) + S(N) = X
// c. if P(N) + D(N+1) > Z, and let P(N) + D(N+1) = q * Z + r, where q >= 1
// and 0 <= r <= Z-1, then packetizer will produce can produce q packets
// for mSegment. Output needs D(N+1) = q * Z - P(N) + r frames and
// mSegment has S(N) + q * z >= q * z - P(N) + Z-1 >= q*z -P(N) + r,
// since r <= Z-1. So mSegment has enough frames for output. Then,
// P(N+1) = r and S(N+1) = S(N) + q * Z - D(N+1)
// => P(N+1) + S(N+1) = S(N) + (q * Z + r - D(N+1)) = S(N) + P(N) = X
// => P(K) + S(K) = X always holds
//
// Since P(K) + S(K) = X and P(K) is in [0, Z-1], the S(K) is in [X-Z+1, X]
// range. In our implementation, X is set to Z so S(K) is in [1, Z].
// By the above workflow, we always have enough data for output and no extra
// frames put into packetizer. It means we don't have any glitch!
//
// II. In pass-through mode:
//
// Process()
// +--------+
// input D(N) | | output D(N)
// -------------->-------->--------------->
// | |
// +--------+
//
// The D(N) frames of data are just forwarded from input to output without any
// processing
void AudioInputProcessing::Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom,
GraphTime aTo, AudioSegment* aInput,
AudioSegment* aOutput) {
MOZ_ASSERT(aGraph->OnGraphThread());
MOZ_ASSERT(aFrom <= aTo);
MOZ_ASSERT(!mEnded);
aSegment->AppendNullData(delta);
TrackTime need = aTo - aFrom;
if (need == 0) {
return;
}
if (!mEnabled) {
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64
" frames of silence to output (disabled)",
aGraph, aGraph->CurrentDriver(), this, need);
aOutput->AppendNullData(need);
return;
}
MOZ_ASSERT(aInput->GetDuration() == need,
"Wrong data length from input port source");
if (PassThrough(aGraph)) {
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64
" frames of input data to output directly (PassThrough)",
aGraph, aGraph->CurrentDriver(), this, aInput->GetDuration());
aOutput->AppendSegment(aInput, mPrincipal);
return;
}
// SetPassThrough(false) must be called before reaching here.
MOZ_ASSERT(mPacketizerInput);
// If mRequestedInputChannelCount is updated, create a new packetizer. No
// need to change the pre-buffering since the rate is always the same. The
// frames left in the packetizer would be replaced by null data and then
// transferred to mSegment.
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
// Preconditions of the audio-processing logic.
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
mPacketizerInput->FramesAvailable() ==
mPacketizerInput->mPacketSize);
// We pre-buffer mPacketSize frames, but the maximum number of frames stuck in
// the packetizer before it can emit a packet is mPacketSize-1. Thus that
// remaining 1 frame will always be present in mSegment.
MOZ_ASSERT(mSegment.GetDuration() >= 1);
MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
PacketizeAndProcess(aGraph, *aInput);
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64
" frames of data now, after packetizing and processing",
aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration());
// By setting pre-buffering to the number of frames of one packet, and
// because the maximum number of frames stuck in the packetizer before
// it can emit a packet is the mPacketSize-1, we always have at least
// one more frame than output needs.
MOZ_ASSERT(mSegment.GetDuration() > need);
aOutput->AppendSlice(mSegment, 0, need);
mSegment.RemoveLeading(need);
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64
" frames of data to output, leaving %" PRId64 " frames in buffer",
aGraph, aGraph->CurrentDriver(), this, need,
mSegment.GetDuration());
// Postconditions of the audio-processing logic.
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
mPacketizerInput->FramesAvailable() ==
mPacketizerInput->mPacketSize);
MOZ_ASSERT(mSegment.GetDuration() >= 1);
MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
}
void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
@ -653,12 +773,13 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
return;
}
if (!mPacketizerOutput || mPacketizerOutput->mPacketSize != aRate / 100u ||
if (!mPacketizerOutput ||
mPacketizerOutput->mPacketSize != GetPacketSize(aRate) ||
mPacketizerOutput->mChannels != aChannels) {
// It's ok to drop the audio still in the packetizer here: if this changes,
// we changed devices or something.
mPacketizerOutput = Nothing();
mPacketizerOutput.emplace(aRate / 100, aChannels);
mPacketizerOutput.emplace(GetPacketSize(aRate), aChannels);
}
mPacketizerOutput->Input(aBuffer, aFrames);
@ -736,27 +857,35 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
// Only called if we're not in passthrough mode
void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
const AudioDataValue* aBuffer,
size_t aFrames, TrackRate aRate,
uint32_t aChannels) {
const AudioSegment& aSegment) {
MOZ_ASSERT(!PassThrough(aGraph),
"This should be bypassed when in PassThrough mode.");
MOZ_ASSERT(mEnabled);
size_t offset = 0;
MOZ_ASSERT(mPacketizerInput);
MOZ_ASSERT(mPacketizerInput->mPacketSize ==
GetPacketSize(aGraph->GraphRate()));
if (!mPacketizerInput || mPacketizerInput->mPacketSize != aRate / 100u ||
mPacketizerInput->mChannels != aChannels) {
// It's ok to drop the audio still in the packetizer here.
mPacketizerInput = Nothing();
mPacketizerInput.emplace(aRate / 100, aChannels);
}
LOG_FRAME("AudioInputProcessing %p Appending %zu frames to packetizer", this,
aFrames);
// The WriteToInterleavedBuffer will do upmix or downmix if the channel-count
// in aSegment's chunks is different from mPacketizerInput->mChannels
// WriteToInterleavedBuffer could be avoided once Bug 1729041 is done.
size_t sampleCount = aSegment.WriteToInterleavedBuffer(
mInterleavedBuffer, mPacketizerInput->mChannels);
size_t frameCount =
sampleCount / static_cast<size_t>(mPacketizerInput->mChannels);
// Packetize our input data into 10ms chunks, deinterleave into planar channel
// buffers, process, and append to the right MediaStreamTrack.
mPacketizerInput->Input(aBuffer, static_cast<uint32_t>(aFrames));
mPacketizerInput->Input(mInterleavedBuffer.Elements(),
static_cast<uint32_t>(frameCount));
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. "
"Packetizer has %u frames (enough for %u packets) now",
aGraph, aGraph->CurrentDriver(), this, frameCount,
mPacketizerInput->FramesAvailable(),
mPacketizerInput->PacketsAvailable());
size_t offset = 0;
while (mPacketizerInput->PacketsAvailable()) {
mPacketCount++;
@ -771,15 +900,15 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
float* packet = mInputBuffer.Data();
mPacketizerInput->Output(packet);
// Downmix from aChannels to mono if needed. We always have floats
// here, the packetizer performed the conversion. This handles sound cards
// with multiple physical jacks exposed as a single device with _n_
// discrete channels, where only a single mic is plugged in. Those channels
// are not correlated temporaly since they are discrete channels, mixing is
// just a sum.
// Downmix from mPacketizerInput->mChannels to mono if needed. We always
// have floats here, the packetizer performed the conversion. This handles
// sound cards with multiple physical jacks exposed as a single device with
// _n_ discrete channels, where only a single mic is plugged in. Those
// channels are not correlated temporaly since they are discrete channels,
// mixing is just a sum.
AutoTArray<float*, 8> deinterleavedPacketizedInputDataChannelPointers;
uint32_t channelCountInput = 0;
if (aChannels > MAX_CHANNELS) {
if (mPacketizerInput->mChannels > MAX_CHANNELS) {
channelCountInput = MONO;
deinterleavedPacketizedInputDataChannelPointers.SetLength(
channelCountInput);
@ -790,12 +919,12 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
size_t readIndex = 0;
for (size_t i = 0; i < mPacketizerInput->mPacketSize; i++) {
mDeinterleavedBuffer.Data()[i] = 0.;
for (size_t j = 0; j < aChannels; j++) {
for (size_t j = 0; j < mPacketizerInput->mChannels; j++) {
mDeinterleavedBuffer.Data()[i] += packet[readIndex++];
}
}
} else {
channelCountInput = aChannels;
channelCountInput = mPacketizerInput->mChannels;
// Deinterleave the input data
// Prepare an array pointing to deinterleaved channels.
deinterleavedPacketizedInputDataChannelPointers.SetLength(
@ -812,7 +941,7 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
deinterleavedPacketizedInputDataChannelPointers.Elements());
}
StreamConfig inputConfig(aRate, channelCountInput,
StreamConfig inputConfig(aGraph->GraphRate(), channelCountInput,
false /* we don't use typing detection*/);
StreamConfig outputConfig = inputConfig;
@ -873,8 +1002,11 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
continue;
}
LOG_FRAME("AudioInputProcessing %p Appending %u frames of packetized audio",
this, mPacketizerInput->mPacketSize);
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of "
"packetized audio, leaving %u frames in packetizer",
aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize,
mPacketizerInput->FramesAvailable());
// We already have planar audio data of the right format. Insert into the
// MTG.
@ -886,54 +1018,10 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
}
}
void AudioInputProcessing::ProcessInput(MediaTrackGraphImpl* aGraph,
const AudioSegment* aSegment) {
MOZ_ASSERT(aGraph);
MOZ_ASSERT(aGraph->OnGraphThread());
if (mEnded || !mEnabled || !mLiveBufferingAppended ||
mPendingData.IsEmpty()) {
return;
}
// The number of NotifyInputData and ProcessInput calls could be different. We
// always process the input data from NotifyInputData in the first
// ProcessInput after the NotifyInputData
// If some processing is necessary, packetize and insert in the WebRTC.org
// code. Otherwise, directly insert the mic data in the MTG, bypassing all
// processing.
if (PassThrough(aGraph)) {
if (aSegment && !aSegment->IsEmpty()) {
mSegment.AppendSegment(aSegment, mPrincipal);
} else {
mSegment.AppendFromInterleavedBuffer(mPendingData.Data(),
mPendingData.FrameCount(),
mPendingData.Channels(), mPrincipal);
}
} else {
MOZ_ASSERT(aGraph->GraphRate() == mPendingData.Rate());
// Bug 1729041: Feed aSegment to PacketizeAndProcess so mPendingData can be
// removed, and save a copy.
PacketizeAndProcess(aGraph, mPendingData.Data(), mPendingData.FrameCount(),
mPendingData.Rate(), mPendingData.Channels());
}
mPendingData.Clear();
}
void AudioInputProcessing::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThread());
// This is called when an AudioCallbackDriver switch has happened for any
// reason, including other reasons than starting this audio input stream. We
// reset state when this happens, as a fallback driver may have fiddled with
// the amount of buffered silence during the switch.
mLiveBufferingAppended = Nothing();
mSegment.Clear();
if (mPacketizerInput) {
mPacketizerInput->Clear();
}
mPendingData.Clear();
// reason, including other reasons than starting this audio input stream.
}
// Called back on GraphDriver thread!
@ -944,17 +1032,9 @@ void AudioInputProcessing::NotifyInputData(MediaTrackGraphImpl* aGraph,
uint32_t aChannels,
uint32_t aAlreadyBuffered) {
MOZ_ASSERT(aGraph->OnGraphThread());
TRACE("AudioInputProcessing::NotifyInputData");
MOZ_ASSERT(aGraph->GraphRate() == aRate);
MOZ_ASSERT(mEnabled);
if (!mLiveBufferingAppended) {
// First time we see live frames getting added. Use what's already buffered
// in the driver's scratch buffer as a starting point.
mLiveBufferingAppended = Some(aAlreadyBuffered);
}
mPendingData.Push(aBuffer, aFrames, aRate, aChannels);
TRACE("AudioInputProcessing::NotifyInputData");
}
void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) {
@ -962,6 +1042,10 @@ void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) {
// Reset some processing
mAudioProcessing->Initialize();
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Reinitializing audio "
"processing",
aGraph, aGraph->CurrentDriver(), this);
}
void AudioInputProcessing::ApplyConfig(MediaTrackGraphImpl* aGraph,
@ -973,7 +1057,6 @@ void AudioInputProcessing::ApplyConfig(MediaTrackGraphImpl* aGraph,
void AudioInputProcessing::End() {
mEnded = true;
mSegment.Clear();
mPendingData.Clear();
}
TrackTime AudioInputProcessing::NumBufferedFrames(
@ -982,6 +1065,74 @@ TrackTime AudioInputProcessing::NumBufferedFrames(
return mSegment.GetDuration();
}
void AudioInputProcessing::EnsureAudioProcessing(MediaTrackGraphImpl* aGraph,
uint32_t aChannels) {
MOZ_ASSERT(aGraph->OnGraphThread());
MOZ_ASSERT(aChannels > 0);
MOZ_ASSERT(mEnabled);
MOZ_ASSERT(!mSkipProcessing);
if (mPacketizerInput && mPacketizerInput->mChannels == aChannels) {
return;
}
// If mPacketizerInput exists but with different channel-count, there is no
// need to change pre-buffering since the packet size is the same as the old
// one, since the rate is a constant.
MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize ==
GetPacketSize(aGraph->GraphRate()));
bool needPreBuffering = !mPacketizerInput;
if (mPacketizerInput) {
const TrackTime numBufferedFrames =
static_cast<TrackTime>(mPacketizerInput->FramesAvailable());
mSegment.AppendNullData(numBufferedFrames);
mPacketizerInput = Nothing();
}
mPacketizerInput.emplace(GetPacketSize(aGraph->GraphRate()), aChannels);
if (needPreBuffering) {
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of "
"silence as pre-buffering",
aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize);
AudioSegment buffering;
buffering.AppendNullData(
static_cast<TrackTime>(mPacketizerInput->mPacketSize));
PacketizeAndProcess(aGraph, buffering);
}
}
void AudioInputProcessing::ResetAudioProcessing(MediaTrackGraphImpl* aGraph) {
MOZ_ASSERT(aGraph->OnGraphThread());
MOZ_ASSERT(mSkipProcessing || !mEnabled);
MOZ_ASSERT(mPacketizerInput);
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Resetting audio "
"processing",
aGraph, aGraph->CurrentDriver(), this);
// Reset AudioProcessing so that if we resume processing in the future it
// doesn't depend on old state.
mAudioProcessing->Initialize();
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
mPacketizerInput->FramesAvailable() ==
mPacketizerInput->mPacketSize);
// It's ok to clear all the internal buffer here since we won't use mSegment
// in pass-through mode or when audio processing is disabled.
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputProcessing %p Emptying out %" PRId64
" frames of data",
aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration());
mSegment.Clear();
mPacketizerInput = Nothing();
}
void AudioInputTrack::Destroy() {
MOZ_ASSERT(NS_IsMainThread());
CloseAudioInput();
@ -1031,38 +1182,101 @@ void AudioInputTrack::DestroyImpl() {
void AudioInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
uint32_t aFlags) {
TRACE_COMMENT("AudioInputTrack::ProcessInput", "AudioInputTrack %p", this);
MOZ_ASSERT(mInputProcessing);
// Check if there is a connected NativeInputTrack
NativeInputTrack* source = nullptr;
if (!mInputs.IsEmpty()) {
for (const MediaInputPort* input : mInputs) {
MOZ_ASSERT(input->GetSource());
if (input->GetSource()->AsNativeInputTrack()) {
source = input->GetSource()->AsNativeInputTrack();
break;
}
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputTrack %p ProcessInput from %" PRId64
" to %" PRId64 ", needs %" PRId64 " frames",
mGraph, mGraph->CurrentDriver(), this, aFrom, aTo, aTo - aFrom);
if (aFrom >= aTo) {
return;
}
if (!mInputProcessing->IsEnded()) {
MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aFrom);
if (mInputs.IsEmpty()) {
GetData<AudioSegment>()->AppendNullData(aTo - aFrom);
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Filling %" PRId64
" frames of null data (no input source)",
mGraph, mGraph->CurrentDriver(), this, aTo - aFrom);
} else {
MOZ_ASSERT(mInputs.Length() == 1);
AudioSegment data;
GetInputSourceData(data, mInputProcessing->GetPrincipalHandle(),
mInputs[0], aFrom, aTo);
mInputProcessing->Process(GraphImpl(), aFrom, aTo, &data,
GetData<AudioSegment>());
}
}
MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo);
// Push the input data from the connected NativeInputTrack to mInputProcessing
if (source) {
MOZ_ASSERT(source->GraphImpl() == GraphImpl());
MOZ_ASSERT(source->mSampleRate == mSampleRate);
MOZ_ASSERT(GraphImpl()->GraphRate() == mSampleRate);
mInputProcessing->ProcessInput(GraphImpl(),
source->GetData<AudioSegment>());
}
bool ended = false;
mInputProcessing->Pull(
GraphImpl(), aFrom, aTo, TrackTimeToGraphTime(GetEnd()),
GetData<AudioSegment>(), aTo == GraphImpl()->mStateComputedTime, &ended);
ApplyTrackDisabling(mSegment.get());
if (ended && (aFlags & ALLOW_END)) {
ApplyTrackDisabling(mSegment.get());
} else if (aFlags & ALLOW_END) {
mEnded = true;
}
}
void AudioInputTrack::GetInputSourceData(AudioSegment& aOutput,
const PrincipalHandle& aPrincipal,
const MediaInputPort* aPort,
GraphTime aFrom, GraphTime aTo) const {
MOZ_ASSERT(mGraph->OnGraphThread());
MOZ_ASSERT(aOutput.IsEmpty());
MediaTrack* source = aPort->GetSource();
GraphTime next;
for (GraphTime t = aFrom; t < aTo; t = next) {
MediaInputPort::InputInterval interval =
MediaInputPort::GetNextInputInterval(aPort, t);
interval.mEnd = std::min(interval.mEnd, aTo);
const bool inputEnded =
source->Ended() &&
source->GetEnd() <=
source->GraphTimeToTrackTimeWithBlocking(interval.mStart);
TrackTime ticks = interval.mEnd - interval.mStart;
next = interval.mEnd;
if (interval.mStart >= interval.mEnd) {
break;
}
if (inputEnded) {
aOutput.AppendNullData(ticks);
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
" ticks of null data from input port source (ended input)",
mGraph, mGraph->CurrentDriver(), this, ticks);
} else if (interval.mInputIsBlocked) {
aOutput.AppendNullData(ticks);
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
" ticks of null data from input port source (blocked input)",
mGraph, mGraph->CurrentDriver(), this, ticks);
} else if (source->IsSuspended()) {
aOutput.AppendNullData(ticks);
LOG_FRAME(
"(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
" ticks of null data from input port source (source is suspended)",
mGraph, mGraph->CurrentDriver(), this, ticks);
} else {
TrackTime start =
source->GraphTimeToTrackTimeWithBlocking(interval.mStart);
TrackTime end = source->GraphTimeToTrackTimeWithBlocking(interval.mEnd);
MOZ_ASSERT(source->GetData<AudioSegment>()->GetDuration() >= end);
AudioSegment data;
data.AppendSlice(*source->GetData<AudioSegment>(), start, end);
// Replace the principal
aOutput.AppendSegment(&data, aPrincipal);
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
" ticks of real data from input port source %p",
mGraph, mGraph->CurrentDriver(), this, end - start, source);
}
}
}
void AudioInputTrack::SetInputProcessingImpl(
RefPtr<AudioInputProcessing> aInputProcessing) {
MOZ_ASSERT(GraphImpl()->OnGraphThread());

Просмотреть файл

@ -126,10 +126,8 @@ class AudioInputProcessing : public AudioDataListener {
public:
AudioInputProcessing(uint32_t aMaxChannelCount,
const PrincipalHandle& aPrincipalHandle);
void Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
GraphTime aTrackEnd, AudioSegment* aSegment,
bool aLastPullThisIteration, bool* aEnded);
void Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
AudioSegment* aInput, AudioSegment* aOutput);
void NotifyOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
size_t aFrames, TrackRate aRate,
@ -146,8 +144,8 @@ class AudioInputProcessing : public AudioDataListener {
return !PassThrough(aGraph);
}
void Start();
void Stop();
void Start(MediaTrackGraphImpl* aGraph);
void Stop(MediaTrackGraphImpl* aGraph);
void DeviceChanged(MediaTrackGraphImpl* aGraph) override;
@ -157,12 +155,8 @@ class AudioInputProcessing : public AudioDataListener {
void Disconnect(MediaTrackGraphImpl* aGraph) override;
// aSegment stores the unprocessed non-interleaved audio input data from mic
void ProcessInput(MediaTrackGraphImpl* aGraph, const AudioSegment* aSegment);
void PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
const AudioDataValue* aBuffer, size_t aFrames,
TrackRate aRate, uint32_t aChannels);
const AudioSegment& aSegment);
void SetPassThrough(MediaTrackGraphImpl* aGraph, bool aPassThrough);
uint32_t GetRequestedInputChannelCount();
@ -182,8 +176,19 @@ class AudioInputProcessing : public AudioDataListener {
TrackTime NumBufferedFrames(MediaTrackGraphImpl* aGraph) const;
// The packet size contains samples in 10ms. The unit of aRate is hz.
constexpr static uint32_t GetPacketSize(TrackRate aRate) {
return static_cast<uint32_t>(aRate) / 100u;
}
bool IsEnded() const { return mEnded; }
const PrincipalHandle& GetPrincipalHandle() const { return mPrincipal; }
private:
~AudioInputProcessing() = default;
void EnsureAudioProcessing(MediaTrackGraphImpl* aGraph, uint32_t aChannels);
void ResetAudioProcessing(MediaTrackGraphImpl* aGraph);
// This implements the processing algoritm to apply to the input (e.g. a
// microphone). If all algorithms are disabled, this class in not used. This
// class only accepts audio chunks of 10ms. It has two inputs and one output:
@ -215,13 +220,6 @@ class AudioInputProcessing : public AudioDataListener {
AlignedFloatBuffer mInputDownmixBuffer;
// Stores data waiting to be pulled.
AudioSegment mSegment;
// Set to Nothing() by Start(). Once live frames have been appended from the
// audio callback, this is the number of frames appended as pre-buffer for
// that data, to avoid underruns. Buffering in the track might be needed
// because of the AUDIO_BLOCK interval at which we run the graph, the
// packetizer keeping some input data. Care must be taken when turning on and
// off the packetizer.
Maybe<TrackTime> mLiveBufferingAppended;
// Principal for the data that flows through this class.
const PrincipalHandle mPrincipal;
// Whether or not this MediaEngine is enabled. If it's not enabled, it
@ -230,11 +228,15 @@ class AudioInputProcessing : public AudioDataListener {
bool mEnabled;
// Whether or not we've ended and removed the AudioInputTrack.
bool mEnded;
// Store the unprocessed interleaved audio input data
AudioInputSamples mPendingData;
// When processing is enabled, the number of packets received by this
// instance, to implement periodic logging.
uint64_t mPacketCount;
// A storage holding the interleaved audio data converted the AudioSegment.
// This will be used as an input parameter for PacketizeAndProcess. This
// should be removed once bug 1729041 is done.
AutoTArray<AudioDataValue,
SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
mInterleavedBuffer;
};
// MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
@ -283,6 +285,12 @@ class AudioInputTrack : public ProcessedMediaTrack {
"Must set mInputProcessing before exposing to content");
return mInputProcessing->GetRequestedInputChannelCount();
}
// Get the data in [aFrom, aTo) from aPort->GetSource() to aOutput. aOutput
// needs to be empty.
void GetInputSourceData(AudioSegment& aOutput,
const PrincipalHandle& aPrincipal,
const MediaInputPort* aPort, GraphTime aFrom,
GraphTime aTo) const;
// Any thread
AudioInputTrack* AsAudioInputTrack() override { return this; }