зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1741959 - Move audio data processing to ProcessInput r=padenot,pehrsons
The interface for getting the data source of the AudioInputProcessing in AudioInputTrack is moved from AudioInputProcessing::NotifyInputData to ::ProcessInput, which takes an AudioSegment forwarded from the AudioInputTrack's source track Depends on D131870 Differential Revision: https://phabricator.services.mozilla.com/D122513
This commit is contained in:
Родитель
8d5a436659
Коммит
082eb01c54
|
@ -61,6 +61,60 @@ void AudioSegment::ResampleChunks(nsAutoRef<SpeexResamplerState>& aResampler,
|
|||
}
|
||||
}
|
||||
|
||||
size_t AudioSegment::WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
|
||||
uint32_t aChannels) const {
|
||||
size_t offset = 0;
|
||||
if (GetDuration() <= 0) {
|
||||
MOZ_ASSERT(GetDuration() == 0);
|
||||
return offset;
|
||||
}
|
||||
|
||||
// Calculate how many samples in this segment
|
||||
size_t frames = static_cast<size_t>(GetDuration());
|
||||
CheckedInt<size_t> samples(frames);
|
||||
samples *= static_cast<size_t>(aChannels);
|
||||
MOZ_ASSERT(samples.isValid());
|
||||
if (!samples.isValid()) {
|
||||
return offset;
|
||||
}
|
||||
|
||||
// Enlarge buffer space if needed
|
||||
if (samples.value() > aBuffer.Capacity()) {
|
||||
aBuffer.SetCapacity(samples.value());
|
||||
}
|
||||
aBuffer.SetLengthAndRetainStorage(samples.value());
|
||||
aBuffer.ClearAndRetainStorage();
|
||||
|
||||
// Convert the de-interleaved chunks into an interleaved buffer. Note that
|
||||
// we may upmix or downmix the audio data if the channel in the chunks
|
||||
// mismatch with aChannels
|
||||
for (ConstChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
|
||||
const AudioChunk& c = *ci;
|
||||
size_t samplesInChunk = static_cast<size_t>(c.mDuration) * aChannels;
|
||||
switch (c.mBufferFormat) {
|
||||
case AUDIO_FORMAT_S16:
|
||||
WriteChunk<int16_t>(c, aChannels, c.mVolume,
|
||||
aBuffer.Elements() + offset);
|
||||
break;
|
||||
case AUDIO_FORMAT_FLOAT32:
|
||||
WriteChunk<float>(c, aChannels, c.mVolume, aBuffer.Elements() + offset);
|
||||
break;
|
||||
case AUDIO_FORMAT_SILENCE:
|
||||
PodZero(aBuffer.Elements() + offset, samplesInChunk);
|
||||
break;
|
||||
default:
|
||||
MOZ_ASSERT_UNREACHABLE("Unknown format");
|
||||
PodZero(aBuffer.Elements() + offset, samplesInChunk);
|
||||
break;
|
||||
}
|
||||
offset += samplesInChunk;
|
||||
}
|
||||
MOZ_DIAGNOSTIC_ASSERT(samples.value() == offset,
|
||||
"Segment's duration is incorrect");
|
||||
aBuffer.SetLengthAndRetainStorage(offset);
|
||||
return offset;
|
||||
}
|
||||
|
||||
// This helps to to safely get a pointer to the position we want to start
|
||||
// writing a planar audio buffer, depending on the channel and the offset in the
|
||||
// buffer.
|
||||
|
|
|
@ -422,6 +422,12 @@ class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
|
|||
MOZ_ASSERT(aChannels == channels.Length());
|
||||
AppendFrames(buffer.forget(), channels, aFrames, aPrincipalHandle);
|
||||
}
|
||||
// Write the segement data into an interleaved buffer. Do mixing if the
|
||||
// AudioChunk's channel count in the segment is different from aChannels.
|
||||
// Returns sample count of the converted audio data. The converted data will
|
||||
// be stored into aBuffer.
|
||||
size_t WriteToInterleavedBuffer(nsTArray<AudioDataValue>& aBuffer,
|
||||
uint32_t aChannels) const;
|
||||
// Consumes aChunk, and returns a pointer to the persistent copy of aChunk
|
||||
// in the segment.
|
||||
AudioChunk* AppendAndConsumeChunk(AudioChunk&& aChunk) {
|
||||
|
@ -486,8 +492,8 @@ class AudioSegment : public MediaSegmentBase<AudioSegment, AudioChunk> {
|
|||
};
|
||||
|
||||
template <typename SrcT>
|
||||
void WriteChunk(AudioChunk& aChunk, uint32_t aOutputChannels, float aVolume,
|
||||
AudioDataValue* aOutputBuffer) {
|
||||
void WriteChunk(const AudioChunk& aChunk, uint32_t aOutputChannels,
|
||||
float aVolume, AudioDataValue* aOutputBuffer) {
|
||||
AutoTArray<const SrcT*, GUESS_AUDIO_CHANNELS> channelData;
|
||||
|
||||
channelData = aChunk.ChannelData<SrcT>().Clone();
|
||||
|
|
|
@ -11,11 +11,45 @@
|
|||
|
||||
namespace mozilla {
|
||||
|
||||
#ifdef LOG_INTERNAL
|
||||
# undef LOG_INTERNAL
|
||||
#endif // LOG_INTERNAL
|
||||
#define LOG_INTERNAL(level, msg, ...) \
|
||||
MOZ_LOG(gMediaTrackGraphLog, LogLevel::level, (msg, ##__VA_ARGS__))
|
||||
|
||||
#ifdef LOG
|
||||
# undef LOG
|
||||
#endif // LOG
|
||||
#define LOG(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
|
||||
|
||||
// This can only be called in graph thread since mGraph->CurrentDriver() is
|
||||
// graph thread only
|
||||
#ifdef TRACK_GRAPH_LOG_INTERNAL
|
||||
# undef TRACK_GRAPH_LOG_INTERNAL
|
||||
#endif // TRACK_GRAPH_LOG_INTERNAL
|
||||
#define TRACK_GRAPH_LOG_INTERNAL(level, msg, ...) \
|
||||
LOG_INTERNAL(level, "(Graph %p, Driver %p) NativeInputTrack %p, " msg, \
|
||||
this->mGraph, this->mGraph->CurrentDriver(), this, \
|
||||
##__VA_ARGS__)
|
||||
|
||||
#ifdef TRACK_GRAPH_LOG
|
||||
# undef TRACK_GRAPH_LOG
|
||||
#endif // TRACK_GRAPH_LOG
|
||||
#define TRACK_GRAPH_LOG(msg, ...) \
|
||||
TRACK_GRAPH_LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
|
||||
|
||||
#ifdef TRACK_GRAPH_LOGV
|
||||
# undef TRACK_GRAPH_LOGV
|
||||
#endif // TRACK_GRAPH_LOGV
|
||||
#define TRACK_GRAPH_LOGV(msg, ...) \
|
||||
TRACK_GRAPH_LOG_INTERNAL(Verbose, msg, ##__VA_ARGS__)
|
||||
|
||||
/* static */
|
||||
NativeInputTrack* NativeInputTrack::Create(MediaTrackGraphImpl* aGraph) {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
|
||||
NativeInputTrack* track = new NativeInputTrack(aGraph->GraphRate());
|
||||
LOG("Create NativeInputTrack %p in MTG %p", track, aGraph);
|
||||
aGraph->AddTrack(track);
|
||||
return track;
|
||||
}
|
||||
|
@ -35,7 +69,7 @@ size_t NativeInputTrack::RemoveUser() {
|
|||
|
||||
void NativeInputTrack::DestroyImpl() {
|
||||
MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
|
||||
mInputData.Clear();
|
||||
mPendingData.Clear();
|
||||
ProcessedMediaTrack::DestroyImpl();
|
||||
}
|
||||
|
||||
|
@ -44,23 +78,27 @@ void NativeInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
|
|||
MOZ_ASSERT(mGraph->OnGraphThreadOrNotRunning());
|
||||
TRACE_COMMENT("NativeInputTrack::ProcessInput", "%p", this);
|
||||
|
||||
if (mInputData.IsEmpty()) {
|
||||
TRACK_GRAPH_LOGV("ProcessInput from %" PRId64 " to %" PRId64
|
||||
", needs %" PRId64 " frames",
|
||||
aFrom, aTo, aTo - aFrom);
|
||||
|
||||
TrackTime from = GraphTimeToTrackTime(aFrom);
|
||||
TrackTime to = GraphTimeToTrackTime(aTo);
|
||||
if (from >= to) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The number of NotifyInputData and ProcessInput calls could be different. We
|
||||
// always process the input data from NotifyInputData in the first
|
||||
// ProcessInput after the NotifyInputData
|
||||
MOZ_ASSERT_IF(!mIsBufferingAppended, mPendingData.IsEmpty());
|
||||
|
||||
// The mSegment will be the de-interleaved audio data converted from
|
||||
// mInputData
|
||||
TrackTime need = to - from;
|
||||
TrackTime dataNeed = std::min(mPendingData.GetDuration(), need);
|
||||
TrackTime silenceNeed = std::max(need - dataNeed, (TrackTime)0);
|
||||
|
||||
GetData<AudioSegment>()->Clear();
|
||||
GetData<AudioSegment>()->AppendFromInterleavedBuffer(
|
||||
mInputData.Data(), mInputData.FrameCount(), mInputData.Channels(),
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
MOZ_ASSERT_IF(dataNeed > 0, silenceNeed == 0);
|
||||
|
||||
mInputData.Clear();
|
||||
GetData<AudioSegment>()->AppendSlice(mPendingData, 0, dataNeed);
|
||||
mPendingData.RemoveLeading(dataNeed);
|
||||
GetData<AudioSegment>()->AppendNullData(silenceNeed);
|
||||
}
|
||||
|
||||
uint32_t NativeInputTrack::NumberOfChannels() const {
|
||||
|
@ -82,8 +120,10 @@ void NativeInputTrack::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
|
|||
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
|
||||
MOZ_ASSERT(aGraph == mGraph,
|
||||
"Receive input stopped signal from another graph");
|
||||
TRACK_GRAPH_LOG("NotifyInputStopped");
|
||||
mInputChannels = 0;
|
||||
mInputData.Clear();
|
||||
mIsBufferingAppended = false;
|
||||
mPendingData.Clear();
|
||||
for (auto& listener : mDataUsers) {
|
||||
listener->NotifyInputStopped(aGraph);
|
||||
}
|
||||
|
@ -96,12 +136,30 @@ void NativeInputTrack::NotifyInputData(MediaTrackGraphImpl* aGraph,
|
|||
uint32_t aAlreadyBuffered) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
|
||||
MOZ_ASSERT(aGraph == mGraph, "Receive input data from another graph");
|
||||
TRACK_GRAPH_LOGV(
|
||||
"NotifyInputData: frames=%zu, rate=%d, channel=%u, alreadyBuffered=%u",
|
||||
aFrames, aRate, aChannels, aAlreadyBuffered);
|
||||
|
||||
if (!mIsBufferingAppended) {
|
||||
// First time we see live frames getting added. Use what's already buffered
|
||||
// in the driver's scratch buffer as a starting point.
|
||||
MOZ_ASSERT(mPendingData.IsEmpty());
|
||||
constexpr TrackTime buffering = WEBAUDIO_BLOCK_SIZE;
|
||||
const TrackTime remaining =
|
||||
buffering - static_cast<TrackTime>(aAlreadyBuffered);
|
||||
mPendingData.AppendNullData(remaining);
|
||||
mIsBufferingAppended = true;
|
||||
TRACK_GRAPH_LOG("Set mIsBufferingAppended by appending %" PRId64 " frames.",
|
||||
remaining);
|
||||
}
|
||||
|
||||
MOZ_ASSERT(aChannels);
|
||||
if (!mInputChannels) {
|
||||
mInputChannels = aChannels;
|
||||
}
|
||||
mInputData.Push(aBuffer, aFrames, aRate, aChannels);
|
||||
mPendingData.AppendFromInterleavedBuffer(aBuffer, aFrames, aChannels,
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
|
||||
for (auto& listener : mDataUsers) {
|
||||
listener->NotifyInputData(aGraph, aBuffer, aFrames, aRate, aChannels,
|
||||
aAlreadyBuffered);
|
||||
|
@ -112,10 +170,16 @@ void NativeInputTrack::DeviceChanged(MediaTrackGraphImpl* aGraph) {
|
|||
MOZ_ASSERT(aGraph->OnGraphThreadOrNotRunning());
|
||||
MOZ_ASSERT(aGraph == mGraph,
|
||||
"Receive device changed signal from another graph");
|
||||
mInputData.Clear();
|
||||
TRACK_GRAPH_LOG("DeviceChanged");
|
||||
for (auto& listener : mDataUsers) {
|
||||
listener->DeviceChanged(aGraph);
|
||||
}
|
||||
}
|
||||
|
||||
#undef LOG_INTERNAL
|
||||
#undef LOG
|
||||
#undef TRACK_GRAPH_LOG_INTERNAL
|
||||
#undef TRACK_GRAPH_LOG
|
||||
#undef TRACK_GRAPH_LOGV
|
||||
|
||||
} // namespace mozilla
|
||||
|
|
|
@ -17,7 +17,8 @@ class NativeInputTrack : public ProcessedMediaTrack {
|
|||
~NativeInputTrack() = default;
|
||||
explicit NativeInputTrack(TrackRate aSampleRate)
|
||||
: ProcessedMediaTrack(aSampleRate, MediaSegment::AUDIO,
|
||||
new AudioSegment()) {}
|
||||
new AudioSegment()),
|
||||
mIsBufferingAppended(false) {}
|
||||
|
||||
public:
|
||||
// Main Thread API
|
||||
|
@ -49,9 +50,13 @@ class NativeInputTrack : public ProcessedMediaTrack {
|
|||
nsTArray<RefPtr<AudioDataListener>> mDataUsers;
|
||||
|
||||
private:
|
||||
// Indicate whether we append extra frames in mPendingData. The extra number
|
||||
// of frames is in [0, WEBAUDIO_BLOCK_SIZE] range.
|
||||
bool mIsBufferingAppended;
|
||||
|
||||
// Queue the audio input data coming from NotifyInputData. Used in graph
|
||||
// thread only.
|
||||
AudioInputSamples mInputData;
|
||||
AudioSegment mPendingData;
|
||||
|
||||
// Only accessed on the graph thread.
|
||||
uint32_t mInputChannels = 0;
|
||||
|
|
|
@ -37,286 +37,163 @@ class MockGraph : public MediaTrackGraphImpl {
|
|||
~MockGraph() = default;
|
||||
};
|
||||
|
||||
TEST(TestAudioInputProcessing, UnaccountedPacketizerBuffering)
|
||||
// AudioInputProcessing will put extra frames as pre-buffering data to avoid
|
||||
// glitchs in non pass-through mode. The main goal of the test is to check how
|
||||
// many frames left in the AudioInputProcessing's mSegment in various situations
|
||||
// after input data has been processed.
|
||||
TEST(TestAudioInputProcessing, Buffering)
|
||||
{
|
||||
const TrackRate rate = 48000;
|
||||
const uint32_t channels = 2;
|
||||
auto graph = MakeRefPtr<NiceMock<MockGraph>>(48000, 2);
|
||||
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
|
||||
AudioGenerator<AudioDataValue> generator(channels, rate);
|
||||
|
||||
// The packetizer takes 480 frames. To trigger this we need to populate the
|
||||
// packetizer without filling it completely the first iteration, then trigger
|
||||
// the unbounded-buffering-assertion on the second iteration.
|
||||
|
||||
const size_t nrFrames = 440;
|
||||
const size_t bufferSize = nrFrames * channels;
|
||||
GraphTime processedTime;
|
||||
GraphTime nextTime;
|
||||
nsTArray<AudioDataValue> buffer(bufferSize);
|
||||
buffer.AppendElements(bufferSize);
|
||||
AudioSegment segment;
|
||||
bool ended;
|
||||
|
||||
aip->Start();
|
||||
|
||||
{
|
||||
// First iteration.
|
||||
// 440 does not fill the packetizer but accounts for pre-silence buffering.
|
||||
// Iterations have processed 72 frames more than provided by callbacks:
|
||||
// 512 - 440 = 72
|
||||
// Thus the total amount of pre-silence buffering added is:
|
||||
// 480 + 128 - 72 = 536
|
||||
// The iteration pulls in 512 frames of silence, leaving 24 frames buffered.
|
||||
processedTime = 0;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(nrFrames);
|
||||
generator.GenerateInterleaved(buffer.Elements(), nrFrames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
|
||||
nextTime - nrFrames);
|
||||
aip->ProcessInput(graph, nullptr);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 24U);
|
||||
}
|
||||
|
||||
{
|
||||
// Second iteration.
|
||||
// 880 fills a packet of 480 frames. 400 left in the packetizer.
|
||||
// Last iteration left 24 frames buffered, making this iteration have 504
|
||||
// frames in the buffer while pulling 384 frames.
|
||||
// That leaves 120 frames buffered, which must be no more than the total
|
||||
// intended buffering of 480 + 128 = 608 frames.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * nrFrames);
|
||||
generator.GenerateInterleaved(buffer.Elements(), nrFrames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), nrFrames, rate, channels,
|
||||
nextTime - (2 * nrFrames));
|
||||
aip->ProcessInput(graph, nullptr);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 120U);
|
||||
}
|
||||
|
||||
graph->Destroy();
|
||||
}
|
||||
|
||||
TEST(TestAudioInputProcessing, InputDataCapture)
|
||||
{
|
||||
// This test simulates an audio cut issue happens when using Redmi AirDots.
|
||||
// Similar issues could happen when using other Bluetooth devices like Bose QC
|
||||
// 35 II or Sony WH-XB900N.
|
||||
|
||||
const TrackRate rate = 8000; // So the packetizer takes 80 frames
|
||||
const TrackRate rate = 8000; // So packet size is 80
|
||||
const uint32_t channels = 1;
|
||||
auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate, channels);
|
||||
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
|
||||
AudioGenerator<AudioDataValue> generator(channels, rate);
|
||||
|
||||
const size_t frames = 72;
|
||||
const size_t bufferSize = frames * channels;
|
||||
nsTArray<AudioDataValue> buffer(bufferSize);
|
||||
buffer.AppendElements(bufferSize);
|
||||
|
||||
AudioGenerator<AudioDataValue> generator(channels, rate);
|
||||
GraphTime processedTime;
|
||||
GraphTime nextTime;
|
||||
AudioSegment segment;
|
||||
bool ended;
|
||||
AudioSegment output;
|
||||
|
||||
aip->Start();
|
||||
// Toggle pass-through mode without starting
|
||||
{
|
||||
EXPECT_EQ(aip->PassThrough(graph), false);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
|
||||
aip->SetPassThrough(graph, true);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
|
||||
aip->SetPassThrough(graph, false);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
|
||||
aip->SetPassThrough(graph, true);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
}
|
||||
|
||||
{
|
||||
// First iteration.
|
||||
// aip will fill (WEBAUDIO_BLOCK_SIZE + packetizer-size) = 128 + 80 = 208
|
||||
// silence frames in begining of its data storage. The iteration will take
|
||||
// (nextTime - segment-duration) = (128 - 0) = 128 frames to segment,
|
||||
// leaving 208 - 128 = 80 silence frames.
|
||||
const TrackTime bufferedFrames = 80U;
|
||||
// Need (nextTime - processedTime) = 128 - 0 = 128 frames this round.
|
||||
// aip has not started and set to processing mode yet, so output will be
|
||||
// filled with silence data directly.
|
||||
processedTime = 0;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels, 0);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, nullptr);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
}
|
||||
|
||||
// Set aip to processing/non-pass-through mode
|
||||
aip->SetPassThrough(graph, false);
|
||||
{
|
||||
// Second iteration.
|
||||
// We will packetize 80 frames to aip's data storage. The last round left 80
|
||||
// frames so we have 80 + 80 = 160 frames. The iteration will take (nextTime
|
||||
// - segment-duration) = (256 - 128) = 128 frames to segment, leaving 160 -
|
||||
// 128 = 32 frames.
|
||||
const TrackTime bufferedFrames = 32U;
|
||||
// Need (nextTime - processedTime) = 256 - 128 = 128 frames this round.
|
||||
// aip has not started yet, so output will be filled with silence data
|
||||
// directly.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, nullptr);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
}
|
||||
|
||||
// aip has been started and set to processing mode so it will insert 80 frames
|
||||
// into aip's internal buffer as pre-buffering.
|
||||
aip->Start(graph);
|
||||
{
|
||||
// Third iteration.
|
||||
// Sometimes AudioCallbackDriver's buffer, whose type is
|
||||
// AudioCallbackBufferWrapper, could be unavailable, and therefore
|
||||
// ProcessInput won't be called. In this case, we should queue the audio
|
||||
// data and process them when ProcessInput can be called again.
|
||||
// Need (nextTime - processedTime) = 256 - 256 = 0 frames this round.
|
||||
// The Process() aip will take 0 frames from input, packetize and process
|
||||
// these frames into 0 80-frame packet(0 frames left in packetizer), insert
|
||||
// packets into aip's internal buffer, then move 0 frames the internal
|
||||
// buffer to output, leaving 80 + 0 - 0 = 80 frames in aip's internal
|
||||
// buffer.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
|
||||
// Note that processedTime is *equal* to nextTime (processedTime ==
|
||||
// nextTime) now but it's ok since we don't call ProcessInput here.
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
Unused << processedTime;
|
||||
buffer.ClearAndRetainStorage();
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 80);
|
||||
}
|
||||
|
||||
{
|
||||
// Fourth iteration.
|
||||
// We will packetize 80 (previous round) + 80 (this round) = 160 frames to
|
||||
// aip's data storage. 32 frames are left after the second iteration, so we
|
||||
// have 160 + 32 = 192 frames. The iteration will take (nextTime
|
||||
// - segment-duration) = (384 - 256) = 128 frames to segment, leaving 192 -
|
||||
// 128 = 64 frames.
|
||||
const TrackTime bufferedFrames = 64U;
|
||||
// Need (nextTime - processedTime) = 384 - 256 = 128 frames this round.
|
||||
// The Process() aip will take 128 frames from input, packetize and process
|
||||
// these frames into floor(128/80) = 1 80-frame packet (48 frames left in
|
||||
// packetizer), insert packets into aip's internal buffer, then move 128
|
||||
// frames the internal buffer to output, leaving 80 + 80 - 128 = 32 frames
|
||||
// in aip's internal buffer.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, nullptr);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
|
||||
}
|
||||
|
||||
graph->Destroy();
|
||||
}
|
||||
{
|
||||
// Need (nextTime - processedTime) = 384 - 384 = 0 frames this round.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(5 * frames);
|
||||
|
||||
TEST(TestAudioInputProcessing, InputDataCapturePassThrough)
|
||||
{
|
||||
// This test simulates an audio cut issue happens when using Redmi AirDots.
|
||||
// Similar issues could happen when using other Bluetooth devices like Bose QC
|
||||
// 35 II or Sony WH-XB900N.
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
const TrackRate rate = 8000; // So the packetizer takes 80 frames
|
||||
const uint32_t channels = 1;
|
||||
auto graph = MakeRefPtr<NiceMock<MockGraph>>(rate, channels);
|
||||
auto aip = MakeRefPtr<AudioInputProcessing>(channels, PRINCIPAL_HANDLE_NONE);
|
||||
AudioGenerator<AudioDataValue> generator(channels, rate);
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 32);
|
||||
}
|
||||
|
||||
const size_t frames = 72;
|
||||
const size_t bufferSize = frames * channels;
|
||||
nsTArray<AudioDataValue> buffer(bufferSize);
|
||||
buffer.AppendElements(bufferSize);
|
||||
{
|
||||
// Need (nextTime - processedTime) = 512 - 384 = 128 frames this round.
|
||||
// The Process() aip will take 128 frames from input, packetize and process
|
||||
// these frames into floor(128+48/80) = 2 80-frame packet (16 frames left in
|
||||
// packetizer), insert packets into aip's internal buffer, then move 128
|
||||
// frames the internal buffer to output, leaving 32 + 2*80 - 128 = 64 frames
|
||||
// in aip's internal buffer.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(6 * frames);
|
||||
|
||||
GraphTime processedTime;
|
||||
GraphTime nextTime;
|
||||
AudioSegment segment;
|
||||
AudioSegment source;
|
||||
bool ended;
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), nextTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 64);
|
||||
}
|
||||
|
||||
aip->SetPassThrough(graph, true);
|
||||
aip->Start();
|
||||
|
||||
{
|
||||
// First iteration.
|
||||
// aip will fill (WEBAUDIO_BLOCK_SIZE + ) = 128 + 72 = 200 silence frames in
|
||||
// begining of its data storage. The iteration will take (nextTime -
|
||||
// segment-duration) = (128 - 0) = 128 frames to segment, leaving 200 - 128
|
||||
// = 72 silence frames.
|
||||
const TrackTime bufferedFrames = 72U;
|
||||
processedTime = 0;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(frames);
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels, 0);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, &source);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
source.Clear();
|
||||
}
|
||||
|
||||
{
|
||||
// Second iteration.
|
||||
// We will feed 72 frames to aip's data storage. The last round left 72
|
||||
// frames so we have 72 + 72 = 144 frames. The iteration will take (nextTime
|
||||
// - segment-duration) = (256 - 128) = 128 frames to segment, leaving 144 -
|
||||
// 128 = 16 frames.
|
||||
const TrackTime bufferedFrames = 16U;
|
||||
// Need (nextTime - processedTime) = 512 - 512 = 0 frames this round.
|
||||
// No buffering in pass-through mode
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(2 * frames);
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(7 * frames);
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, &source);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
source.Clear();
|
||||
}
|
||||
|
||||
{
|
||||
// Third iteration.
|
||||
// Sometimes AudioCallbackDriver's buffer, whose type is
|
||||
// AudioCallbackBufferWrapper, could be unavailable, and therefore
|
||||
// ProcessInput won't be called. In this case, we should queue the audio
|
||||
// data and process them when ProcessInput can be called again.
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(3 * frames);
|
||||
// Note that processedTime is *equal* to nextTime (processedTime ==
|
||||
// nextTime) now but it's ok since we don't call ProcessInput here.
|
||||
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
Unused << processedTime;
|
||||
buffer.ClearAndRetainStorage();
|
||||
}
|
||||
|
||||
{
|
||||
// Fourth iteration.
|
||||
// We will feed 72 (previous round) + 72 (this round) = 144 frames to aip's
|
||||
// data storage. 16 frames are left after the second iteration, so we have
|
||||
// 144 + 16 = 160 frames. The iteration will take (nextTime -
|
||||
// segment-duration) = (384 - 256) = 128 frames to segment, leaving 160 -
|
||||
// 128 = 32 frames.
|
||||
const TrackTime bufferedFrames = 32U;
|
||||
processedTime = nextTime;
|
||||
nextTime = MediaTrackGraphImpl::RoundUpToEndOfAudioBlock(4 * frames);
|
||||
generator.GenerateInterleaved(buffer.Elements(), frames);
|
||||
source.AppendFromInterleavedBuffer(buffer.Elements(), frames, channels,
|
||||
PRINCIPAL_HANDLE_NONE);
|
||||
aip->NotifyInputData(graph, buffer.Elements(), frames, rate, channels,
|
||||
0 /* ignored */);
|
||||
buffer.ClearAndRetainStorage();
|
||||
aip->ProcessInput(graph, &source);
|
||||
aip->Pull(graph, processedTime, nextTime, segment.GetDuration(), &segment,
|
||||
true, &ended);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), bufferedFrames);
|
||||
source.Clear();
|
||||
AudioSegment input;
|
||||
generator.Generate(input, nextTime - processedTime);
|
||||
|
||||
aip->Process(graph, processedTime, nextTime, &input, &output);
|
||||
EXPECT_EQ(input.GetDuration(), nextTime - processedTime);
|
||||
EXPECT_EQ(output.GetDuration(), processedTime);
|
||||
EXPECT_EQ(aip->NumBufferedFrames(graph), 0);
|
||||
}
|
||||
|
||||
aip->Stop(graph);
|
||||
graph->Destroy();
|
||||
}
|
||||
|
|
|
@ -48,15 +48,16 @@ struct StartInputProcessing : public ControlMessage {
|
|||
: ControlMessage(aTrack),
|
||||
mInputTrack(aTrack),
|
||||
mInputProcessing(aInputProcessing) {}
|
||||
void Run() override { mInputProcessing->Start(); }
|
||||
void Run() override { mInputProcessing->Start(mTrack->GraphImpl()); }
|
||||
};
|
||||
|
||||
struct StopInputProcessing : public ControlMessage {
|
||||
const RefPtr<AudioInputProcessing> mInputProcessing;
|
||||
|
||||
explicit StopInputProcessing(AudioInputProcessing* aInputProcessing)
|
||||
: ControlMessage(nullptr), mInputProcessing(aInputProcessing) {}
|
||||
void Run() override { mInputProcessing->Stop(); }
|
||||
explicit StopInputProcessing(AudioInputTrack* aTrack,
|
||||
AudioInputProcessing* aInputProcessing)
|
||||
: ControlMessage(aTrack), mInputProcessing(aInputProcessing) {}
|
||||
void Run() override { mInputProcessing->Stop(mTrack->GraphImpl()); }
|
||||
};
|
||||
|
||||
struct SetPassThrough : public ControlMessage {
|
||||
|
@ -279,7 +280,7 @@ TEST(TestAudioTrackGraph, ErrorCallback)
|
|||
// Clean up.
|
||||
DispatchFunction([&] {
|
||||
inputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(listener));
|
||||
MakeUnique<StopInputProcessing>(inputTrack, listener));
|
||||
inputTrack->CloseAudioInput();
|
||||
inputTrack->Destroy();
|
||||
});
|
||||
|
@ -348,7 +349,7 @@ TEST(TestAudioTrackGraph, AudioInputTrack)
|
|||
outputTrack->Destroy();
|
||||
port->Destroy();
|
||||
inputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(listener));
|
||||
MakeUnique<StopInputProcessing>(inputTrack, listener));
|
||||
inputTrack->CloseAudioInput();
|
||||
inputTrack->Destroy();
|
||||
});
|
||||
|
@ -363,7 +364,7 @@ TEST(TestAudioTrackGraph, AudioInputTrack)
|
|||
|
||||
EXPECT_EQ(estimatedFreq, inputFrequency);
|
||||
std::cerr << "PreSilence: " << preSilenceSamples << std::endl;
|
||||
// We buffer 128 frames in passthrough mode. See AudioInputProcessing::Pull.
|
||||
// We buffer 128 frames. See DeviceInputTrack::ProcessInput.
|
||||
EXPECT_GE(preSilenceSamples, 128U);
|
||||
// If the fallback system clock driver is doing a graph iteration before the
|
||||
// first audio driver iteration comes in, that iteration is ignored and
|
||||
|
@ -485,7 +486,7 @@ TEST(TestAudioTrackGraph, ReOpenAudioInput)
|
|||
outputTrack->Destroy();
|
||||
port->Destroy();
|
||||
inputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(listener));
|
||||
MakeUnique<StopInputProcessing>(inputTrack, listener));
|
||||
inputTrack->CloseAudioInput();
|
||||
inputTrack->Destroy();
|
||||
});
|
||||
|
@ -501,7 +502,8 @@ TEST(TestAudioTrackGraph, ReOpenAudioInput)
|
|||
EXPECT_EQ(estimatedFreq, inputFrequency);
|
||||
std::cerr << "PreSilence: " << preSilenceSamples << std::endl;
|
||||
// We buffer 10ms worth of frames in non-passthrough mode, plus up to 128
|
||||
// frames as we round up to the nearest block. See AudioInputProcessing::Pull.
|
||||
// frames as we round up to the nearest block. See
|
||||
// AudioInputProcessing::Process and DeviceInputTrack::PrcoessInput.
|
||||
EXPECT_GE(preSilenceSamples, 128U + inputRate / 100);
|
||||
// If the fallback system clock driver is doing a graph iteration before the
|
||||
// first audio driver iteration comes in, that iteration is ignored and
|
||||
|
@ -605,7 +607,7 @@ TEST(TestAudioTrackGraph, AudioInputTrackDisabling)
|
|||
outputTrack->Destroy();
|
||||
port->Destroy();
|
||||
inputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(listener));
|
||||
MakeUnique<StopInputProcessing>(inputTrack, listener));
|
||||
inputTrack->CloseAudioInput();
|
||||
inputTrack->Destroy();
|
||||
});
|
||||
|
@ -686,7 +688,7 @@ struct AudioTrackSet {
|
|||
mOutputTrack->Destroy();
|
||||
mPort->Destroy();
|
||||
mInputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(mListener));
|
||||
MakeUnique<StopInputProcessing>(mInputTrack, mListener));
|
||||
mInputTrack->CloseAudioInput();
|
||||
mInputTrack->Destroy();
|
||||
|
||||
|
@ -1028,7 +1030,7 @@ void TestCrossGraphPort(uint32_t aInputRate, uint32_t aOutputRate,
|
|||
transmitter->Destroy();
|
||||
port->Destroy();
|
||||
inputTrack->GraphImpl()->AppendMessage(
|
||||
MakeUnique<StopInputProcessing>(listener));
|
||||
MakeUnique<StopInputProcessing>(inputTrack, listener));
|
||||
inputTrack->CloseAudioInput();
|
||||
inputTrack->Destroy();
|
||||
});
|
||||
|
|
|
@ -388,18 +388,19 @@ class StartStopMessage : public ControlMessage {
|
|||
public:
|
||||
enum StartStop { Start, Stop };
|
||||
|
||||
StartStopMessage(AudioInputProcessing* aInputProcessing, StartStop aAction)
|
||||
: ControlMessage(nullptr),
|
||||
StartStopMessage(MediaTrack* aTrack, AudioInputProcessing* aInputProcessing,
|
||||
StartStop aAction)
|
||||
: ControlMessage(aTrack),
|
||||
mInputProcessing(aInputProcessing),
|
||||
mAction(aAction) {}
|
||||
|
||||
void Run() override {
|
||||
if (mAction == StartStopMessage::Start) {
|
||||
TRACE("InputProcessing::Start")
|
||||
mInputProcessing->Start();
|
||||
mInputProcessing->Start(mTrack->GraphImpl());
|
||||
} else if (mAction == StartStopMessage::Stop) {
|
||||
TRACE("InputProcessing::Stop")
|
||||
mInputProcessing->Stop();
|
||||
mInputProcessing->Stop(mTrack->GraphImpl());
|
||||
} else {
|
||||
MOZ_CRASH("Invalid enum value");
|
||||
}
|
||||
|
@ -439,7 +440,7 @@ nsresult MediaEngineWebRTCMicrophoneSource::Start() {
|
|||
}
|
||||
|
||||
track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>(
|
||||
inputProcessing, StartStopMessage::Start));
|
||||
track, inputProcessing, StartStopMessage::Start));
|
||||
track->OpenAudioInput(deviceID, inputProcessing);
|
||||
}));
|
||||
|
||||
|
@ -470,7 +471,7 @@ nsresult MediaEngineWebRTCMicrophoneSource::Stop() {
|
|||
}
|
||||
|
||||
track->GraphImpl()->AppendMessage(MakeUnique<StartStopMessage>(
|
||||
inputProcessing, StartStopMessage::Stop));
|
||||
track, inputProcessing, StartStopMessage::Stop));
|
||||
MOZ_ASSERT(track->DeviceId().value() == deviceInfo->DeviceID());
|
||||
track->CloseAudioInput();
|
||||
}));
|
||||
|
@ -493,7 +494,6 @@ AudioInputProcessing::AudioInputProcessing(
|
|||
mRequestedInputChannelCount(aMaxChannelCount),
|
||||
mSkipProcessing(false),
|
||||
mInputDownmixBuffer(MAX_SAMPLING_FREQ * MAX_CHANNELS / 100),
|
||||
mLiveBufferingAppended(Nothing()),
|
||||
mPrincipal(aPrincipalHandle),
|
||||
mEnabled(false),
|
||||
mEnded(false),
|
||||
|
@ -513,22 +513,24 @@ void AudioInputProcessing::SetPassThrough(MediaTrackGraphImpl* aGraph,
|
|||
bool aPassThrough) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
|
||||
if (!mSkipProcessing && aPassThrough) {
|
||||
// Reset AudioProcessing so that if we resume processing in the future it
|
||||
// doesn't depend on old state.
|
||||
mAudioProcessing->Initialize();
|
||||
|
||||
if (mPacketizerInput) {
|
||||
MOZ_ASSERT(mPacketizerInput->PacketsAvailable() == 0);
|
||||
LOG_FRAME(
|
||||
"AudioInputProcessing %p Appending %u frames of null data for data "
|
||||
"discarded in the packetizer",
|
||||
this, mPacketizerInput->FramesAvailable());
|
||||
mSegment.AppendNullData(mPacketizerInput->FramesAvailable());
|
||||
mPacketizerInput->Clear();
|
||||
}
|
||||
if (aPassThrough == mSkipProcessing) {
|
||||
return;
|
||||
}
|
||||
mSkipProcessing = aPassThrough;
|
||||
|
||||
if (!mEnabled) {
|
||||
MOZ_ASSERT(!mPacketizerInput);
|
||||
return;
|
||||
}
|
||||
|
||||
if (aPassThrough) {
|
||||
// Turn on pass-through
|
||||
ResetAudioProcessing(aGraph);
|
||||
} else {
|
||||
// Turn off pass-through
|
||||
MOZ_ASSERT(!mPacketizerInput);
|
||||
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t AudioInputProcessing::GetRequestedInputChannelCount() {
|
||||
|
@ -542,104 +544,222 @@ void AudioInputProcessing::SetRequestedInputChannelCount(
|
|||
aGraph->ReevaluateInputDevice();
|
||||
}
|
||||
|
||||
void AudioInputProcessing::Start() {
|
||||
mEnabled = true;
|
||||
mLiveBufferingAppended = Nothing();
|
||||
}
|
||||
|
||||
void AudioInputProcessing::Stop() { mEnabled = false; }
|
||||
|
||||
void AudioInputProcessing::Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom,
|
||||
GraphTime aTo, GraphTime aTrackEnd,
|
||||
AudioSegment* aSegment,
|
||||
bool aLastPullThisIteration, bool* aEnded) {
|
||||
void AudioInputProcessing::Start(MediaTrackGraphImpl* aGraph) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
|
||||
if (mEnded) {
|
||||
*aEnded = true;
|
||||
if (mEnabled) {
|
||||
return;
|
||||
}
|
||||
mEnabled = true;
|
||||
|
||||
if (mSkipProcessing) {
|
||||
return;
|
||||
}
|
||||
|
||||
TrackTime delta = aTo - aTrackEnd;
|
||||
MOZ_ASSERT(delta >= 0, "We shouldn't append more than requested");
|
||||
TrackTime buffering = 0;
|
||||
MOZ_ASSERT(!mPacketizerInput);
|
||||
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
|
||||
}
|
||||
|
||||
// Add the amount of buffering required to not underrun and glitch.
|
||||
void AudioInputProcessing::Stop(MediaTrackGraphImpl* aGraph) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
|
||||
// Make sure there's at least one extra block buffered until audio callbacks
|
||||
// come in, since we round graph iteration durations up to the nearest block.
|
||||
buffering += WEBAUDIO_BLOCK_SIZE;
|
||||
|
||||
if (!PassThrough(aGraph) && mPacketizerInput) {
|
||||
// Processing is active and is processed in chunks of 10ms through the
|
||||
// input packetizer. We allow for 10ms of silence on the track to
|
||||
// accomodate the buffering worst-case.
|
||||
buffering += mPacketizerInput->mPacketSize;
|
||||
}
|
||||
|
||||
if (delta <= 0) {
|
||||
if (!mEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (MOZ_LIKELY(mLiveBufferingAppended)) {
|
||||
if (MOZ_UNLIKELY(buffering > *mLiveBufferingAppended)) {
|
||||
// We need to buffer more data. This could happen the first time we pull
|
||||
// input data, or the first iteration after starting to use the
|
||||
// packetizer.
|
||||
TrackTime silence = buffering - *mLiveBufferingAppended;
|
||||
LOG_FRAME("AudioInputProcessing %p Inserting %" PRId64
|
||||
" frames of silence due to buffer increase",
|
||||
this, silence);
|
||||
mSegment.InsertNullDataAtStart(silence);
|
||||
mLiveBufferingAppended = Some(buffering);
|
||||
} else if (MOZ_UNLIKELY(buffering < *mLiveBufferingAppended)) {
|
||||
// We need to clear some buffered data to reduce latency now that the
|
||||
// packetizer is no longer used.
|
||||
MOZ_ASSERT(PassThrough(aGraph), "Must have turned on passthrough");
|
||||
TrackTime removal = *mLiveBufferingAppended - buffering;
|
||||
MOZ_ASSERT(mSegment.GetDuration() >= removal);
|
||||
TrackTime frames = std::min(mSegment.GetDuration(), removal);
|
||||
LOG_FRAME("AudioInputProcessing %p Removing %" PRId64
|
||||
" frames of silence due to buffer decrease",
|
||||
this, frames);
|
||||
*mLiveBufferingAppended -= frames;
|
||||
mSegment.RemoveLeading(frames);
|
||||
}
|
||||
}
|
||||
mEnabled = false;
|
||||
|
||||
if (mSegment.GetDuration() > 0) {
|
||||
MOZ_ASSERT(buffering == *mLiveBufferingAppended);
|
||||
TrackTime frames = std::min(mSegment.GetDuration(), delta);
|
||||
LOG_FRAME("AudioInputProcessing %p Appending %" PRId64
|
||||
" frames of real data for %u channels.",
|
||||
this, frames, mRequestedInputChannelCount);
|
||||
aSegment->AppendSlice(mSegment, 0, frames);
|
||||
mSegment.RemoveLeading(frames);
|
||||
delta -= frames;
|
||||
|
||||
// Assert that the amount of data buffered doesn't grow unboundedly.
|
||||
MOZ_ASSERT_IF(aLastPullThisIteration, mSegment.GetDuration() <= buffering);
|
||||
}
|
||||
|
||||
if (delta <= 0) {
|
||||
if (mSegment.GetDuration() == 0) {
|
||||
mLiveBufferingAppended = Some(-delta);
|
||||
}
|
||||
if (mSkipProcessing) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_FRAME("AudioInputProcessing %p Pulling %" PRId64
|
||||
" frames of silence for %u channels.",
|
||||
this, delta, mRequestedInputChannelCount);
|
||||
// Packetizer is active and we were just stopped. Stop the packetizer and
|
||||
// processing.
|
||||
ResetAudioProcessing(aGraph);
|
||||
}
|
||||
|
||||
// This assertion fails if we append silence here after having appended live
|
||||
// frames. Before appending live frames we should add sufficient buffering to
|
||||
// not have to glitch (aka append silence). Failing this meant the buffering
|
||||
// was not sufficient.
|
||||
MOZ_ASSERT_IF(mEnabled, !mLiveBufferingAppended);
|
||||
mLiveBufferingAppended = Nothing();
|
||||
// The following is how how Process() works in pass-through and non-pass-through
|
||||
// mode. In both mode, Process() outputs the same amount of the frames as its
|
||||
// input data.
|
||||
//
|
||||
// I. In non-pass-through mode:
|
||||
//
|
||||
// We will use webrtc::AudioProcessing to process the input audio data in this
|
||||
// mode. The data input in webrtc::AudioProcessing needs to be a 10ms chunk,
|
||||
// while the input data passed to Process() is not necessary to have times of
|
||||
// 10ms-chunk length. To divide the input data into 10ms chunks,
|
||||
// mPacketizerInput is introduced.
|
||||
//
|
||||
// We will add one 10ms-chunk silence into the internal buffer before Process()
|
||||
// works. Those extra frames is called pre-buffering. It aims to avoid glitches
|
||||
// we may have when producing data in mPacketizerInput. Without pre-buffering,
|
||||
// when the input data length is not 10ms-times, we could end up having no
|
||||
// enough output needs since mPacketizerInput would keep some input data, which
|
||||
// is the remainder of the 10ms-chunk length. To force processing those data
|
||||
// left in mPacketizerInput, we would need to add some extra frames to make
|
||||
// mPacketizerInput produce a 10ms-chunk. For example, if the sample rate is
|
||||
// 44100 Hz, then the packet-size is 441 frames. When we only have 384 input
|
||||
// frames, we would need to put additional 57 frames to mPacketizerInput to
|
||||
// produce a packet. However, those extra 57 frames result in a glitch sound.
|
||||
//
|
||||
// By adding one 10ms-chunk silence in advance to the internal buffer, we won't
|
||||
// need to add extra frames between the input data no matter what data length it
|
||||
// is. The only drawback is the input data won't be processed and send to output
|
||||
// immediately. Process() will consume pre-buffering data for its output first.
|
||||
// The below describes how it works:
|
||||
//
|
||||
//
|
||||
// Process()
|
||||
// +-----------------------------+
|
||||
// input D(N) | +--------+ +--------+ | output D(N)
|
||||
// --------------|-->| P(N) |-->| S(N) |---|-------------->
|
||||
// | +--------+ +--------+ |
|
||||
// | packetizer mSegment |
|
||||
// +-----------------------------+
|
||||
// <------ internal buffer ------>
|
||||
//
|
||||
//
|
||||
// D(N): number of frames from the input and the output needs in the N round
|
||||
// Z: number of frames of a 10ms chunk(packet) in mPacketizerInput, Z >= 1
|
||||
// (if Z = 1, packetizer has no effect)
|
||||
// P(N): number of frames left in mPacketizerInput after the N round. Once the
|
||||
// frames in packetizer >= Z, packetizer will produce a packet to
|
||||
// mSegment, so P(N) = (P(N-1) + D(N)) % Z, 0 <= P(N) <= Z-1
|
||||
// S(N): number of frames left in mSegment after the N round. The input D(N)
|
||||
// frames will be passed to mPacketizerInput first, and then
|
||||
// mPacketizerInput may append some packets to mSegment, so
|
||||
// S(N) = S(N-1) + Z * floor((P(N-1) + D(N)) / Z) - D(N)
|
||||
//
|
||||
// At the first, we set P(0) = 0, S(0) = X, where X >= Z-1. X is the
|
||||
// pre-buffering put in the internal buffer. With this settings, P(K) + S(K) = X
|
||||
// always holds.
|
||||
//
|
||||
// Intuitively, this seems true: We put X frames in the internal buffer at
|
||||
// first. If the data won't be blocked in packetizer, after the Process(), the
|
||||
// internal buffer should still hold X frames since the number of frames coming
|
||||
// from input is the same as the output needs. The key of having enough data for
|
||||
// output needs, while the input data is piled up in packetizer, is by putting
|
||||
// at least Z-1 frames as pre-buffering, since the maximum number of frames
|
||||
// stuck in the packetizer before it can emit a packet is packet-size - 1.
|
||||
// Otherwise, we don't have enough data for output if the new input data plus
|
||||
// the data left in packetizer produces a smaller-than-10ms chunk, which will be
|
||||
// left in packetizer. Thus we must have some pre-buffering frames in the
|
||||
// mSegment to make up the length of the left chunk we need for output. This can
|
||||
// also be told by by induction:
|
||||
// (1) This holds when K = 0
|
||||
// (2) Assume this holds when K = N: so P(N) + S(N) = X
|
||||
// => P(N) + S(N) = X >= Z-1 => S(N) >= Z-1-P(N)
|
||||
// (3) When K = N+1, we have D(N+1) input frames comes
|
||||
// a. if P(N) + D(N+1) < Z, then packetizer has no enough data for one
|
||||
// packet. No data produced by packertizer, so the mSegment now has
|
||||
// S(N) >= Z-1-P(N) frames. Output needs D(N+1) < Z-P(N) frames. So it
|
||||
// needs at most Z-P(N)-1 frames, and mSegment has enough frames for
|
||||
// output, Then, P(N+1) = P(N) + D(N+1) and S(N+1) = S(N) - D(N+1)
|
||||
// => P(N+1) + S(N+1) = P(N) + S(N) = X
|
||||
// b. if P(N) + D(N+1) = Z, then packetizer will produce one packet for
|
||||
// mSegment, so mSegment now has S(N) + Z frames. Output needs D(N+1)
|
||||
// = Z-P(N) frames. S(N) has at least Z-1-P(N)+Z >= Z-P(N) frames, since
|
||||
// Z >= 1. So mSegment has enough frames for output. Then, P(N+1) = 0 and
|
||||
// S(N+1) = S(N) + Z - D(N+1) = S(N) + P(N)
|
||||
// => P(N+1) + S(N+1) = P(N) + S(N) = X
|
||||
// c. if P(N) + D(N+1) > Z, and let P(N) + D(N+1) = q * Z + r, where q >= 1
|
||||
// and 0 <= r <= Z-1, then packetizer will produce can produce q packets
|
||||
// for mSegment. Output needs D(N+1) = q * Z - P(N) + r frames and
|
||||
// mSegment has S(N) + q * z >= q * z - P(N) + Z-1 >= q*z -P(N) + r,
|
||||
// since r <= Z-1. So mSegment has enough frames for output. Then,
|
||||
// P(N+1) = r and S(N+1) = S(N) + q * Z - D(N+1)
|
||||
// => P(N+1) + S(N+1) = S(N) + (q * Z + r - D(N+1)) = S(N) + P(N) = X
|
||||
// => P(K) + S(K) = X always holds
|
||||
//
|
||||
// Since P(K) + S(K) = X and P(K) is in [0, Z-1], the S(K) is in [X-Z+1, X]
|
||||
// range. In our implementation, X is set to Z so S(K) is in [1, Z].
|
||||
// By the above workflow, we always have enough data for output and no extra
|
||||
// frames put into packetizer. It means we don't have any glitch!
|
||||
//
|
||||
// II. In pass-through mode:
|
||||
//
|
||||
// Process()
|
||||
// +--------+
|
||||
// input D(N) | | output D(N)
|
||||
// -------------->-------->--------------->
|
||||
// | |
|
||||
// +--------+
|
||||
//
|
||||
// The D(N) frames of data are just forwarded from input to output without any
|
||||
// processing
|
||||
void AudioInputProcessing::Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom,
|
||||
GraphTime aTo, AudioSegment* aInput,
|
||||
AudioSegment* aOutput) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
MOZ_ASSERT(aFrom <= aTo);
|
||||
MOZ_ASSERT(!mEnded);
|
||||
|
||||
aSegment->AppendNullData(delta);
|
||||
TrackTime need = aTo - aFrom;
|
||||
if (need == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mEnabled) {
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Filling %" PRId64
|
||||
" frames of silence to output (disabled)",
|
||||
aGraph, aGraph->CurrentDriver(), this, need);
|
||||
aOutput->AppendNullData(need);
|
||||
return;
|
||||
}
|
||||
|
||||
MOZ_ASSERT(aInput->GetDuration() == need,
|
||||
"Wrong data length from input port source");
|
||||
|
||||
if (PassThrough(aGraph)) {
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Forwarding %" PRId64
|
||||
" frames of input data to output directly (PassThrough)",
|
||||
aGraph, aGraph->CurrentDriver(), this, aInput->GetDuration());
|
||||
aOutput->AppendSegment(aInput, mPrincipal);
|
||||
return;
|
||||
}
|
||||
|
||||
// SetPassThrough(false) must be called before reaching here.
|
||||
MOZ_ASSERT(mPacketizerInput);
|
||||
// If mRequestedInputChannelCount is updated, create a new packetizer. No
|
||||
// need to change the pre-buffering since the rate is always the same. The
|
||||
// frames left in the packetizer would be replaced by null data and then
|
||||
// transferred to mSegment.
|
||||
EnsureAudioProcessing(aGraph, mRequestedInputChannelCount);
|
||||
|
||||
// Preconditions of the audio-processing logic.
|
||||
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
|
||||
mPacketizerInput->FramesAvailable() ==
|
||||
mPacketizerInput->mPacketSize);
|
||||
// We pre-buffer mPacketSize frames, but the maximum number of frames stuck in
|
||||
// the packetizer before it can emit a packet is mPacketSize-1. Thus that
|
||||
// remaining 1 frame will always be present in mSegment.
|
||||
MOZ_ASSERT(mSegment.GetDuration() >= 1);
|
||||
MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
|
||||
|
||||
PacketizeAndProcess(aGraph, *aInput);
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p Buffer has %" PRId64
|
||||
" frames of data now, after packetizing and processing",
|
||||
aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration());
|
||||
|
||||
// By setting pre-buffering to the number of frames of one packet, and
|
||||
// because the maximum number of frames stuck in the packetizer before
|
||||
// it can emit a packet is the mPacketSize-1, we always have at least
|
||||
// one more frame than output needs.
|
||||
MOZ_ASSERT(mSegment.GetDuration() > need);
|
||||
aOutput->AppendSlice(mSegment, 0, need);
|
||||
mSegment.RemoveLeading(need);
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputProcessing %p moving %" PRId64
|
||||
" frames of data to output, leaving %" PRId64 " frames in buffer",
|
||||
aGraph, aGraph->CurrentDriver(), this, need,
|
||||
mSegment.GetDuration());
|
||||
|
||||
// Postconditions of the audio-processing logic.
|
||||
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
|
||||
mPacketizerInput->FramesAvailable() ==
|
||||
mPacketizerInput->mPacketSize);
|
||||
MOZ_ASSERT(mSegment.GetDuration() >= 1);
|
||||
MOZ_ASSERT(mSegment.GetDuration() <= mPacketizerInput->mPacketSize);
|
||||
}
|
||||
|
||||
void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
|
||||
|
@ -653,12 +773,13 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
|
|||
return;
|
||||
}
|
||||
|
||||
if (!mPacketizerOutput || mPacketizerOutput->mPacketSize != aRate / 100u ||
|
||||
if (!mPacketizerOutput ||
|
||||
mPacketizerOutput->mPacketSize != GetPacketSize(aRate) ||
|
||||
mPacketizerOutput->mChannels != aChannels) {
|
||||
// It's ok to drop the audio still in the packetizer here: if this changes,
|
||||
// we changed devices or something.
|
||||
mPacketizerOutput = Nothing();
|
||||
mPacketizerOutput.emplace(aRate / 100, aChannels);
|
||||
mPacketizerOutput.emplace(GetPacketSize(aRate), aChannels);
|
||||
}
|
||||
|
||||
mPacketizerOutput->Input(aBuffer, aFrames);
|
||||
|
@ -736,27 +857,35 @@ void AudioInputProcessing::NotifyOutputData(MediaTrackGraphImpl* aGraph,
|
|||
|
||||
// Only called if we're not in passthrough mode
|
||||
void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
||||
const AudioDataValue* aBuffer,
|
||||
size_t aFrames, TrackRate aRate,
|
||||
uint32_t aChannels) {
|
||||
const AudioSegment& aSegment) {
|
||||
MOZ_ASSERT(!PassThrough(aGraph),
|
||||
"This should be bypassed when in PassThrough mode.");
|
||||
MOZ_ASSERT(mEnabled);
|
||||
size_t offset = 0;
|
||||
MOZ_ASSERT(mPacketizerInput);
|
||||
MOZ_ASSERT(mPacketizerInput->mPacketSize ==
|
||||
GetPacketSize(aGraph->GraphRate()));
|
||||
|
||||
if (!mPacketizerInput || mPacketizerInput->mPacketSize != aRate / 100u ||
|
||||
mPacketizerInput->mChannels != aChannels) {
|
||||
// It's ok to drop the audio still in the packetizer here.
|
||||
mPacketizerInput = Nothing();
|
||||
mPacketizerInput.emplace(aRate / 100, aChannels);
|
||||
}
|
||||
|
||||
LOG_FRAME("AudioInputProcessing %p Appending %zu frames to packetizer", this,
|
||||
aFrames);
|
||||
// The WriteToInterleavedBuffer will do upmix or downmix if the channel-count
|
||||
// in aSegment's chunks is different from mPacketizerInput->mChannels
|
||||
// WriteToInterleavedBuffer could be avoided once Bug 1729041 is done.
|
||||
size_t sampleCount = aSegment.WriteToInterleavedBuffer(
|
||||
mInterleavedBuffer, mPacketizerInput->mChannels);
|
||||
size_t frameCount =
|
||||
sampleCount / static_cast<size_t>(mPacketizerInput->mChannels);
|
||||
|
||||
// Packetize our input data into 10ms chunks, deinterleave into planar channel
|
||||
// buffers, process, and append to the right MediaStreamTrack.
|
||||
mPacketizerInput->Input(aBuffer, static_cast<uint32_t>(aFrames));
|
||||
mPacketizerInput->Input(mInterleavedBuffer.Elements(),
|
||||
static_cast<uint32_t>(frameCount));
|
||||
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Packetizing %zu frames. "
|
||||
"Packetizer has %u frames (enough for %u packets) now",
|
||||
aGraph, aGraph->CurrentDriver(), this, frameCount,
|
||||
mPacketizerInput->FramesAvailable(),
|
||||
mPacketizerInput->PacketsAvailable());
|
||||
|
||||
size_t offset = 0;
|
||||
|
||||
while (mPacketizerInput->PacketsAvailable()) {
|
||||
mPacketCount++;
|
||||
|
@ -771,15 +900,15 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
|||
float* packet = mInputBuffer.Data();
|
||||
mPacketizerInput->Output(packet);
|
||||
|
||||
// Downmix from aChannels to mono if needed. We always have floats
|
||||
// here, the packetizer performed the conversion. This handles sound cards
|
||||
// with multiple physical jacks exposed as a single device with _n_
|
||||
// discrete channels, where only a single mic is plugged in. Those channels
|
||||
// are not correlated temporaly since they are discrete channels, mixing is
|
||||
// just a sum.
|
||||
// Downmix from mPacketizerInput->mChannels to mono if needed. We always
|
||||
// have floats here, the packetizer performed the conversion. This handles
|
||||
// sound cards with multiple physical jacks exposed as a single device with
|
||||
// _n_ discrete channels, where only a single mic is plugged in. Those
|
||||
// channels are not correlated temporaly since they are discrete channels,
|
||||
// mixing is just a sum.
|
||||
AutoTArray<float*, 8> deinterleavedPacketizedInputDataChannelPointers;
|
||||
uint32_t channelCountInput = 0;
|
||||
if (aChannels > MAX_CHANNELS) {
|
||||
if (mPacketizerInput->mChannels > MAX_CHANNELS) {
|
||||
channelCountInput = MONO;
|
||||
deinterleavedPacketizedInputDataChannelPointers.SetLength(
|
||||
channelCountInput);
|
||||
|
@ -790,12 +919,12 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
|||
size_t readIndex = 0;
|
||||
for (size_t i = 0; i < mPacketizerInput->mPacketSize; i++) {
|
||||
mDeinterleavedBuffer.Data()[i] = 0.;
|
||||
for (size_t j = 0; j < aChannels; j++) {
|
||||
for (size_t j = 0; j < mPacketizerInput->mChannels; j++) {
|
||||
mDeinterleavedBuffer.Data()[i] += packet[readIndex++];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
channelCountInput = aChannels;
|
||||
channelCountInput = mPacketizerInput->mChannels;
|
||||
// Deinterleave the input data
|
||||
// Prepare an array pointing to deinterleaved channels.
|
||||
deinterleavedPacketizedInputDataChannelPointers.SetLength(
|
||||
|
@ -812,7 +941,7 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
|||
deinterleavedPacketizedInputDataChannelPointers.Elements());
|
||||
}
|
||||
|
||||
StreamConfig inputConfig(aRate, channelCountInput,
|
||||
StreamConfig inputConfig(aGraph->GraphRate(), channelCountInput,
|
||||
false /* we don't use typing detection*/);
|
||||
StreamConfig outputConfig = inputConfig;
|
||||
|
||||
|
@ -873,8 +1002,11 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
|||
continue;
|
||||
}
|
||||
|
||||
LOG_FRAME("AudioInputProcessing %p Appending %u frames of packetized audio",
|
||||
this, mPacketizerInput->mPacketSize);
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Appending %u frames of "
|
||||
"packetized audio, leaving %u frames in packetizer",
|
||||
aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize,
|
||||
mPacketizerInput->FramesAvailable());
|
||||
|
||||
// We already have planar audio data of the right format. Insert into the
|
||||
// MTG.
|
||||
|
@ -886,54 +1018,10 @@ void AudioInputProcessing::PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
|||
}
|
||||
}
|
||||
|
||||
void AudioInputProcessing::ProcessInput(MediaTrackGraphImpl* aGraph,
|
||||
const AudioSegment* aSegment) {
|
||||
MOZ_ASSERT(aGraph);
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
|
||||
if (mEnded || !mEnabled || !mLiveBufferingAppended ||
|
||||
mPendingData.IsEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The number of NotifyInputData and ProcessInput calls could be different. We
|
||||
// always process the input data from NotifyInputData in the first
|
||||
// ProcessInput after the NotifyInputData
|
||||
|
||||
// If some processing is necessary, packetize and insert in the WebRTC.org
|
||||
// code. Otherwise, directly insert the mic data in the MTG, bypassing all
|
||||
// processing.
|
||||
if (PassThrough(aGraph)) {
|
||||
if (aSegment && !aSegment->IsEmpty()) {
|
||||
mSegment.AppendSegment(aSegment, mPrincipal);
|
||||
} else {
|
||||
mSegment.AppendFromInterleavedBuffer(mPendingData.Data(),
|
||||
mPendingData.FrameCount(),
|
||||
mPendingData.Channels(), mPrincipal);
|
||||
}
|
||||
} else {
|
||||
MOZ_ASSERT(aGraph->GraphRate() == mPendingData.Rate());
|
||||
// Bug 1729041: Feed aSegment to PacketizeAndProcess so mPendingData can be
|
||||
// removed, and save a copy.
|
||||
PacketizeAndProcess(aGraph, mPendingData.Data(), mPendingData.FrameCount(),
|
||||
mPendingData.Rate(), mPendingData.Channels());
|
||||
}
|
||||
|
||||
mPendingData.Clear();
|
||||
}
|
||||
|
||||
void AudioInputProcessing::NotifyInputStopped(MediaTrackGraphImpl* aGraph) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
// This is called when an AudioCallbackDriver switch has happened for any
|
||||
// reason, including other reasons than starting this audio input stream. We
|
||||
// reset state when this happens, as a fallback driver may have fiddled with
|
||||
// the amount of buffered silence during the switch.
|
||||
mLiveBufferingAppended = Nothing();
|
||||
mSegment.Clear();
|
||||
if (mPacketizerInput) {
|
||||
mPacketizerInput->Clear();
|
||||
}
|
||||
mPendingData.Clear();
|
||||
// reason, including other reasons than starting this audio input stream.
|
||||
}
|
||||
|
||||
// Called back on GraphDriver thread!
|
||||
|
@ -944,17 +1032,9 @@ void AudioInputProcessing::NotifyInputData(MediaTrackGraphImpl* aGraph,
|
|||
uint32_t aChannels,
|
||||
uint32_t aAlreadyBuffered) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
TRACE("AudioInputProcessing::NotifyInputData");
|
||||
|
||||
MOZ_ASSERT(aGraph->GraphRate() == aRate);
|
||||
MOZ_ASSERT(mEnabled);
|
||||
|
||||
if (!mLiveBufferingAppended) {
|
||||
// First time we see live frames getting added. Use what's already buffered
|
||||
// in the driver's scratch buffer as a starting point.
|
||||
mLiveBufferingAppended = Some(aAlreadyBuffered);
|
||||
}
|
||||
|
||||
mPendingData.Push(aBuffer, aFrames, aRate, aChannels);
|
||||
TRACE("AudioInputProcessing::NotifyInputData");
|
||||
}
|
||||
|
||||
void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) {
|
||||
|
@ -962,6 +1042,10 @@ void AudioInputProcessing::DeviceChanged(MediaTrackGraphImpl* aGraph) {
|
|||
|
||||
// Reset some processing
|
||||
mAudioProcessing->Initialize();
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Reinitializing audio "
|
||||
"processing",
|
||||
aGraph, aGraph->CurrentDriver(), this);
|
||||
}
|
||||
|
||||
void AudioInputProcessing::ApplyConfig(MediaTrackGraphImpl* aGraph,
|
||||
|
@ -973,7 +1057,6 @@ void AudioInputProcessing::ApplyConfig(MediaTrackGraphImpl* aGraph,
|
|||
void AudioInputProcessing::End() {
|
||||
mEnded = true;
|
||||
mSegment.Clear();
|
||||
mPendingData.Clear();
|
||||
}
|
||||
|
||||
TrackTime AudioInputProcessing::NumBufferedFrames(
|
||||
|
@ -982,6 +1065,74 @@ TrackTime AudioInputProcessing::NumBufferedFrames(
|
|||
return mSegment.GetDuration();
|
||||
}
|
||||
|
||||
void AudioInputProcessing::EnsureAudioProcessing(MediaTrackGraphImpl* aGraph,
|
||||
uint32_t aChannels) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
MOZ_ASSERT(aChannels > 0);
|
||||
MOZ_ASSERT(mEnabled);
|
||||
MOZ_ASSERT(!mSkipProcessing);
|
||||
|
||||
if (mPacketizerInput && mPacketizerInput->mChannels == aChannels) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If mPacketizerInput exists but with different channel-count, there is no
|
||||
// need to change pre-buffering since the packet size is the same as the old
|
||||
// one, since the rate is a constant.
|
||||
MOZ_ASSERT_IF(mPacketizerInput, mPacketizerInput->mPacketSize ==
|
||||
GetPacketSize(aGraph->GraphRate()));
|
||||
bool needPreBuffering = !mPacketizerInput;
|
||||
if (mPacketizerInput) {
|
||||
const TrackTime numBufferedFrames =
|
||||
static_cast<TrackTime>(mPacketizerInput->FramesAvailable());
|
||||
mSegment.AppendNullData(numBufferedFrames);
|
||||
mPacketizerInput = Nothing();
|
||||
}
|
||||
|
||||
mPacketizerInput.emplace(GetPacketSize(aGraph->GraphRate()), aChannels);
|
||||
|
||||
if (needPreBuffering) {
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p: Adding %u frames of "
|
||||
"silence as pre-buffering",
|
||||
aGraph, aGraph->CurrentDriver(), this, mPacketizerInput->mPacketSize);
|
||||
|
||||
AudioSegment buffering;
|
||||
buffering.AppendNullData(
|
||||
static_cast<TrackTime>(mPacketizerInput->mPacketSize));
|
||||
PacketizeAndProcess(aGraph, buffering);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioInputProcessing::ResetAudioProcessing(MediaTrackGraphImpl* aGraph) {
|
||||
MOZ_ASSERT(aGraph->OnGraphThread());
|
||||
MOZ_ASSERT(mSkipProcessing || !mEnabled);
|
||||
MOZ_ASSERT(mPacketizerInput);
|
||||
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Resetting audio "
|
||||
"processing",
|
||||
aGraph, aGraph->CurrentDriver(), this);
|
||||
|
||||
// Reset AudioProcessing so that if we resume processing in the future it
|
||||
// doesn't depend on old state.
|
||||
mAudioProcessing->Initialize();
|
||||
|
||||
MOZ_ASSERT(static_cast<uint32_t>(mSegment.GetDuration()) +
|
||||
mPacketizerInput->FramesAvailable() ==
|
||||
mPacketizerInput->mPacketSize);
|
||||
|
||||
// It's ok to clear all the internal buffer here since we won't use mSegment
|
||||
// in pass-through mode or when audio processing is disabled.
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputProcessing %p Emptying out %" PRId64
|
||||
" frames of data",
|
||||
aGraph, aGraph->CurrentDriver(), this, mSegment.GetDuration());
|
||||
mSegment.Clear();
|
||||
|
||||
mPacketizerInput = Nothing();
|
||||
}
|
||||
|
||||
void AudioInputTrack::Destroy() {
|
||||
MOZ_ASSERT(NS_IsMainThread());
|
||||
CloseAudioInput();
|
||||
|
@ -1031,38 +1182,101 @@ void AudioInputTrack::DestroyImpl() {
|
|||
void AudioInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
|
||||
uint32_t aFlags) {
|
||||
TRACE_COMMENT("AudioInputTrack::ProcessInput", "AudioInputTrack %p", this);
|
||||
MOZ_ASSERT(mInputProcessing);
|
||||
|
||||
// Check if there is a connected NativeInputTrack
|
||||
NativeInputTrack* source = nullptr;
|
||||
if (!mInputs.IsEmpty()) {
|
||||
for (const MediaInputPort* input : mInputs) {
|
||||
MOZ_ASSERT(input->GetSource());
|
||||
if (input->GetSource()->AsNativeInputTrack()) {
|
||||
source = input->GetSource()->AsNativeInputTrack();
|
||||
break;
|
||||
}
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputTrack %p ProcessInput from %" PRId64
|
||||
" to %" PRId64 ", needs %" PRId64 " frames",
|
||||
mGraph, mGraph->CurrentDriver(), this, aFrom, aTo, aTo - aFrom);
|
||||
|
||||
if (aFrom >= aTo) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!mInputProcessing->IsEnded()) {
|
||||
MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aFrom);
|
||||
if (mInputs.IsEmpty()) {
|
||||
GetData<AudioSegment>()->AppendNullData(aTo - aFrom);
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Filling %" PRId64
|
||||
" frames of null data (no input source)",
|
||||
mGraph, mGraph->CurrentDriver(), this, aTo - aFrom);
|
||||
} else {
|
||||
MOZ_ASSERT(mInputs.Length() == 1);
|
||||
AudioSegment data;
|
||||
GetInputSourceData(data, mInputProcessing->GetPrincipalHandle(),
|
||||
mInputs[0], aFrom, aTo);
|
||||
mInputProcessing->Process(GraphImpl(), aFrom, aTo, &data,
|
||||
GetData<AudioSegment>());
|
||||
}
|
||||
}
|
||||
MOZ_ASSERT(TrackTimeToGraphTime(GetEnd()) == aTo);
|
||||
|
||||
// Push the input data from the connected NativeInputTrack to mInputProcessing
|
||||
if (source) {
|
||||
MOZ_ASSERT(source->GraphImpl() == GraphImpl());
|
||||
MOZ_ASSERT(source->mSampleRate == mSampleRate);
|
||||
MOZ_ASSERT(GraphImpl()->GraphRate() == mSampleRate);
|
||||
mInputProcessing->ProcessInput(GraphImpl(),
|
||||
source->GetData<AudioSegment>());
|
||||
}
|
||||
|
||||
bool ended = false;
|
||||
mInputProcessing->Pull(
|
||||
GraphImpl(), aFrom, aTo, TrackTimeToGraphTime(GetEnd()),
|
||||
GetData<AudioSegment>(), aTo == GraphImpl()->mStateComputedTime, &ended);
|
||||
ApplyTrackDisabling(mSegment.get());
|
||||
if (ended && (aFlags & ALLOW_END)) {
|
||||
ApplyTrackDisabling(mSegment.get());
|
||||
} else if (aFlags & ALLOW_END) {
|
||||
mEnded = true;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioInputTrack::GetInputSourceData(AudioSegment& aOutput,
|
||||
const PrincipalHandle& aPrincipal,
|
||||
const MediaInputPort* aPort,
|
||||
GraphTime aFrom, GraphTime aTo) const {
|
||||
MOZ_ASSERT(mGraph->OnGraphThread());
|
||||
MOZ_ASSERT(aOutput.IsEmpty());
|
||||
|
||||
MediaTrack* source = aPort->GetSource();
|
||||
GraphTime next;
|
||||
for (GraphTime t = aFrom; t < aTo; t = next) {
|
||||
MediaInputPort::InputInterval interval =
|
||||
MediaInputPort::GetNextInputInterval(aPort, t);
|
||||
interval.mEnd = std::min(interval.mEnd, aTo);
|
||||
|
||||
const bool inputEnded =
|
||||
source->Ended() &&
|
||||
source->GetEnd() <=
|
||||
source->GraphTimeToTrackTimeWithBlocking(interval.mStart);
|
||||
|
||||
TrackTime ticks = interval.mEnd - interval.mStart;
|
||||
next = interval.mEnd;
|
||||
|
||||
if (interval.mStart >= interval.mEnd) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (inputEnded) {
|
||||
aOutput.AppendNullData(ticks);
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
|
||||
" ticks of null data from input port source (ended input)",
|
||||
mGraph, mGraph->CurrentDriver(), this, ticks);
|
||||
} else if (interval.mInputIsBlocked) {
|
||||
aOutput.AppendNullData(ticks);
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
|
||||
" ticks of null data from input port source (blocked input)",
|
||||
mGraph, mGraph->CurrentDriver(), this, ticks);
|
||||
} else if (source->IsSuspended()) {
|
||||
aOutput.AppendNullData(ticks);
|
||||
LOG_FRAME(
|
||||
"(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
|
||||
" ticks of null data from input port source (source is suspended)",
|
||||
mGraph, mGraph->CurrentDriver(), this, ticks);
|
||||
} else {
|
||||
TrackTime start =
|
||||
source->GraphTimeToTrackTimeWithBlocking(interval.mStart);
|
||||
TrackTime end = source->GraphTimeToTrackTimeWithBlocking(interval.mEnd);
|
||||
MOZ_ASSERT(source->GetData<AudioSegment>()->GetDuration() >= end);
|
||||
|
||||
AudioSegment data;
|
||||
data.AppendSlice(*source->GetData<AudioSegment>(), start, end);
|
||||
|
||||
// Replace the principal
|
||||
aOutput.AppendSegment(&data, aPrincipal);
|
||||
|
||||
LOG_FRAME("(Graph %p, Driver %p) AudioInputTrack %p Getting %" PRId64
|
||||
" ticks of real data from input port source %p",
|
||||
mGraph, mGraph->CurrentDriver(), this, end - start, source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioInputTrack::SetInputProcessingImpl(
|
||||
RefPtr<AudioInputProcessing> aInputProcessing) {
|
||||
MOZ_ASSERT(GraphImpl()->OnGraphThread());
|
||||
|
|
|
@ -126,10 +126,8 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
public:
|
||||
AudioInputProcessing(uint32_t aMaxChannelCount,
|
||||
const PrincipalHandle& aPrincipalHandle);
|
||||
|
||||
void Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
|
||||
GraphTime aTrackEnd, AudioSegment* aSegment,
|
||||
bool aLastPullThisIteration, bool* aEnded);
|
||||
void Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
|
||||
AudioSegment* aInput, AudioSegment* aOutput);
|
||||
|
||||
void NotifyOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
|
||||
size_t aFrames, TrackRate aRate,
|
||||
|
@ -146,8 +144,8 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
return !PassThrough(aGraph);
|
||||
}
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
void Start(MediaTrackGraphImpl* aGraph);
|
||||
void Stop(MediaTrackGraphImpl* aGraph);
|
||||
|
||||
void DeviceChanged(MediaTrackGraphImpl* aGraph) override;
|
||||
|
||||
|
@ -157,12 +155,8 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
|
||||
void Disconnect(MediaTrackGraphImpl* aGraph) override;
|
||||
|
||||
// aSegment stores the unprocessed non-interleaved audio input data from mic
|
||||
void ProcessInput(MediaTrackGraphImpl* aGraph, const AudioSegment* aSegment);
|
||||
|
||||
void PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
|
||||
const AudioDataValue* aBuffer, size_t aFrames,
|
||||
TrackRate aRate, uint32_t aChannels);
|
||||
const AudioSegment& aSegment);
|
||||
|
||||
void SetPassThrough(MediaTrackGraphImpl* aGraph, bool aPassThrough);
|
||||
uint32_t GetRequestedInputChannelCount();
|
||||
|
@ -182,8 +176,19 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
|
||||
TrackTime NumBufferedFrames(MediaTrackGraphImpl* aGraph) const;
|
||||
|
||||
// The packet size contains samples in 10ms. The unit of aRate is hz.
|
||||
constexpr static uint32_t GetPacketSize(TrackRate aRate) {
|
||||
return static_cast<uint32_t>(aRate) / 100u;
|
||||
}
|
||||
|
||||
bool IsEnded() const { return mEnded; }
|
||||
|
||||
const PrincipalHandle& GetPrincipalHandle() const { return mPrincipal; }
|
||||
|
||||
private:
|
||||
~AudioInputProcessing() = default;
|
||||
void EnsureAudioProcessing(MediaTrackGraphImpl* aGraph, uint32_t aChannels);
|
||||
void ResetAudioProcessing(MediaTrackGraphImpl* aGraph);
|
||||
// This implements the processing algoritm to apply to the input (e.g. a
|
||||
// microphone). If all algorithms are disabled, this class in not used. This
|
||||
// class only accepts audio chunks of 10ms. It has two inputs and one output:
|
||||
|
@ -215,13 +220,6 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
AlignedFloatBuffer mInputDownmixBuffer;
|
||||
// Stores data waiting to be pulled.
|
||||
AudioSegment mSegment;
|
||||
// Set to Nothing() by Start(). Once live frames have been appended from the
|
||||
// audio callback, this is the number of frames appended as pre-buffer for
|
||||
// that data, to avoid underruns. Buffering in the track might be needed
|
||||
// because of the AUDIO_BLOCK interval at which we run the graph, the
|
||||
// packetizer keeping some input data. Care must be taken when turning on and
|
||||
// off the packetizer.
|
||||
Maybe<TrackTime> mLiveBufferingAppended;
|
||||
// Principal for the data that flows through this class.
|
||||
const PrincipalHandle mPrincipal;
|
||||
// Whether or not this MediaEngine is enabled. If it's not enabled, it
|
||||
|
@ -230,11 +228,15 @@ class AudioInputProcessing : public AudioDataListener {
|
|||
bool mEnabled;
|
||||
// Whether or not we've ended and removed the AudioInputTrack.
|
||||
bool mEnded;
|
||||
// Store the unprocessed interleaved audio input data
|
||||
AudioInputSamples mPendingData;
|
||||
// When processing is enabled, the number of packets received by this
|
||||
// instance, to implement periodic logging.
|
||||
uint64_t mPacketCount;
|
||||
// A storage holding the interleaved audio data converted the AudioSegment.
|
||||
// This will be used as an input parameter for PacketizeAndProcess. This
|
||||
// should be removed once bug 1729041 is done.
|
||||
AutoTArray<AudioDataValue,
|
||||
SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
|
||||
mInterleavedBuffer;
|
||||
};
|
||||
|
||||
// MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
|
||||
|
@ -283,6 +285,12 @@ class AudioInputTrack : public ProcessedMediaTrack {
|
|||
"Must set mInputProcessing before exposing to content");
|
||||
return mInputProcessing->GetRequestedInputChannelCount();
|
||||
}
|
||||
// Get the data in [aFrom, aTo) from aPort->GetSource() to aOutput. aOutput
|
||||
// needs to be empty.
|
||||
void GetInputSourceData(AudioSegment& aOutput,
|
||||
const PrincipalHandle& aPrincipal,
|
||||
const MediaInputPort* aPort, GraphTime aFrom,
|
||||
GraphTime aTo) const;
|
||||
|
||||
// Any thread
|
||||
AudioInputTrack* AsAudioInputTrack() override { return this; }
|
||||
|
|
Загрузка…
Ссылка в новой задаче