Bug 1637235 - Implement a dynamic resampler with preallocated input and output memory. r=padenot

Create a dynamic resampler that will be able to preallocate its internal buffers and provide the exact requested amount of data in the output. The resampler makes use of the AudioSegment data structure to convey the audio frames to make it easier to be used from MTG.

Differential Revision: https://phabricator.services.mozilla.com/D74883
This commit is contained in:
Alex Chronopoulos 2020-06-01 15:53:30 +00:00
Родитель c267d99198
Коммит a9a18f56ea
5 изменённых файлов: 2326 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,461 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "DynamicResampler.h"
namespace mozilla {
DynamicResampler::DynamicResampler(int aInRate, int aOutRate,
uint32_t aPreBufferFrames)
: mInRate(aInRate), mOutRate(aOutRate), mPreBufferFrames(aPreBufferFrames) {
MOZ_ASSERT(aInRate);
MOZ_ASSERT(aOutRate);
UpdateResampler(mOutRate, STEREO);
}
DynamicResampler::~DynamicResampler() {
if (mResampler) {
speex_resampler_destroy(mResampler);
}
}
void DynamicResampler::SetSampleFormat(AudioSampleFormat aFormat) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE);
MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32);
mSampleFormat = aFormat;
for (AudioRingBuffer& b : mInternalInBuffer) {
b.SetSampleFormat(mSampleFormat);
}
if (mPreBufferFrames) {
AppendInputSilence(mPreBufferFrames);
}
}
bool DynamicResampler::Resample(float* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32);
return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex);
}
bool DynamicResampler::Resample(int16_t* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16);
return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex);
}
void DynamicResampler::ResampleInternal(const float* aInBuffer,
uint32_t* aInFrames, float* aOutBuffer,
uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mResampler);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInRate);
MOZ_ASSERT(mOutRate);
MOZ_ASSERT(aInBuffer);
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(*aInFrames > 0);
MOZ_ASSERT(aOutBuffer);
MOZ_ASSERT(aOutFrames);
MOZ_ASSERT(*aOutFrames > 0);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
#ifdef DEBUG
int rv =
#endif
speex_resampler_process_float(mResampler, aChannelIndex, aInBuffer,
aInFrames, aOutBuffer, aOutFrames);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
}
void DynamicResampler::ResampleInternal(const int16_t* aInBuffer,
uint32_t* aInFrames,
int16_t* aOutBuffer,
uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mResampler);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInRate);
MOZ_ASSERT(mOutRate);
MOZ_ASSERT(aInBuffer);
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(*aInFrames > 0);
MOZ_ASSERT(aOutBuffer);
MOZ_ASSERT(aOutFrames);
MOZ_ASSERT(*aOutFrames > 0);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
#ifdef DEBUG
int rv =
#endif
speex_resampler_process_int(mResampler, aChannelIndex, aInBuffer,
aInFrames, aOutBuffer, aOutFrames);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
}
void DynamicResampler::UpdateResampler(int aOutRate, int aChannels) {
MOZ_ASSERT(aOutRate);
MOZ_ASSERT(aChannels);
if (mChannels != aChannels) {
mResampler = speex_resampler_init(aChannels, mInRate, aOutRate,
SPEEX_RESAMPLER_QUALITY_MIN, nullptr);
MOZ_ASSERT(mResampler);
mChannels = aChannels;
mOutRate = aOutRate;
// Between mono and stereo changes, keep always allocated 2 channels to
// avoid reallocations in the most common case.
if ((mChannels == STEREO || mChannels == 1) &&
mInternalInBuffer.Length() == STEREO) {
// Don't worry if format is not set it will write silence then.
if ((mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32) &&
mChannels == STEREO) {
// The mono channel is allways up to date. When we are going from mono
// to stereo upmix the mono to stereo channel
int bufferedDuration = mInternalInBuffer[0].AvailableRead();
mInternalInBuffer[1].Clear();
if (bufferedDuration) {
mInternalInBuffer[1].Write(mInternalInBuffer[0], bufferedDuration);
}
}
// Maintain stereo size
mInputTail.SetLength(STEREO);
WarmUpResampler(false);
return;
}
// upmix or downmix, for now just clear but it has to be updated
// because allocates and this is executed in audio thread.
mInternalInBuffer.Clear();
for (int i = 0; i < mChannels; ++i) {
// Pre-allocate something big, 100ms of audio.
AudioRingBuffer* b =
mInternalInBuffer.AppendElement(sizeof(float) * mInRate / 10);
if (mSampleFormat != AUDIO_FORMAT_SILENCE) {
// In ctor this update is not needed
b->SetSampleFormat(mSampleFormat);
}
}
mInputTail.SetLength(mChannels);
return;
}
if (mOutRate != aOutRate) {
// If the rates was the same the resampler was not being used so warm up.
if (mOutRate == mInRate) {
WarmUpResampler(true);
}
#ifdef DEBUG
int rv =
#endif
speex_resampler_set_rate(mResampler, mInRate, aOutRate);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
mOutRate = aOutRate;
}
}
void DynamicResampler::WarmUpResampler(bool aSkipLatency) {
MOZ_ASSERT(mInputTail.Length());
for (int i = 0; i < mChannels; ++i) {
if (!mInputTail[i].Length()) {
continue;
}
uint32_t inFrames = mInputTail[i].Length();
uint32_t outFrames = 5 * TailBuffer::MAXSIZE; // something big
if (mSampleFormat == AUDIO_FORMAT_S16) {
short outBuffer[5 * TailBuffer::MAXSIZE] = {};
ResampleInternal(mInputTail[i].Buffer<short>(), &inFrames, outBuffer,
&outFrames, i);
MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length());
} else {
float outBuffer[100] = {};
ResampleInternal(mInputTail[i].Buffer<float>(), &inFrames, outBuffer,
&outFrames, i);
MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length());
}
}
if (aSkipLatency) {
int inputLatency = speex_resampler_get_input_latency(mResampler);
MOZ_ASSERT(inputLatency > 0);
uint32_t ratioNum, ratioDen;
speex_resampler_get_ratio(mResampler, &ratioNum, &ratioDen);
// Ratio at this point is one so only skip the input latency. No special
// calculations are needed.
speex_resampler_set_skip_frac_num(mResampler, inputLatency * ratioDen);
}
}
void DynamicResampler::AppendInput(const nsTArray<const float*>& aInBuffer,
uint32_t aInFrames) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32);
AppendInputInternal(aInBuffer, aInFrames);
}
void DynamicResampler::AppendInput(const nsTArray<const int16_t*>& aInBuffer,
uint32_t aInFrames) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16);
AppendInputInternal(aInBuffer, aInFrames);
}
bool DynamicResampler::EnoughInFrames(uint32_t aOutFrames,
int aChannelIndex) const {
if (mInRate == mOutRate) {
return InFramesBuffered(aChannelIndex) >= aOutFrames;
}
if (!(mOutRate % mInRate) && !(aOutFrames % mOutRate / mInRate)) {
return InFramesBuffered(aChannelIndex) >= aOutFrames / (mOutRate / mInRate);
}
if (!(mInRate % mOutRate) && !(aOutFrames % mOutRate / mInRate)) {
return InFramesBuffered(aChannelIndex) >= aOutFrames * mInRate / mOutRate;
}
return InFramesBuffered(aChannelIndex) > aOutFrames * mInRate / mOutRate;
}
bool DynamicResampler::CanResample(uint32_t aOutFrames) const {
for (int i = 0; i < mChannels; ++i) {
if (!EnoughInFrames(aOutFrames, i)) {
return false;
}
}
return true;
}
void DynamicResampler::AppendInputSilence(const uint32_t aInFrames) {
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInternalInBuffer.Length() >= (uint32_t)mChannels);
for (int i = 0; i < mChannels; ++i) {
mInternalInBuffer[i].WriteSilence(aInFrames);
}
}
uint32_t DynamicResampler::InFramesBuffered(int aChannelIndex) const {
MOZ_ASSERT(mChannels);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
return mInternalInBuffer[aChannelIndex].AvailableRead();
}
AudioChunkList::AudioChunkList(int aTotalDuration, int aChannels) {
int numOfChunks = aTotalDuration / mChunkCapacity;
if (aTotalDuration % mChunkCapacity) {
++numOfChunks;
}
CreateChunks(numOfChunks, aChannels);
}
void AudioChunkList::CreateChunks(int aNumOfChunks, int aChannels) {
MOZ_ASSERT(!mChunks.Length());
MOZ_ASSERT(aNumOfChunks);
MOZ_ASSERT(aChannels);
mChunks.AppendElements(aNumOfChunks);
for (AudioChunk& chunk : mChunks) {
AutoTArray<nsTArray<float>, STEREO> buffer;
buffer.AppendElements(aChannels);
AutoTArray<const float*, STEREO> bufferPtrs;
bufferPtrs.AppendElements(aChannels);
for (int i = 0; i < aChannels; ++i) {
float* ptr = buffer[i].AppendElements(mChunkCapacity);
bufferPtrs[i] = ptr;
}
chunk.mBuffer = new mozilla::SharedChannelArrayBuffer(&buffer);
chunk.mChannelData.AppendElements(aChannels);
for (int i = 0; i < aChannels; ++i) {
chunk.mChannelData[i] = bufferPtrs[i];
}
}
}
void AudioChunkList::UpdateToMonoOrStereo(int aChannels) {
MOZ_ASSERT(mChunks.Length());
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
MOZ_ASSERT(aChannels == 1 || aChannels == 2);
for (AudioChunk& chunk : mChunks) {
MOZ_ASSERT(chunk.ChannelCount() != (uint32_t)aChannels);
MOZ_ASSERT(chunk.ChannelCount() == 1 || chunk.ChannelCount() == 2);
chunk.mChannelData.SetLengthAndRetainStorage(aChannels);
if (mSampleFormat == AUDIO_FORMAT_S16) {
SharedChannelArrayBuffer<short>* channelArray =
static_cast<SharedChannelArrayBuffer<short>*>(chunk.mBuffer.get());
channelArray->mBuffers.SetLengthAndRetainStorage(aChannels);
if (aChannels == 2) {
// This an indirect allocation, unfortunately.
channelArray->mBuffers[1].SetLength(mChunkCapacity);
chunk.mChannelData[1] = channelArray->mBuffers[1].Elements();
}
} else {
SharedChannelArrayBuffer<float>* channelArray =
static_cast<SharedChannelArrayBuffer<float>*>(chunk.mBuffer.get());
channelArray->mBuffers.SetLengthAndRetainStorage(aChannels);
if (aChannels == 2) {
// This an indirect allocation, unfortunately.
channelArray->mBuffers[1].SetLength(mChunkCapacity);
chunk.mChannelData[1] = channelArray->mBuffers[1].Elements();
}
}
}
}
void AudioChunkList::SetSampleFormat(AudioSampleFormat aFormat) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE);
MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32);
mSampleFormat = aFormat;
if (mSampleFormat == AUDIO_FORMAT_S16) {
mChunkCapacity = 2 * mChunkCapacity;
}
}
AudioChunk& AudioChunkList::GetNext() {
AudioChunk& chunk = mChunks[mIndex];
MOZ_ASSERT(!chunk.mChannelData.IsEmpty());
MOZ_ASSERT(chunk.mBuffer);
MOZ_ASSERT(!chunk.mBuffer->IsShared());
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
chunk.mDuration = 0;
chunk.mVolume = 1.0f;
chunk.mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
chunk.mBufferFormat = mSampleFormat;
IncrementIndex();
return chunk;
}
void AudioChunkList::Update(int aChannels) {
MOZ_ASSERT(mChunks.Length());
if (mChunks[0].ChannelCount() == (uint32_t)aChannels) {
return;
}
// Special handling between mono and stereo to avoid reallocations.
if (aChannels <= 2 && mChunks[0].ChannelCount() <= 2) {
UpdateToMonoOrStereo(aChannels);
return;
}
int numOfChunks = static_cast<int>(mChunks.Length());
mChunks.ClearAndRetainStorage();
CreateChunks(numOfChunks, aChannels);
}
AudioResampler::AudioResampler(int aInRate, int aOutRate,
uint32_t aPreBufferFrames)
: mResampler(aInRate, aOutRate, aPreBufferFrames),
mOutputChunks(aOutRate / 10, STEREO) {}
void AudioResampler::AppendInput(const AudioSegment& aInSegment) {
MOZ_ASSERT(aInSegment.GetDuration());
for (AudioSegment::ConstChunkIterator iter(aInSegment); !iter.IsEnded();
iter.Next()) {
const AudioChunk& chunk = *iter;
if (!mIsSampleFormatSet) {
// We don't know the format yet and all buffers are empty.
if (chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
// Only silence has been received and the format is unkown. Igonre it,
// if Resampler() is called it will return silence too.
continue;
}
// First no silence data, set the format once for lifetime and let it
// continue the rest of the flow. We will not get in here again.
mOutputChunks.SetSampleFormat(chunk.mBufferFormat);
mResampler.SetSampleFormat(chunk.mBufferFormat);
mIsSampleFormatSet = true;
}
MOZ_ASSERT(mIsSampleFormatSet);
if (chunk.IsNull()) {
mResampler.AppendInputSilence(chunk.GetDuration());
continue;
}
// Make sure the channel is up to date. An AudioSegment can contain chunks
// with different channel count.
UpdateChannels(chunk.mChannelData.Length());
if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
mResampler.AppendInput(chunk.ChannelData<float>(), chunk.GetDuration());
} else {
mResampler.AppendInput(chunk.ChannelData<int16_t>(), chunk.GetDuration());
}
}
}
AudioSegment AudioResampler::Resample(uint32_t aOutFrames) {
MOZ_ASSERT(aOutFrames);
AudioSegment segment;
// We don't know what to do yet and we only have received silence if any just
// return what they want and leave
if (!mIsSampleFormatSet) {
segment.AppendNullData(aOutFrames);
return segment;
}
// Not enough input frames abort
if (!mResampler.CanResample(aOutFrames)) {
return segment;
}
int totalFrames = aOutFrames;
while (totalFrames) {
MOZ_ASSERT(totalFrames > 0);
AudioChunk& chunk = mOutputChunks.GetNext();
int outFrames = std::min(totalFrames, mOutputChunks.ChunkCapacity());
totalFrames -= outFrames;
for (uint32_t i = 0; i < chunk.ChannelCount(); ++i) {
uint32_t outFramesUsed = outFrames;
if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
#ifdef DEBUG
bool rv =
#endif
mResampler.Resample(chunk.ChannelDataForWrite<float>(i),
&outFramesUsed, i);
MOZ_ASSERT(rv);
} else {
#ifdef DEBUG
bool rv =
#endif
mResampler.Resample(chunk.ChannelDataForWrite<int16_t>(i),
&outFramesUsed, i);
MOZ_ASSERT(rv);
}
MOZ_ASSERT(outFramesUsed == (uint32_t)outFrames);
chunk.mDuration = outFrames;
}
// Create a copy in order to consume that copy and not the pre-allocated
// chunk
AudioChunk tmp = chunk;
segment.AppendAndConsumeChunk(&tmp);
}
return segment;
}
void AudioResampler::Update(int aOutRate, int aChannels) {
mResampler.UpdateResampler(aOutRate, aChannels);
mOutputChunks.Update(aChannels);
}
int AudioResampler::InputDuration() const {
if (!mIsSampleFormatSet) {
return 0;
}
MOZ_ASSERT((int)mResampler.InFramesBuffered(0) >= 0);
return (int)mResampler.InFramesBuffered(0);
}
} // namespace mozilla

Просмотреть файл

@ -0,0 +1,393 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef MOZILLA_DYNAMIC_RESAMPLER_H_
#define MOZILLA_DYNAMIC_RESAMPLER_H_
#include "AudioRingBuffer.h"
#include "AudioSegment.h"
#include <speex/speex_resampler.h>
namespace mozilla {
const int STEREO = 2;
/**
* DynamicResampler allows updating on the fly the output sample rate and the
* number of channels. In addition to that, it maintains an internal buffer for
* the input data and allows pre-buffering as well. The Resample() method
* strives to provide the requested number of output frames by using the input
* data including any pre-buffering. If this is not possible then it will not
* attempt to resample and it will return failure.
*
* Input data buffering makes use of the AudioRingBuffer. The capacity of the
* buffer is 100ms of float audio and it is pre-allocated at the constructor.
* No extra allocations take place when the input is appended. In addition to
* that, due to special feature of AudioRingBuffer, no extra copies take place
* when the input data is fed to the resampler.
*
* The sample format must be set before using any method. If the provided sample
* format is of type short the pre-allocated capacity of the input buffer
* becomes 200ms of short audio.
*
* The DynamicResampler is not thread-safe, so all the methods appart from the
* constructor must be called on the same thread.
*/
class DynamicResampler final {
public:
/**
* Provide the initial input and output rate and the amount of pre-buffering.
* The channel count will be set to stereo. Memory allocation will take
* place. The input buffer is non-interleaved.
*/
DynamicResampler(int aInRate, int aOutRate, uint32_t aPreBufferFrames = 0);
~DynamicResampler();
/**
* Set the sample format type to float or short.
*/
void SetSampleFormat(AudioSampleFormat aFormat);
int GetOutRate() const { return mOutRate; }
int GetChannels() const { return mChannels; }
/**
* Append `aInFrames` number of frames from `aInBuffer` to the internal input
* buffer. Memory copy/move takes place.
*/
void AppendInput(const nsTArray<const float*>& aInBuffer, uint32_t aInFrames);
void AppendInput(const nsTArray<const int16_t*>& aInBuffer,
uint32_t aInFrames);
/**
* Append `aInFrames` number of frames of silence to the internal input
* buffer. Memory copy/move takes place.
*/
void AppendInputSilence(const uint32_t aInFrames);
/**
* Return the number of frames stored in the internal input buffer.
*/
uint32_t InFramesBuffered(int aChannelIndex) const;
/*
* Resampler as much frame is needed from the internal input buffer to the
* `aOutBuffer` in order to provide all `aOutFrames` and return true. If there
* not enough input frames to provide the requested output frames no
* resampling is attempted and false is returned.
*/
bool Resample(float* aOutBuffer, uint32_t* aOutFrames, int aChannelIndex);
bool Resample(int16_t* aOutBuffer, uint32_t* aOutFrames, int aChannelIndex);
/**
* Update the output rate or/and the channel count. If a value is not updated
* compared to the current one nothing happens. Changing the `aOutRate`
* results in recalculation in the resampler. Changing `aChannels` results in
* the reallocation of the internal input buffer with the exception of
* changes between mono to stereo and vice versa where no reallocation takes
* place. A stereo internal input buffer is always maintained even if the
* sound is mono.
*/
void UpdateResampler(int aOutRate, int aChannels);
/**
* Returns true if the resampler has enough input data to provide to the
* output of the `Resample()` method `aOutFrames` number of frames. This is a
* way to know in advance if the `Resampler` method will return true or false
* given that nothing changes in between.
*/
bool CanResample(uint32_t aOutFrames) const;
private:
template <typename T>
void AppendInputInternal(const nsTArray<const T*>& aInBuffer,
uint32_t aInFrames) {
MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
for (int i = 0; i < mChannels; ++i) {
PushInFrames(aInBuffer[i], aInFrames, i);
}
}
void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
float* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex);
void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
int16_t* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex);
template <typename T>
bool ResampleInternal(T* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mInRate);
MOZ_ASSERT(mOutRate);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
MOZ_ASSERT(aOutFrames);
MOZ_ASSERT(*aOutFrames);
// Not enough input, don't do anything
if (!EnoughInFrames(*aOutFrames, aChannelIndex)) {
*aOutFrames = 0;
return false;
}
if (mInRate == mOutRate) {
mInternalInBuffer[aChannelIndex].Read(MakeSpan(aOutBuffer, *aOutFrames));
// Workaround to avoid discontinuity when the speex resampler operates
// again. Feed it with the last 20 frames to warm up the internal memory
// of the resampler and then skip memory equals to resampler's input
// latency.
mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, *aOutFrames);
return true;
}
uint32_t totalOutFramesNeeded = *aOutFrames;
mInternalInBuffer[aChannelIndex].ReadNoCopy(
[this, &aOutBuffer, &totalOutFramesNeeded,
aChannelIndex](const Span<const T>& aInBuffer) -> int {
if (!totalOutFramesNeeded) {
return 0;
}
uint32_t outFramesResampled = totalOutFramesNeeded;
uint32_t inFrames = aInBuffer.Length();
ResampleInternal(aInBuffer.data(), &inFrames, aOutBuffer,
&outFramesResampled, aChannelIndex);
aOutBuffer += outFramesResampled;
totalOutFramesNeeded -= outFramesResampled;
mInputTail[aChannelIndex].StoreTail<T>(aInBuffer);
return inFrames;
});
MOZ_ASSERT(totalOutFramesNeeded == 0);
return true;
}
bool EnoughInFrames(uint32_t aOutFrames, int aChannelIndex) const;
template <typename T>
void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
int aChannelIndex) {
MOZ_ASSERT(aInBuffer);
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
mInternalInBuffer[aChannelIndex].Write(MakeSpan(aInBuffer, aInFrames));
}
void WarmUpResampler(bool aSkipLatency);
private:
int mChannels = 0;
const int mInRate;
int mOutRate;
AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
SpeexResamplerState* mResampler = nullptr;
AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
const uint32_t mPreBufferFrames;
class TailBuffer {
public:
template <typename T>
T* Buffer() {
return reinterpret_cast<T*>(mBuffer);
}
/* Store the MAXSIZE last elements of the buffer. */
template <typename T>
void StoreTail(const Span<const T>& aInBuffer) {
StoreTail(aInBuffer.data(), aInBuffer.size());
}
template <typename T>
void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
if (aInFrames >= MAXSIZE) {
PodCopy(Buffer<T>(), aInBuffer + aInFrames - MAXSIZE, MAXSIZE);
mSize = MAXSIZE;
} else {
PodCopy(Buffer<T>(), aInBuffer, aInFrames);
mSize = static_cast<int>(aInFrames);
}
}
int Length() { return mSize; }
static const int MAXSIZE = 20;
private:
float mBuffer[MAXSIZE] = {};
int mSize = 0;
};
AutoTArray<TailBuffer, STEREO> mInputTail;
};
/**
* AudioChunkList provides a way to have preallocated audio buffers in
* AudioSegment. The idea is that the amount of AudioChunks is created in
* advance. Each AudioChunk is able to hold a specific amount of audio
* (capacity). The total capacity of AudioChunkList is specified by the number
* of AudioChunks. The important aspect of the AudioChunkList is that
* preallocates everything and reuse the same chunks similar to a ring buffer.
*
* Why the whole AudioChunk is preallocated and not some raw memory buffer? This
* is due to the limitations of MediaTrackGraph. The way that MTG works depends
* on `AudioSegment`s to convey the actual audio data. An AudioSegment consists
* of AudioChunks. The AudioChunk is built in a way, that owns and allocates the
* audio buffers. Thus, since the use of AudioSegment is mandatory if the audio
* data was in a different form, the only way to use it from the audio thread
* would be to create the AudioChunk there. That would result in a copy
* operation (not very important) and most of all an allocation of the audio
* buffer in the audio thread. This happens in many places inside MTG it's a bad
* practice, though, and it has been avoided due to the AudioChunkList.
*
* After construction the sample format must be set, when it is available. It
* can be set in the audio thread. Before setting the sample format is not
* possible to use any method of AudioChunkList.
*
* Every AudioChunk in the AudioChunkList is preallocated with a capacity of 128
* frames of float audio. Nevertheless, the sample format is not available at
* that point. Thus if the sample format is set to short, the capacity of each
* chunk changes to 256 number of frames, and the total duration becomes twice
* big. There are methods to get the chunk capacity and total capacity in frames
* and must always be used.
*
* Two things to note. First, when the channel count changes everything is
* recreated which means reallocations. Second, the total capacity might differs
* from the requested total capacity for two reasons. First, if the sample
* format is set to short and second because the number of chunks in the list
* divides exactly the final total capacity. The corresponding method must
* always be used to query the total capacity.
*/
class AudioChunkList {
public:
/**
* Constructor, the final total duration might be different from the requested
* `aTotalDuration`. Memory allocation takes place.
*/
AudioChunkList(int aTotalDuration, int aChannels);
AudioChunkList(const AudioChunkList&) = delete;
AudioChunkList(AudioChunkList&&) = delete;
~AudioChunkList() = default;
/**
* Set sample format. It must be done before any other method being used.
*/
void SetSampleFormat(AudioSampleFormat aFormat);
/**
* Get the next available AudioChunk. The duration of the chunk will be zero
* and the volume 1.0. However, the buffers will be there ready to be written.
* Please note, that a reference of the preallocated chunk is returned. Thus
* it _must not be consumed_ directly. If the chunk needs to be consumed it
* must be copied to a temporary chunk first. For example:
* ```
* AudioChunk& chunk = audioChunklist.GetNext();
* // Set up the chunk
* AudioChunk tmp = chunk;
* audioSegment.AppendAndConsumeChunk(&tmp);
* ```
* This way no memory allocation or copy, takes place.
*/
AudioChunk& GetNext();
/**
* Get the capacity of each individual AudioChunk in the list.
*/
int ChunkCapacity() const {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
return mChunkCapacity;
}
/**
* Get the total capacity of AudioChunkList.
*/
int TotalCapacity() const {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
return CheckedInt<int>(mChunkCapacity * mChunks.Length()).value();
}
/**
* Update the channel count of the AudioChunkList. Memory allocation is
* taking place.
*/
void Update(int aChannels);
private:
void IncrementIndex() {
++mIndex;
mIndex = CheckedInt<int>(mIndex % mChunks.Length()).value();
}
void CreateChunks(int aNumOfChunks, int aChannels);
void UpdateToMonoOrStereo(int aChannels);
private:
nsTArray<AudioChunk> mChunks;
int mIndex = 0;
int mChunkCapacity = 128;
AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
};
/**
* Audio Resampler is a resampler able to change the output rate and channels
* count on the fly. The API is simple and it is based in AudioSegment in order
* to be used MTG. All memory allocations, for input and output buffers, happen
* in the constructor and when channel count changes. The memory is recycled in
* order to avoid reallocations. It also supports prebuffering of silence. It
* consists of DynamicResampler and AudioChunkList so please read their
* documentation if you are interested in more details.
*
* The output buffer is preallocated and returned in the form of AudioSegment.
* The intention is to be used directly in a MediaTrack. Since an AudioChunk
* must no be "shared" in order to be written, the AudioSegment returned by
* resampler method must be cleaned up in order to be able for the `AudioChunk`s
* that it consists of to be reused. For `MediaTrack::mSegment` this happens
* every ~50ms (look at MediaTrack::AdvanceTimeVaryingValuesToCurrentTime). Thus
* memory capacity of 100ms has been preallocated for internal input and output
* buffering.
*/
class AudioResampler final {
public:
AudioResampler(int aInRate, int aOutRate, uint32_t aPreBufferFrames = 0);
/**
* Append input data into the resampler internal buffer. Copy/move of the
* memory is taking place. Also, the channel count will change according to
* the channel count of the chunks.
*/
void AppendInput(const AudioSegment& aInSegment);
/*
* Get the duration of internal input buffer in frames.
*/
int InputDuration() const;
/*
* Reguest `aOutFrames` of audio in the output sample rate. The internal
* buffered input os used. If there is no enough input for that amount of
* output and empty AudioSegment is returned
*/
AudioSegment Resample(uint32_t aOutFrames);
/*
* Updates the output rate that will be used by the resampler.
*/
void UpdateOutRate(int aOutRate) {
Update(aOutRate, mResampler.GetChannels());
}
private:
void UpdateChannels(int aChannels) {
Update(mResampler.GetOutRate(), aChannels);
}
void Update(int aOutRate, int aChannels);
private:
DynamicResampler mResampler;
AudioChunkList mOutputChunks;
bool mIsSampleFormatSet = false;
};
} // namespace mozilla
#endif // MOZILLA_DYNAMIC_RESAMPLER_H_

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -33,6 +33,7 @@ UNIFIED_SOURCES += [
'TestDataMutex.cpp',
'TestDecoderBenchmark.cpp',
'TestDriftCompensation.cpp',
'TestDynamicResampler.cpp',
'TestGMPUtils.cpp',
'TestGroupId.cpp',
'TestIntervalSet.cpp',

Просмотреть файл

@ -129,6 +129,7 @@ EXPORTS += [
'DecoderTraits.h',
'DOMMediaStream.h',
'DriftCompensation.h',
'DynamicResampler.h',
'FileBlockCache.h',
'ForwardedInputTrack.h',
'FrameStatistics.h',
@ -252,6 +253,7 @@ UNIFIED_SOURCES += [
'ChannelMediaResource.cpp',
'CloneableWithRangeMediaResource.cpp',
'DOMMediaStream.cpp',
'DynamicResampler.cpp',
'FileBlockCache.cpp',
'FileMediaResource.cpp',
'ForwardedInputTrack.cpp',