gecko-dev/dom/media/DynamicResampler.cpp

466 строки
16 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "DynamicResampler.h"
namespace mozilla {
DynamicResampler::DynamicResampler(int aInRate, int aOutRate,
uint32_t aPreBufferFrames)
: mInRate(aInRate), mPreBufferFrames(aPreBufferFrames), mOutRate(aOutRate) {
MOZ_ASSERT(aInRate);
MOZ_ASSERT(aOutRate);
UpdateResampler(mOutRate, STEREO);
}
DynamicResampler::~DynamicResampler() {
if (mResampler) {
speex_resampler_destroy(mResampler);
}
}
void DynamicResampler::SetSampleFormat(AudioSampleFormat aFormat) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE);
MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32);
mSampleFormat = aFormat;
for (AudioRingBuffer& b : mInternalInBuffer) {
b.SetSampleFormat(mSampleFormat);
}
if (mPreBufferFrames) {
AppendInputSilence(mPreBufferFrames);
}
}
bool DynamicResampler::Resample(float* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32);
return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex);
}
bool DynamicResampler::Resample(int16_t* aOutBuffer, uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16);
return ResampleInternal(aOutBuffer, aOutFrames, aChannelIndex);
}
void DynamicResampler::ResampleInternal(const float* aInBuffer,
uint32_t* aInFrames, float* aOutBuffer,
uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mResampler);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInRate);
MOZ_ASSERT(mOutRate);
MOZ_ASSERT(aInBuffer);
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(*aInFrames > 0);
MOZ_ASSERT(aOutBuffer);
MOZ_ASSERT(aOutFrames);
MOZ_ASSERT(*aOutFrames > 0);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
#ifdef DEBUG
int rv =
#endif
speex_resampler_process_float(mResampler, aChannelIndex, aInBuffer,
aInFrames, aOutBuffer, aOutFrames);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
}
void DynamicResampler::ResampleInternal(const int16_t* aInBuffer,
uint32_t* aInFrames,
int16_t* aOutBuffer,
uint32_t* aOutFrames,
int aChannelIndex) {
MOZ_ASSERT(mResampler);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInRate);
MOZ_ASSERT(mOutRate);
MOZ_ASSERT(aInBuffer);
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(*aInFrames > 0);
MOZ_ASSERT(aOutBuffer);
MOZ_ASSERT(aOutFrames);
MOZ_ASSERT(*aOutFrames > 0);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
#ifdef DEBUG
int rv =
#endif
speex_resampler_process_int(mResampler, aChannelIndex, aInBuffer,
aInFrames, aOutBuffer, aOutFrames);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
}
void DynamicResampler::UpdateResampler(int aOutRate, int aChannels) {
MOZ_ASSERT(aOutRate);
MOZ_ASSERT(aChannels);
if (mChannels != aChannels) {
mResampler = speex_resampler_init(aChannels, mInRate, aOutRate,
SPEEX_RESAMPLER_QUALITY_MIN, nullptr);
MOZ_ASSERT(mResampler);
mChannels = aChannels;
mOutRate = aOutRate;
// Between mono and stereo changes, keep always allocated 2 channels to
// avoid reallocations in the most common case.
if ((mChannels == STEREO || mChannels == 1) &&
mInternalInBuffer.Length() == STEREO) {
// Don't worry if format is not set it will write silence then.
if ((mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32) &&
mChannels == STEREO) {
// The mono channel is always up to date. When we are going from mono
// to stereo upmix the mono to stereo channel
int bufferedDuration = mInternalInBuffer[0].AvailableRead();
mInternalInBuffer[1].Clear();
if (bufferedDuration) {
mInternalInBuffer[1].Write(mInternalInBuffer[0], bufferedDuration);
}
}
// Maintain stereo size
mInputTail.SetLength(STEREO);
WarmUpResampler(false);
return;
}
// upmix or downmix, for now just clear but it has to be updated
// because allocates and this is executed in audio thread.
mInternalInBuffer.Clear();
for (int i = 0; i < mChannels; ++i) {
// Pre-allocate something big, twice the pre-buffer, or at least 100ms.
AudioRingBuffer* b = mInternalInBuffer.AppendElement(
sizeof(float) *
std::max(2 * mPreBufferFrames, static_cast<uint32_t>(mInRate) / 10));
if (mSampleFormat != AUDIO_FORMAT_SILENCE) {
// In ctor this update is not needed
b->SetSampleFormat(mSampleFormat);
}
}
mInputTail.SetLength(mChannels);
return;
}
if (mOutRate != aOutRate) {
// If the rates was the same the resampler was not being used so warm up.
if (mOutRate == mInRate) {
WarmUpResampler(true);
}
#ifdef DEBUG
int rv =
#endif
speex_resampler_set_rate(mResampler, mInRate, aOutRate);
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
mOutRate = aOutRate;
}
}
void DynamicResampler::WarmUpResampler(bool aSkipLatency) {
MOZ_ASSERT(mInputTail.Length());
for (int i = 0; i < mChannels; ++i) {
if (!mInputTail[i].Length()) {
continue;
}
uint32_t inFrames = mInputTail[i].Length();
uint32_t outFrames = 5 * TailBuffer::MAXSIZE; // something big
if (mSampleFormat == AUDIO_FORMAT_S16) {
short outBuffer[5 * TailBuffer::MAXSIZE] = {};
ResampleInternal(mInputTail[i].Buffer<short>(), &inFrames, outBuffer,
&outFrames, i);
MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length());
} else {
float outBuffer[100] = {};
ResampleInternal(mInputTail[i].Buffer<float>(), &inFrames, outBuffer,
&outFrames, i);
MOZ_ASSERT(inFrames == (uint32_t)mInputTail[i].Length());
}
}
if (aSkipLatency) {
int inputLatency = speex_resampler_get_input_latency(mResampler);
MOZ_ASSERT(inputLatency > 0);
uint32_t ratioNum, ratioDen;
speex_resampler_get_ratio(mResampler, &ratioNum, &ratioDen);
// Ratio at this point is one so only skip the input latency. No special
// calculations are needed.
speex_resampler_set_skip_frac_num(mResampler, inputLatency * ratioDen);
}
}
void DynamicResampler::AppendInput(const nsTArray<const float*>& aInBuffer,
uint32_t aInFrames) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_FLOAT32);
AppendInputInternal(aInBuffer, aInFrames);
}
void DynamicResampler::AppendInput(const nsTArray<const int16_t*>& aInBuffer,
uint32_t aInFrames) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16);
AppendInputInternal(aInBuffer, aInFrames);
}
bool DynamicResampler::EnoughInFrames(uint32_t aOutFrames,
int aChannelIndex) const {
if (mInRate == mOutRate) {
return InFramesBuffered(aChannelIndex) >= aOutFrames;
}
if (!(mOutRate % mInRate) && !(aOutFrames % mOutRate / mInRate)) {
return InFramesBuffered(aChannelIndex) >= aOutFrames / (mOutRate / mInRate);
}
if (!(mInRate % mOutRate) && !(aOutFrames % mOutRate / mInRate)) {
return InFramesBuffered(aChannelIndex) >= aOutFrames * mInRate / mOutRate;
}
return InFramesBuffered(aChannelIndex) > aOutFrames * mInRate / mOutRate;
}
bool DynamicResampler::CanResample(uint32_t aOutFrames) const {
for (int i = 0; i < mChannels; ++i) {
if (!EnoughInFrames(aOutFrames, i)) {
return false;
}
}
return true;
}
void DynamicResampler::AppendInputSilence(const uint32_t aInFrames) {
MOZ_ASSERT(aInFrames);
MOZ_ASSERT(mChannels);
MOZ_ASSERT(mInternalInBuffer.Length() >= (uint32_t)mChannels);
for (int i = 0; i < mChannels; ++i) {
mInternalInBuffer[i].WriteSilence(aInFrames);
}
}
uint32_t DynamicResampler::InFramesBuffered(int aChannelIndex) const {
MOZ_ASSERT(mChannels);
MOZ_ASSERT(aChannelIndex >= 0);
MOZ_ASSERT(aChannelIndex <= mChannels);
MOZ_ASSERT((uint32_t)aChannelIndex <= mInternalInBuffer.Length());
return mInternalInBuffer[aChannelIndex].AvailableRead();
}
AudioChunkList::AudioChunkList(int aTotalDuration, int aChannels) {
int numOfChunks = aTotalDuration / mChunkCapacity;
if (aTotalDuration % mChunkCapacity) {
++numOfChunks;
}
CreateChunks(numOfChunks, aChannels);
}
void AudioChunkList::CreateChunks(int aNumOfChunks, int aChannels) {
MOZ_ASSERT(!mChunks.Length());
MOZ_ASSERT(aNumOfChunks);
MOZ_ASSERT(aChannels);
mChunks.AppendElements(aNumOfChunks);
for (AudioChunk& chunk : mChunks) {
AutoTArray<nsTArray<float>, STEREO> buffer;
buffer.AppendElements(aChannels);
AutoTArray<const float*, STEREO> bufferPtrs;
bufferPtrs.AppendElements(aChannels);
for (int i = 0; i < aChannels; ++i) {
float* ptr = buffer[i].AppendElements(mChunkCapacity);
bufferPtrs[i] = ptr;
}
chunk.mBuffer = new mozilla::SharedChannelArrayBuffer(std::move(buffer));
chunk.mChannelData.AppendElements(aChannels);
for (int i = 0; i < aChannels; ++i) {
chunk.mChannelData[i] = bufferPtrs[i];
}
}
}
void AudioChunkList::UpdateToMonoOrStereo(int aChannels) {
MOZ_ASSERT(mChunks.Length());
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
MOZ_ASSERT(aChannels == 1 || aChannels == 2);
for (AudioChunk& chunk : mChunks) {
MOZ_ASSERT(chunk.ChannelCount() != (uint32_t)aChannels);
MOZ_ASSERT(chunk.ChannelCount() == 1 || chunk.ChannelCount() == 2);
chunk.mChannelData.SetLengthAndRetainStorage(aChannels);
if (mSampleFormat == AUDIO_FORMAT_S16) {
SharedChannelArrayBuffer<short>* channelArray =
static_cast<SharedChannelArrayBuffer<short>*>(chunk.mBuffer.get());
channelArray->mBuffers.SetLengthAndRetainStorage(aChannels);
if (aChannels == 2) {
// This an indirect allocation, unfortunately.
channelArray->mBuffers[1].SetLength(mChunkCapacity);
chunk.mChannelData[1] = channelArray->mBuffers[1].Elements();
}
} else {
SharedChannelArrayBuffer<float>* channelArray =
static_cast<SharedChannelArrayBuffer<float>*>(chunk.mBuffer.get());
channelArray->mBuffers.SetLengthAndRetainStorage(aChannels);
if (aChannels == 2) {
// This an indirect allocation, unfortunately.
channelArray->mBuffers[1].SetLength(mChunkCapacity);
chunk.mChannelData[1] = channelArray->mBuffers[1].Elements();
}
}
}
}
void AudioChunkList::SetSampleFormat(AudioSampleFormat aFormat) {
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_SILENCE);
MOZ_ASSERT(aFormat == AUDIO_FORMAT_S16 || aFormat == AUDIO_FORMAT_FLOAT32);
mSampleFormat = aFormat;
if (mSampleFormat == AUDIO_FORMAT_S16) {
mChunkCapacity = 2 * mChunkCapacity;
}
}
AudioChunk& AudioChunkList::GetNext() {
AudioChunk& chunk = mChunks[mIndex];
MOZ_ASSERT(!chunk.mChannelData.IsEmpty());
MOZ_ASSERT(chunk.mBuffer);
MOZ_ASSERT(!chunk.mBuffer->IsShared());
MOZ_ASSERT(mSampleFormat == AUDIO_FORMAT_S16 ||
mSampleFormat == AUDIO_FORMAT_FLOAT32);
chunk.mDuration = 0;
chunk.mVolume = 1.0f;
chunk.mPrincipalHandle = PRINCIPAL_HANDLE_NONE;
chunk.mBufferFormat = mSampleFormat;
IncrementIndex();
return chunk;
}
void AudioChunkList::Update(int aChannels) {
MOZ_ASSERT(mChunks.Length());
if (mChunks[0].ChannelCount() == (uint32_t)aChannels) {
return;
}
// Special handling between mono and stereo to avoid reallocations.
if (aChannels <= 2 && mChunks[0].ChannelCount() <= 2) {
UpdateToMonoOrStereo(aChannels);
return;
}
int numOfChunks = static_cast<int>(mChunks.Length());
mChunks.ClearAndRetainStorage();
CreateChunks(numOfChunks, aChannels);
}
AudioResampler::AudioResampler(int aInRate, int aOutRate,
uint32_t aPreBufferFrames)
: mResampler(aInRate, aOutRate, aPreBufferFrames),
mOutputChunks(aOutRate / 10, STEREO) {}
void AudioResampler::AppendInput(const AudioSegment& aInSegment) {
MOZ_ASSERT(aInSegment.GetDuration());
for (AudioSegment::ConstChunkIterator iter(aInSegment); !iter.IsEnded();
iter.Next()) {
const AudioChunk& chunk = *iter;
if (!mIsSampleFormatSet) {
// We don't know the format yet and all buffers are empty.
if (chunk.mBufferFormat == AUDIO_FORMAT_SILENCE) {
// Only silence has been received and the format is unkown. Igonre it,
// if Resampler() is called it will return silence too.
continue;
}
// First no silence data, set the format once for lifetime and let it
// continue the rest of the flow. We will not get in here again.
mOutputChunks.SetSampleFormat(chunk.mBufferFormat);
mResampler.SetSampleFormat(chunk.mBufferFormat);
mIsSampleFormatSet = true;
}
MOZ_ASSERT(mIsSampleFormatSet);
if (chunk.IsNull()) {
mResampler.AppendInputSilence(chunk.GetDuration());
continue;
}
// Make sure the channel is up to date. An AudioSegment can contain chunks
// with different channel count.
UpdateChannels(chunk.mChannelData.Length());
if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
mResampler.AppendInput(chunk.ChannelData<float>(), chunk.GetDuration());
} else {
mResampler.AppendInput(chunk.ChannelData<int16_t>(), chunk.GetDuration());
}
}
}
AudioSegment AudioResampler::Resample(uint32_t aOutFrames) {
MOZ_ASSERT(aOutFrames);
AudioSegment segment;
// We don't know what to do yet and we only have received silence if any just
// return what they want and leave
if (!mIsSampleFormatSet) {
segment.AppendNullData(aOutFrames);
return segment;
}
// Not enough input frames abort. We check for the requested frames plus one.
// This is to make sure that the individual resample iteration that will
// follow up, will have enough frames even if one of them consume an extra
// frame.
if (!mResampler.CanResample(aOutFrames + 1)) {
return segment;
}
int totalFrames = aOutFrames;
while (totalFrames) {
MOZ_ASSERT(totalFrames > 0);
AudioChunk& chunk = mOutputChunks.GetNext();
int outFrames = std::min(totalFrames, mOutputChunks.ChunkCapacity());
totalFrames -= outFrames;
for (uint32_t i = 0; i < chunk.ChannelCount(); ++i) {
uint32_t outFramesUsed = outFrames;
if (chunk.mBufferFormat == AUDIO_FORMAT_FLOAT32) {
#ifdef DEBUG
bool rv =
#endif
mResampler.Resample(chunk.ChannelDataForWrite<float>(i),
&outFramesUsed, i);
MOZ_ASSERT(rv);
} else {
#ifdef DEBUG
bool rv =
#endif
mResampler.Resample(chunk.ChannelDataForWrite<int16_t>(i),
&outFramesUsed, i);
MOZ_ASSERT(rv);
}
MOZ_ASSERT(outFramesUsed == (uint32_t)outFrames);
chunk.mDuration = outFrames;
}
// Create a copy in order to consume that copy and not the pre-allocated
// chunk
AudioChunk tmp = chunk;
segment.AppendAndConsumeChunk(&tmp);
}
return segment;
}
void AudioResampler::Update(int aOutRate, int aChannels) {
mResampler.UpdateResampler(aOutRate, aChannels);
mOutputChunks.Update(aChannels);
}
int AudioResampler::InputDuration() const {
if (!mIsSampleFormatSet) {
return (int)mResampler.mPreBufferFrames;
}
MOZ_ASSERT((int)mResampler.InFramesBuffered(0) >= 0);
return (int)mResampler.InFramesBuffered(0);
}
} // namespace mozilla