bug 1221836 return output pointer from FFTConvolver::process() to save a buffer copy r=padenot

--HG--
extra : rebase_source : 99568b7c3095f44a061f783cefd5d6cbeebfa496
This commit is contained in:
Karl Tomlinson 2015-11-04 21:23:18 +13:00
Родитель 754b0bf4f3
Коммит de1e2244a1
7 изменённых файлов: 67 добавлений и 89 удалений

Просмотреть файл

@ -37,6 +37,7 @@ FFTConvolver::FFTConvolver(size_t fftSize, size_t renderPhase)
: m_frame(fftSize)
, m_readWriteIndex(renderPhase % (fftSize / 2))
{
MOZ_ASSERT(fftSize >= 2 * WEBAUDIO_BLOCK_SIZE);
m_inputBuffer.SetLength(fftSize);
PodZero(m_inputBuffer.Elements(), fftSize);
m_outputBuffer.SetLength(fftSize);
@ -60,67 +61,47 @@ size_t FFTConvolver::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) co
return aMallocSizeOf(this) + sizeOfExcludingThis(aMallocSizeOf);
}
void FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP, float* destP)
const float* FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP)
{
size_t halfSize = fftSize() / 2;
// WEBAUDIO_BLOCK_SIZE must be an exact multiple of halfSize,
// or halfSize is a multiple of WEBAUDIO_BLOCK_SIZE when halfSize > WEBAUDIO_BLOCK_SIZE.
MOZ_ASSERT(!(halfSize % WEBAUDIO_BLOCK_SIZE &&
WEBAUDIO_BLOCK_SIZE % halfSize));
// halfSize must be a multiple of WEBAUDIO_BLOCK_SIZE
// and > WEBAUDIO_BLOCK_SIZE.
MOZ_ASSERT(halfSize % WEBAUDIO_BLOCK_SIZE == 0 &&
WEBAUDIO_BLOCK_SIZE <= halfSize);
size_t numberOfDivisions = halfSize <= WEBAUDIO_BLOCK_SIZE ?
(WEBAUDIO_BLOCK_SIZE / halfSize) : 1;
size_t divisionSize = numberOfDivisions == 1 ?
WEBAUDIO_BLOCK_SIZE : halfSize;
// Copy samples to input buffer (note contraint above!)
float* inputP = m_inputBuffer.Elements();
for (size_t i = 0; i < numberOfDivisions; ++i, sourceP += divisionSize, destP += divisionSize) {
// Copy samples to input buffer (note contraint above!)
float* inputP = m_inputBuffer.Elements();
MOZ_ASSERT(sourceP && inputP && m_readWriteIndex + WEBAUDIO_BLOCK_SIZE <= m_inputBuffer.Length());
// Sanity check
bool isCopyGood1 = sourceP && inputP && m_readWriteIndex + divisionSize <= m_inputBuffer.Length();
MOZ_ASSERT(isCopyGood1);
if (!isCopyGood1)
return;
memcpy(inputP + m_readWriteIndex, sourceP, sizeof(float) * WEBAUDIO_BLOCK_SIZE);
memcpy(inputP + m_readWriteIndex, sourceP, sizeof(float) * divisionSize);
float* outputP = m_outputBuffer.Elements();
m_readWriteIndex += WEBAUDIO_BLOCK_SIZE;
float* outputP = m_outputBuffer.Elements();
m_readWriteIndex += divisionSize;
// Check if it's time to perform the next FFT
if (m_readWriteIndex == halfSize) {
// The input buffer is now filled (get frequency-domain version)
m_frame.PerformFFT(m_inputBuffer.Elements());
m_frame.Multiply(*fftKernel);
m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
// Check if it's time to perform the next FFT
if (m_readWriteIndex == halfSize) {
// The input buffer is now filled (get frequency-domain version)
m_frame.PerformFFT(m_inputBuffer.Elements());
m_frame.Multiply(*fftKernel);
m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
// Overlap-add 1st half from previous time
AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
m_outputBuffer.Elements(), halfSize);
// Overlap-add 1st half from previous time
AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
m_outputBuffer.Elements(), halfSize);
// Finally, save 2nd half of result
MOZ_ASSERT(m_outputBuffer.Length() == 2 * halfSize && m_lastOverlapBuffer.Length() == halfSize);
// Finally, save 2nd half of result
bool isCopyGood3 = m_outputBuffer.Length() == 2 * halfSize && m_lastOverlapBuffer.Length() == halfSize;
MOZ_ASSERT(isCopyGood3);
if (!isCopyGood3)
return;
memcpy(m_lastOverlapBuffer.Elements(), m_outputBuffer.Elements() + halfSize, sizeof(float) * halfSize);
memcpy(m_lastOverlapBuffer.Elements(), m_outputBuffer.Elements() + halfSize, sizeof(float) * halfSize);
// Reset index back to start for next time
m_readWriteIndex = 0;
}
// Sanity check
bool isCopyGood2 = destP && outputP && m_readWriteIndex + divisionSize <= m_outputBuffer.Length();
MOZ_ASSERT(isCopyGood2);
if (!isCopyGood2)
return;
// Copy samples from output buffer
memcpy(destP, outputP + m_readWriteIndex, sizeof(float) * divisionSize);
// Reset index back to start for next time
m_readWriteIndex = 0;
}
return outputP + m_readWriteIndex;
}
void FFTConvolver::reset()

Просмотреть файл

@ -47,14 +47,13 @@ public:
// FFTs at the same time.
explicit FFTConvolver(size_t fftSize, size_t renderPhase = 0);
// Process WEBAUDIO_BLOCK_SIZE elements of array |sourceP| to |destP|.
// Process WEBAUDIO_BLOCK_SIZE elements of array |sourceP| and return a
// pointer to an output array of the same size.
//
// |fftKernel| must be pre-scaled for FFTBlock::GetInverseWithoutScaling().
//
// FIXME: Later, we can do more sophisticated buffering to relax this requirement...
//
// Processing in-place is allowed...
void process(FFTBlock* fftKernel, const float* sourceP, float* destP);
const float* process(FFTBlock* fftKernel, const float* sourceP);
void reset();

Просмотреть файл

@ -76,19 +76,19 @@ size_t HRTFElevation::fftSizeForSampleRate(float sampleRate)
// This is the size if we were to use all raw response samples.
unsigned resampledLength =
floorf(ResponseFrameSize * sampleRate / rawSampleRate);
// Keep things semi-sane, with max FFT size of 1024 and minimum of 4.
// "size |= 3" ensures a minimum of 4 (with the size++ below) and sets the
// 2 least significant bits for rounding up to the next power of 2 below.
// Keep things semi-sane, with max FFT size of 1024.
unsigned size = min(resampledLength, 1023U);
size |= 3;
// Ensure a minimum of 2 * WEBAUDIO_BLOCK_SIZE (with the size++ below) for
// FFTConvolver and set the 8 least significant bits for rounding up to
// the next power of 2 below.
size |= 2 * WEBAUDIO_BLOCK_SIZE - 1;
// Round up to the next power of 2, making the FFT size no more than twice
// the impulse response length. This doubles size for values that are
// already powers of 2. This works by filling in 7 bits to right of the
// already powers of 2. This works by filling in alls bit to right of the
// most significant bit. The most significant bit is no greater than
// 1 << 9, and the least significant 2 bits were already set above.
// 1 << 9, and the least significant 8 bits were already set above, so
// there is at most one bit to add.
size |= (size >> 1);
size |= (size >> 2);
size |= (size >> 4);
size++;
MOZ_ASSERT((size & (size - 1)) == 0);

Просмотреть файл

@ -59,11 +59,6 @@ HRTFPanner::HRTFPanner(float sampleRate, already_AddRefed<HRTFDatabaseLoader> da
{
MOZ_ASSERT(m_databaseLoader);
MOZ_COUNT_CTOR(HRTFPanner);
m_tempL1.SetLength(RenderingQuantum);
m_tempR1.SetLength(RenderingQuantum);
m_tempL2.SetLength(RenderingQuantum);
m_tempR2.SetLength(RenderingQuantum);
}
HRTFPanner::~HRTFPanner()
@ -81,10 +76,6 @@ size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) cons
amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf);
amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf);
amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf);
amount += m_tempL1.ShallowSizeOfExcludingThis(aMallocSizeOf);
amount += m_tempL2.ShallowSizeOfExcludingThis(aMallocSizeOf);
amount += m_tempR1.ShallowSizeOfExcludingThis(aMallocSizeOf);
amount += m_tempR2.ShallowSizeOfExcludingThis(aMallocSizeOf);
return amount;
}
@ -256,23 +247,26 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock*
bool needsCrossfading = m_crossfadeIncr;
// Have the convolvers render directly to the final destination if we're not cross-fading.
float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.Elements() : destinationL;
float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.Elements() : destinationR;
float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.Elements() : destinationL;
float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.Elements() : destinationR;
const float* convolutionDestinationL1;
const float* convolutionDestinationR1;
const float* convolutionDestinationL2;
const float* convolutionDestinationR2;
// Now do the convolutions.
// Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.
if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
m_convolverL1.process(kernelL1->fftFrame(), destinationL, convolutionDestinationL1);
m_convolverR1.process(kernelR1->fftFrame(), destinationR, convolutionDestinationR1);
convolutionDestinationL1 =
m_convolverL1.process(kernelL1->fftFrame(), destinationL);
convolutionDestinationR1 =
m_convolverR1.process(kernelR1->fftFrame(), destinationR);
}
if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
m_convolverL2.process(kernelL2->fftFrame(), destinationL, convolutionDestinationL2);
m_convolverR2.process(kernelR2->fftFrame(), destinationR, convolutionDestinationR2);
convolutionDestinationL2 =
m_convolverL2.process(kernelL2->fftFrame(), destinationL);
convolutionDestinationR2 =
m_convolverR2.process(kernelR2->fftFrame(), destinationR);
}
if (needsCrossfading) {
@ -298,6 +292,18 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock*
m_crossfadeX = 0;
m_crossfadeIncr = 0;
}
} else {
const float* sourceL;
const float* sourceR;
if (m_crossfadeSelection == CrossfadeSelection1) {
sourceL = convolutionDestinationL1;
sourceR = convolutionDestinationR1;
} else {
sourceL = convolutionDestinationL2;
sourceR = convolutionDestinationR2;
}
PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE);
PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE);
}
}

Просмотреть файл

@ -76,7 +76,7 @@ void ReverbAccumulationBuffer::updateReadIndex(int* readIndex, size_t numberOfFr
*readIndex = (*readIndex + numberOfFrames) % m_buffer.Length();
}
int ReverbAccumulationBuffer::accumulate(float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames)
int ReverbAccumulationBuffer::accumulate(const float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames)
{
size_t bufferLength = m_buffer.Length();

Просмотреть файл

@ -50,7 +50,7 @@ public:
// We need to pass in and update readIndex here, since each ReverbConvolverStage may be running in
// a different thread than the realtime thread calling ReadAndClear() and maintaining m_readIndex
// Returns the writeIndex where the accumulation took place
int accumulate(float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames);
int accumulate(const float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames);
size_t readIndex() const { return m_readIndex; }
void updateReadIndex(int* readIndex, size_t numberOfFrames) const;

Просмотреть файл

@ -54,9 +54,6 @@ ReverbConvolverStage::ReverbConvolverStage(const float* impulseResponse, size_t,
m_fftKernel->PadAndMakeScaledDFT(impulseResponse + stageOffset, stageLength);
m_fftConvolver = new FFTConvolver(fftSize, renderPhase);
m_temporaryBuffer.SetLength(WEBAUDIO_BLOCK_SIZE);
PodZero(m_temporaryBuffer.Elements(), m_temporaryBuffer.Length());
// The convolution stage at offset stageOffset needs to have a corresponding delay to cancel out the offset.
size_t totalDelay = stageOffset + reverbTotalLatency;
@ -80,8 +77,6 @@ size_t ReverbConvolverStage::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSi
amount += m_fftConvolver->sizeOfIncludingThis(aMallocSizeOf);
}
amount += m_temporaryBuffer.ShallowSizeOfExcludingThis(aMallocSizeOf);
return amount;
}
@ -99,15 +94,12 @@ void ReverbConvolverStage::process(const float* source)
if (!source)
return;
float* temporaryBuffer = m_temporaryBuffer.Elements();
// Now, run the convolution (into the delay buffer).
// An expensive FFT will happen every fftSize / 2 frames.
// We process in-place here...
m_fftConvolver->process(m_fftKernel, source, temporaryBuffer);
const float* output = m_fftConvolver->process(m_fftKernel, source);
// Now accumulate into reverb's accumulation buffer.
m_accumulationBuffer->accumulate(temporaryBuffer, WEBAUDIO_BLOCK_SIZE,
m_accumulationBuffer->accumulate(output, WEBAUDIO_BLOCK_SIZE,
&m_accumulationReadIndex,
m_postDelayLength);
}