bug 1221836 return output pointer from FFTConvolver::process() to save a buffer copy r=padenot

--HG-- extra : rebase_source : 99568b7c3095f44a061f783cefd5d6cbeebfa496
2015-11-04 21:23:18 +13:00 · 2015-11-04 21:23:18 +13:00 · de1e2244a1
--- a/dom/media/webaudio/blink/FFTConvolver.cpp
+++ b/dom/media/webaudio/blink/FFTConvolver.cpp
@ -37,6 +37,7 @@ FFTConvolver::FFTConvolver(size_t fftSize, size_t renderPhase)
    : m_frame(fftSize)
    , m_readWriteIndex(renderPhase % (fftSize / 2))
 {
+    MOZ_ASSERT(fftSize >= 2 * WEBAUDIO_BLOCK_SIZE);
  m_inputBuffer.SetLength(fftSize);
  PodZero(m_inputBuffer.Elements(), fftSize);
  m_outputBuffer.SetLength(fftSize);
@ -60,67 +61,47 @@ size_t FFTConvolver::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) co
  return aMallocSizeOf(this) + sizeOfExcludingThis(aMallocSizeOf);
 }

-void FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP, float* destP)
+const float* FFTConvolver::process(FFTBlock* fftKernel, const float* sourceP)
 {
    size_t halfSize = fftSize() / 2;

    // WEBAUDIO_BLOCK_SIZE must be an exact multiple of halfSize,
-    // or halfSize is a multiple of WEBAUDIO_BLOCK_SIZE when halfSize > WEBAUDIO_BLOCK_SIZE.
-    MOZ_ASSERT(!(halfSize % WEBAUDIO_BLOCK_SIZE &&
-                 WEBAUDIO_BLOCK_SIZE % halfSize));
+    // halfSize must be a multiple of WEBAUDIO_BLOCK_SIZE
+    // and > WEBAUDIO_BLOCK_SIZE.
+    MOZ_ASSERT(halfSize % WEBAUDIO_BLOCK_SIZE == 0 &&
+               WEBAUDIO_BLOCK_SIZE <= halfSize);

-    size_t numberOfDivisions = halfSize <= WEBAUDIO_BLOCK_SIZE ?
-        (WEBAUDIO_BLOCK_SIZE / halfSize) : 1;
-    size_t divisionSize = numberOfDivisions == 1 ?
-        WEBAUDIO_BLOCK_SIZE : halfSize;
+    // Copy samples to input buffer (note contraint above!)
+    float* inputP = m_inputBuffer.Elements();

-    for (size_t i = 0; i < numberOfDivisions; ++i, sourceP += divisionSize, destP += divisionSize) {
-        // Copy samples to input buffer (note contraint above!)
-        float* inputP = m_inputBuffer.Elements();
+    MOZ_ASSERT(sourceP && inputP && m_readWriteIndex + WEBAUDIO_BLOCK_SIZE <= m_inputBuffer.Length());

-        // Sanity check
-        bool isCopyGood1 = sourceP && inputP && m_readWriteIndex + divisionSize <= m_inputBuffer.Length();
-        MOZ_ASSERT(isCopyGood1);
-        if (!isCopyGood1)
-            return;
+    memcpy(inputP + m_readWriteIndex, sourceP, sizeof(float) * WEBAUDIO_BLOCK_SIZE);

-        memcpy(inputP + m_readWriteIndex, sourceP, sizeof(float) * divisionSize);
+    float* outputP = m_outputBuffer.Elements();
+    m_readWriteIndex += WEBAUDIO_BLOCK_SIZE;

-        float* outputP = m_outputBuffer.Elements();
-        m_readWriteIndex += divisionSize;
+    // Check if it's time to perform the next FFT
+    if (m_readWriteIndex == halfSize) {
+        // The input buffer is now filled (get frequency-domain version)
+        m_frame.PerformFFT(m_inputBuffer.Elements());
+        m_frame.Multiply(*fftKernel);
+        m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());

-        // Check if it's time to perform the next FFT
-        if (m_readWriteIndex == halfSize) {
-            // The input buffer is now filled (get frequency-domain version)
-            m_frame.PerformFFT(m_inputBuffer.Elements());
-            m_frame.Multiply(*fftKernel);
-            m_frame.GetInverseWithoutScaling(m_outputBuffer.Elements());
+        // Overlap-add 1st half from previous time
+        AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
+                                m_outputBuffer.Elements(), halfSize);

-            // Overlap-add 1st half from previous time
-            AudioBufferAddWithScale(m_lastOverlapBuffer.Elements(), 1.0f,
-                                    m_outputBuffer.Elements(), halfSize);
+        // Finally, save 2nd half of result
+        MOZ_ASSERT(m_outputBuffer.Length() == 2 * halfSize && m_lastOverlapBuffer.Length() == halfSize);

-            // Finally, save 2nd half of result
-            bool isCopyGood3 = m_outputBuffer.Length() == 2 * halfSize && m_lastOverlapBuffer.Length() == halfSize;
-            MOZ_ASSERT(isCopyGood3);
-            if (!isCopyGood3)
-                return;
+        memcpy(m_lastOverlapBuffer.Elements(), m_outputBuffer.Elements() + halfSize, sizeof(float) * halfSize);

-            memcpy(m_lastOverlapBuffer.Elements(), m_outputBuffer.Elements() + halfSize, sizeof(float) * halfSize);
-
-            // Reset index back to start for next time
-            m_readWriteIndex = 0;
-        }
-
-        // Sanity check
-        bool isCopyGood2 = destP && outputP && m_readWriteIndex + divisionSize <= m_outputBuffer.Length();
-        MOZ_ASSERT(isCopyGood2);
-        if (!isCopyGood2)
-            return;
-
-        // Copy samples from output buffer
-        memcpy(destP, outputP + m_readWriteIndex, sizeof(float) * divisionSize);
+        // Reset index back to start for next time
+        m_readWriteIndex = 0;
    }
+
+    return outputP + m_readWriteIndex;
 }

 void FFTConvolver::reset()
--- a/dom/media/webaudio/blink/FFTConvolver.h
+++ b/dom/media/webaudio/blink/FFTConvolver.h
@ -47,14 +47,13 @@ public:
    // FFTs at the same time.
    explicit FFTConvolver(size_t fftSize, size_t renderPhase = 0);

-    // Process WEBAUDIO_BLOCK_SIZE elements of array |sourceP| to |destP|.
+    // Process WEBAUDIO_BLOCK_SIZE elements of array |sourceP| and return a
+    // pointer to an output array of the same size.
    //
    // |fftKernel| must be pre-scaled for FFTBlock::GetInverseWithoutScaling().
    //
    // FIXME: Later, we can do more sophisticated buffering to relax this requirement...
-    //
-    // Processing in-place is allowed...
-    void process(FFTBlock* fftKernel, const float* sourceP, float* destP);
+    const float* process(FFTBlock* fftKernel, const float* sourceP);

    void reset();

--- a/dom/media/webaudio/blink/HRTFElevation.cpp
+++ b/dom/media/webaudio/blink/HRTFElevation.cpp
@ -76,19 +76,19 @@ size_t HRTFElevation::fftSizeForSampleRate(float sampleRate)
    // This is the size if we were to use all raw response samples.
    unsigned resampledLength =
        floorf(ResponseFrameSize * sampleRate / rawSampleRate);
-    // Keep things semi-sane, with max FFT size of 1024 and minimum of 4.
-    // "size |= 3" ensures a minimum of 4 (with the size++ below) and sets the
-    // 2 least significant bits for rounding up to the next power of 2 below.
+    // Keep things semi-sane, with max FFT size of 1024.
    unsigned size = min(resampledLength, 1023U);
-    size |= 3;
+    // Ensure a minimum of 2 * WEBAUDIO_BLOCK_SIZE (with the size++ below) for
+    // FFTConvolver and set the 8 least significant bits for rounding up to
+    // the next power of 2 below.
+    size |= 2 * WEBAUDIO_BLOCK_SIZE - 1;
    // Round up to the next power of 2, making the FFT size no more than twice
    // the impulse response length.  This doubles size for values that are
-    // already powers of 2.  This works by filling in 7 bits to right of the
+    // already powers of 2.  This works by filling in alls bit to right of the
    // most significant bit.  The most significant bit is no greater than
-    // 1 << 9, and the least significant 2 bits were already set above.
+    // 1 << 9, and the least significant 8 bits were already set above, so
+    // there is at most one bit to add.
    size |= (size >> 1);
-    size |= (size >> 2);
-    size |= (size >> 4);
    size++;
    MOZ_ASSERT((size & (size - 1)) == 0);

--- a/dom/media/webaudio/blink/HRTFPanner.cpp
+++ b/dom/media/webaudio/blink/HRTFPanner.cpp
@ -59,11 +59,6 @@ HRTFPanner::HRTFPanner(float sampleRate, already_AddRefed<HRTFDatabaseLoader> da
 {
    MOZ_ASSERT(m_databaseLoader);
    MOZ_COUNT_CTOR(HRTFPanner);
-
-    m_tempL1.SetLength(RenderingQuantum);
-    m_tempR1.SetLength(RenderingQuantum);
-    m_tempL2.SetLength(RenderingQuantum);
-    m_tempR2.SetLength(RenderingQuantum);
 }

 HRTFPanner::~HRTFPanner()
@ -81,10 +76,6 @@ size_t HRTFPanner::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) cons
    amount += m_convolverL2.sizeOfExcludingThis(aMallocSizeOf);
    amount += m_convolverR2.sizeOfExcludingThis(aMallocSizeOf);
    amount += m_delayLine.SizeOfExcludingThis(aMallocSizeOf);
-    amount += m_tempL1.ShallowSizeOfExcludingThis(aMallocSizeOf);
-    amount += m_tempL2.ShallowSizeOfExcludingThis(aMallocSizeOf);
-    amount += m_tempR1.ShallowSizeOfExcludingThis(aMallocSizeOf);
-    amount += m_tempR2.ShallowSizeOfExcludingThis(aMallocSizeOf);

    return amount;
 }
@ -256,23 +247,26 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock*

    bool needsCrossfading = m_crossfadeIncr;

-    // Have the convolvers render directly to the final destination if we're not cross-fading.
-    float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.Elements() : destinationL;
-    float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.Elements() : destinationR;
-    float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.Elements() : destinationL;
-    float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.Elements() : destinationR;
+    const float* convolutionDestinationL1;
+    const float* convolutionDestinationR1;
+    const float* convolutionDestinationL2;
+    const float* convolutionDestinationR2;

    // Now do the convolutions.
    // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.

    if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
-        m_convolverL1.process(kernelL1->fftFrame(), destinationL, convolutionDestinationL1);
-        m_convolverR1.process(kernelR1->fftFrame(), destinationR, convolutionDestinationR1);
+        convolutionDestinationL1 =
+            m_convolverL1.process(kernelL1->fftFrame(), destinationL);
+        convolutionDestinationR1 =
+            m_convolverR1.process(kernelR1->fftFrame(), destinationR);
    }

    if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
-        m_convolverL2.process(kernelL2->fftFrame(), destinationL, convolutionDestinationL2);
-        m_convolverR2.process(kernelR2->fftFrame(), destinationR, convolutionDestinationR2);
+        convolutionDestinationL2 =
+            m_convolverL2.process(kernelL2->fftFrame(), destinationL);
+        convolutionDestinationR2 =
+            m_convolverR2.process(kernelR2->fftFrame(), destinationR);
    }

    if (needsCrossfading) {
@ -298,6 +292,18 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBlock*
            m_crossfadeX = 0;
            m_crossfadeIncr = 0;
        }
+    } else {
+        const float* sourceL;
+        const float* sourceR;
+        if (m_crossfadeSelection == CrossfadeSelection1) {
+            sourceL = convolutionDestinationL1;
+            sourceR = convolutionDestinationR1;
+        } else {
+            sourceL = convolutionDestinationL2;
+            sourceR = convolutionDestinationR2;
+        }
+        PodCopy(destinationL, sourceL, WEBAUDIO_BLOCK_SIZE);
+        PodCopy(destinationR, sourceR, WEBAUDIO_BLOCK_SIZE);
    }
 }

--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.cpp
@ -76,7 +76,7 @@ void ReverbAccumulationBuffer::updateReadIndex(int* readIndex, size_t numberOfFr
    *readIndex = (*readIndex + numberOfFrames) % m_buffer.Length();
 }

-int ReverbAccumulationBuffer::accumulate(float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames)
+int ReverbAccumulationBuffer::accumulate(const float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames)
 {
    size_t bufferLength = m_buffer.Length();

--- a/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
+++ b/dom/media/webaudio/blink/ReverbAccumulationBuffer.h
@ -50,7 +50,7 @@ public:
    // We need to pass in and update readIndex here, since each ReverbConvolverStage may be running in
    // a different thread than the realtime thread calling ReadAndClear() and maintaining m_readIndex
    // Returns the writeIndex where the accumulation took place
-    int accumulate(float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames);
+    int accumulate(const float* source, size_t numberOfFrames, int* readIndex, size_t delayFrames);

    size_t readIndex() const { return m_readIndex; }
    void updateReadIndex(int* readIndex, size_t numberOfFrames) const;
--- a/dom/media/webaudio/blink/ReverbConvolverStage.cpp
+++ b/dom/media/webaudio/blink/ReverbConvolverStage.cpp
@ -54,9 +54,6 @@ ReverbConvolverStage::ReverbConvolverStage(const float* impulseResponse, size_t,
    m_fftKernel->PadAndMakeScaledDFT(impulseResponse + stageOffset, stageLength);
    m_fftConvolver = new FFTConvolver(fftSize, renderPhase);

-    m_temporaryBuffer.SetLength(WEBAUDIO_BLOCK_SIZE);
-    PodZero(m_temporaryBuffer.Elements(), m_temporaryBuffer.Length());
-
    // The convolution stage at offset stageOffset needs to have a corresponding delay to cancel out the offset.
    size_t totalDelay = stageOffset + reverbTotalLatency;

@ -80,8 +77,6 @@ size_t ReverbConvolverStage::sizeOfIncludingThis(mozilla::MallocSizeOf aMallocSi
        amount += m_fftConvolver->sizeOfIncludingThis(aMallocSizeOf);
    }

-    amount += m_temporaryBuffer.ShallowSizeOfExcludingThis(aMallocSizeOf);
-
    return amount;
 }

@ -99,15 +94,12 @@ void ReverbConvolverStage::process(const float* source)
    if (!source)
        return;
    
-    float* temporaryBuffer = m_temporaryBuffer.Elements();
-    
    // Now, run the convolution (into the delay buffer).
    // An expensive FFT will happen every fftSize / 2 frames.
-    // We process in-place here...
-    m_fftConvolver->process(m_fftKernel, source, temporaryBuffer);
+    const float* output = m_fftConvolver->process(m_fftKernel, source);

    // Now accumulate into reverb's accumulation buffer.
-    m_accumulationBuffer->accumulate(temporaryBuffer, WEBAUDIO_BLOCK_SIZE,
+    m_accumulationBuffer->accumulate(output, WEBAUDIO_BLOCK_SIZE,
                                     &m_accumulationReadIndex,
                                     m_postDelayLength);
 }