Bug 926838 - [Part 4] Implement AlignedTArray for 32-byte alignment is required by openmax dl. Also modify callers. r=ehsan

This commit is contained in:
JW Wang 2013-11-13 11:07:31 +08:00
Родитель 075c139090
Коммит de10d6d328
11 изменённых файлов: 247 добавлений и 65 удалений

Просмотреть файл

@ -0,0 +1,85 @@
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef AlignedTArray_h__
#define AlignedTArray_h__
#include "mozilla/Alignment.h"
#include "nsTArray.h"
/**
* E: element type, must be a POD type.
* N: N bytes alignment for the first element, defaults to 32
*/
template <typename E, int N, typename Alloc>
class AlignedTArray_Impl : public nsTArray_Impl<E, Alloc>
{
static_assert((N & (N-1)) == 0, "N must be power of 2");
typedef nsTArray_Impl<E, Alloc> base_type;
public:
typedef E elem_type;
typedef typename base_type::size_type size_type;
typedef typename base_type::index_type index_type;
AlignedTArray_Impl() {}
explicit AlignedTArray_Impl(size_type capacity) : base_type(capacity+sExtra) {}
elem_type* Elements() { return getAligned(base_type::Elements()); }
const elem_type* Elements() const { return getAligned(base_type::Elements()); }
elem_type& operator[](index_type i) { return Elements()[i];}
const elem_type& operator[](index_type i) const { return Elements()[i]; }
typename Alloc::ResultType SetLength(size_type newLen) {
return base_type::SetLength(newLen + sExtra);
}
size_type Length() const {
return base_type::Length() <= sExtra ? 0 : base_type::Length() - sExtra;
}
private:
AlignedTArray_Impl(const AlignedTArray_Impl& other) = delete;
void operator=(const AlignedTArray_Impl& other) = delete;
static const size_type sPadding = N <= MOZ_ALIGNOF(E) ? 0 : N - MOZ_ALIGNOF(E);
static const size_type sExtra = (sPadding + sizeof(E) - 1) / sizeof(E);
template <typename U>
static U* getAligned(U* p)
{
return reinterpret_cast<U*>(((uintptr_t)p + N - 1) & ~(N-1));
}
};
template <typename E, int N=32>
class AlignedTArray : public AlignedTArray_Impl<E, N, nsTArrayInfallibleAllocator>
{
public:
typedef AlignedTArray_Impl<E, N, nsTArrayInfallibleAllocator> base_type;
typedef AlignedTArray<E, N> self_type;
typedef typename base_type::size_type size_type;
AlignedTArray() {}
explicit AlignedTArray(size_type capacity) : base_type(capacity) {}
private:
AlignedTArray(const AlignedTArray& other) = delete;
void operator=(const AlignedTArray& other) = delete;
};
template <typename E, int N=32>
class AlignedFallibleTArray : public AlignedTArray_Impl<E, N, nsTArrayFallibleAllocator>
{
public:
typedef AlignedTArray_Impl<E, N, nsTArrayFallibleAllocator> base_type;
typedef AlignedFallibleTArray<E, N> self_type;
typedef typename base_type::size_type size_type;
AlignedFallibleTArray() {}
explicit AlignedFallibleTArray(size_type capacity) : base_type(capacity) {}
private:
AlignedFallibleTArray(const AlignedFallibleTArray& other) = delete;
void operator=(const AlignedFallibleTArray& other) = delete;
};
#endif // AlignedTArray_h__

Просмотреть файл

@ -251,17 +251,16 @@ bool
AnalyserNode::FFTAnalysis()
{
float* inputBuffer;
bool allocated = false;
AlignedFallibleTArray<float> tmpBuffer;
if (mWriteIndex == 0) {
inputBuffer = mBuffer.Elements();
} else {
inputBuffer = static_cast<float*>(malloc(FftSize() * sizeof(float)));
if (!inputBuffer) {
if (tmpBuffer.SetLength(FftSize())) {
return false;
}
inputBuffer = tmpBuffer.Elements();
memcpy(inputBuffer, mBuffer.Elements() + mWriteIndex, sizeof(float) * (FftSize() - mWriteIndex));
memcpy(inputBuffer + FftSize() - mWriteIndex, mBuffer.Elements(), sizeof(float) * mWriteIndex);
allocated = true;
}
ApplyBlackmanWindow(inputBuffer, FftSize());
@ -279,9 +278,6 @@ AnalyserNode::FFTAnalysis()
(1.0 - mSmoothingTimeConstant) * scalarMagnitude;
}
if (allocated) {
free(inputBuffer);
}
return true;
}
@ -305,16 +301,16 @@ AnalyserNode::AllocateBuffer()
{
bool result = true;
if (mBuffer.Length() != FftSize()) {
result = mBuffer.SetLength(FftSize());
if (result) {
memset(mBuffer.Elements(), 0, sizeof(float) * FftSize());
mWriteIndex = 0;
result = mOutputBuffer.SetLength(FrequencyBinCount());
if (result) {
memset(mOutputBuffer.Elements(), 0, sizeof(float) * FrequencyBinCount());
}
if (mBuffer.SetLength(FftSize())) {
return false;
}
memset(mBuffer.Elements(), 0, sizeof(float) * FftSize());
mWriteIndex = 0;
if (mOutputBuffer.SetLength(FrequencyBinCount())) {
return false;
}
memset(mOutputBuffer.Elements(), 0, sizeof(float) * FrequencyBinCount());
}
return result;
}

Просмотреть файл

@ -9,6 +9,7 @@
#include "AudioNode.h"
#include "FFTBlock.h"
#include "AlignedTArray.h"
namespace mozilla {
namespace dom {
@ -77,8 +78,8 @@ private:
double mMaxDecibels;
double mSmoothingTimeConstant;
uint32_t mWriteIndex;
FallibleTArray<float> mBuffer;
FallibleTArray<float> mOutputBuffer;
AlignedFallibleTArray<float> mBuffer;
AlignedFallibleTArray<float> mOutputBuffer;
};
}

Просмотреть файл

@ -44,8 +44,7 @@ FFTBlock* FFTBlock::CreateInterpolatedBlock(const FFTBlock& block0, const FFTBlo
// In the time-domain, the 2nd half of the response must be zero, to avoid circular convolution aliasing...
int fftSize = newBlock->FFTSize();
nsTArray<float> buffer;
buffer.SetLength(fftSize);
AlignedTArray<float> buffer(fftSize);
newBlock->GetInverseWithoutScaling(buffer.Elements());
AudioBufferInPlaceScale(buffer.Elements(), 1.0f / fftSize, fftSize / 2);
PodZero(buffer.Elements() + fftSize / 2, fftSize / 2);
@ -60,10 +59,10 @@ void FFTBlock::InterpolateFrequencyComponents(const FFTBlock& block0, const FFTB
{
// FIXME : with some work, this method could be optimized
kiss_fft_cpx* dft = mOutputBuffer.Elements();
ComplexU* dft = mOutputBuffer.Elements();
const kiss_fft_cpx* dft1 = block0.mOutputBuffer.Elements();
const kiss_fft_cpx* dft2 = block1.mOutputBuffer.Elements();
const ComplexU* dft1 = block0.mOutputBuffer.Elements();
const ComplexU* dft2 = block1.mOutputBuffer.Elements();
MOZ_ASSERT(mFFTSize == block0.FFTSize());
MOZ_ASSERT(mFFTSize == block1.FFTSize());
@ -154,7 +153,7 @@ void FFTBlock::InterpolateFrequencyComponents(const FFTBlock& block0, const FFTB
double FFTBlock::ExtractAverageGroupDelay()
{
kiss_fft_cpx* dft = mOutputBuffer.Elements();
ComplexU* dft = mOutputBuffer.Elements();
double aveSum = 0.0;
double weightSum = 0.0;
@ -205,7 +204,7 @@ void FFTBlock::AddConstantGroupDelay(double sampleFrameDelay)
{
int halfSize = FFTSize() / 2;
kiss_fft_cpx* dft = mOutputBuffer.Elements();
ComplexU* dft = mOutputBuffer.Elements();
const double kSamplePhaseDelay = (2.0 * M_PI) / double(FFTSize());

Просмотреть файл

@ -7,7 +7,13 @@
#ifndef FFTBlock_h_
#define FFTBlock_h_
#include "nsTArray.h"
#ifdef BUILD_ARM_NEON
#include <cmath>
#include "mozilla/arm.h"
#include "dl/sp/api/omxSP.h"
#endif
#include "AlignedTArray.h"
#include "AudioNodeEngine.h"
#include "kiss_fft/kiss_fftr.h"
@ -18,15 +24,26 @@ namespace mozilla {
// Currently it's implemented on top of KissFFT on all platforms.
class FFTBlock final
{
union ComplexU {
kiss_fft_cpx c;
float f[2];
struct {
float r;
float i;
};
};
public:
explicit FFTBlock(uint32_t aFFTSize)
: mFFT(nullptr)
, mIFFT(nullptr)
, mFFTSize(aFFTSize)
: mKissFFT(nullptr)
, mKissIFFT(nullptr)
#ifdef BUILD_ARM_NEON
, mOmxFFT(nullptr)
, mOmxIFFT(nullptr)
#endif
{
MOZ_COUNT_CTOR(FFTBlock);
mOutputBuffer.SetLength(aFFTSize / 2 + 1);
PodZero(mOutputBuffer.Elements(), aFFTSize / 2 + 1);
SetFFTSize(aFFTSize);
}
~FFTBlock()
{
@ -44,10 +61,17 @@ public:
void PerformFFT(const float* aData)
{
EnsureFFT();
kiss_fftr(mFFT, aData, mOutputBuffer.Elements());
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
omxSP_FFTFwd_RToCCS_F32_Sfs(aData, mOutputBuffer.Elements()->f, mOmxFFT);
} else
#endif
{
kiss_fftr(mKissFFT, aData, &(mOutputBuffer.Elements()->c));
}
}
// Inverse-transform internal data and store the resulting FFTSize()
// points in aData.
// points in aDataOut.
void GetInverse(float* aDataOut)
{
GetInverseWithoutScaling(aDataOut);
@ -59,7 +83,17 @@ public:
void GetInverseWithoutScaling(float* aDataOut)
{
EnsureIFFT();
kiss_fftri(mIFFT, mOutputBuffer.Elements(), aDataOut);
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
omxSP_FFTInv_CCSToR_F32_Sfs(mOutputBuffer.Elements()->f, aDataOut, mOmxIFFT);
// There is no function that computes de inverse FFT without scaling, so
// we have to scale back up here. Bug 1158741.
AudioBufferInPlaceScale(aDataOut, mFFTSize, mFFTSize);
} else
#endif
{
kiss_fftri(mKissIFFT, &(mOutputBuffer.Elements()->c), aDataOut);
}
}
// Inverse-transform the FFTSize()/2+1 points of data in each
// of aRealDataIn and aImagDataIn and store the resulting
@ -70,23 +104,30 @@ public:
{
EnsureIFFT();
const uint32_t inputSize = mFFTSize / 2 + 1;
nsTArray<kiss_fft_cpx> inputBuffer;
inputBuffer.SetLength(inputSize);
AlignedTArray<ComplexU> inputBuffer(inputSize);
for (uint32_t i = 0; i < inputSize; ++i) {
inputBuffer[i].r = aRealDataIn[i];
inputBuffer[i].i = aImagDataIn[i];
}
kiss_fftri(mIFFT, inputBuffer.Elements(), aRealDataOut);
for (uint32_t i = 0; i < mFFTSize; ++i) {
aRealDataOut[i] /= mFFTSize;
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
omxSP_FFTInv_CCSToR_F32_Sfs(inputBuffer.Elements()->f,
aRealDataOut, mOmxIFFT);
} else
#endif
{
kiss_fftri(mKissIFFT, &(inputBuffer.Elements()->c), aRealDataOut);
for (uint32_t i = 0; i < mFFTSize; ++i) {
aRealDataOut[i] /= mFFTSize;
}
}
}
void Multiply(const FFTBlock& aFrame)
{
BufferComplexMultiply(reinterpret_cast<const float*>(mOutputBuffer.Elements()),
reinterpret_cast<const float*>(aFrame.mOutputBuffer.Elements()),
reinterpret_cast<float*>(mOutputBuffer.Elements()),
BufferComplexMultiply(mOutputBuffer.Elements()->f,
aFrame.mOutputBuffer.Elements()->f,
mOutputBuffer.Elements()->f,
mFFTSize / 2 + 1);
}
@ -97,7 +138,7 @@ public:
void PadAndMakeScaledDFT(const float* aData, size_t dataSize)
{
MOZ_ASSERT(dataSize <= FFTSize());
nsTArray<float> paddedData;
AlignedTArray<float> paddedData;
paddedData.SetLength(FFTSize());
AudioBufferCopyWithScale(aData, 1.0f / FFTSize(),
paddedData.Elements(), dataSize);
@ -132,8 +173,8 @@ public:
size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const
{
size_t amount = 0;
amount += aMallocSizeOf(mFFT);
amount += aMallocSizeOf(mIFFT);
amount += aMallocSizeOf(mKissFFT);
amount += aMallocSizeOf(mKissIFFT);
amount += mOutputBuffer.SizeOfExcludingThis(aMallocSizeOf);
return amount;
}
@ -149,31 +190,78 @@ private:
void EnsureFFT()
{
if (!mFFT) {
mFFT = kiss_fftr_alloc(mFFTSize, 0, nullptr, nullptr);
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
if (!mOmxFFT) {
mOmxFFT = createOmxFFT(mFFTSize);
}
} else
#endif
{
if (!mKissFFT) {
mKissFFT = kiss_fftr_alloc(mFFTSize, 0, nullptr, nullptr);
}
}
}
void EnsureIFFT()
{
if (!mIFFT) {
mIFFT = kiss_fftr_alloc(mFFTSize, 1, nullptr, nullptr);
#ifdef BUILD_ARM_NEON
if (mozilla::supports_neon()) {
if (!mOmxIFFT) {
mOmxIFFT = createOmxFFT(mFFTSize);
}
} else
#endif
{
if (!mKissIFFT) {
mKissIFFT = kiss_fftr_alloc(mFFTSize, 1, nullptr, nullptr);
}
}
}
#ifdef BUILD_ARM_NEON
static OMXFFTSpec_R_F32* createOmxFFT(uint32_t aFFTSize)
{
MOZ_ASSERT((aFFTSize & (aFFTSize-1)) == 0);
OMX_INT bufSize;
OMX_INT order = log((double)aFFTSize)/M_LN2;
MOZ_ASSERT(aFFTSize>>order == 1);
OMXResult status = omxSP_FFTGetBufSize_R_F32(order, &bufSize);
if (status == OMX_Sts_NoErr) {
OMXFFTSpec_R_F32* context = static_cast<OMXFFTSpec_R_F32*>(malloc(bufSize));
if (omxSP_FFTInit_R_F32(context, order) != OMX_Sts_NoErr) {
return nullptr;
}
return context;
}
return nullptr;
}
#endif
void Clear()
{
free(mFFT);
free(mIFFT);
mFFT = mIFFT = nullptr;
#ifdef BUILD_ARM_NEON
free(mOmxFFT);
free(mOmxIFFT);
mOmxFFT = mOmxIFFT = nullptr;
#endif
free(mKissFFT);
free(mKissIFFT);
mKissFFT = mKissIFFT = nullptr;
}
void AddConstantGroupDelay(double sampleFrameDelay);
void InterpolateFrequencyComponents(const FFTBlock& block0,
const FFTBlock& block1, double interp);
kiss_fftr_cfg mFFT, mIFFT;
nsTArray<kiss_fft_cpx> mOutputBuffer;
kiss_fftr_cfg mKissFFT;
kiss_fftr_cfg mKissIFFT;
#ifdef BUILD_ARM_NEON
OMXFFTSpec_R_F32* mOmxFFT;
OMXFFTSpec_R_F32* mOmxIFFT;
#endif
AlignedTArray<ComplexU> mOutputBuffer;
uint32_t mFFTSize;
};
}
#endif

Просмотреть файл

@ -35,7 +35,7 @@
namespace WebCore {
typedef nsTArray<float> AudioFloatArray;
typedef AlignedTArray<float> AlignedAudioFloatArray;
using mozilla::FFTBlock;
class FFTConvolver {
@ -66,13 +66,13 @@ private:
// Buffer input until we get fftSize / 2 samples then do an FFT
size_t m_readWriteIndex;
AudioFloatArray m_inputBuffer;
AlignedAudioFloatArray m_inputBuffer;
// Stores output which we read a little at a time
AudioFloatArray m_outputBuffer;
AlignedAudioFloatArray m_outputBuffer;
// Saves the 2nd half of the FFT buffer, so we can do an overlap-add with the 1st half of the next one
AudioFloatArray m_lastOverlapBuffer;
AlignedAudioFloatArray m_lastOverlapBuffer;
};
} // namespace WebCore

Просмотреть файл

@ -51,6 +51,14 @@ HRTFKernel::HRTFKernel(float* impulseResponse, size_t length, float sampleRate)
: m_frameDelay(0)
, m_sampleRate(sampleRate)
{
AlignedTArray<float> buffer;
// copy to a 32-byte aligned buffer
if (((uintptr_t)impulseResponse & 31) != 0) {
buffer.SetLength(length);
mozilla::PodCopy(buffer.Elements(), impulseResponse, length);
impulseResponse = buffer.Elements();
}
// Determine the leading delay (average group delay) for the response.
m_frameDelay = extractAverageGroupDelay(impulseResponse, length);
@ -79,18 +87,18 @@ nsReturnRef<HRTFKernel> HRTFKernel::createInterpolatedKernel(HRTFKernel* kernel1
MOZ_ASSERT(kernel1 && kernel2);
if (!kernel1 || !kernel2)
return nsReturnRef<HRTFKernel>();
MOZ_ASSERT(x >= 0.0 && x < 1.0);
x = mozilla::clamped(x, 0.0f, 1.0f);
float sampleRate1 = kernel1->sampleRate();
float sampleRate2 = kernel2->sampleRate();
MOZ_ASSERT(sampleRate1 == sampleRate2);
if (sampleRate1 != sampleRate2)
return nsReturnRef<HRTFKernel>();
float frameDelay = (1 - x) * kernel1->frameDelay() + x * kernel2->frameDelay();
nsAutoPtr<FFTBlock> interpolatedFrame(
FFTBlock::CreateInterpolatedBlock(*kernel1->fftFrame(), *kernel2->fftFrame(), x));
return HRTFKernel::create(interpolatedFrame, frameDelay, sampleRate1);

Просмотреть файл

@ -35,6 +35,8 @@ struct AudioChunk;
namespace WebCore {
typedef nsTArray<float> AudioFloatArray;
class HRTFDatabaseLoader;
using mozilla::AudioChunk;

Просмотреть файл

@ -220,7 +220,7 @@ void PeriodicWave::createBandLimitedTables(const float* realData, const float* i
imagP[halfSize-1] = 0;
// Create the band-limited table.
AudioFloatArray* table = new AudioFloatArray(m_periodicWaveSize);
AlignedAudioFloatArray* table = new AlignedAudioFloatArray(m_periodicWaveSize);
m_bandLimitedTables.AppendElement(table);
// Apply an inverse FFT to generate the time-domain table data.

Просмотреть файл

@ -32,10 +32,12 @@
#include "mozilla/dom/OscillatorNodeBinding.h"
#include <nsAutoPtr.h>
#include <nsTArray.h>
#include "AlignedTArray.h"
#include "mozilla/MemoryReporting.h"
namespace WebCore {
typedef AlignedTArray<float> AlignedAudioFloatArray;
typedef nsTArray<float> AudioFloatArray;
class PeriodicWave {
@ -98,7 +100,7 @@ private:
// Creates tables based on numberOfComponents Fourier coefficients.
void createBandLimitedTables(const float* real, const float* imag, unsigned numberOfComponents);
nsTArray<nsAutoPtr<AudioFloatArray> > m_bandLimitedTables;
nsTArray<nsAutoPtr<AlignedAudioFloatArray> > m_bandLimitedTables;
};
} // namespace WebCore

Просмотреть файл

@ -21,6 +21,7 @@ MOCHITEST_CHROME_MANIFESTS += ['test/chrome.ini']
BROWSER_CHROME_MANIFESTS += ['test/browser.ini']
EXPORTS += [
'AlignedTArray.h',
'AudioContext.h',
'AudioEventTimeline.h',
'AudioNodeEngine.h',