зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1801557 - Use xsimd to implement dom/media/webaudio/AudioNodeEngine r=padenot
This patch contains a generic implementation of the algorithms in AudioNodeEngine.cpp, and this generic implementation is instantiated for SSE2 and NEON. Note that with this approach, supporting AVX would only require a few lines. Differential Revision: https://phabricator.services.mozilla.com/D162494
This commit is contained in:
Родитель
149f737bd0
Коммит
938cca0cc4
|
@ -9,12 +9,11 @@
|
|||
#include "mozilla/AbstractThread.h"
|
||||
#ifdef USE_NEON
|
||||
# include "mozilla/arm.h"
|
||||
# include "AudioNodeEngineNEON.h"
|
||||
# include "AudioNodeEngineGeneric.h"
|
||||
#endif
|
||||
#ifdef USE_SSE2
|
||||
# include "mozilla/SSE.h"
|
||||
# include "AlignmentUtils.h"
|
||||
# include "AudioNodeEngineSSE2.h"
|
||||
# include "AudioNodeEngineGeneric.h"
|
||||
#endif
|
||||
#include "AudioBlock.h"
|
||||
#include "Tracing.h"
|
||||
|
@ -67,39 +66,17 @@ void AudioBufferAddWithScale(const float* aInput, float aScale, float* aOutput,
|
|||
uint32_t aSize) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBufferAddWithScale_NEON(aInput, aScale, aOutput, aSize);
|
||||
Engine<xsimd::neon>::AudioBufferAddWithScale(aInput, aScale, aOutput,
|
||||
aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
if (aScale == 1.0f) {
|
||||
while (aSize && (!IS_ALIGNED16(aInput) || !IS_ALIGNED16(aOutput))) {
|
||||
*aOutput += *aInput;
|
||||
++aOutput;
|
||||
++aInput;
|
||||
--aSize;
|
||||
}
|
||||
} else {
|
||||
while (aSize && (!IS_ALIGNED16(aInput) || !IS_ALIGNED16(aOutput))) {
|
||||
*aOutput += *aInput * aScale;
|
||||
++aOutput;
|
||||
++aInput;
|
||||
--aSize;
|
||||
}
|
||||
}
|
||||
|
||||
// we need to round aSize down to the nearest multiple of 16
|
||||
uint32_t alignedSize = aSize & ~0x0F;
|
||||
if (alignedSize > 0) {
|
||||
AudioBufferAddWithScale_SSE(aInput, aScale, aOutput, alignedSize);
|
||||
|
||||
// adjust parameters for use with scalar operations below
|
||||
aInput += alignedSize;
|
||||
aOutput += alignedSize;
|
||||
aSize -= alignedSize;
|
||||
}
|
||||
Engine<xsimd::sse2>::AudioBufferAddWithScale(aInput, aScale, aOutput,
|
||||
aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -127,14 +104,16 @@ void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
|
|||
} else {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
|
||||
Engine<xsimd::neon>::AudioBlockCopyChannelWithScale(aInput, aScale,
|
||||
aOutput);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
|
||||
Engine<xsimd::sse2>::AudioBlockCopyChannelWithScale(aInput, aScale,
|
||||
aOutput);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -147,9 +126,15 @@ void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
|
|||
|
||||
void BufferComplexMultiply(const float* aInput, const float* aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
Engine<xsimd::neon>::BufferComplexMultiply(aInput, aScale, aOutput, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
BufferComplexMultiply_SSE(aInput, aScale, aOutput, aSize);
|
||||
Engine<xsimd::sse2>::BufferComplexMultiply(aInput, aScale, aOutput, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -182,14 +167,16 @@ void AudioBlockCopyChannelWithScale(const float aInput[WEBAUDIO_BLOCK_SIZE],
|
|||
float aOutput[WEBAUDIO_BLOCK_SIZE]) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBlockCopyChannelWithScale_NEON(aInput, aScale, aOutput);
|
||||
Engine<xsimd::neon>::AudioBlockCopyChannelWithScale(aInput, aScale,
|
||||
aOutput);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
AudioBlockCopyChannelWithScale_SSE(aInput, aScale, aOutput);
|
||||
Engine<xsimd::sse2>::AudioBlockCopyChannelWithScale(aInput, aScale,
|
||||
aOutput);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -214,14 +201,14 @@ void AudioBufferInPlaceScale(float* aBlock, float aScale, uint32_t aSize) {
|
|||
}
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
|
||||
Engine<xsimd::neon>::AudioBufferInPlaceScale(aBlock, aScale, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
AudioBufferInPlaceScale_SSE(aBlock, aScale, aSize);
|
||||
Engine<xsimd::sse2>::AudioBufferInPlaceScale(aBlock, aScale, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -234,14 +221,14 @@ void AudioBufferInPlaceScale(float* aBlock, float aScale, uint32_t aSize) {
|
|||
void AudioBufferInPlaceScale(float* aBlock, float* aScale, uint32_t aSize) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBufferInPlaceScale_NEON(aBlock, aScale, aSize);
|
||||
Engine<xsimd::neon>::AudioBufferInPlaceScale(aBlock, aScale, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
AudioBufferInPlaceScale_SSE(aBlock, aScale, aSize);
|
||||
Engine<xsimd::sse2>::AudioBufferInPlaceScale(aBlock, aScale, aSize);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -275,16 +262,16 @@ void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
|||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBlockPanStereoToStereo_NEON(aInputL, aInputR, aGainL, aGainR,
|
||||
aIsOnTheLeft, aOutputL, aOutputR);
|
||||
Engine<xsimd::neon>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
AudioBlockPanStereoToStereo_SSE(aInputL, aInputR, aGainL, aGainR,
|
||||
aIsOnTheLeft, aOutputL, aOutputR);
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -313,8 +300,16 @@ void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
|||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
AudioBlockPanStereoToStereo_NEON(aInputL, aInputR, aGainL, aGainR,
|
||||
aIsOnTheLeft, aOutputL, aOutputR);
|
||||
Engine<xsimd::neon>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -332,32 +327,19 @@ void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
|||
}
|
||||
|
||||
float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
|
||||
float sum = 0.0f;
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
const float* alignedInput = ALIGNED16(aInput);
|
||||
|
||||
// use scalar operations for any unaligned data at the beginning
|
||||
while (aInput != alignedInput) {
|
||||
if (!aLength) {
|
||||
return sum;
|
||||
}
|
||||
sum += *aInput * *aInput;
|
||||
++aInput;
|
||||
--aLength;
|
||||
}
|
||||
|
||||
uint32_t vLength = (aLength >> 4) << 4;
|
||||
sum += AudioBufferSumOfSquares_SSE(alignedInput, vLength);
|
||||
|
||||
// adjust aInput and aLength to use scalar operations for any
|
||||
// remaining values
|
||||
aInput = alignedInput + vLength;
|
||||
aLength -= vLength;
|
||||
#ifdef USE_NEON
|
||||
if (mozilla::supports_neon()) {
|
||||
return Engine<xsimd::neon>::AudioBufferSumOfSquares(aInput, aLength);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
return Engine<xsimd::sse2>::AudioBufferSumOfSquares(aInput, aLength);
|
||||
}
|
||||
#endif
|
||||
|
||||
float sum = 0.f;
|
||||
while (aLength--) {
|
||||
sum += *aInput * *aInput;
|
||||
++aInput;
|
||||
|
@ -368,7 +350,7 @@ float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
|
|||
void NaNToZeroInPlace(float* aSamples, size_t aCount) {
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
NaNToZeroInPlace_SSE(aSamples, aCount);
|
||||
Engine<xsimd::sse2>::NaNToZeroInPlace(aSamples, aCount);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,335 @@
|
|||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef MOZILLA_AUDIONODEENGINEGENERIC_H_
|
||||
#define MOZILLA_AUDIONODEENGINEGENERIC_H_
|
||||
|
||||
#include "AudioNodeEngine.h"
|
||||
#include "AlignmentUtils.h"
|
||||
|
||||
#include "xsimd/xsimd.hpp"
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ > 7
|
||||
# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
|
||||
# define MOZ_UNROLL(factor) MOZ_PRAGMA(GCC unroll factor)
|
||||
#elif defined(__INTEL_COMPILER) || (defined(__clang__) && __clang_major__ > 3)
|
||||
# define MOZ_PRAGMA(tokens) _Pragma(#tokens)
|
||||
# define MOZ_UNROLL(factor) MOZ_PRAGMA(unroll factor)
|
||||
#else
|
||||
# define MOZ_UNROLL(_)
|
||||
#endif
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
template <class Arch>
|
||||
static bool is_aligned(const void* ptr) {
|
||||
return (reinterpret_cast<uintptr_t>(ptr) &
|
||||
~(static_cast<uintptr_t>(Arch::alignment()) - 1)) ==
|
||||
reinterpret_cast<uintptr_t>(ptr);
|
||||
};
|
||||
|
||||
template <class Arch>
|
||||
struct Engine {
|
||||
static void AudioBufferAddWithScale(const float* aInput, float aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
if constexpr (Arch::requires_alignment()) {
|
||||
if (aScale == 1.0f) {
|
||||
while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
|
||||
if (!aSize) return;
|
||||
*aOutput += *aInput;
|
||||
++aOutput;
|
||||
++aInput;
|
||||
--aSize;
|
||||
}
|
||||
} else {
|
||||
while (!is_aligned<Arch>(aInput) || !is_aligned<Arch>(aOutput)) {
|
||||
if (!aSize) return;
|
||||
*aOutput += *aInput * aScale;
|
||||
++aOutput;
|
||||
++aInput;
|
||||
--aSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
|
||||
|
||||
xsimd::batch<float, Arch> vgain(aScale);
|
||||
|
||||
uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
|
||||
MOZ_UNROLL(4)
|
||||
for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
|
||||
auto vin1 = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
|
||||
auto vin2 = xsimd::batch<float, Arch>::load_aligned(&aOutput[i]);
|
||||
auto vout = xsimd::fma(vin1, vgain, vin2);
|
||||
vout.store_aligned(&aOutput[i]);
|
||||
}
|
||||
|
||||
for (unsigned i = aVSize; i < aSize; ++i) {
|
||||
aOutput[i] += aInput[i] * aScale;
|
||||
}
|
||||
};
|
||||
|
||||
static void AudioBlockCopyChannelWithScale(const float* aInput, float aScale,
|
||||
float* aOutput) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
|
||||
|
||||
MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
|
||||
"requires tail processing");
|
||||
|
||||
xsimd::batch<float, Arch> vgain = (aScale);
|
||||
|
||||
MOZ_UNROLL(4)
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
|
||||
i += xsimd::batch<float, Arch>::size) {
|
||||
auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
|
||||
auto vout = vin * vgain;
|
||||
vout.store_aligned(&aOutput[i]);
|
||||
}
|
||||
};
|
||||
|
||||
static void AudioBlockCopyChannelWithScale(
|
||||
const float aInput[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aScale[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutput[WEBAUDIO_BLOCK_SIZE]) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
|
||||
|
||||
MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
|
||||
"requires tail processing");
|
||||
|
||||
MOZ_UNROLL(4)
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
|
||||
i += xsimd::batch<float, Arch>::size) {
|
||||
auto vscaled = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
|
||||
auto vin = xsimd::batch<float, Arch>::load_aligned(&aInput[i]);
|
||||
auto vout = vin * vscaled;
|
||||
vout.store_aligned(&aOutput[i]);
|
||||
}
|
||||
};
|
||||
|
||||
static void AudioBufferInPlaceScale(float* aBlock, float aScale,
|
||||
uint32_t aSize) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
|
||||
|
||||
xsimd::batch<float, Arch> vgain(aScale);
|
||||
|
||||
uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
|
||||
MOZ_UNROLL(4)
|
||||
for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
|
||||
auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
|
||||
auto vout = vin * vgain;
|
||||
vout.store_aligned(&aBlock[i]);
|
||||
}
|
||||
for (unsigned i = aVSize; i < aSize; ++i) aBlock[i] *= aScale;
|
||||
};
|
||||
|
||||
static void AudioBufferInPlaceScale(float* aBlock, float* aScale,
|
||||
uint32_t aSize) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aBlock), "aBlock is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
|
||||
|
||||
uint32_t aVSize = aSize & ~(xsimd::batch<float, Arch>::size - 1);
|
||||
MOZ_UNROLL(4)
|
||||
for (unsigned i = 0; i < aVSize; i += xsimd::batch<float, Arch>::size) {
|
||||
auto vin = xsimd::batch<float, Arch>::load_aligned(&aBlock[i]);
|
||||
auto vgain = xsimd::batch<float, Arch>::load_aligned(&aScale[i]);
|
||||
auto vout = vin * vgain;
|
||||
vout.store_aligned(&aBlock[i]);
|
||||
}
|
||||
for (uint32_t i = aVSize; i < aSize; ++i) {
|
||||
*aBlock++ *= *aScale++;
|
||||
}
|
||||
};
|
||||
|
||||
static void AudioBlockPanStereoToStereo(
|
||||
const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE], float aGainL, float aGainR,
|
||||
bool aIsOnTheLeft, float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
|
||||
|
||||
MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
|
||||
"requires tail processing");
|
||||
|
||||
xsimd::batch<float, Arch> vgainl(aGainL);
|
||||
xsimd::batch<float, Arch> vgainr(aGainR);
|
||||
|
||||
if (aIsOnTheLeft) {
|
||||
MOZ_UNROLL(2)
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
|
||||
i += xsimd::batch<float, Arch>::size) {
|
||||
auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
|
||||
auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
|
||||
|
||||
/* left channel : aOutputL = aInputL + aInputR * gainL */
|
||||
auto vout = xsimd::fma(vinr, vgainl, vinl);
|
||||
vout.store_aligned(&aOutputL[i]);
|
||||
|
||||
/* right channel : aOutputR = aInputR * gainR */
|
||||
auto vscaled = vinr * vgainr;
|
||||
vscaled.store_aligned(&aOutputR[i]);
|
||||
}
|
||||
} else {
|
||||
MOZ_UNROLL(2)
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE;
|
||||
i += xsimd::batch<float, Arch>::size) {
|
||||
auto vinl = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
|
||||
auto vinr = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
|
||||
|
||||
/* left channel : aInputL * gainL */
|
||||
auto vscaled = vinl * vgainl;
|
||||
vscaled.store_aligned(&aOutputL[i]);
|
||||
|
||||
/* right channel: aOutputR = aInputR + aInputL * gainR */
|
||||
auto vout = xsimd::fma(vinl, vgainr, vinr);
|
||||
vout.store_aligned(&aOutputR[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void BufferComplexMultiply(const float* aInput, const float* aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutput), "aOutput is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aScale), "aScale is aligned");
|
||||
MOZ_ASSERT((aSize % xsimd::batch<float, Arch>::size == 0),
|
||||
"requires tail processing");
|
||||
|
||||
MOZ_UNROLL(2)
|
||||
for (unsigned i = 0; i < aSize * 2;
|
||||
i += 2 * xsimd::batch<std::complex<float>>::size) {
|
||||
auto in1 = xsimd::batch<std::complex<float>>::load_aligned(
|
||||
reinterpret_cast<const std::complex<float>*>(&aInput[i]));
|
||||
auto in2 = xsimd::batch<std::complex<float>>::load_aligned(
|
||||
reinterpret_cast<const std::complex<float>*>(&aScale[i]));
|
||||
auto out = in1 * in2;
|
||||
out.store_aligned(reinterpret_cast<std::complex<float>*>(&aOutput[i]));
|
||||
}
|
||||
};
|
||||
|
||||
static float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
|
||||
float sum = 0.f;
|
||||
|
||||
if constexpr (Arch::requires_alignment()) {
|
||||
while (!is_aligned<Arch>(aInput)) {
|
||||
if (!aLength) {
|
||||
return sum;
|
||||
}
|
||||
sum += *aInput * *aInput;
|
||||
++aInput;
|
||||
--aLength;
|
||||
}
|
||||
}
|
||||
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInput), "aInput is aligned");
|
||||
|
||||
constexpr uint32_t unroll_factor = 4;
|
||||
xsimd::batch<float, Arch> accs[unroll_factor] = {0.f, 0.f, 0.f, 0.f};
|
||||
|
||||
uint32_t vLength =
|
||||
aLength & ~(unroll_factor * xsimd::batch<float, Arch>::size - 1);
|
||||
|
||||
for (uint32_t i = 0; i < vLength;
|
||||
i += unroll_factor * xsimd::batch<float, Arch>::size) {
|
||||
MOZ_UNROLL(4)
|
||||
for (uint32_t j = 0; j < unroll_factor; ++j) {
|
||||
auto in = xsimd::batch<float, Arch>::load_aligned(
|
||||
&aInput[i + xsimd::batch<float, Arch>::size * j]);
|
||||
accs[j] = xsimd::fma(in, in, accs[j]);
|
||||
}
|
||||
}
|
||||
|
||||
sum += reduce_add((accs[0] + accs[1]) + (accs[2] + accs[3]));
|
||||
for (uint32_t i = vLength; i < aLength; ++i) sum += aInput[i] * aInput[i];
|
||||
return sum;
|
||||
};
|
||||
|
||||
static void NaNToZeroInPlace(float* aSamples, size_t aCount) {
|
||||
if constexpr (Arch::requires_alignment()) {
|
||||
while (!is_aligned<Arch>(aSamples)) {
|
||||
if (!aCount) {
|
||||
return;
|
||||
}
|
||||
if (*aSamples != *aSamples) {
|
||||
*aSamples = 0.0;
|
||||
}
|
||||
++aSamples;
|
||||
--aCount;
|
||||
}
|
||||
}
|
||||
|
||||
MOZ_ASSERT(is_aligned<Arch>(aSamples), "aSamples is aligned");
|
||||
|
||||
uint32_t vCount = aCount & ~(xsimd::batch<float, Arch>::size - 1);
|
||||
|
||||
MOZ_UNROLL(4)
|
||||
for (uint32_t i = 0; i < vCount; i += xsimd::batch<float, Arch>::size) {
|
||||
auto vin = xsimd::batch<float, Arch>::load_aligned(&aSamples[i]);
|
||||
auto vout =
|
||||
xsimd::select(xsimd::isnan(vin), xsimd::batch<float, Arch>(0.f), vin);
|
||||
vout.store_aligned(&aSamples[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = vCount; i < aCount; i++) {
|
||||
if (aSamples[i] != aSamples[i]) {
|
||||
aSamples[i] = 0.0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static void AudioBlockPanStereoToStereo(
|
||||
const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainR[WEBAUDIO_BLOCK_SIZE],
|
||||
const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInputL), "aInputL is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aInputR), "aInputR is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aGainL), "aGainL is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aGainR), "aGainR is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aIsOnTheLeft), "aIsOnTheLeft is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutputL), "aOutputL is aligned");
|
||||
MOZ_ASSERT(is_aligned<Arch>(aOutputR), "aOutputR is aligned");
|
||||
|
||||
MOZ_ASSERT((WEBAUDIO_BLOCK_SIZE % xsimd::batch<float, Arch>::size == 0),
|
||||
"requires tail processing");
|
||||
|
||||
MOZ_UNROLL(2)
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE;
|
||||
i += xsimd::batch<float, Arch>::size) {
|
||||
auto mask =
|
||||
xsimd::batch_bool<float, Arch>::load_aligned(&aIsOnTheLeft[i]);
|
||||
|
||||
auto inputL = xsimd::batch<float, Arch>::load_aligned(&aInputL[i]);
|
||||
auto inputR = xsimd::batch<float, Arch>::load_aligned(&aInputR[i]);
|
||||
auto gainL = xsimd::batch<float, Arch>::load_aligned(&aGainL[i]);
|
||||
auto gainR = xsimd::batch<float, Arch>::load_aligned(&aGainR[i]);
|
||||
|
||||
auto outL_true = xsimd::fma(inputR, gainL, inputL);
|
||||
auto outR_true = inputR * gainR;
|
||||
|
||||
auto outL_false = inputL * gainL;
|
||||
auto outR_false = xsimd::fma(inputL, gainR, inputR);
|
||||
|
||||
auto outL = xsimd::select(mask, outL_true, outL_false);
|
||||
auto outR = xsimd::select(mask, outR_true, outR_false);
|
||||
|
||||
outL.store_aligned(&aOutputL[i]);
|
||||
outR.store_aligned(&aOutputR[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace mozilla
|
||||
|
||||
#endif
|
|
@ -3,350 +3,7 @@
|
|||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "AudioNodeEngineNEON.h"
|
||||
#if defined(_MSC_VER) && defined(_M_ARM64) && !defined(__clang__)
|
||||
# include <arm64_neon.h>
|
||||
#else
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
//#ifdef DEBUG
|
||||
#if 0 // see bug 921099
|
||||
# define ASSERT_ALIGNED(ptr) \
|
||||
MOZ_ASSERT((((uintptr_t)ptr + 15) & ~0x0F) == (uintptr_t)ptr, \
|
||||
#ptr " has to be aligned 16-bytes aligned.");
|
||||
#else
|
||||
# define ASSERT_ALIGNED(ptr)
|
||||
#endif
|
||||
|
||||
#define ADDRESS_OF(array, index) ((float32_t*)&array[index])
|
||||
|
||||
#include "AudioNodeEngineGeneric.h"
|
||||
namespace mozilla {
|
||||
void AudioBufferAddWithScale_NEON(const float* aInput, float aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
ASSERT_ALIGNED(aInput);
|
||||
ASSERT_ALIGNED(aOutput);
|
||||
|
||||
float32x4_t vin0, vin1, vin2, vin3;
|
||||
float32x4_t vout0, vout1, vout2, vout3;
|
||||
float32x4_t vscale = vmovq_n_f32(aScale);
|
||||
|
||||
uint32_t dif = aSize % 16;
|
||||
aSize -= dif;
|
||||
unsigned i = 0;
|
||||
for (; i < aSize; i += 16) {
|
||||
vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
|
||||
vin1 = vld1q_f32(ADDRESS_OF(aInput, i + 4));
|
||||
vin2 = vld1q_f32(ADDRESS_OF(aInput, i + 8));
|
||||
vin3 = vld1q_f32(ADDRESS_OF(aInput, i + 12));
|
||||
|
||||
vout0 = vld1q_f32(ADDRESS_OF(aOutput, i));
|
||||
vout1 = vld1q_f32(ADDRESS_OF(aOutput, i + 4));
|
||||
vout2 = vld1q_f32(ADDRESS_OF(aOutput, i + 8));
|
||||
vout3 = vld1q_f32(ADDRESS_OF(aOutput, i + 12));
|
||||
|
||||
vout0 = vmlaq_f32(vout0, vin0, vscale);
|
||||
vout1 = vmlaq_f32(vout1, vin1, vscale);
|
||||
vout2 = vmlaq_f32(vout2, vin2, vscale);
|
||||
vout3 = vmlaq_f32(vout3, vin3, vscale);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 4), vout1);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 8), vout2);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 12), vout3);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < dif; ++i, ++j) {
|
||||
aOutput[i] += aInput[i] * aScale;
|
||||
}
|
||||
}
|
||||
void AudioBlockCopyChannelWithScale_NEON(const float* aInput, float aScale,
|
||||
float* aOutput) {
|
||||
ASSERT_ALIGNED(aInput);
|
||||
ASSERT_ALIGNED(aOutput);
|
||||
|
||||
float32x4_t vin0, vin1, vin2, vin3;
|
||||
float32x4_t vout0, vout1, vout2, vout3;
|
||||
float32x4_t vscale = vmovq_n_f32(aScale);
|
||||
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 16) {
|
||||
vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
|
||||
vin1 = vld1q_f32(ADDRESS_OF(aInput, i + 4));
|
||||
vin2 = vld1q_f32(ADDRESS_OF(aInput, i + 8));
|
||||
vin3 = vld1q_f32(ADDRESS_OF(aInput, i + 12));
|
||||
|
||||
vout0 = vmulq_f32(vin0, vscale);
|
||||
vout1 = vmulq_f32(vin1, vscale);
|
||||
vout2 = vmulq_f32(vin2, vscale);
|
||||
vout3 = vmulq_f32(vin3, vscale);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 4), vout1);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 8), vout2);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 12), vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockCopyChannelWithScale_NEON(
|
||||
const float aInput[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aScale[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutput[WEBAUDIO_BLOCK_SIZE]) {
|
||||
ASSERT_ALIGNED(aInput);
|
||||
ASSERT_ALIGNED(aScale);
|
||||
ASSERT_ALIGNED(aOutput);
|
||||
|
||||
float32x4_t vin0, vin1, vin2, vin3;
|
||||
float32x4_t vout0, vout1, vout2, vout3;
|
||||
float32x4_t vscale0, vscale1, vscale2, vscale3;
|
||||
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 16) {
|
||||
vin0 = vld1q_f32(ADDRESS_OF(aInput, i));
|
||||
vin1 = vld1q_f32(ADDRESS_OF(aInput, i + 4));
|
||||
vin2 = vld1q_f32(ADDRESS_OF(aInput, i + 8));
|
||||
vin3 = vld1q_f32(ADDRESS_OF(aInput, i + 12));
|
||||
|
||||
vscale0 = vld1q_f32(ADDRESS_OF(aScale, i));
|
||||
vscale1 = vld1q_f32(ADDRESS_OF(aScale, i + 4));
|
||||
vscale2 = vld1q_f32(ADDRESS_OF(aScale, i + 8));
|
||||
vscale3 = vld1q_f32(ADDRESS_OF(aScale, i + 12));
|
||||
|
||||
vout0 = vmulq_f32(vin0, vscale0);
|
||||
vout1 = vmulq_f32(vin1, vscale1);
|
||||
vout2 = vmulq_f32(vin2, vscale2);
|
||||
vout3 = vmulq_f32(vin3, vscale3);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i), vout0);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 4), vout1);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 8), vout2);
|
||||
vst1q_f32(ADDRESS_OF(aOutput, i + 12), vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBufferInPlaceScale_NEON(float* aBlock, float aScale, uint32_t aSize) {
|
||||
ASSERT_ALIGNED(aBlock);
|
||||
|
||||
float32x4_t vin0, vin1, vin2, vin3;
|
||||
float32x4_t vout0, vout1, vout2, vout3;
|
||||
float32x4_t vscale = vmovq_n_f32(aScale);
|
||||
|
||||
uint32_t dif = aSize % 16;
|
||||
uint32_t vectorSize = aSize - dif;
|
||||
uint32_t i = 0;
|
||||
for (; i < vectorSize; i += 16) {
|
||||
vin0 = vld1q_f32(ADDRESS_OF(aBlock, i));
|
||||
vin1 = vld1q_f32(ADDRESS_OF(aBlock, i + 4));
|
||||
vin2 = vld1q_f32(ADDRESS_OF(aBlock, i + 8));
|
||||
vin3 = vld1q_f32(ADDRESS_OF(aBlock, i + 12));
|
||||
|
||||
vout0 = vmulq_f32(vin0, vscale);
|
||||
vout1 = vmulq_f32(vin1, vscale);
|
||||
vout2 = vmulq_f32(vin2, vscale);
|
||||
vout3 = vmulq_f32(vin3, vscale);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i), vout0);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 4), vout1);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 8), vout2);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 12), vout3);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < dif; ++i, ++j) {
|
||||
aBlock[i] *= aScale;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBufferInPlaceScale_NEON(float* aBlock, float* aScale,
|
||||
uint32_t aSize) {
|
||||
ASSERT_ALIGNED(aBlock);
|
||||
|
||||
float32x4_t vin0, vin1, vin2, vin3;
|
||||
float32x4_t vout0, vout1, vout2, vout3;
|
||||
float32x4_t vscale0, vscale1, vscale2, vscale3;
|
||||
|
||||
uint32_t dif = aSize % 16;
|
||||
uint32_t vectorSize = aSize - dif;
|
||||
uint32_t i = 0;
|
||||
for (; i < vectorSize; i += 16) {
|
||||
vin0 = vld1q_f32(ADDRESS_OF(aBlock, i));
|
||||
vin1 = vld1q_f32(ADDRESS_OF(aBlock, i + 4));
|
||||
vin2 = vld1q_f32(ADDRESS_OF(aBlock, i + 8));
|
||||
vin3 = vld1q_f32(ADDRESS_OF(aBlock, i + 12));
|
||||
|
||||
vscale0 = vld1q_f32(ADDRESS_OF(aScale, i));
|
||||
vscale1 = vld1q_f32(ADDRESS_OF(aScale, i + 4));
|
||||
vscale2 = vld1q_f32(ADDRESS_OF(aScale, i + 8));
|
||||
vscale3 = vld1q_f32(ADDRESS_OF(aScale, i + 12));
|
||||
|
||||
vout0 = vmulq_f32(vin0, vscale0);
|
||||
vout1 = vmulq_f32(vin1, vscale1);
|
||||
vout2 = vmulq_f32(vin2, vscale2);
|
||||
vout3 = vmulq_f32(vin3, vscale3);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i), vout0);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 4), vout1);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 8), vout2);
|
||||
vst1q_f32(ADDRESS_OF(aBlock, i + 12), vout3);
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < dif; ++i, ++j) {
|
||||
aBlock[i] *= aScale[i];
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
float aGainL, float aGainR,
|
||||
bool aIsOnTheLeft,
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
ASSERT_ALIGNED(aInputL);
|
||||
ASSERT_ALIGNED(aInputR);
|
||||
ASSERT_ALIGNED(aOutputL);
|
||||
ASSERT_ALIGNED(aOutputR);
|
||||
|
||||
float32x4_t vinL0, vinL1;
|
||||
float32x4_t vinR0, vinR1;
|
||||
float32x4_t voutL0, voutL1;
|
||||
float32x4_t voutR0, voutR1;
|
||||
float32x4_t vscaleL = vmovq_n_f32(aGainL);
|
||||
float32x4_t vscaleR = vmovq_n_f32(aGainR);
|
||||
|
||||
if (aIsOnTheLeft) {
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
|
||||
vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
|
||||
vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i + 4));
|
||||
|
||||
vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
|
||||
vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i + 4));
|
||||
|
||||
voutL0 = vmlaq_f32(vinL0, vinR0, vscaleL);
|
||||
voutL1 = vmlaq_f32(vinL1, vinR1, vscaleL);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i + 4), voutL1);
|
||||
|
||||
voutR0 = vmulq_f32(vinR0, vscaleR);
|
||||
voutR1 = vmulq_f32(vinR1, vscaleR);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i + 4), voutR1);
|
||||
}
|
||||
} else {
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
|
||||
vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
|
||||
vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i + 4));
|
||||
|
||||
vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
|
||||
vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i + 4));
|
||||
|
||||
voutL0 = vmulq_f32(vinL0, vscaleL);
|
||||
voutL1 = vmulq_f32(vinL1, vscaleL);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i + 4), voutL1);
|
||||
|
||||
voutR0 = vmlaq_f32(vinR0, vinL0, vscaleR);
|
||||
voutR1 = vmlaq_f32(vinR1, vinL1, vscaleR);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i + 4), voutR1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockPanStereoToStereo_NEON(
|
||||
const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainR[WEBAUDIO_BLOCK_SIZE],
|
||||
const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
ASSERT_ALIGNED(aInputL);
|
||||
ASSERT_ALIGNED(aInputR);
|
||||
ASSERT_ALIGNED(aGainL);
|
||||
ASSERT_ALIGNED(aGainR);
|
||||
ASSERT_ALIGNED(aIsOnTheLeft);
|
||||
ASSERT_ALIGNED(aOutputL);
|
||||
ASSERT_ALIGNED(aOutputR);
|
||||
|
||||
float32x4_t vinL0, vinL1;
|
||||
float32x4_t vinR0, vinR1;
|
||||
float32x4_t voutL0, voutL1;
|
||||
float32x4_t voutR0, voutR1;
|
||||
float32x4_t vscaleL0, vscaleL1;
|
||||
float32x4_t vscaleR0, vscaleR1;
|
||||
float32x4_t onleft0, onleft1, notonleft0, notonleft1;
|
||||
|
||||
float32x4_t zero = vmovq_n_f32(0);
|
||||
uint8x8_t isOnTheLeft;
|
||||
|
||||
// Although MSVC throws uninitialized value warning for voutL0 and voutL1,
|
||||
// since we fill all lanes by vsetq_lane_f32, we can ignore it. But to avoid
|
||||
// compiler warning, set zero.
|
||||
voutL0 = zero;
|
||||
voutL1 = zero;
|
||||
|
||||
for (uint32_t i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
|
||||
vinL0 = vld1q_f32(ADDRESS_OF(aInputL, i));
|
||||
vinL1 = vld1q_f32(ADDRESS_OF(aInputL, i + 4));
|
||||
|
||||
vinR0 = vld1q_f32(ADDRESS_OF(aInputR, i));
|
||||
vinR1 = vld1q_f32(ADDRESS_OF(aInputR, i + 4));
|
||||
|
||||
vscaleL0 = vld1q_f32(ADDRESS_OF(aGainL, i));
|
||||
vscaleL1 = vld1q_f32(ADDRESS_OF(aGainL, i + 4));
|
||||
|
||||
vscaleR0 = vld1q_f32(ADDRESS_OF(aGainR, i));
|
||||
vscaleR1 = vld1q_f32(ADDRESS_OF(aGainR, i + 4));
|
||||
|
||||
// Load output with boolean "on the left" values. This assumes that
|
||||
// bools are stored as a single byte.
|
||||
isOnTheLeft = vld1_u8((uint8_t*)&aIsOnTheLeft[i]);
|
||||
voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 0), voutL0, 0);
|
||||
voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 1), voutL0, 1);
|
||||
voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 2), voutL0, 2);
|
||||
voutL0 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 3), voutL0, 3);
|
||||
voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 4), voutL1, 0);
|
||||
voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 5), voutL1, 1);
|
||||
voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 6), voutL1, 2);
|
||||
voutL1 = vsetq_lane_f32(vget_lane_u8(isOnTheLeft, 7), voutL1, 3);
|
||||
|
||||
// Convert the boolean values into masks by setting all bits to 1
|
||||
// if true.
|
||||
voutL0 = (float32x4_t)vcgtq_f32(voutL0, zero);
|
||||
voutL1 = (float32x4_t)vcgtq_f32(voutL1, zero);
|
||||
|
||||
// The right output masks are the same as the left masks
|
||||
voutR0 = voutL0;
|
||||
voutR1 = voutL1;
|
||||
|
||||
// Calculate left channel assuming isOnTheLeft
|
||||
onleft0 = vmlaq_f32(vinL0, vinR0, vscaleL0);
|
||||
onleft1 = vmlaq_f32(vinL1, vinR1, vscaleL1);
|
||||
|
||||
// Calculate left channel assuming not isOnTheLeft
|
||||
notonleft0 = vmulq_f32(vinL0, vscaleL0);
|
||||
notonleft1 = vmulq_f32(vinL1, vscaleL1);
|
||||
|
||||
// Write results using previously stored masks
|
||||
voutL0 = vbslq_f32((uint32x4_t)voutL0, onleft0, notonleft0);
|
||||
voutL1 = vbslq_f32((uint32x4_t)voutL1, onleft1, notonleft1);
|
||||
|
||||
// Calculate right channel assuming isOnTheLeft
|
||||
onleft0 = vmulq_f32(vinR0, vscaleR0);
|
||||
onleft1 = vmulq_f32(vinR1, vscaleR1);
|
||||
|
||||
// Calculate right channel assuming not isOnTheLeft
|
||||
notonleft0 = vmlaq_f32(vinR0, vinL0, vscaleR0);
|
||||
notonleft1 = vmlaq_f32(vinR1, vinL1, vscaleR1);
|
||||
|
||||
// Write results using previously stored masks
|
||||
voutR0 = vbslq_f32((uint32x4_t)voutR0, onleft0, notonleft0);
|
||||
voutR1 = vbslq_f32((uint32x4_t)voutR1, onleft1, notonleft1);
|
||||
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i), voutL0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputL, i + 4), voutL1);
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i), voutR0);
|
||||
vst1q_f32(ADDRESS_OF(aOutputR, i + 4), voutR1);
|
||||
}
|
||||
}
|
||||
template struct Engine<xsimd::neon>;
|
||||
} // namespace mozilla
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#ifndef MOZILLA_AUDIONODEENGINENEON_H_
|
||||
#define MOZILLA_AUDIONODEENGINENEON_H_
|
||||
|
||||
#include "AudioNodeEngine.h"
|
||||
|
||||
namespace mozilla {
|
||||
void AudioBufferAddWithScale_NEON(const float* aInput, float aScale,
|
||||
float* aOutput, uint32_t aSize);
|
||||
|
||||
void AudioBlockCopyChannelWithScale_NEON(const float* aInput, float aScale,
|
||||
float* aOutput);
|
||||
|
||||
void AudioBlockCopyChannelWithScale_NEON(
|
||||
const float aInput[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aScale[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutput[WEBAUDIO_BLOCK_SIZE]);
|
||||
|
||||
void AudioBufferInPlaceScale_NEON(float* aBlock, float aScale, uint32_t aSize);
|
||||
void AudioBufferInPlaceScale_NEON(float* aBlock, float* aScale, uint32_t aSize);
|
||||
|
||||
void AudioBlockPanStereoToStereo_NEON(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
float aGainL, float aGainR,
|
||||
bool aIsOnTheLeft,
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]);
|
||||
|
||||
void AudioBlockPanStereoToStereo_NEON(
|
||||
const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aGainR[WEBAUDIO_BLOCK_SIZE],
|
||||
const bool aIsOnTheLeft[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE], float aOutputR[WEBAUDIO_BLOCK_SIZE]);
|
||||
} // namespace mozilla
|
||||
|
||||
#endif /* MOZILLA_AUDIONODEENGINENEON_H_ */
|
|
@ -3,361 +3,8 @@
|
|||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "AudioNodeEngineSSE2.h"
|
||||
#include "AlignmentUtils.h"
|
||||
#include <emmintrin.h>
|
||||
#include "AudioNodeEngineGeneric.h"
|
||||
|
||||
namespace mozilla {
|
||||
void AudioBufferAddWithScale_SSE(const float* aInput, float aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
__m128 vin0, vin1, vin2, vin3, vscaled0, vscaled1, vscaled2, vscaled3, vout0,
|
||||
vout1, vout2, vout3, vgain;
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_ALIGNED16(aOutput);
|
||||
ASSERT_MULTIPLE16(aSize);
|
||||
|
||||
vgain = _mm_load1_ps(&aScale);
|
||||
|
||||
for (unsigned i = 0; i < aSize; i += 16) {
|
||||
vin0 = _mm_load_ps(&aInput[i]);
|
||||
vin1 = _mm_load_ps(&aInput[i + 4]);
|
||||
vin2 = _mm_load_ps(&aInput[i + 8]);
|
||||
vin3 = _mm_load_ps(&aInput[i + 12]);
|
||||
|
||||
vscaled0 = _mm_mul_ps(vin0, vgain);
|
||||
vscaled1 = _mm_mul_ps(vin1, vgain);
|
||||
vscaled2 = _mm_mul_ps(vin2, vgain);
|
||||
vscaled3 = _mm_mul_ps(vin3, vgain);
|
||||
|
||||
vin0 = _mm_load_ps(&aOutput[i]);
|
||||
vin1 = _mm_load_ps(&aOutput[i + 4]);
|
||||
vin2 = _mm_load_ps(&aOutput[i + 8]);
|
||||
vin3 = _mm_load_ps(&aOutput[i + 12]);
|
||||
|
||||
vout0 = _mm_add_ps(vin0, vscaled0);
|
||||
vout1 = _mm_add_ps(vin1, vscaled1);
|
||||
vout2 = _mm_add_ps(vin2, vscaled2);
|
||||
vout3 = _mm_add_ps(vin3, vscaled3);
|
||||
|
||||
_mm_store_ps(&aOutput[i], vout0);
|
||||
_mm_store_ps(&aOutput[i + 4], vout1);
|
||||
_mm_store_ps(&aOutput[i + 8], vout2);
|
||||
_mm_store_ps(&aOutput[i + 12], vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockCopyChannelWithScale_SSE(const float* aInput, float aScale,
|
||||
float* aOutput) {
|
||||
__m128 vin0, vin1, vin2, vin3, vout0, vout1, vout2, vout3;
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_ALIGNED16(aOutput);
|
||||
|
||||
__m128 vgain = _mm_load1_ps(&aScale);
|
||||
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 16) {
|
||||
vin0 = _mm_load_ps(&aInput[i]);
|
||||
vin1 = _mm_load_ps(&aInput[i + 4]);
|
||||
vin2 = _mm_load_ps(&aInput[i + 8]);
|
||||
vin3 = _mm_load_ps(&aInput[i + 12]);
|
||||
vout0 = _mm_mul_ps(vin0, vgain);
|
||||
vout1 = _mm_mul_ps(vin1, vgain);
|
||||
vout2 = _mm_mul_ps(vin2, vgain);
|
||||
vout3 = _mm_mul_ps(vin3, vgain);
|
||||
_mm_store_ps(&aOutput[i], vout0);
|
||||
_mm_store_ps(&aOutput[i + 4], vout1);
|
||||
_mm_store_ps(&aOutput[i + 8], vout2);
|
||||
_mm_store_ps(&aOutput[i + 12], vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aScale[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutput[WEBAUDIO_BLOCK_SIZE]) {
|
||||
__m128 vin0, vin1, vin2, vin3, vscaled0, vscaled1, vscaled2, vscaled3, vout0,
|
||||
vout1, vout2, vout3;
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_ALIGNED16(aScale);
|
||||
ASSERT_ALIGNED16(aOutput);
|
||||
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 16) {
|
||||
vscaled0 = _mm_load_ps(&aScale[i]);
|
||||
vscaled1 = _mm_load_ps(&aScale[i + 4]);
|
||||
vscaled2 = _mm_load_ps(&aScale[i + 8]);
|
||||
vscaled3 = _mm_load_ps(&aScale[i + 12]);
|
||||
|
||||
vin0 = _mm_load_ps(&aInput[i]);
|
||||
vin1 = _mm_load_ps(&aInput[i + 4]);
|
||||
vin2 = _mm_load_ps(&aInput[i + 8]);
|
||||
vin3 = _mm_load_ps(&aInput[i + 12]);
|
||||
|
||||
vout0 = _mm_mul_ps(vin0, vscaled0);
|
||||
vout1 = _mm_mul_ps(vin1, vscaled1);
|
||||
vout2 = _mm_mul_ps(vin2, vscaled2);
|
||||
vout3 = _mm_mul_ps(vin3, vscaled3);
|
||||
|
||||
_mm_store_ps(&aOutput[i], vout0);
|
||||
_mm_store_ps(&aOutput[i + 4], vout1);
|
||||
_mm_store_ps(&aOutput[i + 8], vout2);
|
||||
_mm_store_ps(&aOutput[i + 12], vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBufferInPlaceScale_SSE(float* aBlock, float aScale, uint32_t aSize) {
|
||||
__m128 vout0, vout1, vout2, vout3, vin0, vin1, vin2, vin3;
|
||||
|
||||
ASSERT_ALIGNED16(aBlock);
|
||||
ASSERT_MULTIPLE16(aSize);
|
||||
|
||||
__m128 vgain = _mm_load1_ps(&aScale);
|
||||
|
||||
for (unsigned i = 0; i < aSize; i += 16) {
|
||||
vin0 = _mm_load_ps(&aBlock[i]);
|
||||
vin1 = _mm_load_ps(&aBlock[i + 4]);
|
||||
vin2 = _mm_load_ps(&aBlock[i + 8]);
|
||||
vin3 = _mm_load_ps(&aBlock[i + 12]);
|
||||
vout0 = _mm_mul_ps(vin0, vgain);
|
||||
vout1 = _mm_mul_ps(vin1, vgain);
|
||||
vout2 = _mm_mul_ps(vin2, vgain);
|
||||
vout3 = _mm_mul_ps(vin3, vgain);
|
||||
_mm_store_ps(&aBlock[i], vout0);
|
||||
_mm_store_ps(&aBlock[i + 4], vout1);
|
||||
_mm_store_ps(&aBlock[i + 8], vout2);
|
||||
_mm_store_ps(&aBlock[i + 12], vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBufferInPlaceScale_SSE(float* aBlock, float* aScale, uint32_t aSize) {
|
||||
__m128 vout0, vout1, vout2, vout3, vgain0, vgain1, vgain2, vgain3, vin0, vin1,
|
||||
vin2, vin3;
|
||||
|
||||
ASSERT_ALIGNED16(aBlock);
|
||||
ASSERT_MULTIPLE16(aSize);
|
||||
|
||||
for (unsigned i = 0; i < aSize; i += 16) {
|
||||
vin0 = _mm_load_ps(&aBlock[i]);
|
||||
vin1 = _mm_load_ps(&aBlock[i + 4]);
|
||||
vin2 = _mm_load_ps(&aBlock[i + 8]);
|
||||
vin3 = _mm_load_ps(&aBlock[i + 12]);
|
||||
vgain0 = _mm_load_ps(&aScale[i]);
|
||||
vgain1 = _mm_load_ps(&aScale[i + 4]);
|
||||
vgain2 = _mm_load_ps(&aScale[i + 8]);
|
||||
vgain3 = _mm_load_ps(&aScale[i + 12]);
|
||||
vout0 = _mm_mul_ps(vin0, vgain0);
|
||||
vout1 = _mm_mul_ps(vin1, vgain1);
|
||||
vout2 = _mm_mul_ps(vin2, vgain2);
|
||||
vout3 = _mm_mul_ps(vin3, vgain3);
|
||||
_mm_store_ps(&aBlock[i], vout0);
|
||||
_mm_store_ps(&aBlock[i + 4], vout1);
|
||||
_mm_store_ps(&aBlock[i + 8], vout2);
|
||||
_mm_store_ps(&aBlock[i + 12], vout3);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
float aGainL, float aGainR,
|
||||
bool aIsOnTheLeft,
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]) {
|
||||
__m128 vinl0, vinr0, vinl1, vinr1, vout0, vout1, vscaled0, vscaled1, vgainl,
|
||||
vgainr;
|
||||
|
||||
ASSERT_ALIGNED16(aInputL);
|
||||
ASSERT_ALIGNED16(aInputR);
|
||||
ASSERT_ALIGNED16(aOutputL);
|
||||
ASSERT_ALIGNED16(aOutputR);
|
||||
|
||||
vgainl = _mm_load1_ps(&aGainL);
|
||||
vgainr = _mm_load1_ps(&aGainR);
|
||||
|
||||
if (aIsOnTheLeft) {
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
|
||||
vinl0 = _mm_load_ps(&aInputL[i]);
|
||||
vinr0 = _mm_load_ps(&aInputR[i]);
|
||||
vinl1 = _mm_load_ps(&aInputL[i + 4]);
|
||||
vinr1 = _mm_load_ps(&aInputR[i + 4]);
|
||||
|
||||
/* left channel : aOutputL = aInputL + aInputR * gainL */
|
||||
vscaled0 = _mm_mul_ps(vinr0, vgainl);
|
||||
vscaled1 = _mm_mul_ps(vinr1, vgainl);
|
||||
vout0 = _mm_add_ps(vscaled0, vinl0);
|
||||
vout1 = _mm_add_ps(vscaled1, vinl1);
|
||||
_mm_store_ps(&aOutputL[i], vout0);
|
||||
_mm_store_ps(&aOutputL[i + 4], vout1);
|
||||
|
||||
/* right channel : aOutputR = aInputR * gainR */
|
||||
vscaled0 = _mm_mul_ps(vinr0, vgainr);
|
||||
vscaled1 = _mm_mul_ps(vinr1, vgainr);
|
||||
_mm_store_ps(&aOutputR[i], vscaled0);
|
||||
_mm_store_ps(&aOutputR[i + 4], vscaled1);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < WEBAUDIO_BLOCK_SIZE; i += 8) {
|
||||
vinl0 = _mm_load_ps(&aInputL[i]);
|
||||
vinr0 = _mm_load_ps(&aInputR[i]);
|
||||
vinl1 = _mm_load_ps(&aInputL[i + 4]);
|
||||
vinr1 = _mm_load_ps(&aInputR[i + 4]);
|
||||
|
||||
/* left channel : aInputL * gainL */
|
||||
vscaled0 = _mm_mul_ps(vinl0, vgainl);
|
||||
vscaled1 = _mm_mul_ps(vinl1, vgainl);
|
||||
_mm_store_ps(&aOutputL[i], vscaled0);
|
||||
_mm_store_ps(&aOutputL[i + 4], vscaled1);
|
||||
|
||||
/* right channel: aOutputR = aInputR + aInputL * gainR */
|
||||
vscaled0 = _mm_mul_ps(vinl0, vgainr);
|
||||
vscaled1 = _mm_mul_ps(vinl1, vgainr);
|
||||
vout0 = _mm_add_ps(vscaled0, vinr0);
|
||||
vout1 = _mm_add_ps(vscaled1, vinr1);
|
||||
_mm_store_ps(&aOutputR[i], vout0);
|
||||
_mm_store_ps(&aOutputR[i + 4], vout1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BufferComplexMultiply_SSE(const float* aInput, const float* aScale,
|
||||
float* aOutput, uint32_t aSize) {
|
||||
unsigned i;
|
||||
__m128 in0, in1, in2, in3, outreal0, outreal1, outreal2, outreal3, outimag0,
|
||||
outimag1, outimag2, outimag3;
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_ALIGNED16(aScale);
|
||||
ASSERT_ALIGNED16(aOutput);
|
||||
ASSERT_MULTIPLE16(aSize);
|
||||
|
||||
for (i = 0; i < aSize * 2; i += 16) {
|
||||
in0 = _mm_load_ps(&aInput[i]);
|
||||
in1 = _mm_load_ps(&aInput[i + 4]);
|
||||
in2 = _mm_load_ps(&aInput[i + 8]);
|
||||
in3 = _mm_load_ps(&aInput[i + 12]);
|
||||
|
||||
outreal0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
outimag0 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
outreal2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
outimag2 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
|
||||
in0 = _mm_load_ps(&aScale[i]);
|
||||
in1 = _mm_load_ps(&aScale[i + 4]);
|
||||
in2 = _mm_load_ps(&aScale[i + 8]);
|
||||
in3 = _mm_load_ps(&aScale[i + 12]);
|
||||
|
||||
outreal1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
outimag1 = _mm_shuffle_ps(in0, in1, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
outreal3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
outimag3 = _mm_shuffle_ps(in2, in3, _MM_SHUFFLE(3, 1, 3, 1));
|
||||
|
||||
in0 = _mm_sub_ps(_mm_mul_ps(outreal0, outreal1),
|
||||
_mm_mul_ps(outimag0, outimag1));
|
||||
in1 = _mm_add_ps(_mm_mul_ps(outreal0, outimag1),
|
||||
_mm_mul_ps(outimag0, outreal1));
|
||||
in2 = _mm_sub_ps(_mm_mul_ps(outreal2, outreal3),
|
||||
_mm_mul_ps(outimag2, outimag3));
|
||||
in3 = _mm_add_ps(_mm_mul_ps(outreal2, outimag3),
|
||||
_mm_mul_ps(outimag2, outreal3));
|
||||
|
||||
outreal0 = _mm_unpacklo_ps(in0, in1);
|
||||
outreal1 = _mm_unpackhi_ps(in0, in1);
|
||||
outreal2 = _mm_unpacklo_ps(in2, in3);
|
||||
outreal3 = _mm_unpackhi_ps(in2, in3);
|
||||
|
||||
_mm_store_ps(&aOutput[i], outreal0);
|
||||
_mm_store_ps(&aOutput[i + 4], outreal1);
|
||||
_mm_store_ps(&aOutput[i + 8], outreal2);
|
||||
_mm_store_ps(&aOutput[i + 12], outreal3);
|
||||
}
|
||||
}
|
||||
|
||||
float AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength) {
|
||||
unsigned i;
|
||||
__m128 in0, in1, in2, in3, acc0, acc1, acc2, acc3;
|
||||
float out[4];
|
||||
|
||||
ASSERT_ALIGNED16(aInput);
|
||||
ASSERT_MULTIPLE16(aLength);
|
||||
|
||||
acc0 = _mm_setzero_ps();
|
||||
acc1 = _mm_setzero_ps();
|
||||
acc2 = _mm_setzero_ps();
|
||||
acc3 = _mm_setzero_ps();
|
||||
|
||||
for (i = 0; i < aLength; i += 16) {
|
||||
in0 = _mm_load_ps(&aInput[i]);
|
||||
in1 = _mm_load_ps(&aInput[i + 4]);
|
||||
in2 = _mm_load_ps(&aInput[i + 8]);
|
||||
in3 = _mm_load_ps(&aInput[i + 12]);
|
||||
|
||||
in0 = _mm_mul_ps(in0, in0);
|
||||
in1 = _mm_mul_ps(in1, in1);
|
||||
in2 = _mm_mul_ps(in2, in2);
|
||||
in3 = _mm_mul_ps(in3, in3);
|
||||
|
||||
acc0 = _mm_add_ps(acc0, in0);
|
||||
acc1 = _mm_add_ps(acc1, in1);
|
||||
acc2 = _mm_add_ps(acc2, in2);
|
||||
acc3 = _mm_add_ps(acc3, in3);
|
||||
}
|
||||
|
||||
acc0 = _mm_add_ps(acc0, acc1);
|
||||
acc0 = _mm_add_ps(acc0, acc2);
|
||||
acc0 = _mm_add_ps(acc0, acc3);
|
||||
|
||||
_mm_store_ps(out, acc0);
|
||||
|
||||
return out[0] + out[1] + out[2] + out[3];
|
||||
}
|
||||
|
||||
void NaNToZeroInPlace_SSE(float* aSamples, size_t aCount) {
|
||||
__m128 vin0, vin1, vin2, vin3;
|
||||
__m128 vmask0, vmask1, vmask2, vmask3;
|
||||
__m128 vout0, vout1, vout2, vout3;
|
||||
|
||||
float* samplesAligned16 = ALIGNED16(aSamples);
|
||||
size_t leadingElementsScalar =
|
||||
std::min(static_cast<size_t>(samplesAligned16 - aSamples), aCount);
|
||||
size_t remainingElements = aCount - leadingElementsScalar;
|
||||
size_t vectoredEnd = aCount - remainingElements % 16;
|
||||
|
||||
MOZ_ASSERT(!((vectoredEnd - leadingElementsScalar) % 16));
|
||||
|
||||
size_t i = 0;
|
||||
for (; i < leadingElementsScalar; i++) {
|
||||
if (aSamples[i] != aSamples[i]) {
|
||||
aSamples[i] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_ALIGNED16(&aSamples[i]);
|
||||
|
||||
for (; i < vectoredEnd; i += 16) {
|
||||
vin0 = _mm_load_ps(&aSamples[i + 0]);
|
||||
vin1 = _mm_load_ps(&aSamples[i + 4]);
|
||||
vin2 = _mm_load_ps(&aSamples[i + 8]);
|
||||
vin3 = _mm_load_ps(&aSamples[i + 12]);
|
||||
|
||||
vmask0 = _mm_cmpord_ps(vin0, vin0);
|
||||
vmask1 = _mm_cmpord_ps(vin1, vin1);
|
||||
vmask2 = _mm_cmpord_ps(vin2, vin2);
|
||||
vmask3 = _mm_cmpord_ps(vin3, vin3);
|
||||
|
||||
vout0 = _mm_and_ps(vin0, vmask0);
|
||||
vout1 = _mm_and_ps(vin1, vmask1);
|
||||
vout2 = _mm_and_ps(vin2, vmask2);
|
||||
vout3 = _mm_and_ps(vin3, vmask3);
|
||||
|
||||
_mm_store_ps(&aSamples[i + 0], vout0);
|
||||
_mm_store_ps(&aSamples[i + 4], vout1);
|
||||
_mm_store_ps(&aSamples[i + 8], vout2);
|
||||
_mm_store_ps(&aSamples[i + 12], vout3);
|
||||
}
|
||||
for (; i < aCount; i++) {
|
||||
if (aSamples[i] != aSamples[i]) {
|
||||
aSamples[i] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template struct Engine<xsimd::sse2>;
|
||||
} // namespace mozilla
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "AudioNodeEngine.h"
|
||||
|
||||
namespace mozilla {
|
||||
void AudioBufferAddWithScale_SSE(const float* aInput, float aScale,
|
||||
float* aOutput, uint32_t aSize);
|
||||
|
||||
void AudioBlockCopyChannelWithScale_SSE(const float* aInput, float aScale,
|
||||
float* aOutput);
|
||||
|
||||
void AudioBlockCopyChannelWithScale_SSE(const float aInput[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aScale[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutput[WEBAUDIO_BLOCK_SIZE]);
|
||||
|
||||
void AudioBufferInPlaceScale_SSE(float* aBlock, float aScale, uint32_t aSize);
|
||||
void AudioBufferInPlaceScale_SSE(float* aBlock, float* aScale, uint32_t aSize);
|
||||
|
||||
void AudioBlockPanStereoToStereo_SSE(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
||||
const float aInputR[WEBAUDIO_BLOCK_SIZE],
|
||||
float aGainL, float aGainR,
|
||||
bool aIsOnTheLeft,
|
||||
float aOutputL[WEBAUDIO_BLOCK_SIZE],
|
||||
float aOutputR[WEBAUDIO_BLOCK_SIZE]);
|
||||
|
||||
float AudioBufferSumOfSquares_SSE(const float* aInput, uint32_t aLength);
|
||||
|
||||
void BufferComplexMultiply_SSE(const float* aInput, const float* aScale,
|
||||
float* aOutput, uint32_t aSize);
|
||||
|
||||
void NaNToZeroInPlace_SSE(float* aSamples, size_t aCount);
|
||||
} // namespace mozilla
|
|
@ -127,6 +127,7 @@ UNIFIED_SOURCES += [
|
|||
|
||||
if CONFIG["CPU_ARCH"] == "aarch64" or CONFIG["BUILD_ARM_NEON"]:
|
||||
DEFINES["USE_NEON"] = True
|
||||
LOCAL_INCLUDES += ["/third_party/xsimd/include"]
|
||||
SOURCES += ["AudioNodeEngineNEON.cpp"]
|
||||
SOURCES["AudioNodeEngineNEON.cpp"].flags += CONFIG["NEON_FLAGS"]
|
||||
if CONFIG["BUILD_ARM_NEON"]:
|
||||
|
@ -136,6 +137,7 @@ if CONFIG["CPU_ARCH"] == "aarch64" or CONFIG["BUILD_ARM_NEON"]:
|
|||
if CONFIG["INTEL_ARCHITECTURE"]:
|
||||
SOURCES += ["AudioNodeEngineSSE2.cpp"]
|
||||
DEFINES["USE_SSE2"] = True
|
||||
LOCAL_INCLUDES += ["/third_party/xsimd/include"]
|
||||
SOURCES["AudioNodeEngineSSE2.cpp"].flags += CONFIG["SSE2_FLAGS"]
|
||||
|
||||
include("/ipc/chromium/chromium-config.mozbuild")
|
||||
|
|
Загрузка…
Ссылка в новой задаче