зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1804226 - fma3 support for AudioNodeEngine r=padenot
Update xsimd dependency to integrate an fma portability patch. Add support for fma and sse4.2 detection. Differential Revision: https://phabricator.services.mozilla.com/D163927
This commit is contained in:
Родитель
ee9ecc66a7
Коммит
0aefae5322
|
@ -3091,3 +3091,5 @@ set_config("MMX_FLAGS", ["-mmmx"])
|
|||
set_config("SSE_FLAGS", ["-msse"])
|
||||
set_config("SSE2_FLAGS", ["-msse2"])
|
||||
set_config("SSSE3_FLAGS", ["-mssse3"])
|
||||
set_config("SSE4_2_FLAGS", ["-msse4.2"])
|
||||
set_config("FMA_FLAGS", ["-mfma"])
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
# include "mozilla/SSE.h"
|
||||
# include "AudioNodeEngineGeneric.h"
|
||||
#endif
|
||||
#if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
# include "mozilla/SSE.h"
|
||||
# include "AudioNodeEngineGeneric.h"
|
||||
#endif
|
||||
#include "AudioBlock.h"
|
||||
#include "Tracing.h"
|
||||
|
||||
|
@ -93,8 +97,16 @@ void AudioBufferAddWithScale(const float* aInput, float aScale, float* aOutput,
|
|||
// we need to round aSize down to the nearest multiple of 16
|
||||
uint32_t alignedSize = aSize & ~0x0F;
|
||||
if (alignedSize > 0) {
|
||||
Engine<xsimd::sse2>::AudioBufferAddWithScale(aInput, aScale, aOutput,
|
||||
alignedSize);
|
||||
# if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
if (mozilla::supports_fma3() && mozilla::supports_sse4_2()) {
|
||||
Engine<xsimd::fma3<xsimd::sse4_2>>::AudioBufferAddWithScale(
|
||||
aInput, aScale, aOutput, alignedSize);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
Engine<xsimd::sse2>::AudioBufferAddWithScale(aInput, aScale, aOutput,
|
||||
alignedSize);
|
||||
}
|
||||
|
||||
// adjust parameters for use with scalar operations below
|
||||
aInput += alignedSize;
|
||||
|
@ -152,7 +164,16 @@ void BufferComplexMultiply(const float* aInput, const float* aScale,
|
|||
float* aOutput, uint32_t aSize) {
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse()) {
|
||||
Engine<xsimd::sse2>::BufferComplexMultiply(aInput, aScale, aOutput, aSize);
|
||||
# if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
if (mozilla::supports_fma3() && mozilla::supports_sse4_2()) {
|
||||
Engine<xsimd::fma3<xsimd::sse4_2>>::BufferComplexMultiply(aInput, aScale,
|
||||
aOutput, aSize);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
Engine<xsimd::sse2>::BufferComplexMultiply(aInput, aScale, aOutput,
|
||||
aSize);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -288,8 +309,16 @@ void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
|||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
# if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
if (mozilla::supports_fma3() && mozilla::supports_sse4_2()) {
|
||||
Engine<xsimd::fma3<xsimd::sse4_2>>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -326,8 +355,16 @@ void AudioBlockPanStereoToStereo(const float aInputL[WEBAUDIO_BLOCK_SIZE],
|
|||
|
||||
#ifdef USE_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
# if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
if (mozilla::supports_fma3() && mozilla::supports_sse4_2()) {
|
||||
Engine<xsimd::fma3<xsimd::sse2>>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
Engine<xsimd::sse2>::AudioBlockPanStereoToStereo(
|
||||
aInputL, aInputR, aGainL, aGainR, aIsOnTheLeft, aOutputL, aOutputR);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@ -362,7 +399,16 @@ float AudioBufferSumOfSquares(const float* aInput, uint32_t aLength) {
|
|||
}
|
||||
|
||||
uint32_t vLength = (aLength >> 4) << 4;
|
||||
sum += Engine<xsimd::sse2>::AudioBufferSumOfSquares(alignedInput, vLength);
|
||||
# if defined(USE_SSE42) && defined(USE_FMA3)
|
||||
if (mozilla::supports_fma3() && mozilla::supports_sse4_2()) {
|
||||
sum += Engine<xsimd::fma3<xsimd::sse4_2>>::AudioBufferSumOfSquares(
|
||||
alignedInput, vLength);
|
||||
} else
|
||||
# endif
|
||||
{
|
||||
sum +=
|
||||
Engine<xsimd::sse2>::AudioBufferSumOfSquares(alignedInput, vLength);
|
||||
}
|
||||
|
||||
// adjust aInput and aLength to use scalar operations for any
|
||||
// remaining values
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "AudioNodeEngineGeneric.h"
|
||||
|
||||
namespace mozilla {
|
||||
template struct Engine<xsimd::fma3<xsimd::sse4_2>>;
|
||||
} // namespace mozilla
|
|
@ -133,12 +133,17 @@ if CONFIG["CPU_ARCH"] == "aarch64" or CONFIG["BUILD_ARM_NEON"]:
|
|||
if CONFIG["BUILD_ARM_NEON"]:
|
||||
LOCAL_INCLUDES += ["/media/openmax_dl/dl/api/"]
|
||||
|
||||
# Are we targeting x86 or x64? If so, build SSE2 files.
|
||||
# Are we targeting x86 or x64? If so, build SSEX files.
|
||||
if CONFIG["INTEL_ARCHITECTURE"]:
|
||||
SOURCES += ["AudioNodeEngineSSE2.cpp"]
|
||||
SOURCES += ["AudioNodeEngineSSE2.cpp", "AudioNodeEngineSSE4_2_FMA3.cpp"]
|
||||
DEFINES["USE_SSE2"] = True
|
||||
DEFINES["USE_SSE4_2"] = True
|
||||
DEFINES["USE_FMA3"] = True
|
||||
LOCAL_INCLUDES += ["/third_party/xsimd/include"]
|
||||
SOURCES["AudioNodeEngineSSE2.cpp"].flags += CONFIG["SSE2_FLAGS"]
|
||||
SOURCES["AudioNodeEngineSSE4_2_FMA3.cpp"].flags += (
|
||||
CONFIG["SSE4_2_FLAGS"] + CONFIG["FMA_FLAGS"]
|
||||
)
|
||||
|
||||
include("/ipc/chromium/chromium-config.mozbuild")
|
||||
|
||||
|
|
|
@ -147,6 +147,10 @@ bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u << 19));
|
|||
bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u << 20));
|
||||
# endif
|
||||
|
||||
# if !defined(MOZILLA_PRESUME_FMA3)
|
||||
bool fma3_enabled = has_cpuid_bits(1u, ecx, (1u << 12));
|
||||
# endif
|
||||
|
||||
# if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
|
||||
static bool has_avx() {
|
||||
# if defined(MOZILLA_PRESUME_AVX)
|
||||
|
|
|
@ -215,6 +215,9 @@ extern bool MFBT_DATA sse4_1_enabled;
|
|||
# if !defined(MOZILLA_PRESUME_SSE4_2)
|
||||
extern bool MFBT_DATA sse4_2_enabled;
|
||||
# endif
|
||||
# if !defined(MOZILLA_PRESUME_FMA3)
|
||||
extern bool MFBT_DATA fma3_enabled;
|
||||
# endif
|
||||
# if !defined(MOZILLA_PRESUME_AVX)
|
||||
extern bool MFBT_DATA avx_enabled;
|
||||
# endif
|
||||
|
@ -317,6 +320,16 @@ inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
|
|||
inline bool supports_sse4_2() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_FMA3)
|
||||
# define MOZILLA_MAY_SUPPORT_FMA3 1
|
||||
inline bool supports_fma3() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
# define MOZILLA_MAY_SUPPORT_FMA3 1
|
||||
inline bool supports_fma3() { return sse_private::fma3_enabled; }
|
||||
#else
|
||||
inline bool supports_fma3() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_AVX)
|
||||
# define MOZILLA_MAY_SUPPORT_AVX 1
|
||||
inline bool supports_avx() { return true; }
|
||||
|
|
Загрузка…
Ссылка в новой задаче