зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1868949 - Add support for AVX VNNI in cpu detection and IntegerGemm r=marco
This make it possible to use the recently added changes in gemmology. Differential Revision: https://phabricator.services.mozilla.com/D196425
This commit is contained in:
Родитель
6b0b013d69
Коммит
9779ed6e69
|
@ -3244,3 +3244,8 @@ set_config("SSSE3_FLAGS", ["-mssse3"])
|
|||
set_config("SSE4_2_FLAGS", ["-msse4.2"])
|
||||
set_config("FMA_FLAGS", ["-mfma"])
|
||||
set_config("AVX2_FLAGS", ["-mavx2"])
|
||||
set_config(
|
||||
"AVXVNNI_FLAGS",
|
||||
["-mavxvnni"],
|
||||
try_compile(check_msg="for -mavxvnni support", flags=["-mavxvnni"]),
|
||||
)
|
||||
|
|
|
@ -20,7 +20,10 @@
|
|||
#include "wasm/WasmInstance.h"
|
||||
#include "wasm/WasmLog.h"
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
#if defined(USE_AVXVNNI)
|
||||
# define SUPPORTED_ARCHS \
|
||||
xsimd::arch_list<xsimd::avxvnni, xsimd::avx2, xsimd::ssse3, xsimd::sse2>
|
||||
#elif defined(USE_AVX2)
|
||||
# define SUPPORTED_ARCHS \
|
||||
xsimd::arch_list<xsimd::avx2, xsimd::ssse3, xsimd::sse2>
|
||||
#elif defined(USE_SSSE3)
|
||||
|
|
|
@ -37,6 +37,12 @@ if CONFIG["INTEL_ARCHITECTURE"]:
|
|||
SOURCES[
|
||||
"/third_party/gemmology/kernels/GemmologyEngineAVX2.cpp"
|
||||
].flags += CONFIG["AVX2_FLAGS"]
|
||||
if CONFIG["AVXVNNI_FLAGS"]:
|
||||
DEFINES["USE_AVXVNNI"] = True
|
||||
SOURCES += ["/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp"]
|
||||
SOURCES[
|
||||
"/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp"
|
||||
].flags += CONFIG["AVXVNNI_FLAGS"]
|
||||
|
||||
if CONFIG["TARGET_CPU"] == "aarch64":
|
||||
DEFINES["USE_NEON"] = True
|
||||
|
|
|
@ -40,6 +40,20 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
|
|||
return (regs[reg] & bits) == bits;
|
||||
}
|
||||
|
||||
static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg,
|
||||
unsigned int bits) {
|
||||
unsigned int regs[4];
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
unsigned max = __get_cpuid_max(level & 0x80000000u, nullptr);
|
||||
if (level > max) return false;
|
||||
__cpuid_count(level, 1, eax, ebx, ecx, edx);
|
||||
regs[0] = eax;
|
||||
regs[1] = ebx;
|
||||
regs[2] = ecx;
|
||||
regs[3] = edx;
|
||||
return (regs[reg] & bits) == bits;
|
||||
}
|
||||
|
||||
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
|
||||
|
||||
enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
|
||||
|
@ -48,12 +62,12 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
|
|||
unsigned int bits) {
|
||||
// Check that the level in question is supported.
|
||||
int regs[4];
|
||||
__cpuid(regs, level & 0x80000000u);
|
||||
__cpuid_ex(regs, level & 0x80000000u, 1);
|
||||
if (unsigned(regs[0]) < level) return false;
|
||||
|
||||
// "The __cpuid intrinsic clears the ECX register before calling the cpuid
|
||||
// instruction."
|
||||
__cpuid(regs, level);
|
||||
__cpuid_ex(regs, level, 1);
|
||||
return (unsigned(regs[reg]) & bits) == bits;
|
||||
}
|
||||
|
||||
|
@ -77,6 +91,20 @@ static void moz_cpuid(int CPUInfo[4], int InfoType) {
|
|||
"D"(CPUInfo) // %edi
|
||||
: "%ecx", "%edx", "%esi");
|
||||
}
|
||||
static void moz_cpuid_ex(int CPUInfo[4], int InfoType) {
|
||||
asm("xchg %esi, %ebx\n"
|
||||
"movl 1, %ecx\n"
|
||||
"cpuid\n"
|
||||
"movl %eax, (%edi)\n"
|
||||
"movl %ebx, 4(%edi)\n"
|
||||
"movl %ecx, 8(%edi)\n"
|
||||
"movl %edx, 12(%edi)\n"
|
||||
"xchg %esi, %ebx\n"
|
||||
:
|
||||
: "a"(InfoType), // %eax
|
||||
"D"(CPUInfo) // %edi
|
||||
: "%ecx", "%edx", "%esi");
|
||||
}
|
||||
# else
|
||||
static void moz_cpuid(int CPUInfo[4], int InfoType) {
|
||||
asm("xchg %rsi, %rbx\n"
|
||||
|
@ -92,6 +120,20 @@ static void moz_cpuid(int CPUInfo[4], int InfoType) {
|
|||
"D"(CPUInfo) // %rdi
|
||||
: "%ecx", "%edx", "%rsi");
|
||||
}
|
||||
static void moz_cpuid_ex(int CPUInfo[4], int InfoType) {
|
||||
asm("xchg %rsi, %rbx\n"
|
||||
"movl 1, %ecx\n"
|
||||
"cpuid\n"
|
||||
"movl %eax, (%rdi)\n"
|
||||
"movl %ebx, 4(%rdi)\n"
|
||||
"movl %ecx, 8(%rdi)\n"
|
||||
"movl %edx, 12(%rdi)\n"
|
||||
"xchg %rsi, %rbx\n"
|
||||
:
|
||||
: "a"(InfoType), // %eax
|
||||
"D"(CPUInfo) // %rdi
|
||||
: "%ecx", "%edx", "%rsi");
|
||||
}
|
||||
# endif
|
||||
|
||||
static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
|
||||
|
@ -105,6 +147,17 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
|
|||
return (unsigned(regs[reg]) & bits) == bits;
|
||||
}
|
||||
|
||||
static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg,
|
||||
unsigned int bits) {
|
||||
// Check that the level in question is supported.
|
||||
volatile int regs[4];
|
||||
moz_cpuid_ex((int*)regs, level & 0x80000000u);
|
||||
if (unsigned(regs[0]) < level) return false;
|
||||
|
||||
moz_cpuid_ex((int*)regs, level);
|
||||
return (unsigned(regs[reg]) & bits) == bits;
|
||||
}
|
||||
|
||||
#endif // end CPUID declarations
|
||||
|
||||
} // namespace
|
||||
|
@ -179,6 +232,10 @@ bool avx_enabled = has_avx();
|
|||
bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5));
|
||||
# endif
|
||||
|
||||
# if !defined(MOZILLA_PRESUME_AVXVNNI)
|
||||
bool avxvnni_enabled = has_cpuid_bits_ex(7u, eax, (1u << 4));
|
||||
# endif
|
||||
|
||||
# if !defined(MOZILLA_PRESUME_AES)
|
||||
bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25));
|
||||
# endif
|
||||
|
|
|
@ -138,6 +138,10 @@
|
|||
// It's ok to use AVX instructions based on the -march option.
|
||||
# define MOZILLA_PRESUME_AVX2 1
|
||||
# endif
|
||||
# ifdef __AVXVNNI__
|
||||
// It's ok to use AVX instructions based on the -march option.
|
||||
# define MOZILLA_PRESUME_AVXVNNI 1
|
||||
# endif
|
||||
# ifdef __AES__
|
||||
// It's ok to use AES instructions based on the -march option.
|
||||
# define MOZILLA_PRESUME_AES 1
|
||||
|
@ -224,6 +228,9 @@ extern bool MFBT_DATA avx_enabled;
|
|||
# if !defined(MOZILLA_PRESUME_AVX2)
|
||||
extern bool MFBT_DATA avx2_enabled;
|
||||
# endif
|
||||
# if !defined(MOZILLA_PRESUME_AVXVNNI)
|
||||
extern bool MFBT_DATA avxvnni_enabled;
|
||||
# endif
|
||||
# if !defined(MOZILLA_PRESUME_AES)
|
||||
extern bool MFBT_DATA aes_enabled;
|
||||
# endif
|
||||
|
@ -350,6 +357,16 @@ inline bool supports_avx2() { return sse_private::avx2_enabled; }
|
|||
inline bool supports_avx2() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_AVXVNNI)
|
||||
# define MOZILLA_MAY_SUPPORT_AVXVNNI 1
|
||||
inline bool supports_avxvnni() { return true; }
|
||||
#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
||||
# define MOZILLA_MAY_SUPPORT_AVXVNNI 1
|
||||
inline bool supports_avxvnni() { return sse_private::avxvnni_enabled; }
|
||||
#else
|
||||
inline bool supports_avxvnni() { return false; }
|
||||
#endif
|
||||
|
||||
#if defined(MOZILLA_PRESUME_AES)
|
||||
# define MOZILLA_MAY_SUPPORT_AES 1
|
||||
inline bool supports_aes() { return true; }
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* this source code form is subject to the terms of the mozilla public
|
||||
* license, v. 2.0. if a copy of the mpl was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include <gemmology.h>
|
||||
|
||||
namespace gemmology {
|
||||
template struct Engine<xsimd::avxvnni>;
|
||||
template void Engine<xsimd::avxvnni>::SelectColumnsB(int8_t const*, int8_t*,
|
||||
size_t, uint32_t const*,
|
||||
uint32_t const*);
|
||||
template void Engine<xsimd::avxvnni>::Shift::Multiply(
|
||||
uint8_t const*, int8_t const*, size_t, size_t, size_t,
|
||||
gemmology::callbacks::UnquantizeAndAddBiasAndWrite);
|
||||
template void Engine<xsimd::avxvnni>::Shift::PrepareBias(
|
||||
int8_t const*, size_t, size_t,
|
||||
gemmology::callbacks::UnquantizeAndAddBiasAndWrite);
|
||||
} // namespace gemmology
|
Загрузка…
Ссылка в новой задаче