Bug 1488726: Implement CPU detection of LZCNT, BMI1, BMI2; r=sunfish

--HG--
extra : rebase_source : 864eea5108ce66909b7bf4499047d40330643cf3
extra : histedit_source : 09dfc073278306dd7b56887e880029a9ab9a3cdd
This commit is contained in:
Benjamin Bouvier 2018-11-08 13:18:54 +01:00
Родитель edf43f5289
Коммит bcfda3e9d9
3 изменённых файлов: 90 добавлений и 51 удалений

Просмотреть файл

@ -237,6 +237,9 @@ CPUInfo::SSEVersion CPUInfo::maxEnabledSSEVersion = UnknownSSE;
bool CPUInfo::avxPresent = false;
bool CPUInfo::avxEnabled = false;
bool CPUInfo::popcntPresent = false;
bool CPUInfo::bmi1Present = false;
bool CPUInfo::bmi2Present = false;
bool CPUInfo::lzcntPresent = false;
bool CPUInfo::needAmdBugWorkaround = false;
static uintptr_t
@ -263,95 +266,119 @@ ReadXGETBV()
return xcr0EAX;
}
void
CPUInfo::SetSSEVersion()
static void
ReadCPUInfo(int* flagsEax, int* flagsEbx, int* flagsEcx, int* flagsEdx)
{
int flagsEAX = 0;
int flagsECX = 0;
int flagsEDX = 0;
#ifdef _MSC_VER
int cpuinfo[4];
__cpuid(cpuinfo, 1);
flagsEAX = cpuinfo[0];
flagsECX = cpuinfo[2];
flagsEDX = cpuinfo[3];
__cpuid(cpuinfo, *flagsEax);
*flagsEax = cpuinfo[0];
*flagsEbx = cpuinfo[1];
*flagsEcx = cpuinfo[2];
*flagsEdx = cpuinfo[3];
#elif defined(__GNUC__)
// Some older 32-bits processors don't fill the ecx register with cpuid, so
// clobber it before calling cpuid, so that there's no risk of picking
// random bits indicating SSE3/SSE4 are present. Also make sure that it's
// set to 0 as an input for BMI detection on all platforms.
*flagsEcx = 0;
# ifdef JS_CODEGEN_X64
asm (
"movl $0x1, %%eax;"
"cpuid;"
: "=a" (flagsEAX), "=c" (flagsECX), "=d" (flagsEDX)
:
: "%ebx"
: "+a" (*flagsEax), "=b" (*flagsEbx), "+c" (*flagsEcx), "=d" (*flagsEdx)
);
# else
// On x86, preserve ebx. The compiler needs it for PIC mode.
// Some older processors don't fill the ecx register with cpuid, so clobber
// it before calling cpuid, so that there's no risk of picking random bits
// indicating SSE3/SSE4 are present.
asm (
"xor %%ecx, %%ecx;"
"movl $0x1, %%eax;"
"pushl %%ebx;"
"mov %%ebx, %%edi;"
"cpuid;"
"popl %%ebx;"
: "=a" (flagsEAX), "=c" (flagsECX), "=d" (flagsEDX)
:
:
"xchg %%edi, %%ebx;"
: "+a" (*flagsEax), "=D" (*flagsEbx), "+c" (*flagsEcx), "=d" (*flagsEdx)
);
# endif
#else
# error "Unsupported compiler"
#endif
}
static const int SSEBit = 1 << 25;
static const int SSE2Bit = 1 << 26;
static const int SSE3Bit = 1 << 0;
static const int SSSE3Bit = 1 << 9;
static const int SSE41Bit = 1 << 19;
static const int SSE42Bit = 1 << 20;
void
CPUInfo::SetSSEVersion()
{
int flagsEax = 1;
int flagsEbx = 0;
int flagsEcx = 0;
int flagsEdx = 0;
ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx);
if (flagsECX & SSE42Bit) maxSSEVersion = SSE4_2;
else if (flagsECX & SSE41Bit) maxSSEVersion = SSE4_1;
else if (flagsECX & SSSE3Bit) maxSSEVersion = SSSE3;
else if (flagsECX & SSE3Bit) maxSSEVersion = SSE3;
else if (flagsEDX & SSE2Bit) maxSSEVersion = SSE2;
else if (flagsEDX & SSEBit) maxSSEVersion = SSE;
else maxSSEVersion = NoSSE;
static constexpr int SSEBit = 1 << 25;
static constexpr int SSE2Bit = 1 << 26;
static constexpr int SSE3Bit = 1 << 0;
static constexpr int SSSE3Bit = 1 << 9;
static constexpr int SSE41Bit = 1 << 19;
static constexpr int SSE42Bit = 1 << 20;
if (flagsEcx & SSE42Bit) {
maxSSEVersion = SSE4_2;
} else if (flagsEcx & SSE41Bit) {
maxSSEVersion = SSE4_1;
} else if (flagsEcx & SSSE3Bit) {
maxSSEVersion = SSSE3;
} else if (flagsEcx & SSE3Bit) {
maxSSEVersion = SSE3;
} else if (flagsEdx & SSE2Bit) {
maxSSEVersion = SSE2;
} else if (flagsEdx & SSEBit) {
maxSSEVersion = SSE;
} else {
maxSSEVersion = NoSSE;
}
if (maxEnabledSSEVersion != UnknownSSE) {
maxSSEVersion = Min(maxSSEVersion, maxEnabledSSEVersion);
}
static const int AVXBit = 1 << 28;
static const int XSAVEBit = 1 << 27;
avxPresent = (flagsECX & AVXBit) && (flagsECX & XSAVEBit) && avxEnabled;
static constexpr int AVXBit = 1 << 28;
static constexpr int XSAVEBit = 1 << 27;
avxPresent = (flagsEcx & AVXBit) && (flagsEcx & XSAVEBit) && avxEnabled;
// If the hardware supports AVX, check whether the OS supports it too.
if (avxPresent) {
size_t xcr0EAX = ReadXGETBV();
static const int xcr0SSEBit = 1 << 1;
static const int xcr0AVXBit = 1 << 2;
static constexpr int xcr0SSEBit = 1 << 1;
static constexpr int xcr0AVXBit = 1 << 2;
avxPresent = (xcr0EAX & xcr0SSEBit) && (xcr0EAX & xcr0AVXBit);
}
// CMOV instruction are supposed to be supported by all CPU which have SSE2
// enabled. While this might be true, this is not guaranteed by any
// documentation, nor AMD, nor Intel.
static const int CMOVBit = 1 << 15;
MOZ_RELEASE_ASSERT(flagsEDX & CMOVBit,
static constexpr int CMOVBit = 1 << 15;
MOZ_RELEASE_ASSERT(flagsEdx & CMOVBit,
"CMOVcc instruction is not recognized by this CPU.");
static const int POPCNTBit = 1 << 23;
popcntPresent = (flagsECX & POPCNTBit);
static constexpr int POPCNTBit = 1 << 23;
popcntPresent = (flagsEcx & POPCNTBit);
// Check if we need to work around an AMD CPU bug (see bug 1281759).
// We check for family 20 models 0-2. Intel doesn't use family 20 at
// this point, so this should only match AMD CPUs.
unsigned family = ((flagsEAX >> 20) & 0xff) + ((flagsEAX >> 8) & 0xf);
unsigned model = (((flagsEAX >> 16) & 0xf) << 4) + ((flagsEAX >> 4) & 0xf);
unsigned family = ((flagsEax >> 20) & 0xff) + ((flagsEax >> 8) & 0xf);
unsigned model = (((flagsEax >> 16) & 0xf) << 4) + ((flagsEax >> 4) & 0xf);
needAmdBugWorkaround = (family == 20 && model <= 2);
flagsEax = 0x80000001;
ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx);
static constexpr int LZCNTBit = 1 << 5;
lzcntPresent = (flagsEcx & LZCNTBit);
flagsEax = 0x7;
ReadCPUInfo(&flagsEax, &flagsEbx, &flagsEcx, &flagsEdx);
static constexpr int BMI1Bit = 1 << 3;
static constexpr int BMI2Bit = 1 << 8;
bmi1Present = (flagsEbx & BMI1Bit);
bmi2Present = bmi1Present && (flagsEbx & BMI2Bit);
}
volatile uintptr_t* blackbox = nullptr;

Просмотреть файл

@ -227,6 +227,9 @@ class CPUInfo
static bool avxPresent;
static bool avxEnabled;
static bool popcntPresent;
static bool bmi1Present;
static bool bmi2Present;
static bool lzcntPresent;
static bool needAmdBugWorkaround;
static void SetSSEVersion();
@ -244,6 +247,9 @@ class CPUInfo
static bool IsSSE41Present() { return GetSSEVersion() >= SSE4_1; }
static bool IsSSE42Present() { return GetSSEVersion() >= SSE4_2; }
static bool IsPOPCNTPresent() { return popcntPresent; }
static bool IsBMI1Present() { return bmi1Present; }
static bool IsBMI2Present() { return bmi2Present; }
static bool IsLZCNTPresent() { return lzcntPresent; }
static bool NeedAmdBugWorkaround() { return needAmdBugWorkaround; }
static void SetSSE3Disabled() { maxEnabledSSEVersion = SSE2; avxEnabled = false; }
@ -1109,6 +1115,9 @@ class AssemblerX86Shared : public AssemblerShared
static bool HasSSE41() { return CPUInfo::IsSSE41Present(); }
static bool HasSSE42() { return CPUInfo::IsSSE42Present(); }
static bool HasPOPCNT() { return CPUInfo::IsPOPCNTPresent(); }
static bool HasBMI1() { return CPUInfo::IsBMI1Present(); }
static bool HasBMI2() { return CPUInfo::IsBMI2Present(); }
static bool HasLZCNT() { return CPUInfo::IsLZCNTPresent(); }
static bool SupportsFloatingPoint() { return CPUInfo::IsSSE2Present(); }
static bool SupportsUnalignedAccesses() { return true; }
static bool SupportsSimd() { return CPUInfo::IsSSE2Present(); }

Просмотреть файл

@ -177,6 +177,9 @@ CraneliftStaticEnvironment::CraneliftStaticEnvironment()
hasSse42(Assembler::HasSSE42()),
hasPopcnt(Assembler::HasPOPCNT()),
hasAvx(Assembler::HasAVX()),
hasBmi1(Assembler::HasBMI1()),
hasBmi2(Assembler::HasBMI2()),
hasLzcnt(Assembler::HasLZCNT()),
#else
hasSse2(false),
hasSse3(false),
@ -184,10 +187,10 @@ CraneliftStaticEnvironment::CraneliftStaticEnvironment()
hasSse42(false),
hasPopcnt(false),
hasAvx(false),
hasBmi1(false),
hasBmi2(false),
hasLzcnt(false),
#endif
hasBmi1(false), // TODO implement feature detection for bmi1
hasBmi2(false), // TODO implement feature detection for bmi2
hasLzcnt(false), // TODO implement feature detection for lzcnt
staticMemoryBound(
#ifdef WASM_HUGE_MEMORY
// In the huge memory configuration, we always reserve the full 4 GB index