зеркало из https://github.com/mozilla/gecko-dev.git
195 строки
4.9 KiB
C++
195 строки
4.9 KiB
C++
/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
/* compile-time and runtime tests for whether to use SSE instructions */
|
|
|
|
#include "SSE.h"
|
|
|
|
namespace {
|
|
|
|
// SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
|
|
// We can't declare these functions in the header file, however, because
|
|
// <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
|
|
// include both SSE.h and <windows.h>.
|
|
|
|
#ifdef HAVE_CPUID_H
|
|
|
|
// cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
|
|
#include <cpuid.h>
|
|
|
|
enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
|
|
|
|
static bool
|
|
has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
|
|
{
|
|
unsigned int regs[4];
|
|
unsigned int eax, ebx, ecx, edx;
|
|
unsigned max = __get_cpuid_max(0, NULL);
|
|
if (level > max)
|
|
return false;
|
|
__cpuid_count(level, 0, eax, ebx, ecx, edx);
|
|
regs[0] = eax;
|
|
regs[1] = ebx;
|
|
regs[2] = ecx;
|
|
regs[3] = edx;
|
|
return (regs[reg] & bits) == bits;
|
|
}
|
|
|
|
static uint64_t xgetbv(uint32_t xcr) {
|
|
uint32_t eax, edx;
|
|
__asm__ ( ".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
|
|
return (uint64_t)(edx) << 32 | eax;
|
|
}
|
|
|
|
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
|
|
|
|
// MSVC 2005 or newer on x86-32 or x86-64
|
|
#include <intrin.h>
|
|
|
|
enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
|
|
|
|
static bool
|
|
has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
|
|
{
|
|
// Check that the level in question is supported.
|
|
int regs[4];
|
|
__cpuid(regs, level & 0x80000000u);
|
|
if (unsigned(regs[0]) < level)
|
|
return false;
|
|
|
|
// "The __cpuid intrinsic clears the ECX register before calling the cpuid instruction."
|
|
__cpuid(regs, level);
|
|
return (unsigned(regs[reg]) & bits) == bits;
|
|
}
|
|
|
|
static uint64_t xgetbv(uint32_t xcr) { return _xgetbv(xcr); }
|
|
|
|
#elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && (defined(__i386) || defined(__x86_64__))
|
|
|
|
enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
|
|
|
|
#ifdef __i386
|
|
static void
|
|
moz_cpuid(int CPUInfo[4], int InfoType)
|
|
{
|
|
asm (
|
|
"xchg %esi, %ebx\n"
|
|
"xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
|
|
"cpuid\n"
|
|
"movl %eax, (%edi)\n"
|
|
"movl %ebx, 4(%edi)\n"
|
|
"movl %ecx, 8(%edi)\n"
|
|
"movl %edx, 12(%edi)\n"
|
|
"xchg %esi, %ebx\n"
|
|
:
|
|
: "a"(InfoType), // %eax
|
|
"D"(CPUInfo) // %edi
|
|
: "%ecx", "%edx", "%esi"
|
|
);
|
|
}
|
|
#else
|
|
static void
|
|
moz_cpuid(int CPUInfo[4], int InfoType)
|
|
{
|
|
asm (
|
|
"xchg %rsi, %rbx\n"
|
|
"xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
|
|
"cpuid\n"
|
|
"movl %eax, (%rdi)\n"
|
|
"movl %ebx, 4(%rdi)\n"
|
|
"movl %ecx, 8(%rdi)\n"
|
|
"movl %edx, 12(%rdi)\n"
|
|
"xchg %rsi, %rbx\n"
|
|
:
|
|
: "a"(InfoType), // %eax
|
|
"D"(CPUInfo) // %rdi
|
|
: "%ecx", "%edx", "%rsi"
|
|
);
|
|
}
|
|
#endif
|
|
|
|
static bool
|
|
has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits)
|
|
{
|
|
// Check that the level in question is supported.
|
|
volatile int regs[4];
|
|
moz_cpuid((int *)regs, level & 0x80000000u);
|
|
if (unsigned(regs[0]) < level)
|
|
return false;
|
|
|
|
moz_cpuid((int *)regs, level);
|
|
return (unsigned(regs[reg]) & bits) == bits;
|
|
}
|
|
|
|
#endif // end CPUID declarations
|
|
|
|
} // namespace
|
|
|
|
namespace mozilla {
|
|
|
|
namespace sse_private {
|
|
|
|
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
|
|
|
|
#if !defined(MOZILLA_PRESUME_MMX)
|
|
bool mmx_enabled = has_cpuid_bits(1u, edx, (1u<<23));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE)
|
|
bool sse_enabled = has_cpuid_bits(1u, edx, (1u<<25));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE2)
|
|
bool sse2_enabled = has_cpuid_bits(1u, edx, (1u<<26));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE3)
|
|
bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u<<0));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSSE3)
|
|
bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u<<9));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE4A)
|
|
bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u<<6));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE4_1)
|
|
bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u<<19));
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_SSE4_2)
|
|
bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u<<20));
|
|
#endif
|
|
|
|
static bool has_avx()
|
|
{
|
|
const unsigned AVX = 1u << 28;
|
|
const unsigned OSXSAVE = 1u << 27;
|
|
const unsigned XSAVE = 1u << 26;
|
|
|
|
const unsigned XMM_STATE = 1u << 1;
|
|
const unsigned YMM_STATE = 1u << 2;
|
|
const unsigned AVX_STATE = XMM_STATE | YMM_STATE;
|
|
|
|
return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) &&
|
|
// ensure the OS supports XSAVE of YMM registers
|
|
(xgetbv(0) & AVX_STATE) == AVX_STATE;
|
|
}
|
|
|
|
#if !defined(MOZILLA_PRESUME_AVX)
|
|
bool avx_enabled = has_avx();
|
|
#endif
|
|
|
|
#if !defined(MOZILLA_PRESUME_AVX2)
|
|
bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u<<5));
|
|
#endif
|
|
|
|
#endif
|
|
|
|
} // namespace sse_private
|
|
} // namespace mozilla
|