/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* compile-time and runtime tests for whether to use SSE instructions */ #include "SSE.h" #ifdef HAVE_CPUID_H // cpuid.h is available on gcc 4.3 and higher on i386 and x86_64 # include #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) // MSVC 2005 or newer on x86-32 or x86-64 # include #endif namespace { // SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION. // We can't declare these functions in the header file, however, because // conflicts with on MSVC 2005, and some files want to // include both SSE.h and . #ifdef HAVE_CPUID_H enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { unsigned int regs[4]; unsigned int eax, ebx, ecx, edx; unsigned max = __get_cpuid_max(0, NULL); if (level > max) return false; __cpuid_count(level, 0, eax, ebx, ecx, edx); regs[0] = eax; regs[1] = ebx; regs[2] = ecx; regs[3] = edx; return (regs[reg] & bits) == bits; } #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { // Check that the level in question is supported. int regs[4]; __cpuid(regs, level & 0x80000000u); if (unsigned(regs[0]) < level) return false; // "The __cpuid intrinsic clears the ECX register before calling the cpuid // instruction." __cpuid(regs, level); return (unsigned(regs[reg]) & bits) == bits; } #elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \ (defined(__i386) || defined(__x86_64__)) enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; # ifdef __i386 static void moz_cpuid(int CPUInfo[4], int InfoType) { asm("xchg %esi, %ebx\n" "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) "cpuid\n" "movl %eax, (%edi)\n" "movl %ebx, 4(%edi)\n" "movl %ecx, 8(%edi)\n" "movl %edx, 12(%edi)\n" "xchg %esi, %ebx\n" : : "a"(InfoType), // %eax "D"(CPUInfo) // %edi : "%ecx", "%edx", "%esi"); } # else static void moz_cpuid(int CPUInfo[4], int InfoType) { asm("xchg %rsi, %rbx\n" "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0) "cpuid\n" "movl %eax, (%rdi)\n" "movl %ebx, 4(%rdi)\n" "movl %ecx, 8(%rdi)\n" "movl %edx, 12(%rdi)\n" "xchg %rsi, %rbx\n" : : "a"(InfoType), // %eax "D"(CPUInfo) // %rdi : "%ecx", "%edx", "%rsi"); } # endif static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { // Check that the level in question is supported. volatile int regs[4]; moz_cpuid((int*)regs, level & 0x80000000u); if (unsigned(regs[0]) < level) return false; moz_cpuid((int*)regs, level); return (unsigned(regs[reg]) & bits) == bits; } #endif // end CPUID declarations } // namespace namespace mozilla { namespace sse_private { #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) # if !defined(MOZILLA_PRESUME_MMX) bool mmx_enabled = has_cpuid_bits(1u, edx, (1u << 23)); # endif # if !defined(MOZILLA_PRESUME_SSE) bool sse_enabled = has_cpuid_bits(1u, edx, (1u << 25)); # endif # if !defined(MOZILLA_PRESUME_SSE2) bool sse2_enabled = has_cpuid_bits(1u, edx, (1u << 26)); # endif # if !defined(MOZILLA_PRESUME_SSE3) bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u << 0)); # endif # if !defined(MOZILLA_PRESUME_SSSE3) bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u << 9)); # endif # if !defined(MOZILLA_PRESUME_SSE4A) bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u << 6)); # endif # if !defined(MOZILLA_PRESUME_SSE4_1) bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u << 19)); # endif # if !defined(MOZILLA_PRESUME_SSE4_2) bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u << 20)); # endif # if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2) static bool has_avx() { # if defined(MOZILLA_PRESUME_AVX) return true; # else const unsigned AVX = 1u << 28; const unsigned OSXSAVE = 1u << 27; const unsigned XSAVE = 1u << 26; const unsigned XMM_STATE = 1u << 1; const unsigned YMM_STATE = 1u << 2; const unsigned AVX_STATE = XMM_STATE | YMM_STATE; return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) && // ensure the OS supports XSAVE of YMM registers (xgetbv(0) & AVX_STATE) == AVX_STATE; # endif // MOZILLA_PRESUME_AVX } # endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2 # if !defined(MOZILLA_PRESUME_AVX) bool avx_enabled = has_avx(); # endif # if !defined(MOZILLA_PRESUME_AVX2) bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5)); # endif # if !defined(MOZILLA_PRESUME_AES) bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25)); # endif #endif } // namespace sse_private #ifdef HAVE_CPUID_H uint64_t xgetbv(uint32_t xcr) { uint32_t eax, edx; __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr)); return (uint64_t)(edx) << 32 | eax; } #endif } // namespace mozilla