vpx_ports/: apply clang-format
Change-Id: I9654530a34a3d0691baeca9d62184cd7b9ac3b4c
This commit is contained in:
Родитель
cdd4eb0291
Коммит
05ce850890
|
@ -8,7 +8,6 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VPX_PORTS_ARM_H_
|
||||
#define VPX_PORTS_ARM_H_
|
||||
#include <stdlib.h>
|
||||
|
@ -19,17 +18,17 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/*ARMv5TE "Enhanced DSP" instructions.*/
|
||||
#define HAS_EDSP 0x01
|
||||
#define HAS_EDSP 0x01
|
||||
/*ARMv6 "Parallel" or "Media" instructions.*/
|
||||
#define HAS_MEDIA 0x02
|
||||
/*ARMv7 optional NEON instructions.*/
|
||||
#define HAS_NEON 0x04
|
||||
#define HAS_NEON 0x04
|
||||
|
||||
int arm_cpu_caps(void);
|
||||
|
||||
// Earlier gcc compilers have issues with some neon intrinsics
|
||||
#if !defined(__clang__) && defined(__GNUC__) && \
|
||||
__GNUC__ == 4 && __GNUC_MINOR__ <= 6
|
||||
#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 4 && \
|
||||
__GNUC_MINOR__ <= 6
|
||||
#define VPX_INCOMPATIBLE_GCC
|
||||
#endif
|
||||
|
||||
|
@ -38,4 +37,3 @@ int arm_cpu_caps(void);
|
|||
#endif
|
||||
|
||||
#endif // VPX_PORTS_ARM_H_
|
||||
|
||||
|
|
|
@ -71,23 +71,22 @@ int arm_cpu_caps(void) {
|
|||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
/* MSVC has no inline __asm support for ARM, but it does let you __emit
|
||||
* instructions via their assembled hex code.
|
||||
* All of these instructions should be essentially nops.
|
||||
*/
|
||||
/* MSVC has no inline __asm support for ARM, but it does let you __emit
|
||||
* instructions via their assembled hex code.
|
||||
* All of these instructions should be essentially nops.
|
||||
*/
|
||||
#if HAVE_MEDIA
|
||||
if (mask & HAS_MEDIA)
|
||||
__try {
|
||||
if (mask & HAS_MEDIA) __try {
|
||||
/*SHADD8 r3,r3,r3*/
|
||||
__emit(0xE6333F93);
|
||||
flags |= HAS_MEDIA;
|
||||
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
/*Ignore exception.*/
|
||||
}
|
||||
/*Ignore exception.*/
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_MEDIA */
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (mask &HAS_NEON) {
|
||||
if (mask & HAS_NEON) {
|
||||
__try {
|
||||
/*VORR q0,q0,q0*/
|
||||
__emit(0xF2200150);
|
||||
|
@ -117,8 +116,7 @@ int arm_cpu_caps(void) {
|
|||
flags |= HAS_MEDIA;
|
||||
#endif /* HAVE_MEDIA */
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (features & ANDROID_CPU_ARM_FEATURE_NEON)
|
||||
flags |= HAS_NEON;
|
||||
if (features & ANDROID_CPU_ARM_FEATURE_NEON) flags |= HAS_NEON;
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
return flags & mask;
|
||||
}
|
||||
|
@ -169,7 +167,8 @@ int arm_cpu_caps(void) {
|
|||
}
|
||||
return flags & mask;
|
||||
}
|
||||
#else /* end __linux__ */
|
||||
#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
|
||||
#else /* end __linux__ */
|
||||
#error \
|
||||
"--enable-runtime-cpu-detect selected, but no CPU detection method " \
|
||||
"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
|
||||
#endif
|
||||
|
|
|
@ -16,10 +16,10 @@
|
|||
#include "vpx_ports/msvc.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if defined(_M_X64) || defined(_M_IX86)
|
||||
# include <intrin.h>
|
||||
# define USE_MSC_INTRINSICS
|
||||
# endif
|
||||
#if defined(_M_X64) || defined(_M_IX86)
|
||||
#include <intrin.h>
|
||||
#define USE_MSC_INTRINSICS
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -15,40 +15,40 @@
|
|||
/* From emmintrin.h (gcc 4.5.3) */
|
||||
/* Casts between various SP, DP, INT vector types. Note that these do no
|
||||
conversion of values, they just change the type. */
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castpd_ps(__m128d __A)
|
||||
{
|
||||
return (__m128) __A;
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castpd_ps(__m128d __A) {
|
||||
return (__m128)__A;
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castpd_si128(__m128d __A)
|
||||
{
|
||||
return (__m128i) __A;
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castpd_si128(__m128d __A) {
|
||||
return (__m128i)__A;
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castps_pd(__m128 __A)
|
||||
{
|
||||
return (__m128d) __A;
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castps_pd(__m128 __A) {
|
||||
return (__m128d)__A;
|
||||
}
|
||||
|
||||
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castps_si128(__m128 __A)
|
||||
{
|
||||
return (__m128i) __A;
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castps_si128(__m128 __A) {
|
||||
return (__m128i)__A;
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castsi128_ps(__m128i __A)
|
||||
{
|
||||
return (__m128) __A;
|
||||
extern __inline __m128
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castsi128_ps(__m128i __A) {
|
||||
return (__m128)__A;
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castsi128_pd(__m128i __A)
|
||||
{
|
||||
return (__m128d) __A;
|
||||
extern __inline __m128d
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_castsi128_pd(__m128i __A) {
|
||||
return (__m128d)__A;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VPX_PORTS_MEM_H_
|
||||
#define VPX_PORTS_MEM_H_
|
||||
|
||||
|
@ -16,12 +15,12 @@
|
|||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C)
|
||||
#define DECLARE_ALIGNED(n,typ,val) typ val __attribute__ ((aligned (n)))
|
||||
#define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n)))
|
||||
#elif defined(_MSC_VER)
|
||||
#define DECLARE_ALIGNED(n,typ,val) __declspec(align(n)) typ val
|
||||
#define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val
|
||||
#else
|
||||
#warning No alignment directives known for this compiler.
|
||||
#define DECLARE_ALIGNED(n,typ,val) typ val
|
||||
#define DECLARE_ALIGNED(n, typ, val) typ val
|
||||
#endif
|
||||
|
||||
/* Indicates that the usage of the specified variable has been audited to assure
|
||||
|
@ -29,7 +28,7 @@
|
|||
* warnings on gcc.
|
||||
*/
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#define UNINITIALIZED_IS_SAFE(x) x=x
|
||||
#define UNINITIALIZED_IS_SAFE(x) x = x
|
||||
#else
|
||||
#define UNINITIALIZED_IS_SAFE(x) x
|
||||
#endif
|
||||
|
@ -39,20 +38,19 @@
|
|||
#endif
|
||||
|
||||
/* Shift down with rounding for use when n >= 0, value >= 0 */
|
||||
#define ROUND_POWER_OF_TWO(value, n) \
|
||||
(((value) + (((1 << (n)) >> 1))) >> (n))
|
||||
#define ROUND_POWER_OF_TWO(value, n) (((value) + (((1 << (n)) >> 1))) >> (n))
|
||||
|
||||
/* Shift down with rounding for signed integers, for use when n >= 0 */
|
||||
#define ROUND_POWER_OF_TWO_SIGNED(value, n) \
|
||||
(((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
|
||||
: ROUND_POWER_OF_TWO((value), (n)))
|
||||
#define ROUND_POWER_OF_TWO_SIGNED(value, n) \
|
||||
(((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
|
||||
: ROUND_POWER_OF_TWO((value), (n)))
|
||||
|
||||
#define ALIGN_POWER_OF_TWO(value, n) \
|
||||
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
|
||||
(((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1))
|
||||
|
||||
#define CONVERT_TO_SHORTPTR(x) ((uint16_t*)(((uintptr_t)(x)) << 1))
|
||||
#define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1))
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define CONVERT_TO_BYTEPTR(x) ((uint8_t*)(((uintptr_t)(x)) >> 1))
|
||||
#define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1))
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#endif // VPX_PORTS_MEM_H_
|
||||
|
|
|
@ -46,36 +46,36 @@
|
|||
#undef MEM_VALUE_T_SZ_BITS
|
||||
#define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3)
|
||||
|
||||
#undef mem_ops_wrap_symbol
|
||||
#undef mem_ops_wrap_symbol
|
||||
#define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T)
|
||||
#undef mem_ops_wrap_symbol2
|
||||
#define mem_ops_wrap_symbol2(fn,typ) mem_ops_wrap_symbol3(fn,typ)
|
||||
#undef mem_ops_wrap_symbol3
|
||||
#define mem_ops_wrap_symbol3(fn,typ) fn##_as_##typ
|
||||
#undef mem_ops_wrap_symbol2
|
||||
#define mem_ops_wrap_symbol2(fn, typ) mem_ops_wrap_symbol3(fn, typ)
|
||||
#undef mem_ops_wrap_symbol3
|
||||
#define mem_ops_wrap_symbol3(fn, typ) fn##_as_##typ
|
||||
|
||||
/*
|
||||
* Include aligned access routines
|
||||
*/
|
||||
#define INCLUDED_BY_MEM_OPS_H
|
||||
#include "mem_ops_aligned.h"
|
||||
#undef INCLUDED_BY_MEM_OPS_H
|
||||
#undef INCLUDED_BY_MEM_OPS_H
|
||||
|
||||
#undef mem_get_be16
|
||||
#undef mem_get_be16
|
||||
#define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
|
||||
static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = mem[0] << 8;
|
||||
val |= mem[1];
|
||||
return val;
|
||||
}
|
||||
|
||||
#undef mem_get_be24
|
||||
#undef mem_get_be24
|
||||
#define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
|
||||
static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = mem[0] << 16;
|
||||
val |= mem[1] << 8;
|
||||
|
@ -83,11 +83,11 @@ static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) {
|
|||
return val;
|
||||
}
|
||||
|
||||
#undef mem_get_be32
|
||||
#undef mem_get_be32
|
||||
#define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
|
||||
static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = ((unsigned MEM_VALUE_T)mem[0]) << 24;
|
||||
val |= mem[1] << 16;
|
||||
|
@ -96,22 +96,22 @@ static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) {
|
|||
return val;
|
||||
}
|
||||
|
||||
#undef mem_get_le16
|
||||
#undef mem_get_le16
|
||||
#define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
|
||||
static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = mem[1] << 8;
|
||||
val |= mem[0];
|
||||
return val;
|
||||
}
|
||||
|
||||
#undef mem_get_le24
|
||||
#undef mem_get_le24
|
||||
#define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
|
||||
static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = mem[2] << 16;
|
||||
val |= mem[1] << 8;
|
||||
|
@ -119,11 +119,11 @@ static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) {
|
|||
return val;
|
||||
}
|
||||
|
||||
#undef mem_get_le32
|
||||
#undef mem_get_le32
|
||||
#define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
|
||||
static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
unsigned MEM_VALUE_T val;
|
||||
const MAU_T *mem = (const MAU_T *)vmem;
|
||||
|
||||
val = ((unsigned MEM_VALUE_T)mem[3]) << 24;
|
||||
val |= mem[2] << 16;
|
||||
|
@ -132,13 +132,14 @@ static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) {
|
|||
return val;
|
||||
}
|
||||
|
||||
#define mem_get_s_generic(end,sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
|
||||
const MAU_T *mem = (const MAU_T*)vmem;\
|
||||
signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
|
||||
return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
|
||||
#define mem_get_s_generic(end, sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) { \
|
||||
const MAU_T *mem = (const MAU_T *)vmem; \
|
||||
signed MEM_VALUE_T val = mem_get_##end##sz(mem); \
|
||||
return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz); \
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
#undef mem_get_sbe16
|
||||
#define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16)
|
||||
mem_get_s_generic(be, 16)
|
||||
|
@ -222,5 +223,6 @@ static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) {
|
|||
mem[2] = (MAU_T)((val >> 16) & 0xff);
|
||||
mem[3] = (MAU_T)((val >> 24) & 0xff);
|
||||
}
|
||||
/* clang-format on */
|
||||
|
||||
#endif // VPX_PORTS_MEM_OPS_H_
|
||||
|
|
|
@ -27,86 +27,87 @@
|
|||
/* Architectures that provide instructions for doing this byte swapping
|
||||
* could redefine these macros.
|
||||
*/
|
||||
#define swap_endian_16(val,raw) do {\
|
||||
val = (uint16_t)(((raw>>8) & 0x00ff) \
|
||||
| ((raw<<8) & 0xff00));\
|
||||
} while(0)
|
||||
#define swap_endian_32(val,raw) do {\
|
||||
val = ((raw>>24) & 0x000000ff) \
|
||||
| ((raw>>8) & 0x0000ff00) \
|
||||
| ((raw<<8) & 0x00ff0000) \
|
||||
| ((raw<<24) & 0xff000000); \
|
||||
} while(0)
|
||||
#define swap_endian_16_se(val,raw) do {\
|
||||
swap_endian_16(val,raw);\
|
||||
val = ((val << 16) >> 16);\
|
||||
} while(0)
|
||||
#define swap_endian_32_se(val,raw) swap_endian_32(val,raw)
|
||||
#define swap_endian_16(val, raw) \
|
||||
do { \
|
||||
val = (uint16_t)(((raw >> 8) & 0x00ff) | ((raw << 8) & 0xff00)); \
|
||||
} while (0)
|
||||
#define swap_endian_32(val, raw) \
|
||||
do { \
|
||||
val = ((raw >> 24) & 0x000000ff) | ((raw >> 8) & 0x0000ff00) | \
|
||||
((raw << 8) & 0x00ff0000) | ((raw << 24) & 0xff000000); \
|
||||
} while (0)
|
||||
#define swap_endian_16_se(val, raw) \
|
||||
do { \
|
||||
swap_endian_16(val, raw); \
|
||||
val = ((val << 16) >> 16); \
|
||||
} while (0)
|
||||
#define swap_endian_32_se(val, raw) swap_endian_32(val, raw)
|
||||
|
||||
#define mem_get_ne_aligned_generic(end,sz) \
|
||||
static VPX_INLINE unsigned MEM_VALUE_T \
|
||||
mem_get_##end##sz##_aligned(const void *vmem) {\
|
||||
const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
|
||||
return *mem;\
|
||||
#define mem_get_ne_aligned_generic(end, sz) \
|
||||
static VPX_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \
|
||||
const void *vmem) { \
|
||||
const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \
|
||||
return *mem; \
|
||||
}
|
||||
|
||||
#define mem_get_sne_aligned_generic(end,sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T \
|
||||
mem_get_s##end##sz##_aligned(const void *vmem) {\
|
||||
const int##sz##_t *mem = (const int##sz##_t *)vmem;\
|
||||
return *mem;\
|
||||
#define mem_get_sne_aligned_generic(end, sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \
|
||||
const void *vmem) { \
|
||||
const int##sz##_t *mem = (const int##sz##_t *)vmem; \
|
||||
return *mem; \
|
||||
}
|
||||
|
||||
#define mem_get_se_aligned_generic(end,sz) \
|
||||
static VPX_INLINE unsigned MEM_VALUE_T \
|
||||
mem_get_##end##sz##_aligned(const void *vmem) {\
|
||||
const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
|
||||
unsigned MEM_VALUE_T val, raw = *mem;\
|
||||
swap_endian_##sz(val,raw);\
|
||||
return val;\
|
||||
#define mem_get_se_aligned_generic(end, sz) \
|
||||
static VPX_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \
|
||||
const void *vmem) { \
|
||||
const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \
|
||||
unsigned MEM_VALUE_T val, raw = *mem; \
|
||||
swap_endian_##sz(val, raw); \
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define mem_get_sse_aligned_generic(end,sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T \
|
||||
mem_get_s##end##sz##_aligned(const void *vmem) {\
|
||||
const int##sz##_t *mem = (const int##sz##_t *)vmem;\
|
||||
unsigned MEM_VALUE_T val, raw = *mem;\
|
||||
swap_endian_##sz##_se(val,raw);\
|
||||
return val;\
|
||||
#define mem_get_sse_aligned_generic(end, sz) \
|
||||
static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \
|
||||
const void *vmem) { \
|
||||
const int##sz##_t *mem = (const int##sz##_t *)vmem; \
|
||||
unsigned MEM_VALUE_T val, raw = *mem; \
|
||||
swap_endian_##sz##_se(val, raw); \
|
||||
return val; \
|
||||
}
|
||||
|
||||
#define mem_put_ne_aligned_generic(end,sz) \
|
||||
static VPX_INLINE void \
|
||||
mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
|
||||
uint##sz##_t *mem = (uint##sz##_t *)vmem;\
|
||||
*mem = (uint##sz##_t)val;\
|
||||
#define mem_put_ne_aligned_generic(end, sz) \
|
||||
static VPX_INLINE void mem_put_##end##sz##_aligned(void *vmem, \
|
||||
MEM_VALUE_T val) { \
|
||||
uint##sz##_t *mem = (uint##sz##_t *)vmem; \
|
||||
*mem = (uint##sz##_t)val; \
|
||||
}
|
||||
|
||||
#define mem_put_se_aligned_generic(end,sz) \
|
||||
static VPX_INLINE void \
|
||||
mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
|
||||
uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
|
||||
swap_endian_##sz(raw,val);\
|
||||
*mem = (uint##sz##_t)raw;\
|
||||
#define mem_put_se_aligned_generic(end, sz) \
|
||||
static VPX_INLINE void mem_put_##end##sz##_aligned(void *vmem, \
|
||||
MEM_VALUE_T val) { \
|
||||
uint##sz##_t *mem = (uint##sz##_t *)vmem, raw; \
|
||||
swap_endian_##sz(raw, val); \
|
||||
*mem = (uint##sz##_t)raw; \
|
||||
}
|
||||
|
||||
#include "vpx_config.h"
|
||||
#if CONFIG_BIG_ENDIAN
|
||||
#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be,sz)
|
||||
#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be,sz)
|
||||
#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le,sz)
|
||||
#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le,sz)
|
||||
#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be,sz)
|
||||
#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le,sz)
|
||||
#define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be, sz)
|
||||
#define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be, sz)
|
||||
#define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le, sz)
|
||||
#define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le, sz)
|
||||
#define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be, sz)
|
||||
#define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le, sz)
|
||||
#else
|
||||
#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be,sz)
|
||||
#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be,sz)
|
||||
#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le,sz)
|
||||
#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le,sz)
|
||||
#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be,sz)
|
||||
#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le,sz)
|
||||
#define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be, sz)
|
||||
#define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be, sz)
|
||||
#define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le, sz)
|
||||
#define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le, sz)
|
||||
#define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be, sz)
|
||||
#define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le, sz)
|
||||
#endif
|
||||
|
||||
/* clang-format off */
|
||||
#undef mem_get_be16_aligned
|
||||
#define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned)
|
||||
mem_get_be_aligned_generic(16)
|
||||
|
@ -165,5 +166,6 @@ mem_put_le_aligned_generic(32)
|
|||
#undef swap_endian_32
|
||||
#undef swap_endian_16_se
|
||||
#undef swap_endian_32_se
|
||||
/* clang-format on */
|
||||
|
||||
#endif // VPX_PORTS_MEM_OPS_ALIGNED_H_
|
||||
|
|
|
@ -14,9 +14,9 @@
|
|||
|
||||
#include "./vpx_config.h"
|
||||
|
||||
# if _MSC_VER < 1900 // VS2015 provides snprintf
|
||||
# define snprintf _snprintf
|
||||
# endif // _MSC_VER < 1900
|
||||
#if _MSC_VER < 1900 // VS2015 provides snprintf
|
||||
#define snprintf _snprintf
|
||||
#endif // _MSC_VER < 1900
|
||||
|
||||
#if _MSC_VER < 1800 // VS2013 provides round
|
||||
#include <math.h>
|
||||
|
|
|
@ -48,102 +48,92 @@
|
|||
* As a static, once_state will be zero-initialized as program start.
|
||||
*/
|
||||
static LONG once_state;
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
/* Try to advance once_state from its initial value of 0 to 1.
|
||||
* Only one thread can succeed in doing so.
|
||||
*/
|
||||
if (InterlockedCompareExchange(&once_state, 1, 0) == 0) {
|
||||
/* We're the winning thread, having set once_state to 1.
|
||||
* Call our function. */
|
||||
func();
|
||||
/* Now advance once_state to 2, unblocking any other threads. */
|
||||
InterlockedIncrement(&once_state);
|
||||
return;
|
||||
}
|
||||
|
||||
/* We weren't the winning thread, but we want to block on
|
||||
* the state variable so we don't return before func()
|
||||
* has finished executing elsewhere.
|
||||
*
|
||||
* Try to advance once_state from 2 to 2, which is only possible
|
||||
* after the winning thead advances it from 1 to 2.
|
||||
*/
|
||||
while (InterlockedCompareExchange(&once_state, 2, 2) != 2) {
|
||||
/* State isn't yet 2. Try again.
|
||||
*
|
||||
* We are used for singleton initialization functions,
|
||||
* which should complete quickly. Contention will likewise
|
||||
* be rare, so it's worthwhile to use a simple but cpu-
|
||||
* intensive busy-wait instead of successive backoff,
|
||||
* waiting on a kernel object, or another heavier-weight scheme.
|
||||
*
|
||||
* We can at least yield our timeslice.
|
||||
*/
|
||||
Sleep(0);
|
||||
}
|
||||
|
||||
/* We've seen once_state advance to 2, so we know func()
|
||||
* has been called. And we've left once_state as we found it,
|
||||
* so other threads will have the same experience.
|
||||
*
|
||||
* It's safe to return now.
|
||||
*/
|
||||
static void once(void (*func)(void)) {
|
||||
/* Try to advance once_state from its initial value of 0 to 1.
|
||||
* Only one thread can succeed in doing so.
|
||||
*/
|
||||
if (InterlockedCompareExchange(&once_state, 1, 0) == 0) {
|
||||
/* We're the winning thread, having set once_state to 1.
|
||||
* Call our function. */
|
||||
func();
|
||||
/* Now advance once_state to 2, unblocking any other threads. */
|
||||
InterlockedIncrement(&once_state);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* We weren't the winning thread, but we want to block on
|
||||
* the state variable so we don't return before func()
|
||||
* has finished executing elsewhere.
|
||||
*
|
||||
* Try to advance once_state from 2 to 2, which is only possible
|
||||
* after the winning thead advances it from 1 to 2.
|
||||
*/
|
||||
while (InterlockedCompareExchange(&once_state, 2, 2) != 2) {
|
||||
/* State isn't yet 2. Try again.
|
||||
*
|
||||
* We are used for singleton initialization functions,
|
||||
* which should complete quickly. Contention will likewise
|
||||
* be rare, so it's worthwhile to use a simple but cpu-
|
||||
* intensive busy-wait instead of successive backoff,
|
||||
* waiting on a kernel object, or another heavier-weight scheme.
|
||||
*
|
||||
* We can at least yield our timeslice.
|
||||
*/
|
||||
Sleep(0);
|
||||
}
|
||||
|
||||
/* We've seen once_state advance to 2, so we know func()
|
||||
* has been called. And we've left once_state as we found it,
|
||||
* so other threads will have the same experience.
|
||||
*
|
||||
* It's safe to return now.
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
#elif CONFIG_MULTITHREAD && defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#include <os2.h>
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static int done;
|
||||
static void once(void (*func)(void)) {
|
||||
static int done;
|
||||
|
||||
/* If the initialization is complete, return early. */
|
||||
if(done)
|
||||
return;
|
||||
/* If the initialization is complete, return early. */
|
||||
if (done) return;
|
||||
|
||||
/* Causes all other threads in the process to block themselves
|
||||
* and give up their time slice.
|
||||
*/
|
||||
DosEnterCritSec();
|
||||
/* Causes all other threads in the process to block themselves
|
||||
* and give up their time slice.
|
||||
*/
|
||||
DosEnterCritSec();
|
||||
|
||||
if (!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
if (!done) {
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
|
||||
/* Restores normal thread dispatching for the current process. */
|
||||
DosExitCritSec();
|
||||
/* Restores normal thread dispatching for the current process. */
|
||||
DosExitCritSec();
|
||||
}
|
||||
|
||||
|
||||
#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
static void once(void (*func)(void)) {
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
/* No-op version that performs no synchronization. *_rtcd() is idempotent,
|
||||
* so as long as your platform provides atomic loads/stores of pointers
|
||||
* no synchronization is strictly necessary.
|
||||
*/
|
||||
|
||||
static void once(void (*func)(void))
|
||||
{
|
||||
static int done;
|
||||
static void once(void (*func)(void)) {
|
||||
static int done;
|
||||
|
||||
if(!done)
|
||||
{
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
if (!done) {
|
||||
func();
|
||||
done = 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VPX_PORTS_VPX_TIMER_H_
|
||||
#define VPX_PORTS_VPX_TIMER_H_
|
||||
|
||||
|
@ -34,30 +33,27 @@
|
|||
|
||||
/* timersub is not provided by msys at this time. */
|
||||
#ifndef timersub
|
||||
#define timersub(a, b, result) \
|
||||
do { \
|
||||
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
|
||||
#define timersub(a, b, result) \
|
||||
do { \
|
||||
(result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
|
||||
(result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
|
||||
if ((result)->tv_usec < 0) { \
|
||||
--(result)->tv_sec; \
|
||||
(result)->tv_usec += 1000000; \
|
||||
} \
|
||||
if ((result)->tv_usec < 0) { \
|
||||
--(result)->tv_sec; \
|
||||
(result)->tv_usec += 1000000; \
|
||||
} \
|
||||
} while (0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
struct vpx_usec_timer {
|
||||
#if defined(_WIN32)
|
||||
LARGE_INTEGER begin, end;
|
||||
LARGE_INTEGER begin, end;
|
||||
#else
|
||||
struct timeval begin, end;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
static INLINE void
|
||||
vpx_usec_timer_start(struct vpx_usec_timer *t) {
|
||||
static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {
|
||||
#if defined(_WIN32)
|
||||
QueryPerformanceCounter(&t->begin);
|
||||
#else
|
||||
|
@ -65,9 +61,7 @@ vpx_usec_timer_start(struct vpx_usec_timer *t) {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
static INLINE void
|
||||
vpx_usec_timer_mark(struct vpx_usec_timer *t) {
|
||||
static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {
|
||||
#if defined(_WIN32)
|
||||
QueryPerformanceCounter(&t->end);
|
||||
#else
|
||||
|
@ -75,9 +69,7 @@ vpx_usec_timer_mark(struct vpx_usec_timer *t) {
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
static INLINE int64_t
|
||||
vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
|
||||
static INLINE int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
|
||||
#if defined(_WIN32)
|
||||
LARGE_INTEGER freq, diff;
|
||||
|
||||
|
@ -104,16 +96,11 @@ struct vpx_usec_timer {
|
|||
void *dummy;
|
||||
};
|
||||
|
||||
static INLINE void
|
||||
vpx_usec_timer_start(struct vpx_usec_timer *t) { }
|
||||
static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {}
|
||||
|
||||
static INLINE void
|
||||
vpx_usec_timer_mark(struct vpx_usec_timer *t) { }
|
||||
static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {}
|
||||
|
||||
static INLINE int
|
||||
vpx_usec_timer_elapsed(struct vpx_usec_timer *t) {
|
||||
return 0;
|
||||
}
|
||||
static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; }
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT */
|
||||
|
||||
|
|
198
vpx_ports/x86.h
198
vpx_ports/x86.h
|
@ -8,13 +8,12 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VPX_PORTS_X86_H_
|
||||
#define VPX_PORTS_X86_H_
|
||||
#include <stdlib.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> /* For __cpuidex, __rdtsc */
|
||||
#include <intrin.h> /* For __cpuidex, __rdtsc */
|
||||
#endif
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
@ -41,68 +40,71 @@ typedef enum {
|
|||
VPX_CPU_VIA,
|
||||
|
||||
VPX_CPU_LAST
|
||||
} vpx_cpu_t;
|
||||
} vpx_cpu_t;
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
|
||||
#if ARCH_X86_64
|
||||
#define cpuid(func, func2, ax, bx, cx, dx)\
|
||||
__asm__ __volatile__ (\
|
||||
"cpuid \n\t" \
|
||||
: "=a" (ax), "=b" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func), "c" (func2));
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
__asm__ __volatile__("cpuid \n\t" \
|
||||
: "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
|
||||
: "a"(func), "c"(func2));
|
||||
#else
|
||||
#define cpuid(func, func2, ax, bx, cx, dx)\
|
||||
__asm__ __volatile__ (\
|
||||
"mov %%ebx, %%edi \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%edi, %%ebx \n\t" \
|
||||
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func), "c" (func2));
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
__asm__ __volatile__( \
|
||||
"mov %%ebx, %%edi \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%edi, %%ebx \n\t" \
|
||||
: "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
|
||||
: "a"(func), "c"(func2));
|
||||
#endif
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
|
||||
#elif defined(__SUNPRO_C) || \
|
||||
defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
|
||||
#if ARCH_X86_64
|
||||
#define cpuid(func, func2, ax, bx, cx, dx)\
|
||||
asm volatile (\
|
||||
"xchg %rsi, %rbx \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"movl %ebx, %edi \n\t" \
|
||||
"xchg %rsi, %rbx \n\t" \
|
||||
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func), "c" (func2));
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
asm volatile( \
|
||||
"xchg %rsi, %rbx \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"movl %ebx, %edi \n\t" \
|
||||
"xchg %rsi, %rbx \n\t" \
|
||||
: "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
|
||||
: "a"(func), "c"(func2));
|
||||
#else
|
||||
#define cpuid(func, func2, ax, bx, cx, dx)\
|
||||
asm volatile (\
|
||||
"pushl %ebx \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"movl %ebx, %edi \n\t" \
|
||||
"popl %ebx \n\t" \
|
||||
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func), "c" (func2));
|
||||
#define cpuid(func, func2, ax, bx, cx, dx) \
|
||||
asm volatile( \
|
||||
"pushl %ebx \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"movl %ebx, %edi \n\t" \
|
||||
"popl %ebx \n\t" \
|
||||
: "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \
|
||||
: "a"(func), "c"(func2));
|
||||
#endif
|
||||
#else /* end __SUNPRO__ */
|
||||
#if ARCH_X86_64
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1500
|
||||
#define cpuid(func, func2, a, b, c, d) do {\
|
||||
int regs[4];\
|
||||
__cpuidex(regs, func, func2); \
|
||||
a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\
|
||||
} while(0)
|
||||
#define cpuid(func, func2, a, b, c, d) \
|
||||
do { \
|
||||
int regs[4]; \
|
||||
__cpuidex(regs, func, func2); \
|
||||
a = regs[0]; \
|
||||
b = regs[1]; \
|
||||
c = regs[2]; \
|
||||
d = regs[3]; \
|
||||
} while (0)
|
||||
#else
|
||||
#define cpuid(func, func2, a, b, c, d) do {\
|
||||
int regs[4];\
|
||||
__cpuid(regs, func); \
|
||||
a = regs[0]; b = regs[1]; c = regs[2]; d = regs[3];\
|
||||
#define cpuid(func, func2, a, b, c, d) \
|
||||
do { \
|
||||
int regs[4]; \
|
||||
__cpuid(regs, func); \
|
||||
a = regs[0]; \
|
||||
b = regs[1]; \
|
||||
c = regs[2]; \
|
||||
d = regs[3]; \
|
||||
} while (0)
|
||||
#endif
|
||||
#else
|
||||
#define cpuid(func, func2, a, b, c, d)\
|
||||
__asm mov eax, func\
|
||||
__asm mov ecx, func2\
|
||||
__asm cpuid\
|
||||
__asm mov a, eax\
|
||||
__asm mov b, ebx\
|
||||
__asm mov c, ecx\
|
||||
__asm mov d, edx
|
||||
#define cpuid(func, func2, a, b, c, d) \
|
||||
__asm mov eax, func __asm mov ecx, func2 __asm cpuid __asm mov a, \
|
||||
eax __asm mov b, ebx __asm mov c, ecx __asm mov d, edx
|
||||
#endif
|
||||
#endif /* end others */
|
||||
|
||||
|
@ -112,13 +114,13 @@ static INLINE uint64_t xgetbv(void) {
|
|||
const uint32_t ecx = 0;
|
||||
uint32_t eax, edx;
|
||||
// Use the raw opcode for xgetbv for compatibility with older toolchains.
|
||||
__asm__ volatile (
|
||||
".byte 0x0f, 0x01, 0xd0\n"
|
||||
: "=a"(eax), "=d"(edx) : "c" (ecx));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0\n"
|
||||
: "=a"(eax), "=d"(edx)
|
||||
: "c"(ecx));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
}
|
||||
#elif (defined(_M_X64) || defined(_M_IX86)) && \
|
||||
defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1
|
||||
#elif(defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \
|
||||
_MSC_FULL_VER >= 160040219 // >= VS2010 SP1
|
||||
#include <immintrin.h>
|
||||
#define xgetbv() _xgetbv(0)
|
||||
#elif defined(_MSC_VER) && defined(_M_IX86)
|
||||
|
@ -144,20 +146,19 @@ static INLINE uint64_t xgetbv(void) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#define HAS_MMX 0x01
|
||||
#define HAS_SSE 0x02
|
||||
#define HAS_SSE2 0x04
|
||||
#define HAS_SSE3 0x08
|
||||
#define HAS_SSSE3 0x10
|
||||
#define HAS_SSE4_1 0x20
|
||||
#define HAS_AVX 0x40
|
||||
#define HAS_AVX2 0x80
|
||||
#define HAS_MMX 0x01
|
||||
#define HAS_SSE 0x02
|
||||
#define HAS_SSE2 0x04
|
||||
#define HAS_SSE3 0x08
|
||||
#define HAS_SSSE3 0x10
|
||||
#define HAS_SSE4_1 0x20
|
||||
#define HAS_AVX 0x40
|
||||
#define HAS_AVX2 0x80
|
||||
#ifndef BIT
|
||||
#define BIT(n) (1<<n)
|
||||
#define BIT(n) (1 << n)
|
||||
#endif
|
||||
|
||||
static INLINE int
|
||||
x86_simd_caps(void) {
|
||||
static INLINE int x86_simd_caps(void) {
|
||||
unsigned int flags = 0;
|
||||
unsigned int mask = ~0;
|
||||
unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx;
|
||||
|
@ -167,19 +168,16 @@ x86_simd_caps(void) {
|
|||
/* See if the CPU capabilities are being overridden by the environment */
|
||||
env = getenv("VPX_SIMD_CAPS");
|
||||
|
||||
if (env && *env)
|
||||
return (int)strtol(env, NULL, 0);
|
||||
if (env && *env) return (int)strtol(env, NULL, 0);
|
||||
|
||||
env = getenv("VPX_SIMD_CAPS_MASK");
|
||||
|
||||
if (env && *env)
|
||||
mask = (unsigned int)strtoul(env, NULL, 0);
|
||||
if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0);
|
||||
|
||||
/* Ensure that the CPUID instruction supports extended features */
|
||||
cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx);
|
||||
|
||||
if (max_cpuid_val < 1)
|
||||
return 0;
|
||||
if (max_cpuid_val < 1) return 0;
|
||||
|
||||
/* Get the standard feature flags */
|
||||
cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
|
||||
|
@ -218,27 +216,25 @@ x86_simd_caps(void) {
|
|||
// measurement. For large function (CPU time > a couple of seconds), 64-bit
|
||||
// counter should be used.
|
||||
// 32-bit CPU cycle counter
|
||||
static INLINE unsigned int
|
||||
x86_readtsc(void) {
|
||||
static INLINE unsigned int x86_readtsc(void) {
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
unsigned int tsc;
|
||||
__asm__ __volatile__("rdtsc\n\t":"=a"(tsc):);
|
||||
__asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :);
|
||||
return tsc;
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
unsigned int tsc;
|
||||
asm volatile("rdtsc\n\t":"=a"(tsc):);
|
||||
asm volatile("rdtsc\n\t" : "=a"(tsc) :);
|
||||
return tsc;
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
return (unsigned int)__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
__asm rdtsc;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
// 64-bit CPU cycle counter
|
||||
static INLINE uint64_t
|
||||
x86_readtsc64(void) {
|
||||
static INLINE uint64_t x86_readtsc64(void) {
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
uint32_t hi, lo;
|
||||
__asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
|
@ -251,76 +247,64 @@ x86_readtsc64(void) {
|
|||
#if ARCH_X86_64
|
||||
return (uint64_t)__rdtsc();
|
||||
#else
|
||||
__asm rdtsc;
|
||||
__asm rdtsc;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
#define x86_pause_hint()\
|
||||
__asm__ __volatile__ ("pause \n\t")
|
||||
#define x86_pause_hint() __asm__ __volatile__("pause \n\t")
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
#define x86_pause_hint()\
|
||||
asm volatile ("pause \n\t")
|
||||
#define x86_pause_hint() asm volatile("pause \n\t")
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
#define x86_pause_hint()\
|
||||
_mm_pause();
|
||||
#define x86_pause_hint() _mm_pause();
|
||||
#else
|
||||
#define x86_pause_hint()\
|
||||
__asm pause
|
||||
#define x86_pause_hint() __asm pause
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
static void
|
||||
x87_set_control_word(unsigned short mode) {
|
||||
static void x87_set_control_word(unsigned short mode) {
|
||||
__asm__ __volatile__("fldcw %0" : : "m"(*&mode));
|
||||
}
|
||||
static unsigned short
|
||||
x87_get_control_word(void) {
|
||||
static unsigned short x87_get_control_word(void) {
|
||||
unsigned short mode;
|
||||
__asm__ __volatile__("fstcw %0\n\t":"=m"(*&mode):);
|
||||
return mode;
|
||||
__asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :);
|
||||
return mode;
|
||||
}
|
||||
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
static void
|
||||
x87_set_control_word(unsigned short mode) {
|
||||
static void x87_set_control_word(unsigned short mode) {
|
||||
asm volatile("fldcw %0" : : "m"(*&mode));
|
||||
}
|
||||
static unsigned short
|
||||
x87_get_control_word(void) {
|
||||
static unsigned short x87_get_control_word(void) {
|
||||
unsigned short mode;
|
||||
asm volatile("fstcw %0\n\t":"=m"(*&mode):);
|
||||
asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
|
||||
return mode;
|
||||
}
|
||||
#elif ARCH_X86_64
|
||||
/* No fldcw intrinsics on Windows x64, punt to external asm */
|
||||
extern void vpx_winx64_fldcw(unsigned short mode);
|
||||
extern void vpx_winx64_fldcw(unsigned short mode);
|
||||
extern unsigned short vpx_winx64_fstcw(void);
|
||||
#define x87_set_control_word vpx_winx64_fldcw
|
||||
#define x87_get_control_word vpx_winx64_fstcw
|
||||
#else
|
||||
static void
|
||||
x87_set_control_word(unsigned short mode) {
|
||||
static void x87_set_control_word(unsigned short mode) {
|
||||
__asm { fldcw mode }
|
||||
}
|
||||
static unsigned short
|
||||
x87_get_control_word(void) {
|
||||
static unsigned short x87_get_control_word(void) {
|
||||
unsigned short mode;
|
||||
__asm { fstcw mode }
|
||||
return mode;
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE unsigned int
|
||||
x87_set_double_precision(void) {
|
||||
static INLINE unsigned int x87_set_double_precision(void) {
|
||||
unsigned int mode = x87_get_control_word();
|
||||
x87_set_control_word((mode&~0x300) | 0x200);
|
||||
x87_set_control_word((mode & ~0x300) | 0x200);
|
||||
return mode;
|
||||
}
|
||||
|
||||
|
||||
extern void vpx_reset_mmx_state(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
Загрузка…
Ссылка в новой задаче