Add newly necessary casts in SIMD compat headers (#14733)
https://github.com/llvm/llvm-project/commit/db7efcab7dd9 updated the wasm_*_extract_lane intrinsics to be functions instead of macros, which means their arguments must now be v128_t rather than any other 128-bit vector type under -fno-lax-vector-conversions. Update the SIMD compat headers with more casts to keep them compiling cleanly.
This commit is contained in:
Родитель
54cfc2829e
Коммит
ecf79d7cee
|
@ -155,17 +155,23 @@ _mm_maskload_ps(const float *__mem_addr, __m128i __mask)
|
|||
static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_maskstore_pd(double *__mem_addr, __m128i __mask, __m128d __a)
|
||||
{
|
||||
if ((wasm_i64x2_extract_lane(__mask, 0) & 0x8000000000000000ull) != 0) __mem_addr[0] = wasm_f64x2_extract_lane(__a, 0);
|
||||
if ((wasm_i64x2_extract_lane(__mask, 1) & 0x8000000000000000ull) != 0) __mem_addr[1] = wasm_f64x2_extract_lane(__a, 1);
|
||||
if ((wasm_i64x2_extract_lane(__mask, 0) & 0x8000000000000000ull) != 0)
|
||||
__mem_addr[0] = wasm_f64x2_extract_lane((v128_t)__a, 0);
|
||||
if ((wasm_i64x2_extract_lane(__mask, 1) & 0x8000000000000000ull) != 0)
|
||||
__mem_addr[1] = wasm_f64x2_extract_lane((v128_t)__a, 1);
|
||||
}
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_maskstore_ps(float *__mem_addr, __m128i __mask, __m128 __a)
|
||||
{
|
||||
if ((wasm_i32x4_extract_lane(__mask, 0) & 0x80000000ull) != 0) __mem_addr[0] = wasm_f32x4_extract_lane(__a, 0);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 1) & 0x80000000ull) != 0) __mem_addr[1] = wasm_f32x4_extract_lane(__a, 1);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 2) & 0x80000000ull) != 0) __mem_addr[2] = wasm_f32x4_extract_lane(__a, 2);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 3) & 0x80000000ull) != 0) __mem_addr[3] = wasm_f32x4_extract_lane(__a, 3);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 0) & 0x80000000ull) != 0)
|
||||
__mem_addr[0] = wasm_f32x4_extract_lane((v128_t)__a, 0);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 1) & 0x80000000ull) != 0)
|
||||
__mem_addr[1] = wasm_f32x4_extract_lane((v128_t)__a, 1);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 2) & 0x80000000ull) != 0)
|
||||
__mem_addr[2] = wasm_f32x4_extract_lane((v128_t)__a, 2);
|
||||
if ((wasm_i32x4_extract_lane(__mask, 3) & 0x80000000ull) != 0)
|
||||
__mem_addr[3] = wasm_f32x4_extract_lane((v128_t)__a, 3);
|
||||
}
|
||||
|
||||
#define _mm_permute_pd(__a, __imm) __extension__ ({ \
|
||||
|
@ -181,18 +187,17 @@ static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
|
|||
_mm_permutevar_pd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return (__m128d)wasm_f64x2_make(
|
||||
((__f64x2)__a)[(wasm_i64x2_extract_lane(__b, 0) >> 1) & 1],
|
||||
((__f64x2)__a)[(wasm_i64x2_extract_lane(__b, 1) >> 1) & 1]);
|
||||
((__f64x2)__a)[(wasm_i64x2_extract_lane((v128_t)__b, 0) >> 1) & 1],
|
||||
((__f64x2)__a)[(wasm_i64x2_extract_lane((v128_t)__b, 1) >> 1) & 1]);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_permutevar_ps(__m128 __a, __m128 __b)
|
||||
{
|
||||
return (__m128)wasm_f32x4_make(
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 0) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 1) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 2) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 3) & 3]);
|
||||
return (__m128)wasm_f32x4_make(((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 0) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 1) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 2) & 3],
|
||||
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 3) & 3]);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
|
|
|
@ -289,73 +289,73 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
|
|||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comieq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) == wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) == wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comilt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) < wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) < wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comile_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) <= wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) <= wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comigt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) > wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) > wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comige_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) >= wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) >= wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_comineq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) != wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) != wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomieq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) == wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) == wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomilt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) < wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) < wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomile_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) <= wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) <= wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomigt_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) > wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) > wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomige_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) >= wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) >= wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_ucomineq_sd(__m128d __a, __m128d __b)
|
||||
{
|
||||
return wasm_f64x2_extract_lane(__a, 0) != wasm_f64x2_extract_lane(__b, 0);
|
||||
return wasm_f64x2_extract_lane((v128_t)__a, 0) != wasm_f64x2_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
|
|
|
@ -262,8 +262,8 @@ _mm_max_epu32(__m128i __a, __m128i __b)
|
|||
(((__imm8) & 4) ? 6 : 2), \
|
||||
(((__imm8) & 8) ? 7 : 3)); })
|
||||
|
||||
#define _mm_extract_ps(__a, __imm8) __extension__ ({ \
|
||||
wasm_i32x4_extract_lane((__a), (__imm8) & 3); })
|
||||
#define _mm_extract_ps(__a, __imm8) \
|
||||
__extension__({ wasm_i32x4_extract_lane((v128_t)(__a), (__imm8)&3); })
|
||||
|
||||
#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __f32x4 __a = (__f32x4)(X); \
|
||||
(D) = __a[N]; }))
|
||||
|
|
|
@ -515,73 +515,73 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
|
|||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comieq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) == wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comige_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) >= wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comigt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) > wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comile_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) <= wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comilt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) < wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_comineq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) != wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomieq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) == wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomige_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) >= wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomigt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) > wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomile_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) <= wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomilt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) < wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
_mm_ucomineq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
return wasm_f32x4_extract_lane(__a, 0) != wasm_f32x4_extract_lane(__b, 0);
|
||||
return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0);
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
|
||||
|
|
|
@ -11,5 +11,5 @@
|
|||
int main() {
|
||||
__m128 a = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f);
|
||||
__m128 b = _mm_rcp_ps(a);
|
||||
return (int)wasm_f32x4_extract_lane(b, 0);
|
||||
return (int)wasm_f32x4_extract_lane((v128_t)b, 0);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче