Add newly necessary casts in SIMD compat headers (#14733)

https://github.com/llvm/llvm-project/commit/db7efcab7dd9 updated the
wasm_*_extract_lane intrinsics to be functions instead of macros, which means
their arguments must now be v128_t rather than any other 128-bit vector type
under -fno-lax-vector-conversions. Update the SIMD compat headers with more
casts to keep them compiling cleanly.
This commit is contained in:
Thomas Lively 2021-07-22 14:08:24 -07:00 коммит произвёл GitHub
Родитель 54cfc2829e
Коммит ecf79d7cee
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
5 изменённых файлов: 45 добавлений и 40 удалений

Просмотреть файл

@ -155,17 +155,23 @@ _mm_maskload_ps(const float *__mem_addr, __m128i __mask)
static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_maskstore_pd(double *__mem_addr, __m128i __mask, __m128d __a)
{
if ((wasm_i64x2_extract_lane(__mask, 0) & 0x8000000000000000ull) != 0) __mem_addr[0] = wasm_f64x2_extract_lane(__a, 0);
if ((wasm_i64x2_extract_lane(__mask, 1) & 0x8000000000000000ull) != 0) __mem_addr[1] = wasm_f64x2_extract_lane(__a, 1);
if ((wasm_i64x2_extract_lane(__mask, 0) & 0x8000000000000000ull) != 0)
__mem_addr[0] = wasm_f64x2_extract_lane((v128_t)__a, 0);
if ((wasm_i64x2_extract_lane(__mask, 1) & 0x8000000000000000ull) != 0)
__mem_addr[1] = wasm_f64x2_extract_lane((v128_t)__a, 1);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_maskstore_ps(float *__mem_addr, __m128i __mask, __m128 __a)
{
if ((wasm_i32x4_extract_lane(__mask, 0) & 0x80000000ull) != 0) __mem_addr[0] = wasm_f32x4_extract_lane(__a, 0);
if ((wasm_i32x4_extract_lane(__mask, 1) & 0x80000000ull) != 0) __mem_addr[1] = wasm_f32x4_extract_lane(__a, 1);
if ((wasm_i32x4_extract_lane(__mask, 2) & 0x80000000ull) != 0) __mem_addr[2] = wasm_f32x4_extract_lane(__a, 2);
if ((wasm_i32x4_extract_lane(__mask, 3) & 0x80000000ull) != 0) __mem_addr[3] = wasm_f32x4_extract_lane(__a, 3);
if ((wasm_i32x4_extract_lane(__mask, 0) & 0x80000000ull) != 0)
__mem_addr[0] = wasm_f32x4_extract_lane((v128_t)__a, 0);
if ((wasm_i32x4_extract_lane(__mask, 1) & 0x80000000ull) != 0)
__mem_addr[1] = wasm_f32x4_extract_lane((v128_t)__a, 1);
if ((wasm_i32x4_extract_lane(__mask, 2) & 0x80000000ull) != 0)
__mem_addr[2] = wasm_f32x4_extract_lane((v128_t)__a, 2);
if ((wasm_i32x4_extract_lane(__mask, 3) & 0x80000000ull) != 0)
__mem_addr[3] = wasm_f32x4_extract_lane((v128_t)__a, 3);
}
#define _mm_permute_pd(__a, __imm) __extension__ ({ \
@ -181,18 +187,17 @@ static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_permutevar_pd(__m128d __a, __m128d __b)
{
return (__m128d)wasm_f64x2_make(
((__f64x2)__a)[(wasm_i64x2_extract_lane(__b, 0) >> 1) & 1],
((__f64x2)__a)[(wasm_i64x2_extract_lane(__b, 1) >> 1) & 1]);
((__f64x2)__a)[(wasm_i64x2_extract_lane((v128_t)__b, 0) >> 1) & 1],
((__f64x2)__a)[(wasm_i64x2_extract_lane((v128_t)__b, 1) >> 1) & 1]);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_permutevar_ps(__m128 __a, __m128 __b)
{
return (__m128)wasm_f32x4_make(
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 0) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 1) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 2) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane(__b, 3) & 3]);
return (__m128)wasm_f32x4_make(((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 0) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 1) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 2) & 3],
((__f32x4)__a)[wasm_i32x4_extract_lane((v128_t)__b, 3) & 3]);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))

Просмотреть файл

@ -289,73 +289,73 @@ _mm_cmpnge_sd(__m128d __a, __m128d __b)
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comieq_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) == wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) == wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comilt_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) < wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) < wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comile_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) <= wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) <= wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comigt_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) > wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) > wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comige_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) >= wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) >= wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comineq_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) != wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) != wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomieq_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) == wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) == wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomilt_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) < wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) < wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomile_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) <= wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) <= wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomigt_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) > wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) > wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomige_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) >= wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) >= wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomineq_sd(__m128d __a, __m128d __b)
{
return wasm_f64x2_extract_lane(__a, 0) != wasm_f64x2_extract_lane(__b, 0);
return wasm_f64x2_extract_lane((v128_t)__a, 0) != wasm_f64x2_extract_lane((v128_t)__b, 0);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))

Просмотреть файл

@ -262,8 +262,8 @@ _mm_max_epu32(__m128i __a, __m128i __b)
(((__imm8) & 4) ? 6 : 2), \
(((__imm8) & 8) ? 7 : 3)); })
#define _mm_extract_ps(__a, __imm8) __extension__ ({ \
wasm_i32x4_extract_lane((__a), (__imm8) & 3); })
#define _mm_extract_ps(__a, __imm8) \
__extension__({ wasm_i32x4_extract_lane((v128_t)(__a), (__imm8)&3); })
#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __f32x4 __a = (__f32x4)(X); \
(D) = __a[N]; }))

Просмотреть файл

@ -515,73 +515,73 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comieq_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) == wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comige_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) >= wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comigt_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) > wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comile_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) <= wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comilt_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) < wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_comineq_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) != wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomieq_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) == wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomige_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) >= wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomigt_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) > wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomile_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) <= wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomilt_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) < wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))
_mm_ucomineq_ss(__m128 __a, __m128 __b)
{
return wasm_f32x4_extract_lane(__a, 0) != wasm_f32x4_extract_lane(__b, 0);
return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0);
}
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW))

Просмотреть файл

@ -11,5 +11,5 @@
int main() {
__m128 a = _mm_set_ps(1.0f, 2.0f, 3.0f, 4.0f);
__m128 b = _mm_rcp_ps(a);
return (int)wasm_f32x4_extract_lane(b, 0);
return (int)wasm_f32x4_extract_lane((v128_t)b, 0);
}