зеркало из https://github.com/microsoft/clang-1.git
X86 SSE Intrinsics: update header for sqrt_ss, rsqrt_ss and rcp_ss.
There intrinsics pass through the upper FP values from the input. rdar://12558838 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@166743 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Родитель
50d46caf00
Коммит
f0e97dba82
|
@ -95,7 +95,8 @@ _mm_div_ps(__m128 a, __m128 b)
|
|||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_sqrt_ss(__m128 a)
|
||||
{
|
||||
return __builtin_ia32_sqrtss(a);
|
||||
__m128 c = __builtin_ia32_sqrtss(a);
|
||||
return (__m128) { c[0], a[1], a[2], a[3] };
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -107,7 +108,8 @@ _mm_sqrt_ps(__m128 a)
|
|||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rcp_ss(__m128 a)
|
||||
{
|
||||
return __builtin_ia32_rcpss(a);
|
||||
__m128 c = __builtin_ia32_rcpss(a);
|
||||
return (__m128) { c[0], a[1], a[2], a[3] };
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
|
@ -119,7 +121,8 @@ _mm_rcp_ps(__m128 a)
|
|||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
_mm_rsqrt_ss(__m128 a)
|
||||
{
|
||||
return __builtin_ia32_rsqrtss(a);
|
||||
__m128 c = __builtin_ia32_rsqrtss(a);
|
||||
return (__m128) { c[0], a[1], a[2], a[3] };
|
||||
}
|
||||
|
||||
static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
|
||||
|
|
|
@ -1,8 +1,39 @@
|
|||
// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
__m128 test_rsqrt_ss(__m128 x) {
|
||||
// CHECK: define {{.*}} @test_rsqrt_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
return _mm_rsqrt_ss(x);
|
||||
}
|
||||
|
||||
__m128 test_rcp_ss(__m128 x) {
|
||||
// CHECK: define {{.*}} @test_rcp_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.rcp.ss
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
return _mm_rcp_ss(x);
|
||||
}
|
||||
|
||||
__m128 test_sqrt_ss(__m128 x) {
|
||||
// CHECK: define {{.*}} @test_sqrt_ss
|
||||
// CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 0
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 1
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 2
|
||||
// CHECK: extractelement <4 x float> {{.*}}, i32 3
|
||||
return _mm_sqrt_ss(x);
|
||||
}
|
||||
|
||||
__m128 test_loadl_pi(__m128 x, void* y) {
|
||||
// CHECK: define {{.*}} @test_loadl_pi
|
||||
// CHECK: load <2 x float>* {{.*}}, align 1{{$}}
|
||||
|
|
Загрузка…
Ссылка в новой задаче