From 01e994d74e4e3937ee1a3efdc048320a1e51f818 Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Tue, 7 Nov 2017 18:17:33 -0800 Subject: [PATCH] Enable SSE2 code without -msse Bug: libyuv:754 Test: CC=clang CXX=clang++ CFLAGS="-m32" CXXFLAGS="-m32 -mno-sse -O2" make -f linux.mk Change-Id: I74bf8d032013694e65ea7637bc38d3253db53ff2 Reviewed-on: https://chromium-review.googlesource.com/758043 Reviewed-by: Frank Barchard --- include/libyuv/compare_row.h | 2 +- include/libyuv/planar_functions.h | 2 +- include/libyuv/rotate_row.h | 2 +- include/libyuv/row.h | 2 +- include/libyuv/scale_row.h | 2 +- linux.mk | 2 +- source/row_gcc.cc | 6 +++--- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h index 9b7013a2..a34f9ad4 100644 --- a/include/libyuv/compare_row.h +++ b/include/libyuv/compare_row.h @@ -14,17 +14,17 @@ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h index d97965cb..c91501a9 100644 --- a/include/libyuv/planar_functions.h +++ b/include/libyuv/planar_functions.h @@ -715,17 +715,17 @@ int I420Interpolate(const uint8* src0_y, int dst_stride_u, uint8* dst_v, int dst_stride_v, int width, int height, int interpolation); #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h index 60ac55ef..973fc152 100644 --- a/include/libyuv/rotate_row.h +++ b/include/libyuv/rotate_row.h @@ -14,17 +14,17 @@ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/include/libyuv/row.h b/include/libyuv/row.h index 5ccf94d9..6f0cd500 100644 --- a/include/libyuv/row.h +++ b/include/libyuv/row.h @@ -26,17 +26,17 @@ extern "C" { uint8* var##_mem = (uint8*)(malloc((size) + 63)); /* NOLINT */ \ uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ #define free_aligned_buffer_64(var) \ free(var##_mem); \ var = 0 #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h index ebafac4f..b97f3e78 100644 --- a/include/libyuv/scale_row.h +++ b/include/libyuv/scale_row.h @@ -15,17 +15,17 @@ #include "libyuv/scale.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ - (defined(__i386__) && !defined(__SSE2__)) + (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif diff --git a/linux.mk b/linux.mk index 1dd527c7..7e9aa5e4 100644 --- a/linux.mk +++ b/linux.mk @@ -75,9 +75,9 @@ psnr: util/psnr.cc # A C test utility that uses libyuv conversion from C. # gcc 4.4 and older require -fno-exceptions to avoid link error on __gxx_personality_v0 # CC=gcc-4.4 CXXFLAGS=-fno-exceptions CXX=g++-4.4 make -f linux.mk cpuid: util/cpuid.c libyuv.a $(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a clean: - /bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr + /bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr diff --git a/source/row_gcc.cc b/source/row_gcc.cc index 3af32045..93399f0b 100644 --- a/source/row_gcc.cc +++ b/source/row_gcc.cc @@ -5474,17 +5474,17 @@ void HalfFloatRow_SSE2(const uint16* src, uint16* dst, float scale, int width) { "psrld $0xd,%%xmm3 \n" "packssdw %%xmm3,%%xmm2 \n" MEMOPMEM(movdqu,xmm2,-0x10,0,1,1) "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 - : "x"(scale * kScaleBias) // %3 + : "mx"(scale * kScaleBias) // %3 : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_AVX2 void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { @@ -5510,17 +5510,17 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) { MEMOPMEM(vmovdqu,ymm2,-0x20,0,1,1) "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 - : "x"(scale * kScaleBias) // %3 + : "mx"(scale * kScaleBias) // %3 : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_F16C void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { @@ -5543,17 +5543,17 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) { MEMOPMEM(vmovdqu,xmm3,0x10,0,1,1) "add $0x20,%0 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 - : "x"(scale) // %3 + : "mx"(scale) // %3 : "memory", "cc", "xmm2", "xmm3", "xmm4" ); } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C void HalfFloat1Row_F16C(const uint16* src, uint16* dst, float, int width) {