Bug 572034 - Use Linux x86_64 Y'CbCr routines on x86_64 OS X. Convert Linux routines to local asm labels. r=chris.double

--HG--
extra : rebase_source : faa6af7f7529d05ec470796a369e7ea37daf3c69
This commit is contained in:
Matthew Gregan 2010-06-17 18:16:28 +12:00
Родитель ac1cf8c0a4
Коммит f0118e53c6
5 изменённых файлов: 80 добавлений и 7 удалений

Просмотреть файл

@ -30,8 +30,13 @@ CPPSRCS += yuv_row_linux.cpp \
$(NULL)
else
ifeq ($(OS_ARCH),Darwin)
ifeq ($(OS_TEST),x86_64)
CPPSRCS += yuv_row_linux.cpp \
$(NULL)
else
CPPSRCS += yuv_row_mac.cpp \
$(NULL)
endif
else
CPPSRCS += yuv_row_other.cpp \
$(NULL)

Просмотреть файл

@ -19,3 +19,4 @@ export.patch: Fix export for building on comm-central
win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
yv24.patch: Adds YCbCr 4:4:4 support
row_c_fix.patch: Fix broken C fallback code (See bug 561385).
bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.

Просмотреть файл

@ -0,0 +1,66 @@
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
- "jmp convertend\n"
-"convertloop:"
+ "jmp Lconvertend\n"
+"Lconvertloop:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
-"convertend:"
+"Lconvertend:"
"sub $0x2,%4\n"
- "jns convertloop\n"
+ "jns Lconvertloop\n"
-"convertnext:"
+"Lconvertnext:"
"add $0x1,%4\n"
- "js convertdone\n"
+ "js Lconvertdone\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
-"convertdone:"
+"Lconvertdone:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"

Просмотреть файл

@ -13,3 +13,4 @@ patch -p3 <export.patch
patch -p3 <win64_mac64.patch
patch -p3 <yv24.patch
patch -p3 <row_c_fix.patch
patch -p3 <bug572034_mac_64bit.patch

Просмотреть файл

@ -255,8 +255,8 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"jmp convertend\n"
"convertloop:"
"jmp Lconvertend\n"
"Lconvertloop:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
@ -276,13 +276,13 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
"convertend:"
"Lconvertend:"
"sub $0x2,%4\n"
"jns convertloop\n"
"jns Lconvertloop\n"
"convertnext:"
"Lconvertnext:"
"add $0x1,%4\n"
"js convertdone\n"
"js Lconvertdone\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
@ -295,7 +295,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"convertdone:"
"Lconvertdone:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1