Bug 583138 - Update to latest Chromium YCbCr to RGB Conversion code - r=roc a=blocking2.0

This commit is contained in:
Chris Double 2010-11-11 12:54:27 +13:00
Родитель f7174220e4
Коммит 87184c8f80
25 изменённых файлов: 3447 добавлений и 4566 удалений

Просмотреть файл

@ -174,7 +174,8 @@ BasicPlanarYCbCrImage::SetData(const Data& aData)
aData.mCbCrStride,
size.width*4,
type,
gfx::ROTATE_0);
gfx::ROTATE_0,
gfx::FILTER_BILINEAR);
}
else {
gfx::ConvertYCbCrToRGB32(aData.mYChannel,

Просмотреть файл

@ -19,6 +19,7 @@ EXPORTS = chromium_types.h \
CPPSRCS = yuv_convert.cpp \
yuv_row_c.cpp \
yuv_row_table.cpp \
$(NULL)
ifdef _MSC_VER
@ -26,25 +27,20 @@ CPPSRCS += yuv_row_win.cpp \
$(NULL)
else
ifeq ($(OS_ARCH),Linux)
CPPSRCS += yuv_row_linux.cpp \
CPPSRCS += yuv_row_posix.cpp \
$(NULL)
else
ifeq ($(OS_ARCH),SunOS)
CPPSRCS += yuv_row_linux.cpp \
CPPSRCS += yuv_row_posix.cpp \
$(NULL)
else
ifeq ($(OS_ARCH),Darwin)
ifeq ($(OS_TEST),x86_64)
CPPSRCS += yuv_row_linux.cpp \
CPPSRCS += yuv_row_posix.cpp \
$(NULL)
else
CPPSRCS += yuv_row_mac.cpp \
$(NULL)
endif
else
CPPSRCS += yuv_row_other.cpp \
$(NULL)
endif # mac
endif # Darwin
endif # SunOS
endif # linux
endif # windows

Просмотреть файл

@ -2,23 +2,14 @@ This color conversion code is from the Chromium open source project available he
http://code.google.com/chromium/
The code comes from svn revision 40876.
The code comes from svn revision 638400 on 2010-10-26.
The code was copied from a Chromium svn checkout using the 'update.sh' script which then applies patches for our build and to add dynamic CPU detection.
convert.patch: Change Chromium code to build using Mozilla build system.
Add runtime CPU detection for MMX
Move default C implementation to work on all platforms.
picture_region.patch: Change Chromium code to allow a picture region.
The YUV conversion will convert within this
picture region only.
remove_scale.patch: Removes Chromium scaling code.
export.patch: Fix export for building on comm-central
win64_mac64.patch: Fallback to C implementation on Windows and Mac OS X 64 bit
yv24.patch: Adds YCbCr 4:4:4 support
row_c_fix.patch: Fix broken C fallback code (See bug 561385).
bug572034_mac_64bit.patch: Fix x86_64 linux code so it works on OS X.
solaris.patch: Adds Solaris support, fallback to C implementation on SPARC
add_scale.patch: re-adds Chromium scaling code
Change Chromium code to allow a picture region.
The YUV conversion will convert within this
picture region only.
Add YCbCr 4:4:4 support

Просмотреть файл

@ -1,953 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index 40ce10f..7d46629 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -82,10 +82,139 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
#ifdef ARCH_CPU_X86_FAMILY
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_mmx)
EMMS();
#endif
}
+// Scale a frame of YUV to 32 bit ARGB.
+NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int height,
+ int scaled_width,
+ int scaled_height,
+ int y_pitch,
+ int uv_pitch,
+ int rgb_pitch,
+ YUVType yuv_type,
+ Rotate view_rotate) {
+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+ bool has_mmx = supports_mmx();
+ // Diagram showing origin and direction of source sampling.
+ // ->0 4<-
+ // 7 3
+ //
+ // 6 5
+ // ->1 2<-
+ // Rotations that start at right side of image.
+ if ((view_rotate == ROTATE_180) ||
+ (view_rotate == ROTATE_270) ||
+ (view_rotate == MIRROR_ROTATE_0) ||
+ (view_rotate == MIRROR_ROTATE_90)) {
+ y_buf += width - 1;
+ u_buf += width / 2 - 1;
+ v_buf += width / 2 - 1;
+ width = -width;
+ }
+ // Rotations that start at bottom of image.
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_180) ||
+ (view_rotate == MIRROR_ROTATE_90) ||
+ (view_rotate == MIRROR_ROTATE_180)) {
+ y_buf += (height - 1) * y_pitch;
+ u_buf += ((height >> y_shift) - 1) * uv_pitch;
+ v_buf += ((height >> y_shift) - 1) * uv_pitch;
+ height = -height;
+ }
+
+ // Handle zero sized destination.
+ if (scaled_width == 0 || scaled_height == 0)
+ return;
+ int scaled_dx = width * 16 / scaled_width;
+ int scaled_dy = height * 16 / scaled_height;
+
+ int scaled_dx_uv = scaled_dx;
+
+ if ((view_rotate == ROTATE_90) ||
+ (view_rotate == ROTATE_270)) {
+ int tmp = scaled_height;
+ scaled_height = scaled_width;
+ scaled_width = tmp;
+ tmp = height;
+ height = width;
+ width = tmp;
+ int original_dx = scaled_dx;
+ int original_dy = scaled_dy;
+ scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
+ scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
+ scaled_dy = original_dx;
+ if (view_rotate == ROTATE_90) {
+ y_pitch = -1;
+ uv_pitch = -1;
+ height = -height;
+ } else {
+ y_pitch = 1;
+ uv_pitch = 1;
+ }
+ }
+
+ for (int y = 0; y < scaled_height; ++y) {
+ uint8* dest_pixel = rgb_buf + y * rgb_pitch;
+ int scaled_y = (y * height / scaled_height);
+ const uint8* y_ptr = y_buf + scaled_y * y_pitch;
+ const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
+ const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
+
+#if defined(_MSC_VER) && defined(_M_IX86)
+ if (scaled_width == (width * 2)) {
+ DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
+ if (scaled_dx_uv == scaled_dx) { // Not rotated.
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+ } else { // Simple scale down. ie half
+ ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx >> 4);
+ }
+ } else {
+ RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width,
+ scaled_dx >> 4, scaled_dx_uv >> 4);
+ }
+#else
+ if (scaled_dx == 16) { // Not scaled
+ if (has_mmx)
+ FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width);
+ else
+ FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, x_shift);
+#endif
+ } else {
+ if (has_mmx)
+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx);
+ else
+ ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
+ dest_pixel, scaled_width, scaled_dx, x_shift);
+
+ }
+ }
+
+ // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_mmx)
+ EMMS();
+}
+
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index c0b678d..a7e5b68 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -15,27 +15,56 @@ namespace gfx {
// Type of YUV surface.
// The value of these enums matter as they are used to shift vertical indices.
enum YUVType {
YV12 = 0, // YV12 is half width and half height chroma channels.
YV16 = 1, // YV16 is half width and full height chroma channels.
YV24 = 2 // YV24 is full width and full height chroma channels.
};
+// Mirror means flip the image horizontally, as in looking in a mirror.
+// Rotate happens after mirroring.
+enum Rotate {
+ ROTATE_0, // Rotation off.
+ ROTATE_90, // Rotate clockwise.
+ ROTATE_180, // Rotate upside down.
+ ROTATE_270, // Rotate counter clockwise.
+ MIRROR_ROTATE_0, // Mirror horizontally.
+ MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
+ MIRROR_ROTATE_180, // Mirror vertically.
+ MIRROR_ROTATE_270 // Transpose.
+};
+
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
int pic_x,
int pic_y,
int pic_width,
int pic_height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type);
+// Scale a frame of YUV to 32 bit ARGB.
+// Supports rotation and mirroring.
+NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* yplane,
+ const uint8* uplane,
+ const uint8* vplane,
+ uint8* rgbframe,
+ int frame_width,
+ int frame_height,
+ int scaled_width,
+ int scaled_height,
+ int ystride,
+ int uvstride,
+ int rgbstride,
+ YUVType yuv_type,
+ Rotate view_rotate);
+
} // namespace gfx
} // namespace mozilla
#endif // MEDIA_BASE_YUV_CONVERT_H_
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index 8519008..96969ec 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -24,16 +24,64 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
unsigned int x_shift);
+// Can do 1x, half size or any scale down by an integer amount.
+// Step can be negative (mirroring, rotate 180).
+// This is the third fastest of the scalers.
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step);
+
+// Rotate is like Convert, but applies different step to Y versus U and V.
+// This allows rotation by 90 or 270, by stepping by stride.
+// This is the forth fastest of the scalers.
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep);
+
+// Doubler does 4 pixels at a time. Each pixel is replicated.
+// This is the fastest of the scalers.
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width);
+
+// Handles arbitrary scaling up or down.
+// Mirroring is supported, but not 90 or 270 degree rotation.
+// Chroma is under sampled every 2 pixels for performance.
+// This is the slowest of the scalers.
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift);
+
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
#if defined(ARCH_CPU_X86)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#else
#define EMMS() asm("emms")
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index b5c0018..49eced2 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -172,10 +172,31 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
v = v_buf[x + 1];
}
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
+// 28.4 fixed point is used. A shift by 4 isolates the integer.
+// A shift by 5 is used to further subsample the chrominence channels.
+// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
+// for 1/4 pixel accurate interpolation.
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ unsigned int x_shift) {
+ int scaled_x = 0;
+ for (int x = 0; x < width; ++x) {
+ uint8 u = u_buf[scaled_x >> (4 + x_shift)];
+ uint8 v = v_buf[scaled_x >> (4 + x_shift)];
+ uint8 y0 = y_buf[scaled_x >> 4];
+ YuvPixel(y0, u, v, rgb_buf);
+ rgb_buf += 4;
+ scaled_x += scaled_dx;
+ }
+}
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index 9f7625c..bff02b3 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -365,16 +373,86 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
+
+void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
+ const uint8* u_buf, // rsi
+ const uint8* v_buf, // rdx
+ uint8* rgb_buf, // rcx
+ int width, // r8
+ int scaled_dx) { // r9
+ asm(
+ "xor %%r11,%%r11\n"
+ "sub $0x2,%4\n"
+ "js scalenext\n"
+
+"scaleloop:"
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "lea (%%r11,%6),%%r10\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "lea (%%r10,%6),%%r11\n"
+ "sar $0x4,%%r10\n"
+ "movzb (%0,%%r10,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm2\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "paddsw %%xmm0,%%xmm2\n"
+ "shufps $0x44,%%xmm2,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movq %%xmm1,0x0(%3)\n"
+ "add $0x8,%3\n"
+ "sub $0x2,%4\n"
+ "jns scaleloop\n"
+
+"scalenext:"
+ "add $0x1,%4\n"
+ "js scaledone\n"
+
+ "mov %%r11,%%r10\n"
+ "sar $0x5,%%r10\n"
+ "movzb (%1,%%r10,1),%%rax\n"
+ "movq 2048(%5,%%rax,8),%%xmm0\n"
+ "movzb (%2,%%r10,1),%%rax\n"
+ "movq 4096(%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm1,%%xmm0\n"
+ "sar $0x4,%%r11\n"
+ "movzb (%0,%%r11,1),%%rax\n"
+ "movq (%5,%%rax,8),%%xmm1\n"
+ "paddsw %%xmm0,%%xmm1\n"
+ "psraw $0x6,%%xmm1\n"
+ "packuswb %%xmm1,%%xmm1\n"
+ "movd %%xmm1,0x0(%3)\n"
+
+"scaledone:"
+ :
+ : "r"(y_buf), // %0
+ "r"(u_buf), // %1
+ "r"(v_buf), // %2
+ "r"(rgb_buf), // %3
+ "r"(width), // %4
+ "r" (kCoefficientsRgbY), // %5
+ "r"(static_cast<long>(scaled_dx)) // %6
+ : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
+);
+}
+
#endif // __SUNPRO_CC
#else // ARCH_CPU_X86_64
#ifdef __SUNPRO_CC
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
@@ -493,13 +571,87 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
".previous\n"
);
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx);
+
+ asm(
+ ".global ScaleYUVToRGB32Row\n"
+"ScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x34(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp scaleend\n"
+
+"scaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"scaleend:"
+ "sub $0x2,%ecx\n"
+ "jns scaleloop\n"
+
+ "and $0x1,%ecx\n"
+ "je scaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"scaledone:"
+ "popa\n"
+ "ret\n"
+);
+
#endif // __SUNPRO_CC
#endif // ARCH_CPU_X86_64
#endif // !ARCH_CPU_X86_FAMILY
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index a1d0058..5acf825 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -16,16 +16,24 @@ extern "C" {
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
@@ -313,11 +321,96 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
+extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx,
+ int16 *kCoefficientsRgbY);
+
+ __asm__(
+"_MacScaleYUVToRGB32Row:\n"
+ "pusha\n"
+ "mov 0x24(%esp),%edx\n"
+ "mov 0x28(%esp),%edi\n"
+ "mov 0x2c(%esp),%esi\n"
+ "mov 0x30(%esp),%ebp\n"
+ "mov 0x3c(%esp),%ecx\n"
+ "xor %ebx,%ebx\n"
+ "jmp Lscaleend\n"
+
+"Lscaleloop:"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "mov %ebx,%eax\n"
+ "add 0x38(%esp),%ebx\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm2\n"
+ "paddsw %mm0,%mm1\n"
+ "paddsw %mm0,%mm2\n"
+ "psraw $0x6,%mm1\n"
+ "psraw $0x6,%mm2\n"
+ "packuswb %mm2,%mm1\n"
+ "movntq %mm1,0x0(%ebp)\n"
+ "add $0x8,%ebp\n"
+"Lscaleend:"
+ "sub $0x2,0x34(%esp)\n"
+ "jns Lscaleloop\n"
+
+ "and $0x1,0x34(%esp)\n"
+ "je Lscaledone\n"
+
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%edi,%eax,1),%eax\n"
+ "movq 2048(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x5,%eax\n"
+ "movzbl (%esi,%eax,1),%eax\n"
+ "paddsw 4096(%ecx,%eax,8),%mm0\n"
+ "mov %ebx,%eax\n"
+ "sar $0x4,%eax\n"
+ "movzbl (%edx,%eax,1),%eax\n"
+ "movq 0(%ecx,%eax,8),%mm1\n"
+ "paddsw %mm0,%mm1\n"
+ "psraw $0x6,%mm1\n"
+ "packuswb %mm1,%mm1\n"
+ "movd %mm1,0x0(%ebp)\n"
+
+"Lscaledone:"
+ "popa\n"
+ "ret\n"
+);
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+
+ MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
+ &kCoefficientsRgbY[0][0]);
+}
+
#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index 699ac77..a1700fc 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -11,17 +11,26 @@ extern "C" {
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
-
+
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int scaled_dx) {
+ ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
+}
+
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
@@ -307,11 +316,280 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
movd [ebp], mm1
convertdone :
popad
ret
}
}
+__declspec(naked)
+void ConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int step) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ mov ebx, [esp + 32 + 24] // step
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void RotateConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int ystep,
+ int uvstep) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ mov ebx, [esp + 32 + 28] // uvstep
+ add edi, ebx
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ add esi, ebx
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ mov ebx, [esp + 32 + 24] // ystep
+ add edx, ebx
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ movzx eax, byte ptr [edx]
+ add edx, ebx
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ wend :
+ sub ecx, 2
+ jns wloop
+
+ and ecx, 1 // odd number of pixels?
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+ wdone :
+
+ popad
+ ret
+ }
+}
+
+__declspec(naked)
+void DoubleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ jmp wend
+
+ wloop :
+ movzx eax, byte ptr [edi]
+ add edi, 1
+ movzx ebx, byte ptr [esi]
+ add esi, 1
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [edx]
+ paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ punpckldq mm1, mm1
+ movntq [ebp], mm1
+
+ movzx ebx, byte ptr [edx + 1]
+ add edx, 2
+ paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
+ psraw mm0, 6
+ packuswb mm0, mm0
+ punpckldq mm0, mm0
+ movntq [ebp+8], mm0
+ add ebp, 16
+ wend :
+ sub ecx, 4
+ jns wloop
+
+ add ecx, 4
+ jz wdone
+
+ movzx eax, byte ptr [edi]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ movzx eax, byte ptr [esi]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ movzx eax, byte ptr [edx]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ jmp wend1
+
+ wloop1 :
+ movd [ebp], mm1
+ add ebp, 4
+ wend1 :
+ sub ecx, 1
+ jns wloop1
+ wdone :
+ popad
+ ret
+ }
+}
+
+// This version does general purpose scaling by any amount, up or down.
+// The only thing it can not do it rotation by 90 or 270.
+// For performance the chroma is under sampled, reducing cost of a 3x
+// 1080p scale from 8.4 ms to 5.4 ms.
+__declspec(naked)
+void ScaleYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width,
+ int dx) {
+ __asm {
+ pushad
+ mov edx, [esp + 32 + 4] // Y
+ mov edi, [esp + 32 + 8] // U
+ mov esi, [esp + 32 + 12] // V
+ mov ebp, [esp + 32 + 16] // rgb
+ mov ecx, [esp + 32 + 20] // width
+ xor ebx, ebx // x
+ jmp scaleend
+
+ scaleloop :
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ mov eax, ebx
+ add ebx, [esp + 32 + 24] // x += dx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm2, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ paddsw mm2, mm0
+ psraw mm1, 6
+ psraw mm2, 6
+ packuswb mm1, mm2
+ movntq [ebp], mm1
+ add ebp, 8
+ scaleend :
+ sub ecx, 2
+ jns scaleloop
+
+ and ecx, 1 // odd number of pixels?
+ jz scaledone
+
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [edi + eax]
+ movq mm0, [kCoefficientsRgbU + 8 * eax]
+ mov eax, ebx
+ sar eax, 5
+ movzx eax, byte ptr [esi + eax]
+ paddsw mm0, [kCoefficientsRgbV + 8 * eax]
+ mov eax, ebx
+ sar eax, 4
+ movzx eax, byte ptr [edx + eax]
+ movq mm1, [kCoefficientsRgbY + 8 * eax]
+ paddsw mm1, mm0
+ psraw mm1, 6
+ packuswb mm1, mm1
+ movd [ebp], mm1
+
+ scaledone :
+ popad
+ ret
+ }
+}
+
#endif // ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -1,144 +0,0 @@
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -250,18 +250,18 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768]
// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
- "jmp convertend\n"
-"convertloop:"
+ "jmp 1f\n"
+"0:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
@@ -271,36 +271,36 @@ void FastConvertYUVToRGB32Row(const uint
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
-"convertend:"
+"1:"
"sub $0x2,%4\n"
- "jns convertloop\n"
+ "jns 0b\n"
-"convertnext:"
+"2:"
"add $0x1,%4\n"
- "js convertdone\n"
+ "js 3f\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
-"convertdone:"
+"3:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
@@ -309,28 +309,35 @@ void FastConvertYUVToRGB32Row(const uint
#else
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
+
+// It's necessary to specify the correct section for the following code,
+// otherwise it will be placed in whatever the current section is as this unit
+// is compiled. Because GCC remembers the last section it emitted, we must
+// also revert to the previous section state at the end of the asm block.
asm(
+ ".section .text\n"
".global FastConvertYUVToRGB32Row\n"
+ ".type FastConvertYUVToRGB32Row, @function\n"
"FastConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
- "jmp convertend\n"
+ "jmp 1f\n"
-"convertloop:"
+"0:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
@@ -339,34 +346,35 @@ void FastConvertYUVToRGB32Row(const uint
"movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
-"convertend:"
+"1:"
"sub $0x2,%ecx\n"
- "jns convertloop\n"
+ "jns 0b\n"
"and $0x1,%ecx\n"
- "je convertdone\n"
+ "je 2f\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
-"convertdone:"
+"2:"
"popa\n"
"ret\n"
+ ".previous\n"
);
#endif
#endif // ARCH_CPU_ARM_FAMILY
} // extern "C"

Просмотреть файл

@ -1,63 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -36,19 +36,21 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
int pic_width,
int pic_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type) {
unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
- // There is no optimized YV24 MMX routine so we check for this and
+ // Test for SSE because the optimized code uses movntq, which is not part of MMX.
+ bool has_sse = supports_mmx() && supports_sse();
+ // There is no optimized YV24 SSE routine so we check for this and
// fall back to the C code.
- bool has_mmx = supports_mmx() && yuv_type != YV24;
+ has_sse &= yuv_type != YV24;
bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
int x_width = odd_pic_x ? pic_width - 1 : pic_width;
for (int y = pic_y; y < pic_height + pic_y; ++y) {
uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
@@ -60,32 +62,32 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const
u_ptr++,
v_ptr++,
rgb_row,
1,
x_shift);
rgb_row += 4;
}
- if (has_mmx)
+ if (has_sse)
FastConvertYUVToRGB32Row(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width);
else
FastConvertYUVToRGB32Row_C(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width,
x_shift);
}
#ifdef ARCH_CPU_X86_FAMILY
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
- if (has_mmx)
+ // SSE used for FastConvertYUVToRGB32Row requires emms instruction.
+ if (has_sse)
EMMS();
#endif
}
} // namespace gfx
} // namespace mozilla

Просмотреть файл

@ -1,26 +0,0 @@
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index ce5ee89..455dd7b 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -18,7 +18,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
#else
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index 3515ada..351466c 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -15,7 +15,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
#else

Просмотреть файл

@ -54,13 +54,14 @@ typedef PRInt16 int16;
#define ARCH_CPU_64_BITS 1
#elif defined(_M_IX86) || defined(__i386__) || defined(__i386)
#define ARCH_CPU_X86_FAMILY 1
#define ARCH_CPU_X86_32 1
#define ARCH_CPU_X86 1
#define ARCH_CPU_32_BITS 1
#elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1
#define ARCH_CPU_ARMEL 1
#define ARCH_CPU_32_BITS 1
#elif defined(__ppc__)
#elif defined(__ppc__) || defined(__powerpc) || defined(__PPC__)
#define ARCH_CPU_PPC_FAMILY 1
#define ARCH_CPU_PPC 1
#define ARCH_CPU_32_BITS 1

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,43 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index 6735b77..e624168 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -6,6 +6,7 @@
#define MEDIA_BASE_YUV_CONVERT_H_
#include "chromium_types.h"
+#include "gfxCore.h"
namespace mozilla {
@@ -20,18 +21,18 @@ enum YUVType {
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
-void ConvertYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int pic_x,
- int pic_y,
- int pic_width,
- int pic_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type);
+NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
+ const uint8* uplane,
+ const uint8* vplane,
+ uint8* rgbframe,
+ int pic_x,
+ int pic_y,
+ int pic_width,
+ int pic_height,
+ int ystride,
+ int uvstride,
+ int rgbstride,
+ YUVType yuv_type);
} // namespace gfx
} // namespace mozilla

Просмотреть файл

@ -1,99 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index c291d5c..ff7267e 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -25,42 +25,58 @@ namespace mozilla {
namespace gfx {
// Convert a frame of YUV to 32 bit ARGB.
void ConvertYCbCrToRGB32(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width,
- int height,
+ int pic_x,
+ int pic_y,
+ int pic_width,
+ int pic_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type) {
unsigned int y_shift = yuv_type;
bool has_mmx = supports_mmx();
- for (int y = 0; y < height; ++y) {
- uint8* rgb_row = rgb_buf + y * rgb_pitch;
- const uint8* y_ptr = y_buf + y * y_pitch;
- const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
- const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
+ bool odd_pic_x = pic_x % 2 != 0;
+ int x_width = odd_pic_x ? pic_width - 1 : pic_width;
+
+ for (int y = pic_y; y < pic_height + pic_y; ++y) {
+ uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
+ const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
+ const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
+ const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
+
+ if (odd_pic_x) {
+ // Handle the single odd pixel manually and use the
+ // fast routines for the remaining.
+ FastConvertYUVToRGB32Row_C(y_ptr++,
+ u_ptr++,
+ v_ptr++,
+ rgb_row,
+ 1);
+ rgb_row += 4;
+ }
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr,
u_ptr,
v_ptr,
rgb_row,
- width);
+ x_width);
else
FastConvertYUVToRGB32Row_C(y_ptr,
u_ptr,
v_ptr,
rgb_row,
- width);
+ x_width);
}
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_mmx)
EMMS();
}
// Scale a frame of YUV to 32 bit ARGB.
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index 9d148a6..77ca8e6 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -32,18 +32,20 @@ enum Rotate {
};
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
void ConvertYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
- int frame_width,
- int frame_height,
+ int pic_x,
+ int pic_y,
+ int pic_width,
+ int pic_height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type);
// Scale a frame of YUV to 32 bit ARGB.
// Supports rotation and mirroring.
void ScaleYCbCrToRGB32(const uint8* yplane,

Просмотреть файл

@ -1,839 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index eec578d..de91f79 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -81,133 +81,5 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
EMMS();
}
-// Scale a frame of YUV to 32 bit ARGB.
-void ScaleYCbCrToRGB32(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int height,
- int scaled_width,
- int scaled_height,
- int y_pitch,
- int uv_pitch,
- int rgb_pitch,
- YUVType yuv_type,
- Rotate view_rotate) {
- unsigned int y_shift = yuv_type;
- bool has_mmx = supports_mmx();
- // Diagram showing origin and direction of source sampling.
- // ->0 4<-
- // 7 3
- //
- // 6 5
- // ->1 2<-
- // Rotations that start at right side of image.
- if ((view_rotate == ROTATE_180) ||
- (view_rotate == ROTATE_270) ||
- (view_rotate == MIRROR_ROTATE_0) ||
- (view_rotate == MIRROR_ROTATE_90)) {
- y_buf += width - 1;
- u_buf += width / 2 - 1;
- v_buf += width / 2 - 1;
- width = -width;
- }
- // Rotations that start at bottom of image.
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_180) ||
- (view_rotate == MIRROR_ROTATE_90) ||
- (view_rotate == MIRROR_ROTATE_180)) {
- y_buf += (height - 1) * y_pitch;
- u_buf += ((height >> y_shift) - 1) * uv_pitch;
- v_buf += ((height >> y_shift) - 1) * uv_pitch;
- height = -height;
- }
-
- // Handle zero sized destination.
- if (scaled_width == 0 || scaled_height == 0)
- return;
- int scaled_dx = width * 16 / scaled_width;
- int scaled_dy = height * 16 / scaled_height;
-
- int scaled_dx_uv = scaled_dx;
-
- if ((view_rotate == ROTATE_90) ||
- (view_rotate == ROTATE_270)) {
- int tmp = scaled_height;
- scaled_height = scaled_width;
- scaled_width = tmp;
- tmp = height;
- height = width;
- width = tmp;
- int original_dx = scaled_dx;
- int original_dy = scaled_dy;
- scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
- scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
- scaled_dy = original_dx;
- if (view_rotate == ROTATE_90) {
- y_pitch = -1;
- uv_pitch = -1;
- height = -height;
- } else {
- y_pitch = 1;
- uv_pitch = 1;
- }
- }
-
- for (int y = 0; y < scaled_height; ++y) {
- uint8* dest_pixel = rgb_buf + y * rgb_pitch;
- int scaled_y = (y * height / scaled_height);
- const uint8* y_ptr = y_buf + scaled_y * y_pitch;
- const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
- const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
-
-#if defined(_MSC_VER)
- if (scaled_width == (width * 2)) {
- DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- } else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
- if (scaled_dx_uv == scaled_dx) { // Not rotated.
- if (scaled_dx == 16) { // Not scaled
- if (has_mmx)
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- else
- FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- } else { // Simple scale down. ie half
- ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx >> 4);
- }
- } else {
- RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width,
- scaled_dx >> 4, scaled_dx_uv >> 4);
- }
-#else
- if (scaled_dx == 16) { // Not scaled
- if (has_mmx)
- FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
- else
- FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width);
-#endif
- } else {
- if (has_mmx)
- ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx);
- else
- ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
- dest_pixel, scaled_width, scaled_dx);
-
- }
- }
-
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
- if (has_mmx)
- EMMS();
-}
-
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index 7962af7..c9bf7e0 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -18,19 +18,6 @@ enum YUVType {
YV12 = 1 // YV12 is half width and half height chroma channels.
};
-// Mirror means flip the image horizontally, as in looking in a mirror.
-// Rotate happens after mirroring.
-enum Rotate {
- ROTATE_0, // Rotation off.
- ROTATE_90, // Rotate clockwise.
- ROTATE_180, // Rotate upside down.
- ROTATE_270, // Rotate counter clockwise.
- MIRROR_ROTATE_0, // Mirror horizontally.
- MIRROR_ROTATE_90, // Mirror then Rotate clockwise.
- MIRROR_ROTATE_180, // Mirror vertically.
- MIRROR_ROTATE_270 // Transpose.
-};
-
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
void ConvertYCbCrToRGB32(const uint8* yplane,
@@ -48,22 +35,6 @@ void ConvertYCbCrToRGB32(const uint8* yplane,
int rgbstride,
YUVType yuv_type);
-// Scale a frame of YUV to 32 bit ARGB.
-// Supports rotation and mirroring.
-void ScaleYCbCrToRGB32(const uint8* yplane,
- const uint8* uplane,
- const uint8* vplane,
- uint8* rgbframe,
- int frame_width,
- int frame_height,
- int scaled_width,
- int scaled_height,
- int ystride,
- int uvstride,
- int rgbstride,
- YUVType yuv_type,
- Rotate view_rotate);
-
} // namespace gfx
} // namespace mozilla
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index c43f713..2a82972 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -28,53 +28,6 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
int width);
-// Can do 1x, half size or any scale down by an integer amount.
-// Step can be negative (mirroring, rotate 180).
-// This is the third fastest of the scalers.
-void ConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step);
-
-// Rotate is like Convert, but applies different step to Y versus U and V.
-// This allows rotation by 90 or 270, by stepping by stride.
-// This is the forth fastest of the scalers.
-void RotateConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep);
-
-// Doubler does 4 pixels at a time. Each pixel is replicated.
-// This is the fastest of the scalers.
-void DoubleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width);
-
-// Handles arbitrary scaling up or down.
-// Mirroring is supported, but not 90 or 270 degree rotation.
-// Chroma is under sampled every 2 pixels for performance.
-// This is the slowest of the scalers.
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index a81416c..d3bdab4 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -172,25 +172,5 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
}
}
-// 28.4 fixed point is used. A shift by 4 isolates the integer.
-// A shift by 5 is used to further subsample the chrominence channels.
-// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
-// for 1/4 pixel accurate interpolation.
-void ScaleYUVToRGB32Row_C(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- int scaled_x = 0;
- for (int x = 0; x < width; ++x) {
- uint8 u = u_buf[scaled_x >> 5];
- uint8 v = v_buf[scaled_x >> 5];
- uint8 y0 = y_buf[scaled_x >> 4];
- YuvPixel(y0, u, v, rgb_buf);
- rgb_buf += 4;
- scaled_x += scaled_dx;
- }
-}
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index 5fb2bc4..ce5ee89 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -21,14 +21,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
-}
#else
#define RGBY(i) { \
@@ -315,75 +307,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
- const uint8* u_buf, // rsi
- const uint8* v_buf, // rdx
- uint8* rgb_buf, // rcx
- int width, // r8
- int scaled_dx) { // r9
- asm(
- "xor %%r11,%%r11\n"
- "sub $0x2,%4\n"
- "js scalenext\n"
-
-"scaleloop:"
- "mov %%r11,%%r10\n"
- "sar $0x5,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "lea (%%r11,%6),%%r10\n"
- "sar $0x4,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "paddsw %%xmm1,%%xmm0\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "lea (%%r10,%6),%%r11\n"
- "sar $0x4,%%r10\n"
- "movzb (%0,%%r10,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm2\n"
- "paddsw %%xmm0,%%xmm1\n"
- "paddsw %%xmm0,%%xmm2\n"
- "shufps $0x44,%%xmm2,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movq %%xmm1,0x0(%3)\n"
- "add $0x8,%3\n"
- "sub $0x2,%4\n"
- "jns scaleloop\n"
-
-"scalenext:"
- "add $0x1,%4\n"
- "js scaledone\n"
-
- "mov %%r11,%%r10\n"
- "sar $0x5,%%r10\n"
- "movzb (%1,%%r10,1),%%rax\n"
- "movq 2048(%5,%%rax,8),%%xmm0\n"
- "movzb (%2,%%r10,1),%%rax\n"
- "movq 4096(%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm1,%%xmm0\n"
- "sar $0x4,%%r11\n"
- "movzb (%0,%%r11,1),%%rax\n"
- "movq (%5,%%rax,8),%%xmm1\n"
- "paddsw %%xmm0,%%xmm1\n"
- "psraw $0x6,%%xmm1\n"
- "packuswb %%xmm1,%%xmm1\n"
- "movd %%xmm1,0x0(%3)\n"
-
-"scaledone:"
- :
- : "r"(y_buf), // %0
- "r"(u_buf), // %1
- "r"(v_buf), // %2
- "r"(rgb_buf), // %3
- "r"(width), // %4
- "r" (kCoefficientsRgbY), // %5
- "r"(static_cast<long>(scaled_dx)) // %6
- : "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
-);
-}
-
#else
void FastConvertYUVToRGB32Row(const uint8* y_buf,
@@ -443,81 +366,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"ret\n"
);
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx);
-
- asm(
- ".global ScaleYUVToRGB32Row\n"
-"ScaleYUVToRGB32Row:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x34(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp scaleend\n"
-
-"scaleloop:"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"scaleend:"
- "sub $0x2,%ecx\n"
- "jns scaleloop\n"
-
- "and $0x1,%ecx\n"
- "je scaledone\n"
-
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq kCoefficientsRgbY(,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"scaledone:"
- "popa\n"
- "ret\n"
-);
-
#endif
#endif // ARCH_CPU_ARM_FAMILY
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index a7e8243..3515ada 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -18,14 +18,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
}
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
- ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx);
-}
#else
#define RGBY(i) { \
@@ -323,91 +315,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
&kCoefficientsRgbY[0][0]);
}
-extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx,
- int16 *kCoefficientsRgbY);
-
- __asm__(
-"_MacScaleYUVToRGB32Row:\n"
- "pusha\n"
- "mov 0x24(%esp),%edx\n"
- "mov 0x28(%esp),%edi\n"
- "mov 0x2c(%esp),%esi\n"
- "mov 0x30(%esp),%ebp\n"
- "mov 0x3c(%esp),%ecx\n"
- "xor %ebx,%ebx\n"
- "jmp Lscaleend\n"
-
-"Lscaleloop:"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "mov %ebx,%eax\n"
- "add 0x38(%esp),%ebx\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm2\n"
- "paddsw %mm0,%mm1\n"
- "paddsw %mm0,%mm2\n"
- "psraw $0x6,%mm1\n"
- "psraw $0x6,%mm2\n"
- "packuswb %mm2,%mm1\n"
- "movntq %mm1,0x0(%ebp)\n"
- "add $0x8,%ebp\n"
-"Lscaleend:"
- "sub $0x2,0x34(%esp)\n"
- "jns Lscaleloop\n"
-
- "and $0x1,0x34(%esp)\n"
- "je Lscaledone\n"
-
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%edi,%eax,1),%eax\n"
- "movq 2048(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x5,%eax\n"
- "movzbl (%esi,%eax,1),%eax\n"
- "paddsw 4096(%ecx,%eax,8),%mm0\n"
- "mov %ebx,%eax\n"
- "sar $0x4,%eax\n"
- "movzbl (%edx,%eax,1),%eax\n"
- "movq 0(%ecx,%eax,8),%mm1\n"
- "paddsw %mm0,%mm1\n"
- "psraw $0x6,%mm1\n"
- "packuswb %mm1,%mm1\n"
- "movd %mm1,0x0(%ebp)\n"
-
-"Lscaledone:"
- "popa\n"
- "ret\n"
-);
-
-
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int scaled_dx) {
-
- MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
- &kCoefficientsRgbY[0][0]);
-}
#endif // ARCH_CPU_PPC
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index a77a16f..f994931 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -297,273 +297,5 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
}
}
-__declspec(naked)
-void ConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int step) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- mov ebx, [esp + 32 + 24] // step
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void RotateConvertYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int ystep,
- int uvstep) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- mov ebx, [esp + 32 + 28] // uvstep
- add edi, ebx
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- add esi, ebx
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- mov ebx, [esp + 32 + 24] // ystep
- add edx, ebx
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- movzx eax, byte ptr [edx]
- add edx, ebx
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- wend :
- sub ecx, 2
- jns wloop
-
- and ecx, 1 // odd number of pixels?
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
- wdone :
-
- popad
- ret
- }
-}
-
-__declspec(naked)
-void DoubleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- jmp wend
-
- wloop :
- movzx eax, byte ptr [edi]
- add edi, 1
- movzx ebx, byte ptr [esi]
- add esi, 1
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [edx]
- paddsw mm0, [kCoefficientsRgbV + 8 * ebx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- punpckldq mm1, mm1
- movntq [ebp], mm1
-
- movzx ebx, byte ptr [edx + 1]
- add edx, 2
- paddsw mm0, [kCoefficientsRgbY + 8 * ebx]
- psraw mm0, 6
- packuswb mm0, mm0
- punpckldq mm0, mm0
- movntq [ebp+8], mm0
- add ebp, 16
- wend :
- sub ecx, 4
- jns wloop
-
- add ecx, 4
- jz wdone
-
- movzx eax, byte ptr [edi]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- movzx eax, byte ptr [esi]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- movzx eax, byte ptr [edx]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- jmp wend1
-
- wloop1 :
- movd [ebp], mm1
- add ebp, 4
- wend1 :
- sub ecx, 1
- jns wloop1
- wdone :
- popad
- ret
- }
-}
-
-// This version does general purpose scaling by any amount, up or down.
-// The only thing it can not do it rotation by 90 or 270.
-// For performance the chroma is under sampled, reducing cost of a 3x
-// 1080p scale from 8.4 ms to 5.4 ms.
-__declspec(naked)
-void ScaleYUVToRGB32Row(const uint8* y_buf,
- const uint8* u_buf,
- const uint8* v_buf,
- uint8* rgb_buf,
- int width,
- int dx) {
- __asm {
- pushad
- mov edx, [esp + 32 + 4] // Y
- mov edi, [esp + 32 + 8] // U
- mov esi, [esp + 32 + 12] // V
- mov ebp, [esp + 32 + 16] // rgb
- mov ecx, [esp + 32 + 20] // width
- xor ebx, ebx // x
- jmp scaleend
-
- scaleloop :
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += dx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- mov eax, ebx
- add ebx, [esp + 32 + 24] // x += dx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm2, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- paddsw mm2, mm0
- psraw mm1, 6
- psraw mm2, 6
- packuswb mm1, mm2
- movntq [ebp], mm1
- add ebp, 8
- scaleend :
- sub ecx, 2
- jns scaleloop
-
- and ecx, 1 // odd number of pixels?
- jz scaledone
-
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [edi + eax]
- movq mm0, [kCoefficientsRgbU + 8 * eax]
- mov eax, ebx
- sar eax, 5
- movzx eax, byte ptr [esi + eax]
- paddsw mm0, [kCoefficientsRgbV + 8 * eax]
- mov eax, ebx
- sar eax, 4
- movzx eax, byte ptr [edx + eax]
- movq mm1, [kCoefficientsRgbY + 8 * eax]
- paddsw mm1, mm0
- psraw mm1, 6
- packuswb mm1, mm1
- movd [ebp], mm1
-
- scaledone :
- popad
- ret
- }
-}
} // extern "C"

Просмотреть файл

@ -1,23 +0,0 @@
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index 36d9bda..b5c0018 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -142,17 +142,17 @@ static inline void YuvPixel(uint8 y,
uint8* rgb_buf) {
int32 d = static_cast<int32>(u) - 128;
int32 e = static_cast<int32>(v) - 128;
int32 cb = (516 * d + 128);
int32 cg = (- 100 * d - 208 * e + 128);
int32 cr = (409 * e + 128);
- int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
+ int32 C298a = ((static_cast<int32>(y) - 16) * 298);
*reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
(clip(C298a + cg) << 8) |
(clip(C298a + cr) << 16) |
(0xff000000);
}
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,

Просмотреть файл

@ -2,17 +2,8 @@
cp $1/media/base/yuv_convert.h .
cp $1/media/base/yuv_convert.cc yuv_convert.cpp
cp $1/media/base/yuv_row.h .
cp $1/media/base/yuv_row_linux.cc yuv_row_linux.cpp
cp $1/media/base/yuv_row_mac.cc yuv_row_mac.cpp
cp $1/media/base/yuv_row_table.cc yuv_row_table.cpp
cp $1/media/base/yuv_row_posix.cc yuv_row_posix.cpp
cp $1/media/base/yuv_row_win.cc yuv_row_win.cpp
cp $1/media/base/yuv_row_linux.cc yuv_row_c.cpp
cp $1/media/base/yuv_row_posix.cc yuv_row_c.cpp
patch -p3 <convert.patch
patch -p3 <picture_region.patch
patch -p3 <remove_scale.patch
patch -p3 <export.patch
patch -p3 <win64_mac64.patch
patch -p3 <yv24.patch
patch -p3 <row_c_fix.patch
patch -p3 <bug572034_mac_64bit.patch
patch -p3 <bug577645_movntq.patch
patch -p3 <add_scale.patch

Просмотреть файл

@ -1,57 +0,0 @@
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index 351466c..2a679cc 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -8,7 +8,10 @@
extern "C" {
-#if defined(ARCH_CPU_PPC)
+// PPC and 64 Bit builds use the C fallback. Optimized code
+// needs to be fixed for 64 bit builds. PPC has no optimized code
+// option at all.
+#if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
@@ -315,6 +318,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
&kCoefficientsRgbY[0][0]);
}
-#endif // ARCH_CPU_PPC
+#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index f994931..708ef14 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -5,6 +5,21 @@
#include "yuv_row.h"
extern "C" {
+// 64 Bit builds use the C fallback. Optimized code
+// needs to be fixed for 64 bit builds.
+#if defined(ARCH_CPU_64_BITS)
+// PPC implementation uses C fallback
+void FastConvertYUVToRGB32Row(const uint8* y_buf,
+ const uint8* u_buf,
+ const uint8* v_buf,
+ uint8* rgb_buf,
+ int width) {
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+}
+
+#else
+
+
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
@@ -297,5 +312,6 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
}
}
+#endif // ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -1,4 +1,4 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -20,11 +20,18 @@
// Header for low level row functions.
#include "yuv_row.h"
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
#include "mozilla/SSE.h"
namespace mozilla {
namespace gfx {
// 16.16 fixed point arithmetic
const int kFractionBits = 16;
const int kFractionMax = 1 << kFractionBits;
const int kFractionMask = ((1 << kFractionBits) - 1);
// Convert a frame of YUV to 32 bit ARGB.
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
@ -67,45 +74,147 @@ NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* y_buf,
rgb_row += 4;
}
if (has_sse)
if (has_sse) {
FastConvertYUVToRGB32Row(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width);
else
}
else {
FastConvertYUVToRGB32Row_C(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width,
x_shift);
}
}
#ifdef ARCH_CPU_X86_FAMILY
// SSE used for FastConvertYUVToRGB32Row requires emms instruction.
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_sse)
EMMS();
#endif
}
#if defined(MOZILLA_COMPILE_WITH_SSE2)
// FilterRows combines two rows of the image using linear interpolation.
// SSE2 version does 16 pixels at a time
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
__m128i zero = _mm_setzero_si128();
__m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
__m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
__m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
__m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
do {
__m128i y0 = _mm_loadu_si128(y0_ptr128);
__m128i y1 = _mm_loadu_si128(y1_ptr128);
__m128i y2 = _mm_unpackhi_epi8(y0, zero);
__m128i y3 = _mm_unpackhi_epi8(y1, zero);
y0 = _mm_unpacklo_epi8(y0, zero);
y1 = _mm_unpacklo_epi8(y1, zero);
y0 = _mm_mullo_epi16(y0, y0_fraction);
y1 = _mm_mullo_epi16(y1, y1_fraction);
y2 = _mm_mullo_epi16(y2, y0_fraction);
y3 = _mm_mullo_epi16(y3, y1_fraction);
y0 = _mm_add_epi16(y0, y1);
y2 = _mm_add_epi16(y2, y3);
y0 = _mm_srli_epi16(y0, 8);
y2 = _mm_srli_epi16(y2, 8);
y0 = _mm_packus_epi16(y0, y2);
*dest128++ = y0;
++y0_ptr128;
++y1_ptr128;
} while (dest128 < end128);
}
#elif defined(MOZILLA_COMPILE_WITH_MMX)
// MMX version does 8 pixels at a time
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
__m64 zero = _mm_setzero_si64();
__m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
__m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
__m64* dest64 = reinterpret_cast<__m64*>(ybuf);
__m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
do {
__m64 y0 = *y0_ptr64++;
__m64 y1 = *y1_ptr64++;
__m64 y2 = _mm_unpackhi_pi8(y0, zero);
__m64 y3 = _mm_unpackhi_pi8(y1, zero);
y0 = _mm_unpacklo_pi8(y0, zero);
y1 = _mm_unpacklo_pi8(y1, zero);
y0 = _mm_mullo_pi16(y0, y0_fraction);
y1 = _mm_mullo_pi16(y1, y1_fraction);
y2 = _mm_mullo_pi16(y2, y0_fraction);
y3 = _mm_mullo_pi16(y3, y1_fraction);
y0 = _mm_add_pi16(y0, y1);
y2 = _mm_add_pi16(y2, y3);
y0 = _mm_srli_pi16(y0, 8);
y2 = _mm_srli_pi16(y2, 8);
y0 = _mm_packs_pu16(y0, y2);
*dest64++ = y0;
} while (dest64 < end64);
}
#else // no MMX or SSE2
// C version does 8 at a time to mimic MMX code
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
int source_width, int source_y_fraction) {
int y1_fraction = source_y_fraction;
int y0_fraction = 256 - y1_fraction;
uint8* end = ybuf + source_width;
do {
ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
y0_ptr += 8;
y1_ptr += 8;
ybuf += 8;
} while (ybuf < end);
}
#endif
// Scale a frame of YUV to 32 bit ARGB.
NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int source_width,
int source_height,
int width,
int height,
int scaled_width,
int scaled_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type,
Rotate view_rotate) {
unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
Rotate view_rotate,
ScaleFilter filter) {
bool has_mmx = supports_mmx();
// 4096 allows 3 buffers to fit in 12k.
// Helps performance on CPU with 16K L1 cache.
// Large enough for 3830x2160 and 30" displays which are 2560x1600.
const int kFilterBufferSize = 4096;
// Disable filtering if the screen is too big (to avoid buffer overflows).
// This should never happen to regular users: they don't have monitors
// wider than 4096 pixels.
// TODO(fbarchard): Allow rotated videos to filter.
if (source_width > kFilterBufferSize || view_rotate)
filter = FILTER_NONE;
unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
// Diagram showing origin and direction of source sampling.
// ->0 4<-
// 7 3
@ -117,103 +226,148 @@ NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* y_buf,
(view_rotate == ROTATE_270) ||
(view_rotate == MIRROR_ROTATE_0) ||
(view_rotate == MIRROR_ROTATE_90)) {
y_buf += width - 1;
u_buf += width / 2 - 1;
v_buf += width / 2 - 1;
width = -width;
y_buf += source_width - 1;
u_buf += source_width / 2 - 1;
v_buf += source_width / 2 - 1;
source_width = -source_width;
}
// Rotations that start at bottom of image.
if ((view_rotate == ROTATE_90) ||
(view_rotate == ROTATE_180) ||
(view_rotate == MIRROR_ROTATE_90) ||
(view_rotate == MIRROR_ROTATE_180)) {
y_buf += (height - 1) * y_pitch;
u_buf += ((height >> y_shift) - 1) * uv_pitch;
v_buf += ((height >> y_shift) - 1) * uv_pitch;
height = -height;
y_buf += (source_height - 1) * y_pitch;
u_buf += ((source_height >> y_shift) - 1) * uv_pitch;
v_buf += ((source_height >> y_shift) - 1) * uv_pitch;
source_height = -source_height;
}
// Handle zero sized destination.
if (scaled_width == 0 || scaled_height == 0)
if (width == 0 || height == 0)
return;
int scaled_dx = width * 16 / scaled_width;
int scaled_dy = height * 16 / scaled_height;
int scaled_dx_uv = scaled_dx;
int source_dx = source_width * kFractionMax / width;
int source_dy = source_height * kFractionMax / height;
int source_dx_uv = source_dx;
if ((view_rotate == ROTATE_90) ||
(view_rotate == ROTATE_270)) {
int tmp = scaled_height;
scaled_height = scaled_width;
scaled_width = tmp;
tmp = height;
int tmp = height;
height = width;
width = tmp;
int original_dx = scaled_dx;
int original_dy = scaled_dy;
scaled_dx = ((original_dy >> 4) * y_pitch) << 4;
scaled_dx_uv = ((original_dy >> 4) * uv_pitch) << 4;
scaled_dy = original_dx;
tmp = source_height;
source_height = source_width;
source_width = tmp;
int original_dx = source_dx;
int original_dy = source_dy;
source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
source_dy = original_dx;
if (view_rotate == ROTATE_90) {
y_pitch = -1;
uv_pitch = -1;
height = -height;
source_height = -source_height;
} else {
y_pitch = 1;
uv_pitch = 1;
}
}
for (int y = 0; y < scaled_height; ++y) {
// Need padding because FilterRows() will write 1 to 16 extra pixels
// after the end for SSE2 version.
uint8 yuvbuf[16 + kFilterBufferSize * 3 + 16];
uint8* ybuf =
reinterpret_cast<uint8*>(reinterpret_cast<PRUptrdiff>(yuvbuf + 15) & ~15);
uint8* ubuf = ybuf + kFilterBufferSize;
uint8* vbuf = ubuf + kFilterBufferSize;
// TODO(fbarchard): Fixed point math is off by 1 on negatives.
int yscale_fixed = (source_height << kFractionBits) / height;
// TODO(fbarchard): Split this into separate function for better efficiency.
for (int y = 0; y < height; ++y) {
uint8* dest_pixel = rgb_buf + y * rgb_pitch;
int scaled_y = (y * height / scaled_height);
const uint8* y_ptr = y_buf + scaled_y * y_pitch;
const uint8* u_ptr = u_buf + (scaled_y >> y_shift) * uv_pitch;
const uint8* v_ptr = v_buf + (scaled_y >> y_shift) * uv_pitch;
int source_y_subpixel = (y * yscale_fixed);
if (yscale_fixed >= (kFractionMax * 2)) {
source_y_subpixel += kFractionMax / 2; // For 1/2 or less, center filter.
}
int source_y = source_y_subpixel >> kFractionBits;
#if defined(_MSC_VER) && defined(_M_IX86)
if (scaled_width == (width * 2)) {
DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
} else if ((scaled_dx & 15) == 0) { // Scaling by integer scale factor.
if (scaled_dx_uv == scaled_dx) { // Not rotated.
if (scaled_dx == 16) { // Not scaled
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
else
FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, x_shift);
} else { // Simple scale down. ie half
ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx >> 4);
}
const uint8* y0_ptr = y_buf + source_y * y_pitch;
const uint8* y1_ptr = y0_ptr + y_pitch;
const uint8* u0_ptr = u_buf + (source_y >> y_shift) * uv_pitch;
const uint8* u1_ptr = u0_ptr + uv_pitch;
const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch;
const uint8* v1_ptr = v0_ptr + uv_pitch;
// vertical scaler uses 16.8 fixed point
int source_y_fraction = (source_y_subpixel & kFractionMask) >> 8;
int source_uv_fraction =
((source_y_subpixel >> y_shift) & kFractionMask) >> 8;
const uint8* y_ptr = y0_ptr;
const uint8* u_ptr = u0_ptr;
const uint8* v_ptr = v0_ptr;
// Apply vertical filtering if necessary.
// TODO(fbarchard): Remove memcpy when not necessary.
if (filter & mozilla::gfx::FILTER_BILINEAR_V) {
if (yscale_fixed != kFractionMax &&
source_y_fraction && ((source_y + 1) < source_height)) {
FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
} else {
RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width,
scaled_dx >> 4, scaled_dx_uv >> 4);
memcpy(ybuf, y0_ptr, source_width);
}
#else
if (scaled_dx == 16) { // Not scaled
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width);
else
FastConvertYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, x_shift);
#endif
y_ptr = ybuf;
ybuf[source_width] = ybuf[source_width-1];
int uv_source_width = (source_width + 1) / 2;
if (yscale_fixed != kFractionMax &&
source_uv_fraction &&
(((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
} else {
memcpy(ubuf, u0_ptr, uv_source_width);
memcpy(vbuf, v0_ptr, uv_source_width);
}
u_ptr = ubuf;
v_ptr = vbuf;
ubuf[uv_source_width] = ubuf[uv_source_width - 1];
vbuf[uv_source_width] = vbuf[uv_source_width - 1];
}
if (source_dx == kFractionMax) { // Not scaled
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width);
} else {
if (has_mmx)
if (filter & FILTER_BILINEAR_H) {
LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width, source_dx);
} else {
// Specialized scalers and rotation.
#if defined(_MSC_VER) && defined(_M_IX86)
if (width == (source_width * 2)) {
DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width);
} else if ((source_dx & kFractionMask) == 0) {
// Scaling by integer scale factor. ie half.
ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width,
source_dx >> kFractionBits);
} else if (source_dx_uv == source_dx) { // Not rotated.
ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width, source_dx);
} else {
RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, width,
source_dx >> kFractionBits,
source_dx_uv >> kFractionBits);
}
#else
ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx);
else
ScaleYUVToRGB32Row_C(y_ptr, u_ptr, v_ptr,
dest_pixel, scaled_width, scaled_dx, x_shift);
}
dest_pixel, width, source_dx);
#endif
}
}
}
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
// MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
if (has_mmx)
EMMS();
}

Просмотреть файл

@ -1,4 +1,4 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -7,11 +7,11 @@
#include "chromium_types.h"
#include "gfxCore.h"
namespace mozilla {
namespace gfx {
// Type of YUV surface.
// The value of these enums matter as they are used to shift vertical indices.
enum YUVType {
@ -33,6 +33,14 @@ enum Rotate {
MIRROR_ROTATE_270 // Transpose.
};
// Filter affects how scaling looks.
enum ScaleFilter {
FILTER_NONE = 0, // No filter (point sampled).
FILTER_BILINEAR_H = 1, // Bilinear horizontal filter.
FILTER_BILINEAR_V = 2, // Bilinear vertical filter.
FILTER_BILINEAR = 3 // Bilinear filter.
};
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
@ -54,17 +62,18 @@ NS_GFX_(void) ScaleYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
int frame_width,
int frame_height,
int scaled_width,
int scaled_height,
int source_width,
int source_height,
int width,
int height,
int ystride,
int uvstride,
int rgbstride,
YUVType yuv_type,
Rotate view_rotate);
Rotate view_rotate,
ScaleFilter filter);
} // namespace gfx
} // namespace mozilla
#endif // MEDIA_BASE_YUV_CONVERT_H_

Просмотреть файл

@ -1,4 +1,4 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -28,6 +28,11 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
int width,
unsigned int x_shift);
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
// Can do 1x, half size or any scale down by an integer amount.
// Step can be negative (mirroring, rotate 180).
@ -61,28 +66,67 @@ void DoubleYUVToRGB32Row(const uint8* y_buf,
// Handles arbitrary scaling up or down.
// Mirroring is supported, but not 90 or 270 degree rotation.
// Chroma is under sampled every 2 pixels for performance.
// This is the slowest of the scalers.
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
int source_dx);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
void ScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
unsigned int x_shift);
int source_dx);
} // extern "C"
// Handles arbitrary scaling up or down with bilinear filtering.
// Mirroring is supported, but not 90 or 270 degree rotation.
// Chroma is under sampled every 2 pixels for performance.
// This is the slowest of the scalers.
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
#if defined(_MSC_VER)
#define SIMD_ALIGNED(var) __declspec(align(16)) var
#else
#define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
#endif
extern SIMD_ALIGNED(int16 kCoefficientsRgbY[768][4]);
// x64 uses MMX2 (SSE) so emms is not required.
#if defined(ARCH_CPU_X86)
// Warning C4799: function has no EMMS instruction.
// EMMS() is slow and should be called by the calling function once per image.
#if !defined(ARCH_CPU_X86_64)
#if defined(_MSC_VER)
#define EMMS() __asm emms
#pragma warning(disable: 4799)
#else
#define EMMS() asm("emms")
#endif
@ -90,4 +134,6 @@ void ScaleYUVToRGB32Row_C(const uint8* y_buf,
#define EMMS()
#endif
} // extern "C"
#endif // MEDIA_BASE_YUV_ROW_H_

Просмотреть файл

@ -1,4 +1,4 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@ -6,160 +6,50 @@
#define DCHECK(a)
// TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
// TODO(fbarchard): Do 64 bit version.
extern "C" {
// Reference version of YUV converter.
static const int kClipTableSize = 256;
static const int kClipOverflow = 288; // Cb max is 535.
static uint8 kRgbClipTable[kClipOverflow +
kClipTableSize +
kClipOverflow] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0.
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values.
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255.
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
// Clip an rgb channel value to 0..255 range.
// Source is signed fixed point 8.8.
// Table allows for values to underflow or overflow by 128.
// Therefore source range is -128 to 384.
// Output clips to unsigned 0 to 255.
static inline uint32 clip(int32 value) {
DCHECK(((value >> 8) + kClipOverflow) >= 0);
DCHECK(((value >> 8) + kClipOverflow) <
(kClipOverflow + kClipTableSize + kClipOverflow));
return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
}
// C reference code that mimic the YUV assembly.
#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
(((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf) {
int32 d = static_cast<int32>(u) - 128;
int32 e = static_cast<int32>(v) - 128;
int32 cb = (516 * d + 128);
int32 cg = (- 100 * d - 208 * e + 128);
int32 cr = (409 * e + 128);
int b = kCoefficientsRgbY[256+u][0];
int g = kCoefficientsRgbY[256+u][1];
int r = kCoefficientsRgbY[256+u][2];
int a = kCoefficientsRgbY[256+u][3];
int32 C298a = ((static_cast<int32>(y) - 16) * 298);
*reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
(clip(C298a + cg) << 8) |
(clip(C298a + cr) << 16) |
(0xff000000);
b = paddsw(b, kCoefficientsRgbY[512+v][0]);
g = paddsw(g, kCoefficientsRgbY[512+v][1]);
r = paddsw(r, kCoefficientsRgbY[512+v][2]);
a = paddsw(a, kCoefficientsRgbY[512+v][3]);
b = paddsw(b, kCoefficientsRgbY[y][0]);
g = paddsw(g, kCoefficientsRgbY[y][1]);
r = paddsw(r, kCoefficientsRgbY[y][2]);
a = paddsw(a, kCoefficientsRgbY[y][3]);
b >>= 6;
g >>= 6;
r >>= 6;
a >>= 6;
*reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
(packuswb(g) << 8) |
(packuswb(r) << 16) |
(packuswb(a) << 24);
}
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
unsigned int x_shift) {
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
unsigned int x_shift) {
for (int x = 0; x < width; x += 2) {
uint8 u = u_buf[x >> x_shift];
uint8 v = v_buf[x >> x_shift];
@ -177,26 +67,67 @@ void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
}
}
// 28.4 fixed point is used. A shift by 4 isolates the integer.
// A shift by 5 is used to further subsample the chrominence channels.
// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
// for 1/4 pixel accurate interpolation.
// 16.16 fixed point is used. A shift by 16 isolates the integer.
// A shift by 17 is used to further subsample the chrominence channels.
// & 0xffff isolates the fixed point fraction. >> 2 to get the upper 2 bits,
// for 1/65536 pixel accurate interpolation.
void ScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
unsigned int x_shift) {
int scaled_x = 0;
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[scaled_x >> (4 + x_shift)];
uint8 v = v_buf[scaled_x >> (4 + x_shift)];
uint8 y0 = y_buf[scaled_x >> 4];
YuvPixel(y0, u, v, rgb_buf);
rgb_buf += 4;
scaled_x += scaled_dx;
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
int x = 0;
for (int i = 0; i < width; i += 2) {
int y = y_buf[x >> 16];
int u = u_buf[(x >> 17)];
int v = v_buf[(x >> 17)];
YuvPixel(y, u, v, rgb_buf);
x += source_dx;
if ((i + 1) < width) {
y = y_buf[x >> 16];
YuvPixel(y, u, v, rgb_buf+4);
x += source_dx;
}
rgb_buf += 8;
}
}
void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
int x = 0;
if (source_dx >= 0x20000) {
x = 32768;
}
for (int i = 0; i < width; i += 2) {
int y0 = y_buf[x >> 16];
int y1 = y_buf[(x >> 16) + 1];
int u0 = u_buf[(x >> 17)];
int u1 = u_buf[(x >> 17) + 1];
int v0 = v_buf[(x >> 17)];
int v1 = v_buf[(x >> 17) + 1];
int y_frac = (x & 65535);
int uv_frac = ((x >> 1) & 65535);
int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
YuvPixel(y, u, v, rgb_buf);
x += source_dx;
if ((i + 1) < width) {
y0 = y_buf[x >> 16];
y1 = y_buf[(x >> 16) + 1];
y_frac = (x & 65535);
y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
YuvPixel(y, u, v, rgb_buf+4);
x += source_dx;
}
rgb_buf += 8;
}
}
} // extern "C"

Просмотреть файл

@ -1,657 +0,0 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "yuv_row.h"
#define DCHECK(a)
// TODO(fbarchard): Move this to yuv_row_posix.cc to share with Mac.
// TODO(fbarchard): Do 64 bit version.
extern "C" {
#ifndef ARCH_CPU_X86_FAMILY
// non-x86 implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
#define RGBU(i) { \
static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
0, \
static_cast<int16>(256 * 64 - 1) \
}
#define RGBV(i) { \
0, \
static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
0 \
}
#define MMX_ALIGNED(var) var __attribute__((aligned(16)))
MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
// Chroma U table.
RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
// Chroma V table.
RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
};
#ifdef __SUNPRO_CC
#pragma align 16 (kCoefficientsRgbY)
#endif
#if defined(ARCH_CPU_X86_64)
#ifdef __SUNPRO_CC
// AMD64 ABI uses register parameters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"jmp convertend\n"
"convertloop:"
"movzbq (%1),%%r10\n"
"add $0x1,%1\n"
"movzbq (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzbq (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzbq 0x1(%0),%%r11\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"add $0x2,%0\n"
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
"convertend:"
"sub $0x2,%4\n"
"jns convertloop\n"
"convertnext:"
"add $0x1,%4\n"
"js convertdone\n"
"movzbq (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzbq (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzbq (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"convertdone:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (&kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
#else // __SUNPRO_CC
// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"jmp 1f\n"
"0:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"add $0x2,%0\n"
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
"1:"
"sub $0x2,%4\n"
"jns 0b\n"
"2:"
"add $0x1,%4\n"
"js 3f\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"3:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width, // r8
int scaled_dx) { // r9
asm(
"xor %%r11,%%r11\n"
"sub $0x2,%4\n"
"js scalenext\n"
"scaleloop:"
"mov %%r11,%%r10\n"
"sar $0x5,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"lea (%%r11,%6),%%r10\n"
"sar $0x4,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%rax,8),%%xmm1\n"
"lea (%%r10,%6),%%r11\n"
"sar $0x4,%%r10\n"
"movzb (%0,%%r10,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm2\n"
"paddsw %%xmm0,%%xmm1\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movq %%xmm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x2,%4\n"
"jns scaleloop\n"
"scalenext:"
"add $0x1,%4\n"
"js scaledone\n"
"mov %%r11,%%r10\n"
"sar $0x5,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"sar $0x4,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"scaledone:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY), // %5
"r"(static_cast<long>(scaled_dx)) // %6
: "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
);
}
#endif // __SUNPRO_CC
#else // ARCH_CPU_X86_64
#ifdef __SUNPRO_CC
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
asm(
"pusha\n"
"mov %eax,%ebp\n"
"jmp convertend\n"
"convertloop:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"convertend:"
"sub $0x2,%ecx\n"
"jns convertloop\n"
"and $0x1,%ecx\n"
"je convertdone\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"convertdone:"
"popa\n"
:
: "d"(y_buf), // %edx
"D"(u_buf), // %edi
"S"(v_buf), // %esi
"a"(rgb_buf), // %eax
"c"(width) // %ecx
: "memory"
);
}
#else // __SUNPRO_CC
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
// It's necessary to specify the correct section for the following code,
// otherwise it will be placed in whatever the current section is as this unit
// is compiled. Because GCC remembers the last section it emitted, we must
// also revert to the previous section state at the end of the asm block.
asm(
".section .text\n"
".global FastConvertYUVToRGB32Row\n"
".type FastConvertYUVToRGB32Row, @function\n"
"FastConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"jmp 1f\n"
"0:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"sub $0x2,%ecx\n"
"jns 0b\n"
"and $0x1,%ecx\n"
"je 2f\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
".previous\n"
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
asm(
".global ScaleYUVToRGB32Row\n"
"ScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp scaleend\n"
"scaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"scaleend:"
"sub $0x2,%ecx\n"
"jns scaleloop\n"
"and $0x1,%ecx\n"
"je scaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"scaledone:"
"popa\n"
"ret\n"
);
#endif // __SUNPRO_CC
#endif // ARCH_CPU_X86_64
#endif // !ARCH_CPU_X86_FAMILY
} // extern "C"

Просмотреть файл

@ -12,4 +12,23 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
}

849
gfx/ycbcr/yuv_row_posix.cpp Normal file
Просмотреть файл

@ -0,0 +1,849 @@
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "yuv_row.h"
#define DCHECK(a)
extern "C" {
#if defined(ARCH_CPU_X86_64)
// AMD64 ABI uses register paremters.
void FastConvertYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width) { // r8
asm(
"jmp 1f\n"
"0:"
"movzb (%1),%%r10\n"
"add $0x1,%1\n"
"movzb (%2),%%r11\n"
"add $0x1,%2\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%0),%%r10\n"
"movq 4096(%5,%%r11,8),%%xmm1\n"
"movzb 0x1(%0),%%r11\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%r10,8),%%xmm2\n"
"add $0x2,%0\n"
"movq (%5,%%r11,8),%%xmm3\n"
"paddsw %%xmm0,%%xmm2\n"
"paddsw %%xmm0,%%xmm3\n"
"shufps $0x44,%%xmm3,%%xmm2\n"
"psraw $0x6,%%xmm2\n"
"packuswb %%xmm2,%%xmm2\n"
"movq %%xmm2,0x0(%3)\n"
"add $0x8,%3\n"
"1:"
"sub $0x2,%4\n"
"jns 0b\n"
"2:"
"add $0x1,%4\n"
"js 3f\n"
"movzb (%1),%%r10\n"
"movq 2048(%5,%%r10,8),%%xmm0\n"
"movzb (%2),%%r10\n"
"movq 4096(%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0),%%r10\n"
"movq (%5,%%r10,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"3:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY) // %5
: "memory", "r10", "r11", "xmm0", "xmm1", "xmm2", "xmm3"
);
}
void ScaleYUVToRGB32Row(const uint8* y_buf, // rdi
const uint8* u_buf, // rsi
const uint8* v_buf, // rdx
uint8* rgb_buf, // rcx
int width, // r8
int source_dx) { // r9
asm(
"xor %%r11,%%r11\n"
"sub $0x2,%4\n"
"js 1f\n"
"0:"
"mov %%r11,%%r10\n"
"sar $0x11,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"lea (%%r11,%6),%%r10\n"
"sar $0x10,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"paddsw %%xmm1,%%xmm0\n"
"movq (%5,%%rax,8),%%xmm1\n"
"lea (%%r10,%6),%%r11\n"
"sar $0x10,%%r10\n"
"movzb (%0,%%r10,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm2\n"
"paddsw %%xmm0,%%xmm1\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movq %%xmm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x2,%4\n"
"jns 0b\n"
"1:"
"add $0x1,%4\n"
"js 2f\n"
"mov %%r11,%%r10\n"
"sar $0x11,%%r10\n"
"movzb (%1,%%r10,1),%%rax\n"
"movq 2048(%5,%%rax,8),%%xmm0\n"
"movzb (%2,%%r10,1),%%rax\n"
"movq 4096(%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"sar $0x10,%%r11\n"
"movzb (%0,%%r11,1),%%rax\n"
"movq (%5,%%rax,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"2:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY), // %5
"r"(static_cast<long>(source_dx)) // %6
: "memory", "r10", "r11", "rax", "xmm0", "xmm1", "xmm2"
);
}
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
asm(
"xor %%r11,%%r11\n" // x = 0
"sub $0x2,%4\n"
"js 2f\n"
"cmp $0x20000,%6\n" // if source_dx >= 2.0
"jl 0f\n"
"mov $0x8000,%%r11\n" // x = 0.5 for 1/2 or less
"0:"
"1:"
"mov %%r11,%%r10\n"
"sar $0x11,%%r10\n"
"movzb (%1, %%r10, 1), %%r13 \n"
"movzb 1(%1, %%r10, 1), %%r14 \n"
"mov %%r11, %%rax \n"
"and $0x1fffe, %%rax \n"
"imul %%rax, %%r14 \n"
"xor $0x1fffe, %%rax \n"
"imul %%rax, %%r13 \n"
"add %%r14, %%r13 \n"
"shr $17, %%r13 \n"
"movq 2048(%5,%%r13,8), %%xmm0\n"
"movzb (%2, %%r10, 1), %%r13 \n"
"movzb 1(%2, %%r10, 1), %%r14 \n"
"mov %%r11, %%rax \n"
"and $0x1fffe, %%rax \n"
"imul %%rax, %%r14 \n"
"xor $0x1fffe, %%rax \n"
"imul %%rax, %%r13 \n"
"add %%r14, %%r13 \n"
"shr $17, %%r13 \n"
"movq 4096(%5,%%r13,8), %%xmm1\n"
"mov %%r11, %%rax \n"
"lea (%%r11,%6),%%r10\n"
"sar $0x10,%%r11\n"
"paddsw %%xmm1,%%xmm0\n"
"movzb (%0, %%r11, 1), %%r13 \n"
"movzb 1(%0, %%r11, 1), %%r14 \n"
"and $0xffff, %%rax \n"
"imul %%rax, %%r14 \n"
"xor $0xffff, %%rax \n"
"imul %%rax, %%r13 \n"
"add %%r14, %%r13 \n"
"shr $16, %%r13 \n"
"movq (%5,%%r13,8),%%xmm1\n"
"mov %%r10, %%rax \n"
"lea (%%r10,%6),%%r11\n"
"sar $0x10,%%r10\n"
"movzb (%0,%%r10,1), %%r13 \n"
"movzb 1(%0,%%r10,1), %%r14 \n"
"and $0xffff, %%rax \n"
"imul %%rax, %%r14 \n"
"xor $0xffff, %%rax \n"
"imul %%rax, %%r13 \n"
"add %%r14, %%r13 \n"
"shr $16, %%r13 \n"
"movq (%5,%%r13,8),%%xmm2\n"
"paddsw %%xmm0,%%xmm1\n"
"paddsw %%xmm0,%%xmm2\n"
"shufps $0x44,%%xmm2,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movq %%xmm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x2,%4\n"
"jns 1b\n"
"2:"
"add $0x1,%4\n"
"js 3f\n"
"mov %%r11,%%r10\n"
"sar $0x11,%%r10\n"
"movzb (%1,%%r10,1), %%r13 \n"
"movq 2048(%5,%%r13,8),%%xmm0\n"
"movzb (%2,%%r10,1), %%r13 \n"
"movq 4096(%5,%%r13,8),%%xmm1\n"
"paddsw %%xmm1,%%xmm0\n"
"sar $0x10,%%r11\n"
"movzb (%0,%%r11,1), %%r13 \n"
"movq (%5,%%r13,8),%%xmm1\n"
"paddsw %%xmm0,%%xmm1\n"
"psraw $0x6,%%xmm1\n"
"packuswb %%xmm1,%%xmm1\n"
"movd %%xmm1,0x0(%3)\n"
"3:"
:
: "r"(y_buf), // %0
"r"(u_buf), // %1
"r"(v_buf), // %2
"r"(rgb_buf), // %3
"r"(width), // %4
"r" (kCoefficientsRgbY), // %5
"r"(static_cast<long>(source_dx)) // %6
: "memory", "r10", "r11", "r13", "r14", "rax", "xmm0", "xmm1", "xmm2"
);
}
#elif defined(ARCH_CPU_X86_32) && !defined(__PIC__)
// PIC version is slower because less registers are available, so
// non-PIC is used on platforms where it is possible.
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
asm(
".text\n"
".global FastConvertYUVToRGB32Row\n"
".type FastConvertYUVToRGB32Row, @function\n"
"FastConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"jmp 1f\n"
"0:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"sub $0x2,%ecx\n"
"jns 0b\n"
"and $0x1,%ecx\n"
"je 2f\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
asm(
".text\n"
".global ScaleYUVToRGB32Row\n"
".type ScaleYUVToRGB32Row, @function\n"
"ScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp 1f\n"
"0:"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"sub $0x2,%ecx\n"
"jns 0b\n"
"and $0x1,%ecx\n"
"je 2f\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbY+2048(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbY+4096(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx);
asm(
".text\n"
".global LinearScaleYUVToRGB32Row\n"
".type LinearScaleYUVToRGB32Row, @function\n"
"LinearScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x30(%esp),%ebp\n"
// source_width = width * source_dx + ebx
"mov 0x34(%esp), %ecx\n"
"imull 0x38(%esp), %ecx\n"
"mov %ecx, 0x34(%esp)\n"
"mov 0x38(%esp), %ecx\n"
"xor %ebx,%ebx\n" // x = 0
"cmp $0x20000,%ecx\n" // if source_dx >= 2.0
"jl 1f\n"
"mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
"jmp 1f\n"
"0:"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%edi,%eax,1),%ecx\n"
"movzbl 1(%edi,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"andl $0x1fffe, %eax \n"
"imul %eax, %esi \n"
"xorl $0x1fffe, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $17, %ecx \n"
"movq kCoefficientsRgbY+2048(,%ecx,8),%mm0\n"
"mov 0x2c(%esp),%esi\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%ecx\n"
"movzbl 1(%esi,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"andl $0x1fffe, %eax \n"
"imul %eax, %esi \n"
"xorl $0x1fffe, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $17, %ecx \n"
"paddsw kCoefficientsRgbY+4096(,%ecx,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%ecx\n"
"movzbl 1(%edx,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"andl $0xffff, %eax \n"
"imul %eax, %esi \n"
"xorl $0xffff, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $16, %ecx \n"
"movq kCoefficientsRgbY(,%ecx,8),%mm1\n"
"cmp 0x34(%esp), %ebx\n"
"jge 2f\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%ecx\n"
"movzbl 1(%edx,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"andl $0xffff, %eax \n"
"imul %eax, %esi \n"
"xorl $0xffff, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $16, %ecx \n"
"movq kCoefficientsRgbY(,%ecx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"cmp 0x34(%esp), %ebx\n"
"jl 0b\n"
"popa\n"
"ret\n"
"2:"
"paddsw %mm0, %mm1\n"
"psraw $6, %mm1\n"
"packuswb %mm1, %mm1\n"
"movd %mm1, (%ebp)\n"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
#elif defined(ARCH_CPU_X86_32) && defined(__PIC__)
void PICConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int16 *kCoefficientsRgbY);
asm(
".text\n"
#if defined(XP_MACOSX)
"_PICConvertYUVToRGB32Row:\n"
#else
"PICConvertYUVToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x38(%esp),%ecx\n"
"jmp 1f\n"
"0:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw 4096(%ecx,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq 0(%ecx,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"subl $0x2,0x34(%esp)\n"
"jns 0b\n"
"andl $0x1,0x34(%esp)\n"
"je 2f\n"
"movzbl (%edi),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
PICConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
void PICScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx,
int16 *kCoefficientsRgbY);
asm(
".text\n"
#if defined(XP_MACOSX)
"_PICScaleYUVToRGB32Row:\n"
#else
"PICScaleYUVToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x3c(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp 1f\n"
"0:"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"subl $0x2,0x34(%esp)\n"
"jns 0b\n"
"andl $0x1,0x34(%esp)\n"
"je 2f\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"2:"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
PICScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
&kCoefficientsRgbY[0][0]);
}
void PICLinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx,
int16 *kCoefficientsRgbY);
asm(
".text\n"
#if defined(XP_MACOSX)
"_PICLinearScaleYUVToRGB32Row:\n"
#else
"PICLinearScaleYUVToRGB32Row:\n"
#endif
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"mov 0x3c(%esp),%edi\n"
"xor %ebx,%ebx\n"
// source_width = width * source_dx + ebx
"mov 0x34(%esp), %ecx\n"
"imull 0x38(%esp), %ecx\n"
"mov %ecx, 0x34(%esp)\n"
"mov 0x38(%esp), %ecx\n"
"xor %ebx,%ebx\n" // x = 0
"cmp $0x20000,%ecx\n" // if source_dx >= 2.0
"jl 1f\n"
"mov $0x8000,%ebx\n" // x = 0.5 for 1/2 or less
"jmp 1f\n"
"0:"
"mov 0x28(%esp),%esi\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%ecx\n"
"movzbl 1(%esi,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"andl $0x1fffe, %eax \n"
"imul %eax, %esi \n"
"xorl $0x1fffe, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $17, %ecx \n"
"movq 2048(%edi,%ecx,8),%mm0\n"
"mov 0x2c(%esp),%esi\n"
"mov %ebx,%eax\n"
"sar $0x11,%eax\n"
"movzbl (%esi,%eax,1),%ecx\n"
"movzbl 1(%esi,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"andl $0x1fffe, %eax \n"
"imul %eax, %esi \n"
"xorl $0x1fffe, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $17, %ecx \n"
"paddsw 4096(%edi,%ecx,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%ecx\n"
"movzbl 1(%edx,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"andl $0xffff, %eax \n"
"imul %eax, %esi \n"
"xorl $0xffff, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $16, %ecx \n"
"movq (%edi,%ecx,8),%mm1\n"
"cmp 0x34(%esp), %ebx\n"
"jge 2f\n"
"mov %ebx,%eax\n"
"sar $0x10,%eax\n"
"movzbl (%edx,%eax,1),%ecx\n"
"movzbl 1(%edx,%eax,1),%esi\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"andl $0xffff, %eax \n"
"imul %eax, %esi \n"
"xorl $0xffff, %eax \n"
"imul %eax, %ecx \n"
"addl %esi, %ecx \n"
"shrl $16, %ecx \n"
"movq (%edi,%ecx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"1:"
"cmp %ebx, 0x34(%esp)\n"
"jg 0b\n"
"popa\n"
"ret\n"
"2:"
"paddsw %mm0, %mm1\n"
"psraw $6, %mm1\n"
"packuswb %mm1, %mm1\n"
"movd %mm1, (%ebp)\n"
"popa\n"
"ret\n"
#if !defined(XP_MACOSX)
".previous\n"
#endif
);
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
PICLinearScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, source_dx,
&kCoefficientsRgbY[0][0]);
}
#else
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
#endif
} // extern "C"

Просмотреть файл

@ -1,36 +1,11 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "yuv_row.h"
// TODO(fbarchard): Do 64 bit version.
extern "C" {
// PPC and 64 Bit builds use the C fallback. Optimized code
// needs to be fixed for 64 bit builds. PPC has no optimized code
// option at all.
#if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
@ -52,10 +27,7 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
0 \
}
#define MMX_ALIGNED(var) \
var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16)))
MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
SIMD_ALIGNED(int16 kCoefficientsRgbY[256 * 3][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
@ -257,160 +229,5 @@ MMX_ALIGNED(int16 kCoefficientsRgbY[768][4]) = {
#undef RGBY
#undef RGBU
#undef RGBV
#undef MMX_ALIGNED
extern void MacConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int16 *kCoefficientsRgbY);
__asm__(
"_MacConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x38(%esp),%ecx\n"
"jmp Lconvertend\n"
"Lconvertloop:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw 4096(%ecx,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq 0(%ecx,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lconvertend:"
"sub $0x2,0x34(%esp)\n"
"jns Lconvertloop\n"
"and $0x1,0x34(%esp)\n"
"je Lconvertdone\n"
"movzbl (%edi),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lconvertdone:\n"
"popa\n"
"ret\n"
);
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
}
extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
int16 *kCoefficientsRgbY);
__asm__(
"_MacScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x3c(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp Lscaleend\n"
"Lscaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lscaleend:"
"sub $0x2,0x34(%esp)\n"
"jns Lscaleloop\n"
"and $0x1,0x34(%esp)\n"
"je Lscaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lscaledone:"
"popa\n"
"ret\n"
);
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
&kCoefficientsRgbY[0][0]);
}
#endif // ARCH_CPU_PPC || ARCH_CPU_64_BITS
} // extern "C"

Просмотреть файл

@ -1,268 +1,18 @@
// Copyright (c) 2009 The Chromium Authors. All rights reserved.
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "yuv_row.h"
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_SSE2
#define MOZILLA_SSE_INCLUDE_HEADER_FOR_MMX
#include "mozilla/SSE.h"
#define kCoefficientsRgbU kCoefficientsRgbY + 2048
#define kCoefficientsRgbV kCoefficientsRgbY + 4096
extern "C" {
// 64 Bit builds use the C fallback. Optimized code
// needs to be fixed for 64 bit builds.
#if defined(ARCH_CPU_64_BITS)
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
0 \
}
#define RGBU(i) { \
static_cast<int16>(2.018 * 64 * (i - 128) + 0.5), \
static_cast<int16>(-0.391 * 64 * (i - 128) + 0.5), \
0, \
static_cast<int16>(256 * 64 - 1) \
}
#define RGBV(i) { \
0, \
static_cast<int16>(-0.813 * 64 * (i - 128) + 0.5), \
static_cast<int16>(1.596 * 64 * (i - 128) + 0.5), \
0 \
}
#define MMX_ALIGNED(var) __declspec(align(16)) var
MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
RGBY(0x04), RGBY(0x05), RGBY(0x06), RGBY(0x07),
RGBY(0x08), RGBY(0x09), RGBY(0x0A), RGBY(0x0B),
RGBY(0x0C), RGBY(0x0D), RGBY(0x0E), RGBY(0x0F),
RGBY(0x10), RGBY(0x11), RGBY(0x12), RGBY(0x13),
RGBY(0x14), RGBY(0x15), RGBY(0x16), RGBY(0x17),
RGBY(0x18), RGBY(0x19), RGBY(0x1A), RGBY(0x1B),
RGBY(0x1C), RGBY(0x1D), RGBY(0x1E), RGBY(0x1F),
RGBY(0x20), RGBY(0x21), RGBY(0x22), RGBY(0x23),
RGBY(0x24), RGBY(0x25), RGBY(0x26), RGBY(0x27),
RGBY(0x28), RGBY(0x29), RGBY(0x2A), RGBY(0x2B),
RGBY(0x2C), RGBY(0x2D), RGBY(0x2E), RGBY(0x2F),
RGBY(0x30), RGBY(0x31), RGBY(0x32), RGBY(0x33),
RGBY(0x34), RGBY(0x35), RGBY(0x36), RGBY(0x37),
RGBY(0x38), RGBY(0x39), RGBY(0x3A), RGBY(0x3B),
RGBY(0x3C), RGBY(0x3D), RGBY(0x3E), RGBY(0x3F),
RGBY(0x40), RGBY(0x41), RGBY(0x42), RGBY(0x43),
RGBY(0x44), RGBY(0x45), RGBY(0x46), RGBY(0x47),
RGBY(0x48), RGBY(0x49), RGBY(0x4A), RGBY(0x4B),
RGBY(0x4C), RGBY(0x4D), RGBY(0x4E), RGBY(0x4F),
RGBY(0x50), RGBY(0x51), RGBY(0x52), RGBY(0x53),
RGBY(0x54), RGBY(0x55), RGBY(0x56), RGBY(0x57),
RGBY(0x58), RGBY(0x59), RGBY(0x5A), RGBY(0x5B),
RGBY(0x5C), RGBY(0x5D), RGBY(0x5E), RGBY(0x5F),
RGBY(0x60), RGBY(0x61), RGBY(0x62), RGBY(0x63),
RGBY(0x64), RGBY(0x65), RGBY(0x66), RGBY(0x67),
RGBY(0x68), RGBY(0x69), RGBY(0x6A), RGBY(0x6B),
RGBY(0x6C), RGBY(0x6D), RGBY(0x6E), RGBY(0x6F),
RGBY(0x70), RGBY(0x71), RGBY(0x72), RGBY(0x73),
RGBY(0x74), RGBY(0x75), RGBY(0x76), RGBY(0x77),
RGBY(0x78), RGBY(0x79), RGBY(0x7A), RGBY(0x7B),
RGBY(0x7C), RGBY(0x7D), RGBY(0x7E), RGBY(0x7F),
RGBY(0x80), RGBY(0x81), RGBY(0x82), RGBY(0x83),
RGBY(0x84), RGBY(0x85), RGBY(0x86), RGBY(0x87),
RGBY(0x88), RGBY(0x89), RGBY(0x8A), RGBY(0x8B),
RGBY(0x8C), RGBY(0x8D), RGBY(0x8E), RGBY(0x8F),
RGBY(0x90), RGBY(0x91), RGBY(0x92), RGBY(0x93),
RGBY(0x94), RGBY(0x95), RGBY(0x96), RGBY(0x97),
RGBY(0x98), RGBY(0x99), RGBY(0x9A), RGBY(0x9B),
RGBY(0x9C), RGBY(0x9D), RGBY(0x9E), RGBY(0x9F),
RGBY(0xA0), RGBY(0xA1), RGBY(0xA2), RGBY(0xA3),
RGBY(0xA4), RGBY(0xA5), RGBY(0xA6), RGBY(0xA7),
RGBY(0xA8), RGBY(0xA9), RGBY(0xAA), RGBY(0xAB),
RGBY(0xAC), RGBY(0xAD), RGBY(0xAE), RGBY(0xAF),
RGBY(0xB0), RGBY(0xB1), RGBY(0xB2), RGBY(0xB3),
RGBY(0xB4), RGBY(0xB5), RGBY(0xB6), RGBY(0xB7),
RGBY(0xB8), RGBY(0xB9), RGBY(0xBA), RGBY(0xBB),
RGBY(0xBC), RGBY(0xBD), RGBY(0xBE), RGBY(0xBF),
RGBY(0xC0), RGBY(0xC1), RGBY(0xC2), RGBY(0xC3),
RGBY(0xC4), RGBY(0xC5), RGBY(0xC6), RGBY(0xC7),
RGBY(0xC8), RGBY(0xC9), RGBY(0xCA), RGBY(0xCB),
RGBY(0xCC), RGBY(0xCD), RGBY(0xCE), RGBY(0xCF),
RGBY(0xD0), RGBY(0xD1), RGBY(0xD2), RGBY(0xD3),
RGBY(0xD4), RGBY(0xD5), RGBY(0xD6), RGBY(0xD7),
RGBY(0xD8), RGBY(0xD9), RGBY(0xDA), RGBY(0xDB),
RGBY(0xDC), RGBY(0xDD), RGBY(0xDE), RGBY(0xDF),
RGBY(0xE0), RGBY(0xE1), RGBY(0xE2), RGBY(0xE3),
RGBY(0xE4), RGBY(0xE5), RGBY(0xE6), RGBY(0xE7),
RGBY(0xE8), RGBY(0xE9), RGBY(0xEA), RGBY(0xEB),
RGBY(0xEC), RGBY(0xED), RGBY(0xEE), RGBY(0xEF),
RGBY(0xF0), RGBY(0xF1), RGBY(0xF2), RGBY(0xF3),
RGBY(0xF4), RGBY(0xF5), RGBY(0xF6), RGBY(0xF7),
RGBY(0xF8), RGBY(0xF9), RGBY(0xFA), RGBY(0xFB),
RGBY(0xFC), RGBY(0xFD), RGBY(0xFE), RGBY(0xFF),
};
MMX_ALIGNED(int16 kCoefficientsRgbU[256][4]) = {
RGBU(0x00), RGBU(0x01), RGBU(0x02), RGBU(0x03),
RGBU(0x04), RGBU(0x05), RGBU(0x06), RGBU(0x07),
RGBU(0x08), RGBU(0x09), RGBU(0x0A), RGBU(0x0B),
RGBU(0x0C), RGBU(0x0D), RGBU(0x0E), RGBU(0x0F),
RGBU(0x10), RGBU(0x11), RGBU(0x12), RGBU(0x13),
RGBU(0x14), RGBU(0x15), RGBU(0x16), RGBU(0x17),
RGBU(0x18), RGBU(0x19), RGBU(0x1A), RGBU(0x1B),
RGBU(0x1C), RGBU(0x1D), RGBU(0x1E), RGBU(0x1F),
RGBU(0x20), RGBU(0x21), RGBU(0x22), RGBU(0x23),
RGBU(0x24), RGBU(0x25), RGBU(0x26), RGBU(0x27),
RGBU(0x28), RGBU(0x29), RGBU(0x2A), RGBU(0x2B),
RGBU(0x2C), RGBU(0x2D), RGBU(0x2E), RGBU(0x2F),
RGBU(0x30), RGBU(0x31), RGBU(0x32), RGBU(0x33),
RGBU(0x34), RGBU(0x35), RGBU(0x36), RGBU(0x37),
RGBU(0x38), RGBU(0x39), RGBU(0x3A), RGBU(0x3B),
RGBU(0x3C), RGBU(0x3D), RGBU(0x3E), RGBU(0x3F),
RGBU(0x40), RGBU(0x41), RGBU(0x42), RGBU(0x43),
RGBU(0x44), RGBU(0x45), RGBU(0x46), RGBU(0x47),
RGBU(0x48), RGBU(0x49), RGBU(0x4A), RGBU(0x4B),
RGBU(0x4C), RGBU(0x4D), RGBU(0x4E), RGBU(0x4F),
RGBU(0x50), RGBU(0x51), RGBU(0x52), RGBU(0x53),
RGBU(0x54), RGBU(0x55), RGBU(0x56), RGBU(0x57),
RGBU(0x58), RGBU(0x59), RGBU(0x5A), RGBU(0x5B),
RGBU(0x5C), RGBU(0x5D), RGBU(0x5E), RGBU(0x5F),
RGBU(0x60), RGBU(0x61), RGBU(0x62), RGBU(0x63),
RGBU(0x64), RGBU(0x65), RGBU(0x66), RGBU(0x67),
RGBU(0x68), RGBU(0x69), RGBU(0x6A), RGBU(0x6B),
RGBU(0x6C), RGBU(0x6D), RGBU(0x6E), RGBU(0x6F),
RGBU(0x70), RGBU(0x71), RGBU(0x72), RGBU(0x73),
RGBU(0x74), RGBU(0x75), RGBU(0x76), RGBU(0x77),
RGBU(0x78), RGBU(0x79), RGBU(0x7A), RGBU(0x7B),
RGBU(0x7C), RGBU(0x7D), RGBU(0x7E), RGBU(0x7F),
RGBU(0x80), RGBU(0x81), RGBU(0x82), RGBU(0x83),
RGBU(0x84), RGBU(0x85), RGBU(0x86), RGBU(0x87),
RGBU(0x88), RGBU(0x89), RGBU(0x8A), RGBU(0x8B),
RGBU(0x8C), RGBU(0x8D), RGBU(0x8E), RGBU(0x8F),
RGBU(0x90), RGBU(0x91), RGBU(0x92), RGBU(0x93),
RGBU(0x94), RGBU(0x95), RGBU(0x96), RGBU(0x97),
RGBU(0x98), RGBU(0x99), RGBU(0x9A), RGBU(0x9B),
RGBU(0x9C), RGBU(0x9D), RGBU(0x9E), RGBU(0x9F),
RGBU(0xA0), RGBU(0xA1), RGBU(0xA2), RGBU(0xA3),
RGBU(0xA4), RGBU(0xA5), RGBU(0xA6), RGBU(0xA7),
RGBU(0xA8), RGBU(0xA9), RGBU(0xAA), RGBU(0xAB),
RGBU(0xAC), RGBU(0xAD), RGBU(0xAE), RGBU(0xAF),
RGBU(0xB0), RGBU(0xB1), RGBU(0xB2), RGBU(0xB3),
RGBU(0xB4), RGBU(0xB5), RGBU(0xB6), RGBU(0xB7),
RGBU(0xB8), RGBU(0xB9), RGBU(0xBA), RGBU(0xBB),
RGBU(0xBC), RGBU(0xBD), RGBU(0xBE), RGBU(0xBF),
RGBU(0xC0), RGBU(0xC1), RGBU(0xC2), RGBU(0xC3),
RGBU(0xC4), RGBU(0xC5), RGBU(0xC6), RGBU(0xC7),
RGBU(0xC8), RGBU(0xC9), RGBU(0xCA), RGBU(0xCB),
RGBU(0xCC), RGBU(0xCD), RGBU(0xCE), RGBU(0xCF),
RGBU(0xD0), RGBU(0xD1), RGBU(0xD2), RGBU(0xD3),
RGBU(0xD4), RGBU(0xD5), RGBU(0xD6), RGBU(0xD7),
RGBU(0xD8), RGBU(0xD9), RGBU(0xDA), RGBU(0xDB),
RGBU(0xDC), RGBU(0xDD), RGBU(0xDE), RGBU(0xDF),
RGBU(0xE0), RGBU(0xE1), RGBU(0xE2), RGBU(0xE3),
RGBU(0xE4), RGBU(0xE5), RGBU(0xE6), RGBU(0xE7),
RGBU(0xE8), RGBU(0xE9), RGBU(0xEA), RGBU(0xEB),
RGBU(0xEC), RGBU(0xED), RGBU(0xEE), RGBU(0xEF),
RGBU(0xF0), RGBU(0xF1), RGBU(0xF2), RGBU(0xF3),
RGBU(0xF4), RGBU(0xF5), RGBU(0xF6), RGBU(0xF7),
RGBU(0xF8), RGBU(0xF9), RGBU(0xFA), RGBU(0xFB),
RGBU(0xFC), RGBU(0xFD), RGBU(0xFE), RGBU(0xFF),
};
MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = {
RGBV(0x00), RGBV(0x01), RGBV(0x02), RGBV(0x03),
RGBV(0x04), RGBV(0x05), RGBV(0x06), RGBV(0x07),
RGBV(0x08), RGBV(0x09), RGBV(0x0A), RGBV(0x0B),
RGBV(0x0C), RGBV(0x0D), RGBV(0x0E), RGBV(0x0F),
RGBV(0x10), RGBV(0x11), RGBV(0x12), RGBV(0x13),
RGBV(0x14), RGBV(0x15), RGBV(0x16), RGBV(0x17),
RGBV(0x18), RGBV(0x19), RGBV(0x1A), RGBV(0x1B),
RGBV(0x1C), RGBV(0x1D), RGBV(0x1E), RGBV(0x1F),
RGBV(0x20), RGBV(0x21), RGBV(0x22), RGBV(0x23),
RGBV(0x24), RGBV(0x25), RGBV(0x26), RGBV(0x27),
RGBV(0x28), RGBV(0x29), RGBV(0x2A), RGBV(0x2B),
RGBV(0x2C), RGBV(0x2D), RGBV(0x2E), RGBV(0x2F),
RGBV(0x30), RGBV(0x31), RGBV(0x32), RGBV(0x33),
RGBV(0x34), RGBV(0x35), RGBV(0x36), RGBV(0x37),
RGBV(0x38), RGBV(0x39), RGBV(0x3A), RGBV(0x3B),
RGBV(0x3C), RGBV(0x3D), RGBV(0x3E), RGBV(0x3F),
RGBV(0x40), RGBV(0x41), RGBV(0x42), RGBV(0x43),
RGBV(0x44), RGBV(0x45), RGBV(0x46), RGBV(0x47),
RGBV(0x48), RGBV(0x49), RGBV(0x4A), RGBV(0x4B),
RGBV(0x4C), RGBV(0x4D), RGBV(0x4E), RGBV(0x4F),
RGBV(0x50), RGBV(0x51), RGBV(0x52), RGBV(0x53),
RGBV(0x54), RGBV(0x55), RGBV(0x56), RGBV(0x57),
RGBV(0x58), RGBV(0x59), RGBV(0x5A), RGBV(0x5B),
RGBV(0x5C), RGBV(0x5D), RGBV(0x5E), RGBV(0x5F),
RGBV(0x60), RGBV(0x61), RGBV(0x62), RGBV(0x63),
RGBV(0x64), RGBV(0x65), RGBV(0x66), RGBV(0x67),
RGBV(0x68), RGBV(0x69), RGBV(0x6A), RGBV(0x6B),
RGBV(0x6C), RGBV(0x6D), RGBV(0x6E), RGBV(0x6F),
RGBV(0x70), RGBV(0x71), RGBV(0x72), RGBV(0x73),
RGBV(0x74), RGBV(0x75), RGBV(0x76), RGBV(0x77),
RGBV(0x78), RGBV(0x79), RGBV(0x7A), RGBV(0x7B),
RGBV(0x7C), RGBV(0x7D), RGBV(0x7E), RGBV(0x7F),
RGBV(0x80), RGBV(0x81), RGBV(0x82), RGBV(0x83),
RGBV(0x84), RGBV(0x85), RGBV(0x86), RGBV(0x87),
RGBV(0x88), RGBV(0x89), RGBV(0x8A), RGBV(0x8B),
RGBV(0x8C), RGBV(0x8D), RGBV(0x8E), RGBV(0x8F),
RGBV(0x90), RGBV(0x91), RGBV(0x92), RGBV(0x93),
RGBV(0x94), RGBV(0x95), RGBV(0x96), RGBV(0x97),
RGBV(0x98), RGBV(0x99), RGBV(0x9A), RGBV(0x9B),
RGBV(0x9C), RGBV(0x9D), RGBV(0x9E), RGBV(0x9F),
RGBV(0xA0), RGBV(0xA1), RGBV(0xA2), RGBV(0xA3),
RGBV(0xA4), RGBV(0xA5), RGBV(0xA6), RGBV(0xA7),
RGBV(0xA8), RGBV(0xA9), RGBV(0xAA), RGBV(0xAB),
RGBV(0xAC), RGBV(0xAD), RGBV(0xAE), RGBV(0xAF),
RGBV(0xB0), RGBV(0xB1), RGBV(0xB2), RGBV(0xB3),
RGBV(0xB4), RGBV(0xB5), RGBV(0xB6), RGBV(0xB7),
RGBV(0xB8), RGBV(0xB9), RGBV(0xBA), RGBV(0xBB),
RGBV(0xBC), RGBV(0xBD), RGBV(0xBE), RGBV(0xBF),
RGBV(0xC0), RGBV(0xC1), RGBV(0xC2), RGBV(0xC3),
RGBV(0xC4), RGBV(0xC5), RGBV(0xC6), RGBV(0xC7),
RGBV(0xC8), RGBV(0xC9), RGBV(0xCA), RGBV(0xCB),
RGBV(0xCC), RGBV(0xCD), RGBV(0xCE), RGBV(0xCF),
RGBV(0xD0), RGBV(0xD1), RGBV(0xD2), RGBV(0xD3),
RGBV(0xD4), RGBV(0xD5), RGBV(0xD6), RGBV(0xD7),
RGBV(0xD8), RGBV(0xD9), RGBV(0xDA), RGBV(0xDB),
RGBV(0xDC), RGBV(0xDD), RGBV(0xDE), RGBV(0xDF),
RGBV(0xE0), RGBV(0xE1), RGBV(0xE2), RGBV(0xE3),
RGBV(0xE4), RGBV(0xE5), RGBV(0xE6), RGBV(0xE7),
RGBV(0xE8), RGBV(0xE9), RGBV(0xEA), RGBV(0xEB),
RGBV(0xEC), RGBV(0xED), RGBV(0xEE), RGBV(0xEF),
RGBV(0xF0), RGBV(0xF1), RGBV(0xF2), RGBV(0xF3),
RGBV(0xF4), RGBV(0xF5), RGBV(0xF6), RGBV(0xF7),
RGBV(0xF8), RGBV(0xF9), RGBV(0xFA), RGBV(0xFB),
RGBV(0xFC), RGBV(0xFD), RGBV(0xFE), RGBV(0xFF),
};
#undef RGBHY
#undef RGBY
#undef RGBU
#undef RGBV
#undef MMX_ALIGNED
// Warning C4799: function has no EMMS instruction.
// EMMS() is slow and should be called by the calling function once per image.
#pragma warning(disable: 4799)
#if defined(MOZILLA_COMPILE_WITH_SSE2)
__declspec(naked)
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
@ -523,7 +273,7 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int dx) {
int source_dx) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
@ -536,21 +286,21 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
scaleloop :
mov eax, ebx
sar eax, 5
sar eax, 17
movzx eax, byte ptr [edi + eax]
movq mm0, [kCoefficientsRgbU + 8 * eax]
mov eax, ebx
sar eax, 5
sar eax, 17
movzx eax, byte ptr [esi + eax]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
mov eax, ebx
add ebx, [esp + 32 + 24] // x += dx
sar eax, 4
add ebx, [esp + 32 + 24] // x += source_dx
sar eax, 16
movzx eax, byte ptr [edx + eax]
movq mm1, [kCoefficientsRgbY + 8 * eax]
mov eax, ebx
add ebx, [esp + 32 + 24] // x += dx
sar eax, 4
add ebx, [esp + 32 + 24] // x += source_dx
sar eax, 16
movzx eax, byte ptr [edx + eax]
movq mm2, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
@ -568,15 +318,15 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
jz scaledone
mov eax, ebx
sar eax, 5
sar eax, 17
movzx eax, byte ptr [edi + eax]
movq mm0, [kCoefficientsRgbU + 8 * eax]
mov eax, ebx
sar eax, 5
sar eax, 17
movzx eax, byte ptr [esi + eax]
paddsw mm0, [kCoefficientsRgbV + 8 * eax]
mov eax, ebx
sar eax, 4
sar eax, 16
movzx eax, byte ptr [edx + eax]
movq mm1, [kCoefficientsRgbY + 8 * eax]
paddsw mm1, mm0
@ -590,6 +340,138 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
}
}
#endif // ARCH_CPU_64_BITS
__declspec(naked)
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
__asm {
pushad
mov edx, [esp + 32 + 4] // Y
mov edi, [esp + 32 + 8] // U
// [esp + 32 + 12] // V
mov ebp, [esp + 32 + 16] // rgb
mov ecx, [esp + 32 + 20] // width
imul ecx, [esp + 32 + 24] // source_dx
mov [esp + 32 + 20], ecx // source_width = width * source_dx
mov ecx, [esp + 32 + 24] // source_dx
xor ebx, ebx // x = 0
cmp ecx, 0x20000
jl lscaleend
mov ebx, 0x8000 // x = 0.5 for 1/2 or less
jmp lscaleend
lscaleloop:
mov eax, ebx
sar eax, 0x11
movzx ecx, byte ptr [edi + eax]
movzx esi, byte ptr [edi + eax + 1]
mov eax, ebx
and eax, 0x1fffe
imul esi, eax
xor eax, 0x1fffe
imul ecx, eax
add ecx, esi
shr ecx, 17
movq mm0, [kCoefficientsRgbU + 8 * ecx]
mov esi, [esp + 32 + 12]
mov eax, ebx
sar eax, 0x11
movzx ecx, byte ptr [esi + eax]
movzx esi, byte ptr [esi + eax + 1]
mov eax, ebx
and eax, 0x1fffe
imul esi, eax
xor eax, 0x1fffe
imul ecx, eax
add ecx, esi
shr ecx, 17
paddsw mm0, [kCoefficientsRgbV + 8 * ecx]
mov eax, ebx
sar eax, 0x10
movzx ecx, byte ptr [edx + eax]
movzx esi, byte ptr [1 + edx + eax]
mov eax, ebx
add ebx, [esp + 32 + 24]
and eax, 0xffff
imul esi, eax
xor eax, 0xffff
imul ecx, eax
add ecx, esi
shr ecx, 16
movq mm1, [kCoefficientsRgbY + 8 * ecx]
cmp ebx, [esp + 32 + 20]
jge lscalelastpixel
mov eax, ebx
sar eax, 0x10
movzx ecx, byte ptr [edx + eax]
movzx esi, byte ptr [edx + eax + 1]
mov eax, ebx
add ebx, [esp + 32 + 24]
and eax, 0xffff
imul esi, eax
xor eax, 0xffff
imul ecx, eax
add ecx, esi
shr ecx, 16
movq mm2, [kCoefficientsRgbY + 8 * ecx]
paddsw mm1, mm0
paddsw mm2, mm0
psraw mm1, 0x6
psraw mm2, 0x6
packuswb mm1, mm2
movntq [ebp], mm1
add ebp, 0x8
lscaleend:
cmp ebx, [esp + 32 + 20]
jl lscaleloop
popad
ret
lscalelastpixel:
paddsw mm1, mm0
psraw mm1, 6
packuswb mm1, mm1
movd [ebp], mm1
popad
ret
};
}
#else // MOZILLA_COMPILE_WITH_SSE2
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
ScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
void LinearScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int source_dx) {
LinearScaleYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
#endif
} // extern "C"

Просмотреть файл

@ -1,241 +0,0 @@
diff --git a/gfx/ycbcr/yuv_convert.cpp b/gfx/ycbcr/yuv_convert.cpp
index b22e778..cdbb040 100644
--- a/gfx/ycbcr/yuv_convert.cpp
+++ b/gfx/ycbcr/yuv_convert.cpp
@@ -6,16 +6,17 @@
// http://www.fourcc.org/yuv.php
// The actual conversion is best described here
// http://en.wikipedia.org/wiki/YUV
// An article on optimizing YUV conversion using tables instead of multiplies
// http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf
//
// YV12 is a full plane of Y and a half height, half width chroma planes
// YV16 is a full plane of Y and a full height, half width chroma planes
+// YV24 is a full plane of Y and a full height, full width chroma planes
//
// ARGB pixel format is output, which on little endian is stored as BGRA.
// The alpha is set to 255, allowing the application to use RGBA or RGB32.
#include "yuv_convert.h"
// Header for low level row functions.
#include "yuv_row.h"
@@ -33,50 +34,55 @@ void ConvertYCbCrToRGB32(const uint8* y_buf,
int pic_x,
int pic_y,
int pic_width,
int pic_height,
int y_pitch,
int uv_pitch,
int rgb_pitch,
YUVType yuv_type) {
- unsigned int y_shift = yuv_type;
- bool has_mmx = supports_mmx();
- bool odd_pic_x = pic_x % 2 != 0;
+ unsigned int y_shift = yuv_type == YV12 ? 1 : 0;
+ unsigned int x_shift = yuv_type == YV24 ? 0 : 1;
+ // There is no optimized YV24 MMX routine so we check for this and
+ // fall back to the C code.
+ bool has_mmx = supports_mmx() && yuv_type != YV24;
+ bool odd_pic_x = yuv_type != YV24 && pic_x % 2 != 0;
int x_width = odd_pic_x ? pic_width - 1 : pic_width;
for (int y = pic_y; y < pic_height + pic_y; ++y) {
uint8* rgb_row = rgb_buf + (y - pic_y) * rgb_pitch;
const uint8* y_ptr = y_buf + y * y_pitch + pic_x;
- const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
- const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> 1);
+ const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
+ const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch + (pic_x >> x_shift);
if (odd_pic_x) {
// Handle the single odd pixel manually and use the
// fast routines for the remaining.
FastConvertYUVToRGB32Row_C(y_ptr++,
u_ptr++,
v_ptr++,
rgb_row,
- 1);
+ 1,
+ x_shift);
rgb_row += 4;
}
if (has_mmx)
FastConvertYUVToRGB32Row(y_ptr,
u_ptr,
v_ptr,
rgb_row,
x_width);
else
FastConvertYUVToRGB32Row_C(y_ptr,
u_ptr,
v_ptr,
rgb_row,
- x_width);
+ x_width,
+ x_shift);
}
// MMX used for FastConvertYUVToRGB32Row requires emms instruction.
if (has_mmx)
EMMS();
}
} // namespace gfx
diff --git a/gfx/ycbcr/yuv_convert.h b/gfx/ycbcr/yuv_convert.h
index e624168..c0b678d 100644
--- a/gfx/ycbcr/yuv_convert.h
+++ b/gfx/ycbcr/yuv_convert.h
@@ -10,18 +10,19 @@
namespace mozilla {
namespace gfx {
// Type of YUV surface.
// The value of these enums matter as they are used to shift vertical indices.
enum YUVType {
- YV16 = 0, // YV16 is half width and full height chroma channels.
- YV12 = 1 // YV12 is half width and half height chroma channels.
+ YV12 = 0, // YV12 is half width and half height chroma channels.
+ YV16 = 1, // YV16 is half width and full height chroma channels.
+ YV24 = 2 // YV24 is full width and full height chroma channels.
};
// Convert a frame of YUV to 32 bit ARGB.
// Pass in YV16/YV12 depending on source format
NS_GFX_(void) ConvertYCbCrToRGB32(const uint8* yplane,
const uint8* uplane,
const uint8* vplane,
uint8* rgbframe,
diff --git a/gfx/ycbcr/yuv_row.h b/gfx/ycbcr/yuv_row.h
index 2a82972..d776dac 100644
--- a/gfx/ycbcr/yuv_row.h
+++ b/gfx/ycbcr/yuv_row.h
@@ -20,17 +20,18 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width);
+ int width,
+ unsigned int x_shift);
} // extern "C"
// x64 uses MMX2 (SSE) so emms is not required.
#if !defined(ARCH_CPU_X86_64) && !defined(ARCH_CPU_PPC)
#if defined(_MSC_VER)
#define EMMS() __asm emms
diff --git a/gfx/ycbcr/yuv_row_c.cpp b/gfx/ycbcr/yuv_row_c.cpp
index d3bdab4..36d9bda 100644
--- a/gfx/ycbcr/yuv_row_c.cpp
+++ b/gfx/ycbcr/yuv_row_c.cpp
@@ -153,24 +153,29 @@ static inline void YuvPixel(uint8 y,
(clip(C298a + cr) << 16) |
(0xff000000);
}
void FastConvertYUVToRGB32Row_C(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
- int width) {
+ int width,
+ unsigned int x_shift) {
for (int x = 0; x < width; x += 2) {
- uint8 u = u_buf[x >> 1];
- uint8 v = v_buf[x >> 1];
+ uint8 u = u_buf[x >> x_shift];
+ uint8 v = v_buf[x >> x_shift];
uint8 y0 = y_buf[x];
YuvPixel(y0, u, v, rgb_buf);
if ((x + 1) < width) {
uint8 y1 = y_buf[x + 1];
+ if (x_shift == 0) {
+ u = u_buf[x + 1];
+ v = v_buf[x + 1];
+ }
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
} // extern "C"
diff --git a/gfx/ycbcr/yuv_row_linux.cpp b/gfx/ycbcr/yuv_row_linux.cpp
index ce5ee89..455dd7b 100644
--- a/gfx/ycbcr/yuv_row_linux.cpp
+++ b/gfx/ycbcr/yuv_row_linux.cpp
@@ -13,17 +13,17 @@ extern "C" {
#if defined(ARCH_CPU_ARM_FAMILY)
// ARM implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
diff --git a/gfx/ycbcr/yuv_row_mac.cpp b/gfx/ycbcr/yuv_row_mac.cpp
index 34ecdc1..2a679cc 100644
--- a/gfx/ycbcr/yuv_row_mac.cpp
+++ b/gfx/ycbcr/yuv_row_mac.cpp
@@ -13,17 +13,17 @@ extern "C" {
// option at all.
#if defined(ARCH_CPU_PPC) || defined(ARCH_CPU_64_BITS)
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
diff --git a/gfx/ycbcr/yuv_row_win.cpp b/gfx/ycbcr/yuv_row_win.cpp
index d2b82c4..708ef14 100644
--- a/gfx/ycbcr/yuv_row_win.cpp
+++ b/gfx/ycbcr/yuv_row_win.cpp
@@ -9,17 +9,17 @@ extern "C" {
// needs to be fixed for 64 bit builds.
#if defined(ARCH_CPU_64_BITS)
// PPC implementation uses C fallback
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
- FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width);
+ FastConvertYUVToRGB32Row_C(y_buf, u_buf, v_buf, rgb_buf, width, 1);
}
#else
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \