Bug 530550. pixman: Fix arm gcc build.

The pixman-arm-simd-asm.c was just a copy of pixman-arm-simd.c instead
of the file it was supposed to be.
This commit is contained in:
Jeff Muizelaar 2010-01-21 11:30:07 -05:00
Родитель 1d8e820964
Коммит 2a8a87ae64
1 изменённых файлов: 270 добавлений и 59 удалений

Просмотреть файл

@ -28,8 +28,9 @@
#endif #endif
#include "pixman-private.h" #include "pixman-private.h"
#include "pixman-arm-simd-asm.h"
static void void
arm_composite_add_8000_8000 (pixman_implementation_t * impl, arm_composite_add_8000_8000 (pixman_implementation_t * impl,
pixman_op_t op, pixman_op_t op,
pixman_image_t * src_image, pixman_image_t * src_image,
@ -101,7 +102,7 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
} }
static void void
arm_composite_over_8888_8888 (pixman_implementation_t * impl, arm_composite_over_8888_8888 (pixman_implementation_t * impl,
pixman_op_t op, pixman_op_t op,
pixman_image_t * src_image, pixman_image_t * src_image,
@ -194,7 +195,7 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
} }
} }
static void void
arm_composite_over_8888_n_8888 (pixman_implementation_t * impl, arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
pixman_op_t op, pixman_op_t op,
pixman_image_t * src_image, pixman_image_t * src_image,
@ -303,7 +304,7 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
} }
} }
static void void
arm_composite_over_n_8_8888 (pixman_implementation_t * impl, arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
pixman_op_t op, pixman_op_t op,
pixman_image_t * src_image, pixman_image_t * src_image,
@ -419,67 +420,277 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
} }
} }
static const pixman_fast_path_t arm_simd_fast_path_array[] = /**
* Conversion x8r8g8b8 -> r5g6b5
*
* TODO: optimize more, eliminate stalls, try to use burst writes (4 words aligned
* at 16 byte boundary)
*/
static inline void fbComposite_x8r8g8b8_src_r5g6b5_internal_mixed_armv6_c(
uint16_t *dst, uint32_t *src, int w, int dst_stride,
int src_stride, int h)
{ {
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, arm_composite_over_8888_8888 }, uint32_t a, x, y, c1F001F = 0x1F001F;
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, arm_composite_over_8888_8888 }, int backup_w = w;
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, arm_composite_over_8888_8888 }, while (h--)
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, arm_composite_over_8888_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid, PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, arm_composite_add_8000_8000 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, arm_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, arm_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, arm_composite_over_n_8_8888 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, arm_composite_over_n_8_8888 },
{ PIXMAN_OP_NONE },
};
const pixman_fast_path_t *const arm_simd_fast_paths = arm_simd_fast_path_array;
static void
arm_simd_composite (pixman_implementation_t *imp,
pixman_op_t op,
pixman_image_t * src,
pixman_image_t * mask,
pixman_image_t * dest,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
{
if (_pixman_run_fast_path (arm_simd_fast_paths, imp,
op, src, mask, dest,
src_x, src_y,
mask_x, mask_y,
dest_x, dest_y,
width, height))
{ {
return; w = backup_w;
if (w > 0 && (uintptr_t)dst & 2)
{
x = *src++;
a = (x >> 3) & c1F001F;
x &= 0xFC00;
a |= a >> 5;
a |= x >> 5;
*dst++ = a;
w--;
}
asm volatile(
"subs %[w], %[w], #2\n"
"blt 2f\n"
"1:\n"
"ldr %[x], [%[src]], #4\n"
"ldr %[y], [%[src]], #4\n"
"subs %[w], %[w], #2\n"
"and %[a], %[c1F001F], %[x], lsr #3\n"
"and %[x], %[x], #0xFC00\n\n"
"orr %[a], %[a], %[a], lsr #5\n"
"orr %[x], %[a], %[x], lsr #5\n"
"and %[a], %[c1F001F], %[y], lsr #3\n"
"and %[y], %[y], #0xFC00\n\n"
"orr %[a], %[a], %[a], lsr #5\n"
"orr %[y], %[a], %[y], lsr #5\n"
"pkhbt %[x], %[x], %[y], lsl #16\n"
"str %[x], [%[dst]], #4\n"
"bge 1b\n"
"2:\n"
: [c1F001F] "+&r" (c1F001F), [src] "+&r" (src), [dst] "+&r" (dst), [a] "=&r" (a),
[x] "=&r" (x), [y] "=&r" (y), [w] "+&r" (w)
);
if (w & 1)
{
x = *src++;
a = (x >> 3) & c1F001F;
x = x & 0xFC00;
a |= a >> 5;
a |= x >> 5;
*dst++ = a;
}
src += src_stride - backup_w;
dst += dst_stride - backup_w;
} }
_pixman_implementation_composite (imp->delegate, op,
src, mask, dest,
src_x, src_y,
mask_x, mask_y,
dest_x, dest_y,
width, height);
} }
pixman_implementation_t * /**
_pixman_implementation_create_arm_simd (void) * Conversion x8r8g8b8 -> r5g6b5
*
* Note: 'w' must be >= 7
*/
static void __attribute__((naked)) fbComposite_x8r8g8b8_src_r5g6b5_internal_armv6(
uint16_t *dst, uint32_t *src, int w, int dst_stride,
int src_stride, int h)
{ {
pixman_implementation_t *general = _pixman_implementation_create_fast_path (); asm volatile(
pixman_implementation_t *imp = _pixman_implementation_create (general); /* define supplementary macros */
".macro cvt8888to565 PIX\n"
"and A, C1F001F, \\PIX, lsr #3\n"
"and \\PIX, \\PIX, #0xFC00\n\n"
"orr A, A, A, lsr #5\n"
"orr \\PIX, A, \\PIX, lsr #5\n"
".endm\n"
imp->composite = arm_simd_composite; ".macro combine_pixels_pair PIX1, PIX2\n"
"pkhbt \\PIX1, \\PIX1, \\PIX2, lsl #16\n" /* Note: assume little endian byte order */
".endm\n"
/* function entry, save all registers (10 words) to stack */
"stmdb sp!, {r4-r11, ip, lr}\n"
/* define some aliases */
"DST .req r0\n"
"SRC .req r1\n"
"W .req r2\n"
"H .req r3\n"
"TMP1 .req r4\n"
"TMP2 .req r5\n"
"TMP3 .req r6\n"
"TMP4 .req r7\n"
"TMP5 .req r8\n"
"TMP6 .req r9\n"
"TMP7 .req r10\n"
"TMP8 .req r11\n"
"C1F001F .req ip\n"
"A .req lr\n"
"ldr TMP1, [sp, #(10*4+0)]\n" /* load src_stride */
"ldr C1F001F, =0x1F001F\n"
"sub r3, r3, W\n"
"str r3, [sp, #(10*4+0)]\n" /* store (dst_stride-w) */
"ldr r3, [sp, #(10*4+4)]\n" /* load h */
"sub TMP1, TMP1, W\n"
"str TMP1, [sp, #(10*4+4)]\n" /* store (src_stride-w) */
"str W, [sp, #(8*4)]\n" /* saved ip = W */
"0:\n"
"subs H, H, #1\n"
"blt 6f\n"
"1:\n"
/* align DST at 4 byte boundary */
"tst DST, #2\n"
"beq 2f\n"
"ldr TMP1, [SRC], #4\n"
"sub W, W, #1\n"
"cvt8888to565 TMP1\n"
"strh TMP1, [DST], #2\n"
"2:"
/* align DST at 8 byte boundary */
"tst DST, #4\n"
"beq 2f\n"
"ldmia SRC!, {TMP1, TMP2}\n"
"sub W, W, #2\n"
"cvt8888to565 TMP1\n"
"cvt8888to565 TMP2\n"
"combine_pixels_pair TMP1, TMP2\n"
"str TMP1, [DST], #4\n"
"2:"
/* align DST at 16 byte boundary */
"tst DST, #8\n"
"beq 2f\n"
"ldmia SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
"sub W, W, #4\n"
"cvt8888to565 TMP1\n"
"cvt8888to565 TMP2\n"
"cvt8888to565 TMP3\n"
"cvt8888to565 TMP4\n"
"combine_pixels_pair TMP1, TMP2\n"
"combine_pixels_pair TMP3, TMP4\n"
"stmia DST!, {TMP1, TMP3}\n"
"2:"
/* inner loop, process 8 pixels per iteration */
"subs W, W, #8\n"
"blt 4f\n"
"3:\n"
"ldmia SRC!, {TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8}\n"
"subs W, W, #8\n"
"cvt8888to565 TMP1\n"
"cvt8888to565 TMP2\n"
"cvt8888to565 TMP3\n"
"cvt8888to565 TMP4\n"
"cvt8888to565 TMP5\n"
"cvt8888to565 TMP6\n"
"cvt8888to565 TMP7\n"
"cvt8888to565 TMP8\n"
"combine_pixels_pair TMP1, TMP2\n"
"combine_pixels_pair TMP3, TMP4\n"
"combine_pixels_pair TMP5, TMP6\n"
"combine_pixels_pair TMP7, TMP8\n"
"stmia DST!, {TMP1, TMP3, TMP5, TMP7}\n"
"bge 3b\n"
"4:\n"
/* process the remaining pixels */
"tst W, #4\n"
"beq 4f\n"
"ldmia SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
"cvt8888to565 TMP1\n"
"cvt8888to565 TMP2\n"
"cvt8888to565 TMP3\n"
"cvt8888to565 TMP4\n"
"combine_pixels_pair TMP1, TMP2\n"
"combine_pixels_pair TMP3, TMP4\n"
"stmia DST!, {TMP1, TMP3}\n"
"4:\n"
"tst W, #2\n"
"beq 4f\n"
"ldmia SRC!, {TMP1, TMP2}\n"
"cvt8888to565 TMP1\n"
"cvt8888to565 TMP2\n"
"combine_pixels_pair TMP1, TMP2\n"
"str TMP1, [DST], #4\n"
"4:\n"
"tst W, #1\n"
"beq 4f\n"
"ldr TMP1, [SRC], #4\n"
"cvt8888to565 TMP1\n"
"strh TMP1, [DST], #2\n"
"4:\n"
"ldr TMP1, [sp, #(10*4+0)]\n" /* (dst_stride-w) */
"ldr TMP2, [sp, #(10*4+4)]\n" /* (src_stride-w) */
"ldr W, [sp, #(8*4)]\n"
"subs H, H, #1\n"
"add DST, DST, TMP1, lsl #1\n"
"add SRC, SRC, TMP2, lsl #2\n"
"bge 1b\n"
"6:\n"
"ldmia sp!, {r4-r11, ip, pc}\n" /* restore all registers and return */
".ltorg\n"
".unreq DST\n"
".unreq SRC\n"
".unreq W\n"
".unreq H\n"
".unreq TMP1\n"
".unreq TMP2\n"
".unreq TMP3\n"
".unreq TMP4\n"
".unreq TMP5\n"
".unreq TMP6\n"
".unreq TMP7\n"
".unreq TMP8\n"
".unreq C1F001F\n"
".unreq A\n"
".purgem cvt8888to565\n"
".purgem combine_pixels_pair\n"
);
}
void
arm_composite_src_8888_0565 (pixman_implementation_t * impl,
pixman_op_t op,
pixman_image_t * src_image,
pixman_image_t * mask_image,
pixman_image_t * dst_image,
int32_t src_x,
int32_t src_y,
int32_t mask_x,
int32_t mask_y,
int32_t dest_x,
int32_t dest_y,
int32_t width,
int32_t height)
{
uint16_t *dst_line, *dst;
uint32_t *src_line, *src;
int dst_stride, src_stride;
uint16_t w, h;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
dst = dst_line;
src = src_line;
h = height;
w = width;
if (w < 7)
fbComposite_x8r8g8b8_src_r5g6b5_internal_mixed_armv6_c(dst, src, w, dst_stride, src_stride, h);
else
fbComposite_x8r8g8b8_src_r5g6b5_internal_armv6(dst, src, w, dst_stride, src_stride, h);
return imp;
} }