Bug 530550. pixman: Fix arm gcc build.

The pixman-arm-simd-asm.c was just a copy of pixman-arm-simd.c instead of the file it was supposed to be.
2010-01-21 11:30:07 -05:00 · 2010-01-21 11:30:07 -05:00 · 2a8a87ae64
--- a/gfx/cairo/libpixman/src/pixman-arm-simd-asm.c
+++ b/gfx/cairo/libpixman/src/pixman-arm-simd-asm.c
@ -28,8 +28,9 @@
 #endif
 #include "pixman-private.h"
 #include "pixman-arm-simd-asm.h"
-static void
+void
 arm_composite_add_8000_8000 (pixman_implementation_t * impl,
 			     pixman_op_t               op,
 			     pixman_image_t *          src_image,
@ -101,7 +102,7 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
 }
-static void
+void
 arm_composite_over_8888_8888 (pixman_implementation_t * impl,
 			      pixman_op_t               op,
 			      pixman_image_t *          src_image,
@ -194,7 +195,7 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
    }
 }
-static void
+void
 arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
 				pixman_op_t               op,
 				pixman_image_t *          src_image,
@ -303,7 +304,7 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
    }
 }
-static void
+void
 arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
 			     pixman_op_t               op,
 			     pixman_image_t *          src_image,
@ -419,67 +420,277 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
    }
 }
-static const pixman_fast_path_t arm_simd_fast_path_array[] =
+/**
 * Conversion x8r8g8b8 -> r5g6b5
 *
 * TODO: optimize more, eliminate stalls, try to use burst writes (4 words aligned 
 * at 16 byte boundary)
 */
 static inline void fbComposite_x8r8g8b8_src_r5g6b5_internal_mixed_armv6_c(
    uint16_t *dst, uint32_t *src, int w, int dst_stride,
    int src_stride, int h)
 {
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, arm_composite_over_8888_8888    },
+    uint32_t a, x, y, c1F001F = 0x1F001F;
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, arm_composite_over_8888_8888    },
+    int backup_w = w;
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, arm_composite_over_8888_8888    },
+    while (h--)
    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, arm_composite_over_8888_8888    },
    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888  },
    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_solid,    PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888  },
    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       arm_composite_add_8000_8000     },
    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, arm_composite_over_n_8_8888     },
    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, arm_composite_over_n_8_8888     },
    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, arm_composite_over_n_8_8888     },
    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, arm_composite_over_n_8_8888     },
    { PIXMAN_OP_NONE },
 };
 const pixman_fast_path_t *const arm_simd_fast_paths = arm_simd_fast_path_array;
 static void
 arm_simd_composite (pixman_implementation_t *imp,
                    pixman_op_t              op,
                    pixman_image_t *         src,
                    pixman_image_t *         mask,
                    pixman_image_t *         dest,
                    int32_t                  src_x,
                    int32_t                  src_y,
                    int32_t                  mask_x,
                    int32_t                  mask_y,
                    int32_t                  dest_x,
                    int32_t                  dest_y,
                    int32_t                  width,
                    int32_t                  height)
 {
    if (_pixman_run_fast_path (arm_simd_fast_paths, imp,
                               op, src, mask, dest,
                               src_x, src_y,
                               mask_x, mask_y,
                               dest_x, dest_y,
                               width, height))
    {
-	return;
+        w = backup_w;
        if (w > 0 && (uintptr_t)dst & 2)
        {
            x = *src++;
            a = (x >> 3) & c1F001F;
            x &= 0xFC00;
            a |= a >> 5;
            a |= x >> 5;
            *dst++ = a;
            w--;
        }
        asm volatile(
            "subs  %[w], %[w], #2\n"
            "blt   2f\n"
        "1:\n"
            "ldr   %[x], [%[src]], #4\n"
            "ldr   %[y], [%[src]], #4\n"
            "subs  %[w], %[w], #2\n"
            "and   %[a], %[c1F001F], %[x], lsr #3\n"
            "and   %[x], %[x], #0xFC00\n\n"
            "orr   %[a], %[a], %[a], lsr #5\n"
            "orr   %[x], %[a], %[x], lsr #5\n"
            "and   %[a], %[c1F001F], %[y], lsr #3\n"
            "and   %[y], %[y], #0xFC00\n\n"
            "orr   %[a], %[a], %[a], lsr #5\n"
            "orr   %[y], %[a], %[y], lsr #5\n"
            "pkhbt %[x], %[x], %[y], lsl #16\n"
            "str   %[x], [%[dst]], #4\n"
            "bge   1b\n"
        "2:\n"
        : [c1F001F] "+&r" (c1F001F), [src] "+&r" (src), [dst] "+&r" (dst), [a] "=&r" (a), 
          [x] "=&r" (x), [y] "=&r" (y), [w] "+&r" (w)
        );
        if (w & 1)
        {
            x = *src++;
            a = (x >> 3) & c1F001F;
            x = x & 0xFC00;
            a |= a >> 5;
            a |= x >> 5;
            *dst++ = a;
        }
        src += src_stride - backup_w;
        dst += dst_stride - backup_w;
    }
    _pixman_implementation_composite (imp->delegate, op,
                                      src, mask, dest,
                                      src_x, src_y,
                                      mask_x, mask_y,
                                      dest_x, dest_y,
                                      width, height);
 }
-pixman_implementation_t *
+/**
-_pixman_implementation_create_arm_simd (void)
+ * Conversion x8r8g8b8 -> r5g6b5
 *
 * Note: 'w' must be >= 7
 */
 static void __attribute__((naked)) fbComposite_x8r8g8b8_src_r5g6b5_internal_armv6(
    uint16_t *dst, uint32_t *src, int w, int dst_stride,
    int src_stride, int h)
 {
-    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
+    asm volatile(
-    pixman_implementation_t *imp = _pixman_implementation_create (general);
+        /* define supplementary macros */
        ".macro cvt8888to565 PIX\n"
            "and   A, C1F001F, \\PIX, lsr #3\n"
            "and   \\PIX, \\PIX, #0xFC00\n\n"
            "orr   A, A, A, lsr #5\n"
            "orr   \\PIX, A, \\PIX, lsr #5\n"
        ".endm\n"
-    imp->composite = arm_simd_composite;
+        ".macro combine_pixels_pair PIX1, PIX2\n"
            "pkhbt \\PIX1, \\PIX1, \\PIX2, lsl #16\n" /* Note: assume little endian byte order */
        ".endm\n"
        /* function entry, save all registers (10 words) to stack */
        "stmdb   sp!, {r4-r11, ip, lr}\n"
        /* define some aliases */
        "DST     .req  r0\n"
        "SRC     .req  r1\n"
        "W       .req  r2\n"
        "H       .req  r3\n"
        "TMP1    .req  r4\n"
        "TMP2    .req  r5\n"
        "TMP3    .req  r6\n"
        "TMP4    .req  r7\n"
        "TMP5    .req  r8\n"
        "TMP6    .req  r9\n"
        "TMP7    .req  r10\n"
        "TMP8    .req  r11\n"
        "C1F001F .req  ip\n"
        "A       .req  lr\n"
        "ldr     TMP1, [sp, #(10*4+0)]\n" /* load src_stride */
        "ldr     C1F001F, =0x1F001F\n"
        "sub     r3, r3, W\n"
        "str     r3, [sp, #(10*4+0)]\n" /* store (dst_stride-w) */
        "ldr     r3, [sp, #(10*4+4)]\n" /* load h */
        "sub     TMP1, TMP1, W\n"
        "str     TMP1, [sp, #(10*4+4)]\n" /* store (src_stride-w) */
        "str     W, [sp, #(8*4)]\n" /* saved ip = W */
    "0:\n"
        "subs    H, H, #1\n"
        "blt     6f\n"
    "1:\n"
        /* align DST at 4 byte boundary */
        "tst     DST, #2\n"
        "beq     2f\n"
        "ldr     TMP1, [SRC], #4\n"
        "sub     W, W, #1\n"
        "cvt8888to565 TMP1\n"
        "strh    TMP1, [DST], #2\n"
    "2:"
        /* align DST at 8 byte boundary */
        "tst     DST, #4\n"
        "beq     2f\n"
        "ldmia   SRC!, {TMP1, TMP2}\n"
        "sub     W, W, #2\n"
        "cvt8888to565 TMP1\n"
        "cvt8888to565 TMP2\n"
        "combine_pixels_pair TMP1, TMP2\n"
        "str     TMP1, [DST], #4\n"
    "2:"
        /* align DST at 16 byte boundary */
        "tst     DST, #8\n"
        "beq     2f\n"
        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
        "sub     W, W, #4\n"
        "cvt8888to565 TMP1\n"
        "cvt8888to565 TMP2\n"
        "cvt8888to565 TMP3\n"
        "cvt8888to565 TMP4\n"
        "combine_pixels_pair TMP1, TMP2\n"
        "combine_pixels_pair TMP3, TMP4\n"
        "stmia DST!, {TMP1, TMP3}\n"
    "2:"
        /* inner loop, process 8 pixels per iteration */
        "subs    W, W, #8\n"
        "blt     4f\n"
    "3:\n"
        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4, TMP5, TMP6, TMP7, TMP8}\n"
        "subs    W, W, #8\n"
        "cvt8888to565 TMP1\n"
        "cvt8888to565 TMP2\n"
        "cvt8888to565 TMP3\n"
        "cvt8888to565 TMP4\n"
        "cvt8888to565 TMP5\n"
        "cvt8888to565 TMP6\n"
        "cvt8888to565 TMP7\n"
        "cvt8888to565 TMP8\n"
        "combine_pixels_pair TMP1, TMP2\n"
        "combine_pixels_pair TMP3, TMP4\n"
        "combine_pixels_pair TMP5, TMP6\n"
        "combine_pixels_pair TMP7, TMP8\n"
        "stmia   DST!, {TMP1, TMP3, TMP5, TMP7}\n"
        "bge     3b\n"
    "4:\n"
        /* process the remaining pixels */
        "tst     W, #4\n"
        "beq     4f\n"
        "ldmia   SRC!, {TMP1, TMP2, TMP3, TMP4}\n"
        "cvt8888to565 TMP1\n"
        "cvt8888to565 TMP2\n"
        "cvt8888to565 TMP3\n"
        "cvt8888to565 TMP4\n"
        "combine_pixels_pair TMP1, TMP2\n"
        "combine_pixels_pair TMP3, TMP4\n"
        "stmia   DST!, {TMP1, TMP3}\n"
    "4:\n"
        "tst     W, #2\n"
        "beq     4f\n"
        "ldmia   SRC!, {TMP1, TMP2}\n"
        "cvt8888to565 TMP1\n"
        "cvt8888to565 TMP2\n"
        "combine_pixels_pair TMP1, TMP2\n"
        "str     TMP1, [DST], #4\n"
    "4:\n"
        "tst     W, #1\n"
        "beq     4f\n"
        "ldr     TMP1, [SRC], #4\n"
        "cvt8888to565 TMP1\n"
        "strh    TMP1, [DST], #2\n"
    "4:\n"
        "ldr     TMP1, [sp, #(10*4+0)]\n" /* (dst_stride-w) */
        "ldr     TMP2, [sp, #(10*4+4)]\n" /* (src_stride-w) */
        "ldr     W, [sp, #(8*4)]\n"
        "subs    H, H, #1\n"
        "add     DST, DST, TMP1, lsl #1\n"
        "add     SRC, SRC, TMP2, lsl #2\n"
        "bge     1b\n"
    "6:\n"
        "ldmia   sp!, {r4-r11, ip, pc}\n" /* restore all registers and return */
        ".ltorg\n"
        ".unreq   DST\n"
        ".unreq   SRC\n"
        ".unreq   W\n"
        ".unreq   H\n"
        ".unreq   TMP1\n"
        ".unreq   TMP2\n"
        ".unreq   TMP3\n"
        ".unreq   TMP4\n"
        ".unreq   TMP5\n"
        ".unreq   TMP6\n"
        ".unreq   TMP7\n"
        ".unreq   TMP8\n"
        ".unreq   C1F001F\n"
        ".unreq   A\n"
        ".purgem  cvt8888to565\n"
        ".purgem  combine_pixels_pair\n"
    );
 }
 void
 arm_composite_src_8888_0565 (pixman_implementation_t * impl,
 			     pixman_op_t               op,
 			     pixman_image_t *          src_image,
 			     pixman_image_t *          mask_image,
 			     pixman_image_t *          dst_image,
 			     int32_t                   src_x,
 			     int32_t                   src_y,
 			     int32_t                   mask_x,
 			     int32_t                   mask_y,
 			     int32_t                   dest_x,
 			     int32_t                   dest_y,
 			     int32_t                   width,
 			     int32_t                   height)
 {
    uint16_t    *dst_line, *dst;
    uint32_t    *src_line, *src;
    int dst_stride, src_stride;
    uint16_t w, h;
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    dst = dst_line;
    src = src_line;
    h = height;
    w = width;
    if (w < 7)
        fbComposite_x8r8g8b8_src_r5g6b5_internal_mixed_armv6_c(dst, src, w, dst_stride, src_stride, h);
    else
        fbComposite_x8r8g8b8_src_r5g6b5_internal_armv6(dst, src, w, dst_stride, src_stride, h);
    return imp;
 }