b=482676; add pixman NEON optimizations; r=jeff

This commit is contained in:
Vladimir Vukicevic 2009-04-07 12:12:31 -07:00
Родитель 608319aa16
Коммит 842b17b4d0
9 изменённых файлов: 3570 добавлений и 14 удалений

Просмотреть файл

@ -654,4 +654,5 @@ MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@
MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@
MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@
HAVE_ARM_SIMD= @HAVE_ARM_SIMD@
HAVE_ARM_SIMD = @HAVE_ARM_SIMD@
HAVE_ARM_NEON = @HAVE_ARM_NEON@

Просмотреть файл

@ -3280,7 +3280,7 @@ AC_SUBST(XLIBS)
AC_SUBST(XEXT_LIBS)
AC_SUBST(XT_LIBS)
AC_MSG_CHECKING(for ARM SIMD support)
AC_MSG_CHECKING(for ARM SIMD support in compiler)
AC_TRY_COMPILE([],
[asm("uqadd8 r1, r1, r2");],
result="yes", result="no")
@ -3291,6 +3291,23 @@ if test "$result" = "yes"; then
fi
AC_SUBST(HAVE_ARM_SIMD)
AC_MSG_CHECKING(for ARM NEON support in compiler)
_SAVE_CFLAGS="$CFLAGS"
if test "$GNU_CC"; then
# gcc needs -mfpu=neon to recognize NEON instructions
CFLAGS="$CFLAGS -mfpu=neon"
fi
AC_TRY_COMPILE([],
[asm("vadd.i8 d0, d0, d0");],
result="yes", result="no")
AC_MSG_RESULT("$result")
if test "$result" = "yes"; then
AC_DEFINE(HAVE_ARM_NEON)
HAVE_ARM_NEON=1
fi
CFLAGS="$_SAVE_CFLAGS"
AC_SUBST(HAVE_ARM_NEON)
dnl ========================================================
dnl = pthread support
dnl = Start by checking whether the system support pthreads

Просмотреть файл

@ -50,6 +50,8 @@ zero-sized.patch: deal with zero sized surface in ways less likely to crash.
==== pixman patches ====
pixman-neon.patch: add ARM NEON optimized compositing functions
endian.patch: include cairo-platform.h for endian macros
==== disable printing patch ====

Просмотреть файл

@ -90,7 +90,10 @@ endif
endif
ifeq (arm,$(findstring arm,$(OS_TEST)))
ifdef HAVE_ARM_SIMD
USE_ARM_SIMD=1
USE_ARM_SIMD_GCC=1
endif
ifdef HAVE_ARM_NEON
USE_ARM_NEON_GCC=1
endif
endif
@ -134,11 +137,17 @@ CSRCS += pixman-vmx.c
DEFINES += -DUSE_VMX
endif
ifdef USE_ARM_SIMD
ifdef USE_ARM_SIMD_GCC
CSRCS += pixman-arm-simd.c
DEFINES += -DUSE_ARM_SIMD
endif
ifdef USE_ARM_NEON_GCC
CSRCS += pixman-arm-neon.c
DEFINES += -DUSE_ARM_NEON
ARM_NEON_CFLAGS = -mfloat-abi=softfp -mfpu=neon
endif
ifdef USE_ARM_SIMD_MSVC
ASFILES += pixman-arm-detect-win32.asm pixman-wce-arm-simd.asm
DEFINES += -DUSE_ARM_SIMD
@ -157,6 +166,7 @@ include $(topsrcdir)/config/rules.mk
CFLAGS += -DPACKAGE="mozpixman" -D_USE_MATH_DEFINES
# special rule for pixman-mmx to get the right cflags
pixman-mmx.$(OBJ_SUFFIX): pixman-mmx.c Makefile Makefile.in
$(REPORT_BUILD)
@ -167,3 +177,8 @@ pixman-sse2.$(OBJ_SUFFIX): pixman-sse2.c Makefile Makefile.in
$(REPORT_BUILD)
@$(MAKE_DEPS_AUTO_CC)
$(ELOG) $(CC) $(OUTOPTION)$@ -c $(COMPILE_CFLAGS) $(MMX_CFLAGS) $(_VPATH_SRCS)
pixman-arm-neon.$(OBJ_SUFFIX): pixman-arm-neon.c Makefile Makefile.in
$(REPORT_BUILD)
@$(MAKE_DEPS_AUTO_CC)
$(ELOG) $(CC) $(OUTOPTION)$@ -c $(COMPILE_CFLAGS) $(ARM_NEON_CFLAGS) $(_VPATH_SRCS)

Просмотреть файл

@ -20,10 +20,22 @@ $FuncName
$PrologName
MEND
export pixman_msvc_try_armv6_op
export pixman_msvc_try_arm_simd_op
FUNC_HEADER pixman_msvc_try_armv6_op
uqadd8 r0,r0,r1
FUNC_HEADER pixman_msvc_try_arm_simd_op
;; I don't think the msvc arm asm knows how to do SIMD insns
;; uqadd8 r3,r3,r3
DCD 0xe6633f93
mov pc,lr
ENTRY_END
endp
export pixman_msvc_try_arm_neon_op
FUNC_HEADER pixman_msvc_try_arm_neon_op
;; I don't think the msvc arm asm knows how to do NEON insns
;; veor d0,d0,d0
DCD 0xf3000110
mov pc,lr
ENTRY_END
endp

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,137 @@
/*
* Copyright © 2009 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Ian Rickards (ian.rickards@arm.com)
*
*/
#include "pixman-private.h"
#ifdef USE_ARM_NEON
pixman_bool_t pixman_have_arm_neon(void);
#else
#define pixman_have_arm_neon() FALSE
#endif
#ifdef USE_ARM_NEON
void
fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrc_8888x8888neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSolidMask_nx8x8888neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrc_x888x0565neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
void
fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
pixman_image_t * pSrc,
pixman_image_t * pMask,
pixman_image_t * pDst,
int16_t xSrc,
int16_t ySrc,
int16_t xMask,
int16_t yMask,
int16_t xDst,
int16_t yDst,
uint16_t width,
uint16_t height);
#endif /* USE_ARM_NEON */

Просмотреть файл

@ -34,6 +34,7 @@
#include "pixman-mmx.h"
#include "pixman-vmx.h"
#include "pixman-sse2.h"
#include "pixman-arm-neon.h"
#include "pixman-arm-simd.h"
#include "pixman-combine32.h"
@ -1610,6 +1611,31 @@ static const FastPathInfo vmx_fast_paths[] =
};
#endif
#ifdef USE_ARM_NEON
static const FastPathInfo arm_neon_fast_paths[] =
{
{ PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8neon, 0 },
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000neon, 0 },
{ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
{ PIXMAN_OP_NONE },
};
#endif
#ifdef USE_ARM_SIMD
static const FastPathInfo arm_simd_fast_paths[] =
{
@ -2005,6 +2031,11 @@ pixman_image_composite (pixman_op_t op,
info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
#endif
#ifdef USE_ARM_NEON
if (!info && pixman_have_arm_neon())
info = get_fast_path (arm_neon_fast_paths, op, pSrc, pMask, pDst, pixbuf);
#endif
#ifdef USE_ARM_SIMD
if (!info && pixman_have_arm_simd())
info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf);
@ -2182,17 +2213,22 @@ pixman_bool_t pixman_have_vmx (void) {
#endif /* __APPLE__ */
#endif /* USE_VMX */
#ifdef USE_ARM_SIMD
#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON)
#if defined(_MSC_VER)
extern int pixman_msvc_try_arm_simd_op();
extern int pixman_msvc_try_arm_neon_op();
pixman_bool_t
pixman_have_arm_simd (void)
{
#ifdef _MSC_VER
static pixman_bool_t initialized = FALSE;
static pixman_bool_t have_arm_simd = FALSE;
if (!initialized) {
__try {
pixman_msvc_try_armv6_op();
pixman_msvc_try_arm_simd_op();
have_arm_simd = TRUE;
} __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
have_arm_simd = FALSE;
@ -2201,11 +2237,107 @@ pixman_have_arm_simd (void)
}
return have_arm_simd;
#else
return TRUE;
#endif
}
#endif
pixman_bool_t
pixman_have_arm_neon (void)
{
static pixman_bool_t initialized = FALSE;
static pixman_bool_t have_arm_neon = FALSE;
if (!initialized) {
__try {
pixman_msvc_try_arm_neon_op();
have_arm_neon = TRUE;
} __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
have_arm_neon = FALSE;
}
initialized = TRUE;
}
return have_arm_neon;
}
#else /* linux ELF */
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <string.h>
#include <elf.h>
static pixman_bool_t arm_has_v7 = FALSE;
static pixman_bool_t arm_has_v6 = FALSE;
static pixman_bool_t arm_has_vfp = FALSE;
static pixman_bool_t arm_has_neon = FALSE;
static pixman_bool_t arm_has_iwmmxt = FALSE;
static pixman_bool_t arm_tests_initialized = FALSE;
static void
pixman_arm_read_auxv() {
int fd;
Elf32_auxv_t aux;
fd = open("/proc/self/auxv", O_RDONLY);
if (fd > 0) {
while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
if (aux.a_type == AT_HWCAP) {
uint32_t hwcap = aux.a_un.a_val;
if (getenv("ARM_FORCE_HWCAP"))
hwcap = strtoul(getenv("ARM_FORCE_HWCAP"), NULL, 0);
// hardcode these values to avoid depending on specific versions
// of the hwcap header, e.g. HWCAP_NEON
arm_has_vfp = (hwcap & 64) != 0;
arm_has_iwmmxt = (hwcap & 512) != 0;
// this flag is only present on kernel 2.6.29
arm_has_neon = (hwcap & 4096) != 0;
} else if (aux.a_type == AT_PLATFORM) {
const char *plat = (const char*) aux.a_un.a_val;
if (getenv("ARM_FORCE_PLATFORM"))
plat = getenv("ARM_FORCE_PLATFORM");
if (strncmp(plat, "v7l", 3) == 0) {
arm_has_v7 = TRUE;
arm_has_v6 = TRUE;
} else if (strncmp(plat, "v6l", 3) == 0) {
arm_has_v6 = TRUE;
}
}
}
close (fd);
// if we don't have 2.6.29, we have to do this hack; set
// the env var to trust HWCAP.
if (!getenv("ARM_TRUST_HWCAP") && arm_has_v7)
arm_has_neon = TRUE;
}
arm_tests_initialized = TRUE;
}
pixman_bool_t
pixman_have_arm_simd (void)
{
if (!arm_tests_initialized)
pixman_arm_read_auxv();
return arm_has_v6;
}
pixman_bool_t
pixman_have_arm_neon (void)
{
if (!arm_tests_initialized)
pixman_arm_read_auxv();
return arm_has_neon;
}
#endif /* linux */
#endif /* USE_ARM_SIMD || USE_ARM_NEON */
#ifdef USE_MMX
/* The CPU detection code needs to be in a file not compiled with

1853
gfx/cairo/pixman-neon.patch Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу