зеркало из https://github.com/mozilla/pjs.git
b=482676; add pixman NEON optimizations; r=jeff
This commit is contained in:
Родитель
608319aa16
Коммит
842b17b4d0
|
@ -654,4 +654,5 @@ MOZ_EMBEDDING_LEVEL_DEFAULT = @MOZ_EMBEDDING_LEVEL_DEFAULT@
|
|||
MOZ_EMBEDDING_LEVEL_BASIC = @MOZ_EMBEDDING_LEVEL_BASIC@
|
||||
MOZ_EMBEDDING_LEVEL_MINIMAL = @MOZ_EMBEDDING_LEVEL_MINIMAL@
|
||||
|
||||
HAVE_ARM_SIMD= @HAVE_ARM_SIMD@
|
||||
HAVE_ARM_SIMD = @HAVE_ARM_SIMD@
|
||||
HAVE_ARM_NEON = @HAVE_ARM_NEON@
|
||||
|
|
19
configure.in
19
configure.in
|
@ -3280,7 +3280,7 @@ AC_SUBST(XLIBS)
|
|||
AC_SUBST(XEXT_LIBS)
|
||||
AC_SUBST(XT_LIBS)
|
||||
|
||||
AC_MSG_CHECKING(for ARM SIMD support)
|
||||
AC_MSG_CHECKING(for ARM SIMD support in compiler)
|
||||
AC_TRY_COMPILE([],
|
||||
[asm("uqadd8 r1, r1, r2");],
|
||||
result="yes", result="no")
|
||||
|
@ -3291,6 +3291,23 @@ if test "$result" = "yes"; then
|
|||
fi
|
||||
AC_SUBST(HAVE_ARM_SIMD)
|
||||
|
||||
AC_MSG_CHECKING(for ARM NEON support in compiler)
|
||||
_SAVE_CFLAGS="$CFLAGS"
|
||||
if test "$GNU_CC"; then
|
||||
# gcc needs -mfpu=neon to recognize NEON instructions
|
||||
CFLAGS="$CFLAGS -mfpu=neon"
|
||||
fi
|
||||
AC_TRY_COMPILE([],
|
||||
[asm("vadd.i8 d0, d0, d0");],
|
||||
result="yes", result="no")
|
||||
AC_MSG_RESULT("$result")
|
||||
if test "$result" = "yes"; then
|
||||
AC_DEFINE(HAVE_ARM_NEON)
|
||||
HAVE_ARM_NEON=1
|
||||
fi
|
||||
CFLAGS="$_SAVE_CFLAGS"
|
||||
AC_SUBST(HAVE_ARM_NEON)
|
||||
|
||||
dnl ========================================================
|
||||
dnl = pthread support
|
||||
dnl = Start by checking whether the system support pthreads
|
||||
|
|
|
@ -50,6 +50,8 @@ zero-sized.patch: deal with zero sized surface in ways less likely to crash.
|
|||
|
||||
==== pixman patches ====
|
||||
|
||||
pixman-neon.patch: add ARM NEON optimized compositing functions
|
||||
|
||||
endian.patch: include cairo-platform.h for endian macros
|
||||
|
||||
==== disable printing patch ====
|
||||
|
|
|
@ -90,7 +90,10 @@ endif
|
|||
endif
|
||||
ifeq (arm,$(findstring arm,$(OS_TEST)))
|
||||
ifdef HAVE_ARM_SIMD
|
||||
USE_ARM_SIMD=1
|
||||
USE_ARM_SIMD_GCC=1
|
||||
endif
|
||||
ifdef HAVE_ARM_NEON
|
||||
USE_ARM_NEON_GCC=1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -134,11 +137,17 @@ CSRCS += pixman-vmx.c
|
|||
DEFINES += -DUSE_VMX
|
||||
endif
|
||||
|
||||
ifdef USE_ARM_SIMD
|
||||
ifdef USE_ARM_SIMD_GCC
|
||||
CSRCS += pixman-arm-simd.c
|
||||
DEFINES += -DUSE_ARM_SIMD
|
||||
endif
|
||||
|
||||
ifdef USE_ARM_NEON_GCC
|
||||
CSRCS += pixman-arm-neon.c
|
||||
DEFINES += -DUSE_ARM_NEON
|
||||
ARM_NEON_CFLAGS = -mfloat-abi=softfp -mfpu=neon
|
||||
endif
|
||||
|
||||
ifdef USE_ARM_SIMD_MSVC
|
||||
ASFILES += pixman-arm-detect-win32.asm pixman-wce-arm-simd.asm
|
||||
DEFINES += -DUSE_ARM_SIMD
|
||||
|
@ -157,6 +166,7 @@ include $(topsrcdir)/config/rules.mk
|
|||
|
||||
CFLAGS += -DPACKAGE="mozpixman" -D_USE_MATH_DEFINES
|
||||
|
||||
|
||||
# special rule for pixman-mmx to get the right cflags
|
||||
pixman-mmx.$(OBJ_SUFFIX): pixman-mmx.c Makefile Makefile.in
|
||||
$(REPORT_BUILD)
|
||||
|
@ -167,3 +177,8 @@ pixman-sse2.$(OBJ_SUFFIX): pixman-sse2.c Makefile Makefile.in
|
|||
$(REPORT_BUILD)
|
||||
@$(MAKE_DEPS_AUTO_CC)
|
||||
$(ELOG) $(CC) $(OUTOPTION)$@ -c $(COMPILE_CFLAGS) $(MMX_CFLAGS) $(_VPATH_SRCS)
|
||||
|
||||
pixman-arm-neon.$(OBJ_SUFFIX): pixman-arm-neon.c Makefile Makefile.in
|
||||
$(REPORT_BUILD)
|
||||
@$(MAKE_DEPS_AUTO_CC)
|
||||
$(ELOG) $(CC) $(OUTOPTION)$@ -c $(COMPILE_CFLAGS) $(ARM_NEON_CFLAGS) $(_VPATH_SRCS)
|
||||
|
|
|
@ -20,10 +20,22 @@ $FuncName
|
|||
$PrologName
|
||||
MEND
|
||||
|
||||
export pixman_msvc_try_armv6_op
|
||||
export pixman_msvc_try_arm_simd_op
|
||||
|
||||
FUNC_HEADER pixman_msvc_try_armv6_op
|
||||
uqadd8 r0,r0,r1
|
||||
FUNC_HEADER pixman_msvc_try_arm_simd_op
|
||||
;; I don't think the msvc arm asm knows how to do SIMD insns
|
||||
;; uqadd8 r3,r3,r3
|
||||
DCD 0xe6633f93
|
||||
mov pc,lr
|
||||
ENTRY_END
|
||||
endp
|
||||
|
||||
export pixman_msvc_try_arm_neon_op
|
||||
|
||||
FUNC_HEADER pixman_msvc_try_arm_neon_op
|
||||
;; I don't think the msvc arm asm knows how to do NEON insns
|
||||
;; veor d0,d0,d0
|
||||
DCD 0xf3000110
|
||||
mov pc,lr
|
||||
ENTRY_END
|
||||
endp
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Copyright © 2009 Mozilla Corporation
|
||||
*
|
||||
* Permission to use, copy, modify, distribute, and sell this software and its
|
||||
* documentation for any purpose is hereby granted without fee, provided that
|
||||
* the above copyright notice appear in all copies and that both that
|
||||
* copyright notice and this permission notice appear in supporting
|
||||
* documentation, and that the name of Mozilla Corporation not be used in
|
||||
* advertising or publicity pertaining to distribution of the software without
|
||||
* specific, written prior permission. Mozilla Corporation makes no
|
||||
* representations about the suitability of this software for any purpose. It
|
||||
* is provided "as is" without express or implied warranty.
|
||||
*
|
||||
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
|
||||
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
||||
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Author: Ian Rickards (ian.rickards@arm.com)
|
||||
*
|
||||
*/
|
||||
|
||||
#include "pixman-private.h"
|
||||
|
||||
#ifdef USE_ARM_NEON
|
||||
|
||||
pixman_bool_t pixman_have_arm_neon(void);
|
||||
|
||||
#else
|
||||
#define pixman_have_arm_neon() FALSE
|
||||
#endif
|
||||
|
||||
#ifdef USE_ARM_NEON
|
||||
|
||||
void
|
||||
fbCompositeSrcAdd_8000x8000neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSrc_8888x8888neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSrc_8888x8x8888neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSolidMask_nx8x0565neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSolidMask_nx8x8888neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSrc_x888x0565neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
void
|
||||
fbCompositeSrcAdd_8888x8x8neon (pixman_op_t op,
|
||||
pixman_image_t * pSrc,
|
||||
pixman_image_t * pMask,
|
||||
pixman_image_t * pDst,
|
||||
int16_t xSrc,
|
||||
int16_t ySrc,
|
||||
int16_t xMask,
|
||||
int16_t yMask,
|
||||
int16_t xDst,
|
||||
int16_t yDst,
|
||||
uint16_t width,
|
||||
uint16_t height);
|
||||
|
||||
#endif /* USE_ARM_NEON */
|
|
@ -34,6 +34,7 @@
|
|||
#include "pixman-mmx.h"
|
||||
#include "pixman-vmx.h"
|
||||
#include "pixman-sse2.h"
|
||||
#include "pixman-arm-neon.h"
|
||||
#include "pixman-arm-simd.h"
|
||||
#include "pixman-combine32.h"
|
||||
|
||||
|
@ -1610,6 +1611,31 @@ static const FastPathInfo vmx_fast_paths[] =
|
|||
};
|
||||
#endif
|
||||
|
||||
#ifdef USE_ARM_NEON
|
||||
static const FastPathInfo arm_neon_fast_paths[] =
|
||||
{
|
||||
{ PIXMAN_OP_ADD, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeSrcAdd_8888x8x8neon, 0 },
|
||||
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000neon, 0 },
|
||||
{ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
|
||||
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeSrc_x888x0565neon, 0 },
|
||||
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
|
||||
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeSrc_x888x0565neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeSrc_8888x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8x8888neon, NEED_SOLID_MASK },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_r5g6b5, fbCompositeSolidMask_nx8x0565neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_b5g6r5, fbCompositeSolidMask_nx8x0565neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8r8g8b8, fbCompositeSolidMask_nx8x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
|
||||
{ PIXMAN_OP_OVER, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMask_nx8x8888neon, 0 },
|
||||
{ PIXMAN_OP_NONE },
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef USE_ARM_SIMD
|
||||
static const FastPathInfo arm_simd_fast_paths[] =
|
||||
{
|
||||
|
@ -2005,6 +2031,11 @@ pixman_image_composite (pixman_op_t op,
|
|||
info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
|
||||
#endif
|
||||
|
||||
#ifdef USE_ARM_NEON
|
||||
if (!info && pixman_have_arm_neon())
|
||||
info = get_fast_path (arm_neon_fast_paths, op, pSrc, pMask, pDst, pixbuf);
|
||||
#endif
|
||||
|
||||
#ifdef USE_ARM_SIMD
|
||||
if (!info && pixman_have_arm_simd())
|
||||
info = get_fast_path (arm_simd_fast_paths, op, pSrc, pMask, pDst, pixbuf);
|
||||
|
@ -2182,17 +2213,22 @@ pixman_bool_t pixman_have_vmx (void) {
|
|||
#endif /* __APPLE__ */
|
||||
#endif /* USE_VMX */
|
||||
|
||||
#ifdef USE_ARM_SIMD
|
||||
#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON)
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
extern int pixman_msvc_try_arm_simd_op();
|
||||
extern int pixman_msvc_try_arm_neon_op();
|
||||
|
||||
pixman_bool_t
|
||||
pixman_have_arm_simd (void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
static pixman_bool_t initialized = FALSE;
|
||||
static pixman_bool_t have_arm_simd = FALSE;
|
||||
|
||||
if (!initialized) {
|
||||
__try {
|
||||
pixman_msvc_try_armv6_op();
|
||||
pixman_msvc_try_arm_simd_op();
|
||||
have_arm_simd = TRUE;
|
||||
} __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
have_arm_simd = FALSE;
|
||||
|
@ -2201,11 +2237,107 @@ pixman_have_arm_simd (void)
|
|||
}
|
||||
|
||||
return have_arm_simd;
|
||||
#else
|
||||
return TRUE;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
pixman_bool_t
|
||||
pixman_have_arm_neon (void)
|
||||
{
|
||||
static pixman_bool_t initialized = FALSE;
|
||||
static pixman_bool_t have_arm_neon = FALSE;
|
||||
|
||||
if (!initialized) {
|
||||
__try {
|
||||
pixman_msvc_try_arm_neon_op();
|
||||
have_arm_neon = TRUE;
|
||||
} __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
have_arm_neon = FALSE;
|
||||
}
|
||||
initialized = TRUE;
|
||||
}
|
||||
|
||||
return have_arm_neon;
|
||||
}
|
||||
|
||||
#else /* linux ELF */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <elf.h>
|
||||
|
||||
static pixman_bool_t arm_has_v7 = FALSE;
|
||||
static pixman_bool_t arm_has_v6 = FALSE;
|
||||
static pixman_bool_t arm_has_vfp = FALSE;
|
||||
static pixman_bool_t arm_has_neon = FALSE;
|
||||
static pixman_bool_t arm_has_iwmmxt = FALSE;
|
||||
static pixman_bool_t arm_tests_initialized = FALSE;
|
||||
|
||||
static void
|
||||
pixman_arm_read_auxv() {
|
||||
int fd;
|
||||
Elf32_auxv_t aux;
|
||||
|
||||
fd = open("/proc/self/auxv", O_RDONLY);
|
||||
if (fd > 0) {
|
||||
while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
|
||||
if (aux.a_type == AT_HWCAP) {
|
||||
uint32_t hwcap = aux.a_un.a_val;
|
||||
if (getenv("ARM_FORCE_HWCAP"))
|
||||
hwcap = strtoul(getenv("ARM_FORCE_HWCAP"), NULL, 0);
|
||||
// hardcode these values to avoid depending on specific versions
|
||||
// of the hwcap header, e.g. HWCAP_NEON
|
||||
arm_has_vfp = (hwcap & 64) != 0;
|
||||
arm_has_iwmmxt = (hwcap & 512) != 0;
|
||||
// this flag is only present on kernel 2.6.29
|
||||
arm_has_neon = (hwcap & 4096) != 0;
|
||||
} else if (aux.a_type == AT_PLATFORM) {
|
||||
const char *plat = (const char*) aux.a_un.a_val;
|
||||
if (getenv("ARM_FORCE_PLATFORM"))
|
||||
plat = getenv("ARM_FORCE_PLATFORM");
|
||||
if (strncmp(plat, "v7l", 3) == 0) {
|
||||
arm_has_v7 = TRUE;
|
||||
arm_has_v6 = TRUE;
|
||||
} else if (strncmp(plat, "v6l", 3) == 0) {
|
||||
arm_has_v6 = TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
close (fd);
|
||||
|
||||
// if we don't have 2.6.29, we have to do this hack; set
|
||||
// the env var to trust HWCAP.
|
||||
if (!getenv("ARM_TRUST_HWCAP") && arm_has_v7)
|
||||
arm_has_neon = TRUE;
|
||||
}
|
||||
|
||||
arm_tests_initialized = TRUE;
|
||||
}
|
||||
|
||||
pixman_bool_t
|
||||
pixman_have_arm_simd (void)
|
||||
{
|
||||
if (!arm_tests_initialized)
|
||||
pixman_arm_read_auxv();
|
||||
|
||||
return arm_has_v6;
|
||||
}
|
||||
|
||||
pixman_bool_t
|
||||
pixman_have_arm_neon (void)
|
||||
{
|
||||
if (!arm_tests_initialized)
|
||||
pixman_arm_read_auxv();
|
||||
|
||||
return arm_has_neon;
|
||||
}
|
||||
|
||||
#endif /* linux */
|
||||
|
||||
#endif /* USE_ARM_SIMD || USE_ARM_NEON */
|
||||
|
||||
#ifdef USE_MMX
|
||||
/* The CPU detection code needs to be in a file not compiled with
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче