зеркало из https://github.com/mozilla/pjs.git
Bug 693057 - Upgrade libvpx decoder to v0.9.7-p1, r=cpearce,khuey
--HG-- rename : media/libvpx/vp8/common/filter_c.c => media/libvpx/vp8/common/filter.c rename : media/libvpx/vpx_scale/generic/scalesystemdependant.c => media/libvpx/vpx_scale/generic/scalesystemdependent.c
This commit is contained in:
Родитель
f0c60879a9
Коммит
7a9faf2d18
31
configure.in
31
configure.in
|
@ -5356,16 +5356,41 @@ if test -n "$MOZ_WEBM"; then
|
|||
fi
|
||||
CFLAGS="-I${LIBVPX_DIR}/include $CFLAGS"
|
||||
LDFLAGS="-L${LIBVPX_DIR}/lib $LDFLAGS"
|
||||
MOZ_NATIVE_LIBVPX_DEC_TEST=
|
||||
MOZ_CHECK_HEADER(vpx/vpx_decoder.h,
|
||||
[if test ! -f "${LIBVPX_DIR}/include/vpx/vpx_decoder.h"; then
|
||||
AC_MSG_ERROR([vpx/vpx_decoder.h found, but is not in ${LIBVPX_DIR}/include])
|
||||
fi],
|
||||
AC_MSG_ERROR([--with-system-libvpx requested but vpx/vpx_decoder.h not found]))
|
||||
AC_CHECK_LIB(vpx, vpx_codec_dec_init_ver,
|
||||
[MOZ_NATIVE_LIBVPX=1
|
||||
MOZ_LIBVPX_INCLUDES="-I${LIBVPX_DIR}/include"
|
||||
MOZ_LIBVPX_LIBS="-L${LIBVPX_DIR}/lib -lvpx"],
|
||||
[MOZ_NATIVE_LIBVPX_DEC_TEST=1],
|
||||
([--with-system-libvpx requested but symbol vpx_codec_dec_init_ver not found]))
|
||||
if test -n "$MOZ_NATIVE_LIBVPX_DEC_TEST" ; then
|
||||
AC_MSG_CHECKING([for libvpx version >= v0.9.7])
|
||||
dnl We need at least v0.9.7 to fix several crash bugs (for which we
|
||||
dnl had local patches prior to v0.9.7).
|
||||
dnl
|
||||
dnl This is a terrible test for the library version, but we don't
|
||||
dnl have a good one. There is no version number in a public header,
|
||||
dnl and testing the headers still doesn't guarantee we link against
|
||||
dnl the right version. While we could call vpx_codec_version() at
|
||||
dnl run-time, that would break cross-compiling. There are no
|
||||
dnl additional exported symbols between the v0.9.7 release and the
|
||||
dnl v0.9.6 one to check for.
|
||||
AC_TRY_COMPILE([
|
||||
#include <vpx/vpx_decoder.h>
|
||||
#if !defined(VPX_CODEC_USE_INPUT_PARTITION)
|
||||
#error "test failed."
|
||||
#endif
|
||||
],
|
||||
[return 0;],
|
||||
[AC_MSG_RESULT([yes])
|
||||
MOZ_NATIVE_LIBVPX=1
|
||||
MOZ_LIBVPX_INCLUDES="-I${LIBVPX_DIR}/include"
|
||||
MOZ_LIBVPX_LIBS="-L${LIBVPX_DIR}/lib -lvpx"],
|
||||
[AC_MSG_RESULT([no])
|
||||
AC_MSG_ERROR([--with-system-libvpx requested but it is not v0.9.7 or later])])
|
||||
fi
|
||||
CFLAGS=$_SAVE_CFLAGS
|
||||
LDFLAGS=$_SAVE_LDFLAGS
|
||||
LIBS=$_SAVE_LIBS
|
||||
|
|
|
@ -130,12 +130,13 @@ CSRCS += \
|
|||
alloccommon.c \
|
||||
blockd.c \
|
||||
debugmodes.c \
|
||||
defaultcoefcounts.c \
|
||||
dsystemdependent.c \
|
||||
entropy.c \
|
||||
entropymode.c \
|
||||
entropymv.c \
|
||||
extend.c \
|
||||
filter_c.c \
|
||||
filter.c \
|
||||
findnearmv.c \
|
||||
idctllm.c \
|
||||
invtrans.c \
|
||||
|
@ -145,7 +146,6 @@ CSRCS += \
|
|||
modecont.c \
|
||||
modecontext.c \
|
||||
postproc.c \
|
||||
predictdc.c \
|
||||
quant_common.c \
|
||||
recon.c \
|
||||
reconinter.c \
|
||||
|
@ -173,7 +173,7 @@ CSRCS += \
|
|||
vpx_mem.c \
|
||||
gen_scalers.c \
|
||||
vpxscale.c \
|
||||
scalesystemdependant.c \
|
||||
scalesystemdependent.c \
|
||||
yv12config.c \
|
||||
yv12extend.c \
|
||||
$(NULL)
|
||||
|
@ -186,6 +186,7 @@ CSRCS += \
|
|||
idct_blk_mmx.c \
|
||||
idct_blk_sse2.c \
|
||||
loopfilter_x86.c \
|
||||
recon_wrapper_sse2.c \
|
||||
vp8_asm_stubs.c \
|
||||
x86_systemdependent.c \
|
||||
x86_dsystemdependent.c \
|
||||
|
@ -284,11 +285,6 @@ VPX_ASFILES = \
|
|||
dequantizeb_neon.asm \
|
||||
$(NULL)
|
||||
|
||||
# The ARM asm needs to extract the offsets of various C struct members.
|
||||
# We need a program that runs on the host to pull them out of a .o file.
|
||||
HOST_CSRCS = obj_int_extract.c
|
||||
HOST_PROGRAM = host_obj_int_extract$(HOST_BIN_SUFFIX)
|
||||
|
||||
ifdef VPX_AS_CONVERSION
|
||||
# The ARM asm is written in ARM RVCT syntax, but we actually build it with
|
||||
# gas using GNU syntax. Add some rules to perform the conversion.
|
||||
|
@ -303,8 +299,6 @@ else
|
|||
ASFILES += $(VPX_ASFILES)
|
||||
endif
|
||||
|
||||
GARBAGE += vpx_asm_offsets.$(OBJ_SUFFIX) vpx_asm_offsets.asm
|
||||
|
||||
endif
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
@ -318,8 +312,3 @@ filter_c.o: filter_c.c Makefile.in
|
|||
$(CC) -o $@ -c $(patsubst -xO[45],-xO3,$(COMPILE_CFLAGS)) $<
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef VPX_ARM_ASM
|
||||
vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
|
||||
./$(HOST_PROGRAM) rvds $< $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
|
||||
endif
|
||||
|
|
|
@ -21,8 +21,14 @@ print "@ This file was created from a .asm file\n";
|
|||
print "@ using the ads2gas.pl script.\n";
|
||||
print "\t.equ DO1STROUNDING, 0\n";
|
||||
|
||||
# Stack of procedure names.
|
||||
@proc_stack = ();
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Load and store alignment
|
||||
s/@/,:/g;
|
||||
|
||||
# Comment character
|
||||
s/;/@/g;
|
||||
|
||||
|
@ -117,8 +123,8 @@ while (<STDIN>)
|
|||
# put the colon at the end of the line in the macro
|
||||
s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
|
||||
|
||||
# Strip ALIGN
|
||||
s/\sALIGN/@ ALIGN/g;
|
||||
# ALIGN directive
|
||||
s/ALIGN/.balign/g;
|
||||
|
||||
# Strip ARM
|
||||
s/\sARM/@ ARM/g;
|
||||
|
@ -130,9 +136,23 @@ while (<STDIN>)
|
|||
# Strip PRESERVE8
|
||||
s/\sPRESERVE8/@ PRESERVE8/g;
|
||||
|
||||
# Strip PROC and ENDPROC
|
||||
s/\sPROC/@/g;
|
||||
s/\sENDP/@/g;
|
||||
# Use PROC and ENDP to give the symbols a .size directive.
|
||||
# This makes them show up properly in debugging tools like gdb and valgrind.
|
||||
if (/\bPROC\b/)
|
||||
{
|
||||
my $proc;
|
||||
/^_([\.0-9A-Z_a-z]\w+)\b/;
|
||||
$proc = $1;
|
||||
push(@proc_stack, $proc) if ($proc);
|
||||
s/\bPROC\b/@ $&/;
|
||||
}
|
||||
if (/\bENDP\b/)
|
||||
{
|
||||
my $proc;
|
||||
s/\bENDP\b/@ $&/;
|
||||
$proc = pop(@proc_stack);
|
||||
$_ = "\t.size $proc, .-$proc".$_ if ($proc);
|
||||
}
|
||||
|
||||
# EQU directive
|
||||
s/(.*)EQU(.*)/.equ $1, $2/;
|
||||
|
@ -151,3 +171,6 @@ while (<STDIN>)
|
|||
next if /^\s*END\s*$/;
|
||||
print;
|
||||
}
|
||||
|
||||
# Mark that this object doesn't need an executable stack.
|
||||
printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n");
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,39 @@
|
|||
diff --git a/media/libvpx/vpx/vp8.h b/media/libvpx/vpx/vp8.h
|
||||
--- a/media/libvpx/vpx/vp8.h
|
||||
+++ b/media/libvpx/vpx/vp8.h
|
||||
@@ -41,33 +41,33 @@ enum vp8_com_control_id
|
||||
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
|
||||
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
|
||||
VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
|
||||
VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
|
||||
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
|
||||
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
|
||||
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
|
||||
VP8_COMMON_CTRL_ID_MAX,
|
||||
- VP8_DECODER_CTRL_ID_START = 256,
|
||||
+ VP8_DECODER_CTRL_ID_START = 256
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
*
|
||||
* The set of macros define VP8 decoder post processing flags
|
||||
*/
|
||||
enum vp8_postproc_level
|
||||
{
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1<<0,
|
||||
VP8_DEMACROBLOCK = 1<<1,
|
||||
VP8_ADDNOISE = 1<<2,
|
||||
VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
|
||||
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
|
||||
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
|
||||
- VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
|
||||
+ VP8_DEBUG_TXT_RATE_INFO = 1<<6 /**< print video rate info (encoder only) */
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
*
|
||||
* This define a structure that describe the post processing settings. For
|
||||
* the best objective measure (using the PSNR metric) set post_proc_flag
|
||||
* to VP8_DEBLOCK and deblocking_level to 1.
|
||||
*/
|
|
@ -16,69 +16,6 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
|
|||
static __inline signed char vp8_signed_char_clamp(int t)
|
||||
{
|
||||
t = (t < -128 ? -128 : t);
|
||||
diff --git a/media/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/vpx/internal/vpx_codec_internal.h
|
||||
--- a/media/libvpx/vpx/internal/vpx_codec_internal.h
|
||||
+++ b/media/libvpx/vpx/internal/vpx_codec_internal.h
|
||||
@@ -316,17 +316,17 @@ struct vpx_codec_iface
|
||||
|
||||
/*!\brief Callback function pointer / user data pair storage */
|
||||
typedef struct vpx_codec_priv_cb_pair
|
||||
{
|
||||
union
|
||||
{
|
||||
vpx_codec_put_frame_cb_fn_t put_frame;
|
||||
vpx_codec_put_slice_cb_fn_t put_slice;
|
||||
- };
|
||||
+ } fn;
|
||||
void *user_priv;
|
||||
} vpx_codec_priv_cb_pair_t;
|
||||
|
||||
|
||||
/*!\brief Instance private storage
|
||||
*
|
||||
* This structure is allocated by the algorithm's init function. It can be
|
||||
* extended in one of two ways. First, a second, algorithm specific structure
|
||||
diff --git a/media/libvpx/vpx/src/vpx_decoder.c b/media/libvpx/vpx/src/vpx_decoder.c
|
||||
--- a/media/libvpx/vpx/src/vpx_decoder.c
|
||||
+++ b/media/libvpx/vpx/src/vpx_decoder.c
|
||||
@@ -165,17 +165,17 @@ vpx_codec_err_t vpx_codec_register_put_f
|
||||
|
||||
if (!ctx || !cb)
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv
|
||||
|| !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
|
||||
res = VPX_CODEC_ERROR;
|
||||
else
|
||||
{
|
||||
- ctx->priv->dec.put_frame_cb.put_frame = cb;
|
||||
+ ctx->priv->dec.put_frame_cb.fn.put_frame = cb;
|
||||
ctx->priv->dec.put_frame_cb.user_priv = user_priv;
|
||||
res = VPX_CODEC_OK;
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
|
||||
@@ -187,17 +187,17 @@ vpx_codec_err_t vpx_codec_register_put_s
|
||||
|
||||
if (!ctx || !cb)
|
||||
res = VPX_CODEC_INVALID_PARAM;
|
||||
else if (!ctx->iface || !ctx->priv
|
||||
|| !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
|
||||
res = VPX_CODEC_ERROR;
|
||||
else
|
||||
{
|
||||
- ctx->priv->dec.put_slice_cb.put_slice = cb;
|
||||
+ ctx->priv->dec.put_slice_cb.fn.put_slice = cb;
|
||||
ctx->priv->dec.put_slice_cb.user_priv = user_priv;
|
||||
res = VPX_CODEC_OK;
|
||||
}
|
||||
|
||||
return SAVE_STATUS(ctx, res);
|
||||
}
|
||||
|
||||
|
||||
diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h
|
||||
--- a/media/libvpx/vpx_ports/mem.h
|
||||
+++ b/media/libvpx/vpx_ports/mem.h
|
||||
|
@ -172,8 +109,8 @@ diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h
|
|||
+ asm volatile ("pause \n\t")
|
||||
#else
|
||||
#if ARCH_X86_64
|
||||
/* No pause intrinsic for windows x64 */
|
||||
#define x86_pause_hint()
|
||||
#define x86_pause_hint()\
|
||||
_mm_pause();
|
||||
#else
|
||||
#define x86_pause_hint()\
|
||||
__asm pause
|
||||
|
|
|
@ -51,16 +51,17 @@ fi
|
|||
|
||||
# These are relative to SDK source dir.
|
||||
commonFiles=(
|
||||
vp8/vp8_dx_iface.c
|
||||
vp8/common/alloccommon.c
|
||||
vp8/common/blockd.c
|
||||
vp8/common/debugmodes.c
|
||||
vp8/common/defaultcoefcounts.c
|
||||
vp8/common/entropy.c
|
||||
vp8/common/entropymode.c
|
||||
vp8/common/entropymv.c
|
||||
vp8/common/extend.c
|
||||
vp8/common/filter_c.c
|
||||
vp8/common/filter.c
|
||||
vp8/common/findnearmv.c
|
||||
vp8/common/generic/systemdependent.c
|
||||
vp8/common/idctllm.c
|
||||
vp8/common/invtrans.c
|
||||
vp8/common/loopfilter.c
|
||||
|
@ -69,7 +70,6 @@ commonFiles=(
|
|||
vp8/common/modecont.c
|
||||
vp8/common/modecontext.c
|
||||
vp8/common/postproc.c
|
||||
vp8/common/predictdc.c
|
||||
vp8/common/quant_common.c
|
||||
vp8/common/recon.c
|
||||
vp8/common/reconinter.c
|
||||
|
@ -84,9 +84,10 @@ commonFiles=(
|
|||
vp8/common/arm/filter_arm.c
|
||||
vp8/common/arm/loopfilter_arm.c
|
||||
vp8/common/arm/reconintra_arm.c
|
||||
vp8/common/arm/vpx_asm_offsets.c
|
||||
vp8/common/arm/neon/recon_neon.c
|
||||
vp8/common/generic/systemdependent.c
|
||||
vp8/common/x86/loopfilter_x86.c
|
||||
vp8/common/x86/recon_wrapper_sse2.c
|
||||
vp8/common/x86/vp8_asm_stubs.c
|
||||
vp8/common/x86/x86_systemdependent.c
|
||||
vp8/decoder/dboolhuff.c
|
||||
|
@ -94,19 +95,18 @@ commonFiles=(
|
|||
vp8/decoder/decodframe.c
|
||||
vp8/decoder/dequantize.c
|
||||
vp8/decoder/detokenize.c
|
||||
vp8/decoder/reconintra_mt.c
|
||||
vp8/decoder/generic/dsystemdependent.c
|
||||
vp8/decoder/idct_blk.c
|
||||
vp8/decoder/onyxd_if.c
|
||||
vp8/decoder/reconintra_mt.c
|
||||
vp8/decoder/threading.c
|
||||
vp8/decoder/arm/arm_dsystemdependent.c
|
||||
vp8/decoder/arm/dequantize_arm.c
|
||||
vp8/decoder/arm/armv6/idct_blk_v6.c
|
||||
vp8/decoder/arm/neon/idct_blk_neon.c
|
||||
vp8/decoder/generic/dsystemdependent.c
|
||||
vp8/decoder/x86/idct_blk_mmx.c
|
||||
vp8/decoder/x86/idct_blk_sse2.c
|
||||
vp8/decoder/x86/x86_dsystemdependent.c
|
||||
vp8/vp8_dx_iface.c
|
||||
vpx/src/vpx_codec.c
|
||||
vpx/src/vpx_decoder.c
|
||||
vpx/src/vpx_decoder_compat.c
|
||||
|
@ -114,7 +114,7 @@ commonFiles=(
|
|||
vpx/src/vpx_image.c
|
||||
vpx_mem/vpx_mem.c
|
||||
vpx_scale/generic/gen_scalers.c
|
||||
vpx_scale/generic/scalesystemdependant.c
|
||||
vpx_scale/generic/scalesystemdependent.c
|
||||
vpx_scale/generic/vpxscale.c
|
||||
vpx_scale/generic/yv12config.c
|
||||
vpx_scale/generic/yv12extend.c
|
||||
|
@ -128,6 +128,7 @@ commonFiles=(
|
|||
vp8/common/entropymode.h
|
||||
vp8/common/entropymv.h
|
||||
vp8/common/extend.h
|
||||
vp8/common/filter.h
|
||||
vp8/common/findnearmv.h
|
||||
vp8/common/g_common.h
|
||||
vp8/common/header.h
|
||||
|
@ -142,8 +143,6 @@ commonFiles=(
|
|||
vp8/common/postproc.h
|
||||
vp8/common/ppflags.h
|
||||
vp8/common/pragmas.h
|
||||
vp8/common/predictdc.h
|
||||
vp8/common/preproc.h
|
||||
vp8/common/quant_common.h
|
||||
vp8/common/recon.h
|
||||
vp8/common/reconinter.h
|
||||
|
@ -156,7 +155,7 @@ commonFiles=(
|
|||
vp8/common/threading.h
|
||||
vp8/common/treecoder.h
|
||||
vp8/common/type_aliases.h
|
||||
vp8/common/vpxerrors.h
|
||||
vp8/common/arm/bilinearfilter_arm.h
|
||||
vp8/common/arm/idct_arm.h
|
||||
vp8/common/arm/loopfilter_arm.h
|
||||
vp8/common/arm/recon_arm.h
|
||||
|
@ -174,9 +173,7 @@ commonFiles=(
|
|||
vp8/decoder/onyxd_int.h
|
||||
vp8/decoder/reconintra_mt.h
|
||||
vp8/decoder/treereader.h
|
||||
vp8/decoder/arm/dboolhuff_arm.h
|
||||
vp8/decoder/arm/dequantize_arm.h
|
||||
vp8/decoder/arm/detokenize_arm.h
|
||||
vp8/decoder/x86/dequantize_x86.h
|
||||
vpx/internal/vpx_codec_internal.h
|
||||
vpx/vp8cx.h
|
||||
|
@ -240,7 +237,6 @@ commonFiles=(
|
|||
vp8/common/arm/neon/recon16x16mb_neon.asm
|
||||
vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
|
||||
vp8/common/arm/neon/save_neon_reg.asm
|
||||
vp8/decoder/arm/detokenize.asm
|
||||
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
|
||||
vp8/decoder/arm/armv6/dequant_idct_v6.asm
|
||||
vp8/decoder/arm/armv6/dequantize_v6.asm
|
||||
|
@ -318,17 +314,5 @@ done
|
|||
# Patch to compile with Sun Studio on Solaris
|
||||
patch -p3 < solaris.patch
|
||||
|
||||
# Patch to fix link with xcode4
|
||||
patch -p1 < xcode4.patch
|
||||
|
||||
# Patch to fix data race on global function pointers
|
||||
patch -p3 < bug640935.patch
|
||||
|
||||
# Patch to avoid text relocations on ARM
|
||||
patch -p3 < bug646815.patch
|
||||
|
||||
# Patch to fix alignment problems with using ARM asm in Thumb mode.
|
||||
patch -p3 < bug666931.patch
|
||||
|
||||
# Patch to make chroma planes 16-byte aligned.
|
||||
patch -p3 < bug671818.patch
|
||||
# Patch to fix errors including C headers in C++
|
||||
patch -p3 < compile_errors.patch
|
||||
|
|
|
@ -16,18 +16,20 @@
|
|||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
#include "vpxerrors.h"
|
||||
|
||||
|
||||
extern void vp8_init_scan_order_mask();
|
||||
|
||||
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
|
||||
{
|
||||
int i;
|
||||
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
|
||||
|
||||
for (i = 0; i < rows; i++)
|
||||
{
|
||||
/* TODO(holmer): Bug? This updates the last element of each row
|
||||
* rather than the border element!
|
||||
*/
|
||||
vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
|
||||
}
|
||||
}
|
||||
|
@ -44,9 +46,11 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
|||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
vpx_free(oci->prev_mip);
|
||||
|
||||
oci->above_context = 0;
|
||||
oci->mip = 0;
|
||||
oci->prev_mip = 0;
|
||||
|
||||
}
|
||||
|
||||
|
@ -66,12 +70,12 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[0] = 0;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,13 +92,13 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
|
@ -106,21 +110,39 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
if (!oci->mip)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* allocate memory for last frame MODE_INFO array */
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->prev_mip)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
|
||||
#else
|
||||
oci->prev_mip = NULL;
|
||||
oci->prev_mi = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return ALLOC_FAILURE;
|
||||
return 1;
|
||||
}
|
||||
|
||||
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -130,32 +152,32 @@ void vp8_setup_version(VP8_COMMON *cm)
|
|||
{
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->simpler_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
|
@ -170,7 +192,7 @@ void vp8_create_common(VP8_COMMON *oci)
|
|||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->simpler_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
|
|
|
@ -11,27 +11,30 @@
|
|||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
#include "g_common.h"
|
||||
#include "pragmas.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/pragmas.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
|
||||
int flags = arm_cpu_caps();
|
||||
int has_edsp = flags & HAS_EDSP;
|
||||
int has_media = flags & HAS_MEDIA;
|
||||
int has_neon = flags & HAS_NEON;
|
||||
rtcd->flags = flags;
|
||||
|
||||
/* Override default functions with fastest ones for this CPU. */
|
||||
#if HAVE_ARMV5TE
|
||||
if (flags & HAS_EDSP)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
if (has_media)
|
||||
if (flags & HAS_MEDIA)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
|
||||
|
@ -51,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_v =
|
||||
vp8_loop_filter_simple_vertical_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h =
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
||||
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
||||
|
@ -66,7 +71,7 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
|||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
if (has_neon)
|
||||
if (flags & HAS_NEON)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
|
||||
|
|
|
@ -15,33 +15,33 @@
|
|||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
pld [r0]
|
||||
;;ENDIF
|
||||
add r7, r2, r4 ; preload next row
|
||||
pld [r0, r7]
|
||||
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; output_height*2
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save output_ptr for each row
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
|
@ -96,9 +96,8 @@
|
|||
add r0, r0, r2 ; move to next input row
|
||||
subs r12, r12, #1
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
pld [r0]
|
||||
;;ENDIF
|
||||
add r9, r2, r4, lsl #1 ; adding back block width
|
||||
pld [r0, r9] ; preload next row
|
||||
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
@ -140,17 +139,17 @@
|
|||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 int output_pitch,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
|
|
|
@ -22,9 +22,7 @@
|
|||
;push {r4-r7}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
ands r4, r0, #15
|
||||
beq copy_mem16x16_fast
|
||||
|
@ -90,6 +88,8 @@ copy_mem16x16_1_loop
|
|||
ldrneb r6, [r0, #2]
|
||||
ldrneb r7, [r0, #3]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
|
@ -121,6 +121,8 @@ copy_mem16x16_4_loop
|
|||
ldrne r6, [r0, #8]
|
||||
ldrne r7, [r0, #12]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
|
@ -148,6 +150,7 @@ copy_mem16x16_8_loop
|
|||
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_8_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
|
@ -171,6 +174,7 @@ copy_mem16x16_fast_loop
|
|||
;stm r2, {r4-r7}
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_fast_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
|
||||
EXPORT |vp8_filter_block2d_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter4_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
|
||||
|
@ -40,11 +42,6 @@
|
|||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, #-2]
|
||||
;;pld [r0, #30]
|
||||
;;ENDIF
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
@ -101,15 +98,10 @@
|
|||
|
||||
bne width_loop_1st_6
|
||||
|
||||
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, r2]
|
||||
;;pld [r0, r9]
|
||||
;;ENDIF
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
|
@ -120,6 +112,192 @@
|
|||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 16x16 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #18 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_16_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_16_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_16_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #34 ; adding back block width(=16)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_16_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 8x8 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #10 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_8_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_8_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_8_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #18 ; adding back block width(=8)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_8_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
|
@ -262,6 +440,10 @@
|
|||
|vp8_filter_block2d_first_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
add r7, r2, r3 ; preload next low
|
||||
add r7, r7, #2
|
||||
pld [r0, r7]
|
||||
|
||||
ldr r4, [sp, #36] ; output pitch
|
||||
ldr r11, [sp, #40] ; HFilter address
|
||||
sub sp, sp, #8
|
||||
|
@ -330,16 +512,15 @@
|
|||
|
||||
bne width_loop_1st_only_6
|
||||
|
||||
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, r2]
|
||||
;;pld [r0, r9]
|
||||
;;ENDIF
|
||||
|
||||
ldr lr, [sp] ; load back output pitch
|
||||
ldr r12, [sp, #4] ; load back output pitch
|
||||
subs r7, r7, #1
|
||||
add r0, r0, r12 ; updata src for next loop
|
||||
|
||||
add r11, r12, r3 ; preload next low
|
||||
add r11, r11, #2
|
||||
pld [r0, r11]
|
||||
|
||||
add r1, r1, lr ; update dst for next loop
|
||||
|
||||
bne height_loop_1st_only_6
|
||||
|
|
|
@ -53,14 +53,11 @@ count RN r5
|
|||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r2 const char *blimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|
@ -72,14 +69,18 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Hnext8|
|
||||
; vp8_filter_mask() function
|
||||
|
@ -253,12 +254,6 @@ count RN r5
|
|||
|
||||
subs count, count, #1
|
||||
|
||||
;pld [src]
|
||||
;pld [src, pstep]
|
||||
;pld [src, pstep, lsl #1]
|
||||
;pld [src, pstep, lsl #2]
|
||||
;pld [src, pstep, lsl #3]
|
||||
|
||||
ldrne r9, [src], pstep ; p3
|
||||
ldrne r10, [src], pstep ; p2
|
||||
ldrne r11, [src], pstep ; p1
|
||||
|
@ -281,14 +276,18 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r9, [src], pstep ; p3
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r10, [src], pstep ; p2
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r11, [src], pstep ; p1
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r6], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r6] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBHnext8|
|
||||
|
||||
|
@ -590,15 +589,19 @@ count RN r5
|
|||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|Vnext8|
|
||||
|
||||
|
@ -857,18 +860,26 @@ count RN r5
|
|||
sub src, src, #4 ; move src pointer down by 4
|
||||
ldr count, [sp, #40] ; count for 8-in-parallel
|
||||
ldr r12, [sp, #36] ; load thresh address
|
||||
pld [src, #23] ; preload for next block
|
||||
sub sp, sp, #16 ; create temp buffer
|
||||
|
||||
ldr r6, [src], pstep ; load source data
|
||||
ldr r4, [r2], #4 ; flimit
|
||||
ldrb r4, [r2] ; blimit
|
||||
pld [src, #23]
|
||||
ldr r7, [src], pstep
|
||||
ldr r2, [r3], #4 ; limit
|
||||
ldrb r2, [r3] ; limit
|
||||
pld [src, #23]
|
||||
ldr r8, [src], pstep
|
||||
uadd8 r4, r4, r4 ; flimit * 2
|
||||
ldr r3, [r12], #4 ; thresh
|
||||
orr r4, r4, r4, lsl #8
|
||||
ldrb r3, [r12] ; thresh
|
||||
orr r2, r2, r2, lsl #8
|
||||
pld [src, #23]
|
||||
ldr lr, [src], pstep
|
||||
mov count, count, lsl #1 ; 4-in-parallel
|
||||
uadd8 r4, r4, r2 ; flimit * 2 + limit
|
||||
orr r4, r4, r4, lsl #16
|
||||
orr r3, r3, r3, lsl #8
|
||||
orr r2, r2, r2, lsl #16
|
||||
orr r3, r3, r3, lsl #16
|
||||
|
||||
|MBVnext8|
|
||||
; vp8_filter_mask() function
|
||||
|
@ -908,6 +919,7 @@ count RN r5
|
|||
str lr, [sp, #8]
|
||||
ldr lr, [src], pstep
|
||||
|
||||
|
||||
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
|
||||
|
||||
ldr lr, [sp, #8] ; load back (f)limit accumulator
|
||||
|
@ -956,6 +968,7 @@ count RN r5
|
|||
beq mbvskip_filter ; skip filtering
|
||||
|
||||
|
||||
|
||||
;vp8_hevmask() function
|
||||
;calculate high edge variance
|
||||
|
||||
|
@ -1123,6 +1136,7 @@ count RN r5
|
|||
smlabb r8, r6, lr, r7
|
||||
smlatb r6, r6, lr, r7
|
||||
smlabb r9, r10, lr, r7
|
||||
|
||||
smlatb r10, r10, lr, r7
|
||||
ssat r8, #8, r8, asr #7
|
||||
ssat r6, #8, r6, asr #7
|
||||
|
@ -1242,9 +1256,13 @@ count RN r5
|
|||
sub src, src, #4
|
||||
subs count, count, #1
|
||||
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrne r6, [src], pstep ; load source data
|
||||
pld [src, #23]
|
||||
ldrne r7, [src], pstep
|
||||
pld [src, #23]
|
||||
ldrne r8, [src], pstep
|
||||
pld [src, #23]
|
||||
ldrne lr, [src], pstep
|
||||
|
||||
bne MBVnext8
|
||||
|
|
|
@ -45,35 +45,28 @@
|
|||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
; All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r3] ; limit
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
ldr r7, [r2] ; flimit
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r9, [sp, #40] ; count for 8-in-parallel
|
||||
uadd8 r7, r7, r7 ; flimit * 2
|
||||
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
|
||||
uadd8 r12, r7, r12 ; flimit * 2 + limit
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
|
@ -148,30 +141,32 @@ pstep RN r1
|
|||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r2] ; r12: flimit
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r7, [r3] ; limit
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
uadd8 r12, r12, r12 ; flimit * 2
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
uadd8 r12, r12, r7 ; flimit * 2 + limit
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, r11, lsl #1 ; 4-in-parallel
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
|
@ -259,19 +254,23 @@ pstep RN r1
|
|||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
bne simple_vnext8
|
||||
|
|
|
@ -35,6 +35,9 @@
|
|||
adr r12, filter8_coeff
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
add r3, r1, #10 ; preload next low
|
||||
pld [r0, r3]
|
||||
|
||||
add r2, r12, r2, lsl #4 ;calculate filter location
|
||||
add r0, r0, #3 ;adjust src only for loading convinience
|
||||
|
||||
|
@ -110,6 +113,9 @@
|
|||
|
||||
add r0, r0, r1 ; move to next input line
|
||||
|
||||
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
|
||||
pld [r0, r11]
|
||||
|
||||
bne first_pass_hloop_v6
|
||||
|
||||
;second pass filter
|
||||
|
@ -243,8 +249,6 @@ skip_secondpass_hloop
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
|
|
|
@ -10,128 +10,29 @@
|
|||
|
||||
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
static const short bilinear_filters[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if 0
|
||||
void vp8_filter_block2d_bil_first_pass_6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_bil_second_pass_6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i,j;
|
||||
int Temp;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[output_width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
/*src_ptr += src_pixels_per_line - output_width;*/
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
/* pixel_step = 1; */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
|
@ -148,8 +49,8 @@ void vp8_bilinear_predict4x4_armv6
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
@ -167,8 +68,8 @@ void vp8_bilinear_predict8x8_armv6
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
@ -186,8 +87,8 @@ void vp8_bilinear_predict8x4_armv6
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
@ -205,8 +106,8 @@ void vp8_bilinear_predict16x16_armv6
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BILINEARFILTER_ARM_H
|
||||
#define BILINEARFILTER_ARM_H
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#endif /* BILINEARFILTER_ARM_H */
|
|
@ -11,26 +11,10 @@
|
|||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
|
@ -41,6 +25,28 @@ extern void vp8_filter_block2d_first_pass_armv6
|
|||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 8x8
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 16x16
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
|
@ -93,11 +99,11 @@ void vp8_sixtap_predict_armv6
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
|
@ -129,47 +135,6 @@ void vp8_sixtap_predict_armv6
|
|||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void vp8_sixtap_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
|
||||
/*if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
|
||||
}*/
|
||||
/* Hfilter is null. Second pass only */
|
||||
/*else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
|
||||
else*/
|
||||
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
|
||||
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
|
||||
/*}*/
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
|
@ -182,10 +147,10 @@ void vp8_sixtap_predict8x8_armv6
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
|
@ -200,12 +165,12 @@ void vp8_sixtap_predict8x8_armv6
|
|||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
|
@ -224,10 +189,10 @@ void vp8_sixtap_predict16x16_armv6
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
|
@ -242,12 +207,12 @@ void vp8_sixtap_predict16x16_armv6
|
|||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,135 +9,107 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
|
||||
#if HAVE_ARMV7
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
/*ARMV6 loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -145,93 +117,60 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
|
|||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -12,15 +12,17 @@
|
|||
#ifndef LOOPFILTER_ARM_H
|
||||
#define LOOPFILTER_ARM_H
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
|
@ -36,28 +38,29 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
|||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
||||
#endif
|
||||
#endif
|
||||
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_mbvs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_mbhs_neon);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
|
@ -83,7 +86,8 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
|||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
|
||||
#endif
|
||||
#endif
|
||||
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
|
||||
|
||||
#endif
|
||||
#endif /* HAVE_ARMV7 */
|
||||
|
||||
#endif /* LOOPFILTER_ARM_H */
|
||||
|
|
|
@ -350,10 +350,7 @@ filt_blk2d_spo16x16_loop_neon
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
bifilter16_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
|
|
|
@ -123,10 +123,7 @@ skip_secondpass_filter
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
bifilter4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
|
|
|
@ -128,10 +128,7 @@ skip_secondpass_filter
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
bifilter8x4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
|
|
|
@ -176,10 +176,7 @@ skip_secondpass_filter
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
bifilter8_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
|
|
|
@ -20,19 +20,16 @@
|
|||
|vp8_short_inv_walsh4x4_neon| PROC
|
||||
|
||||
; read in all four lines of values: d0->d3
|
||||
vldm.64 r0, {q0, q1}
|
||||
vld1.i16 {q0-q1}, [r0@128]
|
||||
|
||||
; first for loop
|
||||
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [12]
|
||||
vadd.s16 d5, d1, d2 ;b = [4] + [8]
|
||||
vsub.s16 d6, d1, d2 ;c = [4] - [8]
|
||||
vsub.s16 d7, d0, d3 ;d = [0] - [12]
|
||||
vadd.s16 d6, d1, d2 ;b = [4] + [8]
|
||||
vsub.s16 d5, d0, d3 ;d = [0] - [12]
|
||||
vsub.s16 d7, d1, d2 ;c = [4] - [8]
|
||||
|
||||
vadd.s16 d0, d4, d5 ;a + b
|
||||
vadd.s16 d1, d6, d7 ;c + d
|
||||
vsub.s16 d2, d4, d5 ;a - b
|
||||
vsub.s16 d3, d7, d6 ;d - c
|
||||
vadd.s16 q0, q2, q3 ; a+b d+c
|
||||
vsub.s16 q1, q2, q3 ; a-b d-c
|
||||
|
||||
vtrn.32 d0, d2 ;d0: 0 1 8 9
|
||||
;d2: 2 3 10 11
|
||||
|
@ -47,29 +44,22 @@
|
|||
; second for loop
|
||||
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [3]
|
||||
vadd.s16 d5, d1, d2 ;b = [1] + [2]
|
||||
vsub.s16 d6, d1, d2 ;c = [1] - [2]
|
||||
vsub.s16 d7, d0, d3 ;d = [0] - [3]
|
||||
vadd.s16 d6, d1, d2 ;b = [1] + [2]
|
||||
vsub.s16 d5, d0, d3 ;d = [0] - [3]
|
||||
vsub.s16 d7, d1, d2 ;c = [1] - [2]
|
||||
|
||||
vadd.s16 d0, d4, d5 ;e = a + b
|
||||
vadd.s16 d1, d6, d7 ;f = c + d
|
||||
vsub.s16 d2, d4, d5 ;g = a - b
|
||||
vsub.s16 d3, d7, d6 ;h = d - c
|
||||
vmov.i16 q8, #3
|
||||
|
||||
vmov.i16 q2, #3
|
||||
vadd.i16 q0, q0, q2 ;e/f += 3
|
||||
vadd.i16 q1, q1, q2 ;g/h += 3
|
||||
vadd.s16 q0, q2, q3 ; a+b d+c
|
||||
vsub.s16 q1, q2, q3 ; a-b d-c
|
||||
|
||||
vadd.i16 q0, q0, q8 ;e/f += 3
|
||||
vadd.i16 q1, q1, q8 ;g/h += 3
|
||||
|
||||
vshr.s16 q0, q0, #3 ;e/f >> 3
|
||||
vshr.s16 q1, q1, #3 ;g/h >> 3
|
||||
|
||||
vtrn.32 d0, d2
|
||||
vtrn.32 d1, d3
|
||||
vtrn.16 d0, d1
|
||||
vtrn.16 d2, d3
|
||||
|
||||
vstmia.16 r1!, {q0}
|
||||
vstmia.16 r1!, {q1}
|
||||
vst4.i16 {d0,d1,d2,d3}, [r1@128]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_neon|
|
||||
|
@ -77,19 +67,13 @@
|
|||
|
||||
;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
|
||||
|vp8_short_inv_walsh4x4_1_neon| PROC
|
||||
; load a full line into a neon register
|
||||
vld1.16 {q0}, [r0]
|
||||
; extract first element and replicate
|
||||
vdup.16 q1, d0[0]
|
||||
; add 3 to all values
|
||||
vmov.i16 q2, #3
|
||||
vadd.i16 q3, q1, q2
|
||||
; right shift
|
||||
vshr.s16 q3, q3, #3
|
||||
; write it back
|
||||
vstmia.16 r1!, {q3}
|
||||
vstmia.16 r1!, {q3}
|
||||
|
||||
ldrsh r2, [r0] ; load input[0]
|
||||
add r3, r2, #3 ; add 3
|
||||
add r2, r1, #16 ; base for last 8 output
|
||||
asr r0, r3, #3 ; right shift 3
|
||||
vdup.16 q0, r0 ; load and duplicate
|
||||
vst1.16 {q0}, [r1@128] ; write back 8
|
||||
vst1.16 {q0}, [r2@128] ; write back last 8
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_1_neon|
|
||||
|
||||
|
|
|
@ -14,109 +14,97 @@
|
|||
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 const signed char *flimit
|
||||
; r3 const signed char *limit
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1
|
||||
add r1, r1, r1
|
||||
|
||||
vld1.u8 {q3}, [r2], r1 ; p3
|
||||
vld1.u8 {q4}, [r2], r1 ; p2
|
||||
vld1.u8 {q5}, [r2], r1 ; p1
|
||||
vld1.u8 {q6}, [r2], r1 ; p0
|
||||
vld1.u8 {q7}, [r2], r1 ; q0
|
||||
vld1.u8 {q8}, [r2], r1 ; q1
|
||||
vld1.u8 {q9}, [r2], r1 ; q2
|
||||
vld1.u8 {q10}, [r2] ; q3
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
sub r0, r0, r1, lsl #1
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vld1.u8 {q3}, [r2@128], r1 ; p3
|
||||
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||
vld1.u8 {q5}, [r2@128], r1 ; p1
|
||||
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||
vld1.u8 {q7}, [r2@128], r1 ; q0
|
||||
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||
vld1.u8 {q9}, [r2@128] ; q2
|
||||
vld1.u8 {q10}, [r12@128] ; q3
|
||||
|
||||
sub r2, r2, r1, lsl #1
|
||||
sub r12, r12, r1, lsl #1
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vst1.u8 {q5}, [r0], r1 ; store op1
|
||||
vst1.u8 {q6}, [r0], r1 ; store op0
|
||||
vst1.u8 {q7}, [r0], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r0], r1 ; store oq1
|
||||
vst1.u8 {q5}, [r2@128], r1 ; store op1
|
||||
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||
vst1.u8 {q7}, [r2@128], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r12@128], r1 ; store oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.u8 {d6}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r3] ; q3
|
||||
|
||||
ldr r3, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
vld1.u8 {d7}, [r12], r1 ; p3
|
||||
vld1.u8 {d9}, [r12], r1 ; p2
|
||||
vld1.u8 {d11}, [r12], r1 ; p1
|
||||
vld1.u8 {d13}, [r12], r1 ; p0
|
||||
vld1.u8 {d15}, [r12], r1 ; q0
|
||||
vld1.u8 {d17}, [r12], r1 ; q1
|
||||
vld1.u8 {d19}, [r12], r1 ; q2
|
||||
vld1.u8 {d21}, [r12] ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r3] ; thresh
|
||||
vld1.u8 {d6}, [r3@64], r1 ; p3
|
||||
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||
vld1.u8 {d8}, [r3@64], r1 ; p2
|
||||
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||
vld1.u8 {d10}, [r3@64], r1 ; p1
|
||||
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||
vld1.u8 {d12}, [r3@64], r1 ; p0
|
||||
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||
vld1.u8 {d14}, [r3@64], r1 ; q0
|
||||
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r3@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||
vld1.u8 {d18}, [r3@64], r1 ; q2
|
||||
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||
vld1.u8 {d20}, [r3@64] ; q3
|
||||
vld1.u8 {d21}, [r12@64] ; q3
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r2, r2, r1, lsl #1
|
||||
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0] ; store u oq1
|
||||
vst1.u8 {d17}, [r2] ; store v oq1
|
||||
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2@64], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2@64], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2@64], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0@64] ; store u oq1
|
||||
vst1.u8 {d17}, [r2@64] ; store v oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
|
@ -124,39 +112,38 @@
|
|||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|
||||
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
add r1, r1, r1
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1, asr #1
|
||||
|
||||
vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
|
||||
vld1.u8 {d8}, [r2], r1
|
||||
vld1.u8 {d6}, [r2], r1
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r2], r1
|
||||
vld1.u8 {d12}, [r2], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r2], r1
|
||||
vld1.u8 {d16}, [r2], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r2], r1
|
||||
vld1.u8 {d20}, [r2], r1
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {d20}, [r12], r1
|
||||
|
||||
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r2], r1
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d11}, [r2], r1
|
||||
vld1.u8 {d13}, [r2], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d15}, [r2], r1
|
||||
vld1.u8 {d17}, [r2], r1
|
||||
vld1.u8 {d19}, [r2], r1
|
||||
vld1.u8 {d21}, [r2]
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d19}, [r2]
|
||||
vld1.u8 {d21}, [r12]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
|
@ -164,6 +151,8 @@
|
|||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
|
@ -178,28 +167,34 @@
|
|||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
add r12, r0, r1, asr #1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
||||
|
@ -209,38 +204,36 @@
|
|||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r3, r2, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
vld1.u8 {d21}, [r3]
|
||||
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
|
@ -248,6 +241,8 @@
|
|||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
|
@ -258,18 +253,16 @@
|
|||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
||||
|
@ -288,7 +281,7 @@
|
|||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_neon();
|
||||
|
@ -308,7 +301,6 @@
|
|||
; q9 q2
|
||||
; q10 q3
|
||||
|vp8_loop_filter_neon| PROC
|
||||
adr r12, lf_coeff
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
|
@ -317,42 +309,44 @@
|
|||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q4
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
||||
vmax.u8 q15, q15, q3
|
||||
|
||||
vadd.u8 q0, q0, q0 ; flimit * 2
|
||||
vadd.u8 q0, q0, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
vmov.u8 q10, #0x80 ; 0x80
|
||||
|
||||
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
; vp8_filter() function
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
veor q8, q8, q0 ; qs1
|
||||
veor q7, q7, q10 ; qs0
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
veor q6, q6, q10 ; ps0
|
||||
|
||||
vld1.u8 {q10}, [r12]!
|
||||
veor q5, q5, q10 ; ps1
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
|
||||
veor q8, q8, q10 ; qs1
|
||||
|
||||
vmov.u8 q10, #3 ; #3
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q11, d15, d13
|
||||
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vmovl.u8 q4, d20
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
|
@ -367,7 +361,7 @@
|
|||
vaddw.s8 q2, q2, d2
|
||||
vaddw.s8 q11, q11, d3
|
||||
|
||||
vld1.u8 {q9}, [r12]!
|
||||
vmov.u8 q9, #4 ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
|
@ -379,29 +373,25 @@
|
|||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
|
||||
|
||||
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
||||
|
||||
; outer tap adjustments: ++vp8_filter >> 1
|
||||
vrshr.s8 q1, q1, #1
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
||||
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
||||
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q8, q12, q0 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
AREA loopfilter_dat, DATA, READONLY
|
||||
lf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
|
||||
;-----------------
|
||||
|
||||
END
|
||||
|
|
|
@ -9,108 +9,109 @@
|
|||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
EXPORT |vp8_loop_filter_bhs_neon|
|
||||
EXPORT |vp8_loop_filter_mbhs_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
; r0 unsigned char *s, PRESERVE
|
||||
; r1 int p, PRESERVE
|
||||
; q1 limit, PRESERVE
|
||||
|
||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
adr r12, lfhy_coeff
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q10}, [r12]! ; 0x03
|
||||
vld1.u8 {q8}, [r0] ; q1
|
||||
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||
vld1.u8 {q5}, [r3@128], r1 ; p0
|
||||
vld1.u8 {q8}, [r0@128] ; q1
|
||||
vld1.u8 {q6}, [r3@128] ; p1
|
||||
|
||||
;vp8_filter_mask() function
|
||||
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
||||
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vmov.s16 q13, #3
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
;vp8_filter() function
|
||||
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
||||
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
|
||||
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q3, d15, d13
|
||||
|
||||
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q12, q3, q3
|
||||
vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
|
||||
vmul.s16 q3, q3, q13
|
||||
|
||||
vld1.u8 {q9}, [r12]! ; 0x04
|
||||
|
||||
vadd.s16 q2, q2, q11
|
||||
vadd.s16 q3, q3, q12
|
||||
vmov.u8 q10, #0x03 ; 0x03
|
||||
vmov.u8 q9, #0x04 ; 0x04
|
||||
|
||||
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q3, q3, d9
|
||||
|
||||
;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d9, q3
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
vand q4, q4, q15 ; vp8_filter &= mask
|
||||
vand q14, q4, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q4, q4, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q4, q3, #3 ; Filter1 >>= 3
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r0, r0, r1
|
||||
|
||||
;calculate output
|
||||
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
add r3, r0, r1
|
||||
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
|
||||
vst1.u8 {q6}, [r0] ; store op0
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
vst1.u8 {q6}, [r3@128] ; store op0
|
||||
vst1.u8 {q7}, [r0@128] ; store oq0
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
lfhy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_bhs_neon| PROC
|
||||
push {r4, lr}
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate blim
|
||||
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
|
||||
pop {r4, lr}
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_mbhs_neon| PROC
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate mblim
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
END
|
||||
|
|
|
@ -9,60 +9,54 @@
|
|||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
EXPORT |vp8_loop_filter_bvs_neon|
|
||||
EXPORT |vp8_loop_filter_mbvs_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
; r0 unsigned char *s, PRESERVE
|
||||
; r1 int p, PRESERVE
|
||||
; q1 limit, PRESERVE
|
||||
|
||||
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
||||
sub r0, r0, #2 ; move src pointer down by 2 columns
|
||||
add r12, r1, r1
|
||||
add r3, r0, r1
|
||||
|
||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
|
||||
adr r12, vlfy_coeff
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
|
||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
|
||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
|
||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
|
||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12
|
||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12
|
||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12
|
||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12
|
||||
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vld1.u8 {q11}, [r12]! ; 0x03
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vld1.u8 {q12}, [r12]! ; 0x04
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12
|
||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12
|
||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3]
|
||||
|
||||
vswp d7, d10
|
||||
vswp d12, d9
|
||||
;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
|
||||
|
||||
;vp8_filter_mask() function
|
||||
;vp8_hevmask() function
|
||||
sub r0, r0, r1, lsl #4
|
||||
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
|
||||
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vmov.s16 q11, #3
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
|
||||
|
@ -70,88 +64,91 @@
|
|||
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
|
||||
;vp8_filter() function
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
|
||||
vsubl.s8 q13, d9, d11
|
||||
|
||||
vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
vqsub.s8 q14, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q14, q13, q13
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q14
|
||||
vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||
vmul.s16 q13, q13, q11
|
||||
|
||||
;vqadd.s8 q1, q1, q2
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
vmov.u8 q11, #0x03 ; 0x03
|
||||
vmov.u8 q12, #0x04 ; 0x04
|
||||
|
||||
vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d3, q13
|
||||
vaddw.s8 q2, q2, d28 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d29
|
||||
|
||||
vqmovn.s16 d28, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d29, q13
|
||||
|
||||
add r0, r0, #1
|
||||
add r2, r0, r1
|
||||
;;;;;;;;;;;
|
||||
add r3, r0, r1
|
||||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
vand q14, q14, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vqadd.s8 q2, q14, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q3, q14, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q14, q3, #3 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q4, q14 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
|
||||
add r3, r2, r1
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
add r12, r1, r1
|
||||
vswp d13, d14
|
||||
add r12, r3, r1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst2.8 {d12[0], d13[0]}, [r0]
|
||||
vst2.8 {d12[1], d13[1]}, [r2]
|
||||
vst2.8 {d12[2], d13[2]}, [r3]
|
||||
vst2.8 {d12[3], d13[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d12[4], d13[4]}, [r12]
|
||||
vst2.8 {d12[5], d13[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d12[6], d13[6]}, [r0]
|
||||
vst2.8 {d12[7], d13[7]}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst2.8 {d14[0], d15[0]}, [r2]
|
||||
vst2.8 {d14[1], d15[1]}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst2.8 {d14[2], d15[2]}, [r3]
|
||||
vst2.8 {d14[3], d15[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d14[4], d15[4]}, [r12]
|
||||
vst2.8 {d14[5], d15[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d14[6], d15[6]}, [r0]
|
||||
vst2.8 {d14[7], d15[7]}, [r2]
|
||||
vst2.8 {d12[0], d13[0]}, [r0], r12
|
||||
vst2.8 {d12[1], d13[1]}, [r3], r12
|
||||
vst2.8 {d12[2], d13[2]}, [r0], r12
|
||||
vst2.8 {d12[3], d13[3]}, [r3], r12
|
||||
vst2.8 {d12[4], d13[4]}, [r0], r12
|
||||
vst2.8 {d12[5], d13[5]}, [r3], r12
|
||||
vst2.8 {d12[6], d13[6]}, [r0], r12
|
||||
vst2.8 {d12[7], d13[7]}, [r3], r12
|
||||
vst2.8 {d14[0], d15[0]}, [r0], r12
|
||||
vst2.8 {d14[1], d15[1]}, [r3], r12
|
||||
vst2.8 {d14[2], d15[2]}, [r0], r12
|
||||
vst2.8 {d14[3], d15[3]}, [r3], r12
|
||||
vst2.8 {d14[4], d15[4]}, [r0], r12
|
||||
vst2.8 {d14[5], d15[5]}, [r3], r12
|
||||
vst2.8 {d14[6], d15[6]}, [r0], r12
|
||||
vst2.8 {d14[7], d15[7]}, [r3]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
vlfy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_bvs_neon| PROC
|
||||
push {r4, lr}
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
mov r4, r0
|
||||
add r0, r0, #4
|
||||
vdup.s8 q1, r3 ; duplicate blim
|
||||
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||
; vp8_loop_filter_simple_vertical_edge_neon preserves r1 and q1
|
||||
add r0, r4, #8
|
||||
bl vp8_loop_filter_simple_vertical_edge_neon
|
||||
add r0, r4, #12
|
||||
pop {r4, lr}
|
||||
b vp8_loop_filter_simple_vertical_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_mbvs_neon| PROC
|
||||
ldrb r3, [r2] ; load mblim from mem
|
||||
vdup.s8 q1, r3 ; duplicate mblim
|
||||
b vp8_loop_filter_simple_vertical_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bvs_neon|
|
||||
END
|
||||
|
|
|
@ -14,155 +14,143 @@
|
|||
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
push {lr}
|
||||
add r1, r1, r1 ; double stride
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ; p3
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
vld1.u8 {q4}, [r0], r1 ; p2
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q8}, [r0], r1 ; q1
|
||||
vld1.u8 {q9}, [r0], r1 ; q2
|
||||
vld1.u8 {q10}, [r0], r1 ; q3
|
||||
vld1.u8 {q3}, [r0@128], r1 ; p3
|
||||
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||
vld1.u8 {q5}, [r0@128], r1 ; p1
|
||||
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||
vld1.u8 {q9}, [r0@128], r1 ; q2
|
||||
vld1.u8 {q10}, [r12@128], r1 ; q3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
add r0, r0, r1
|
||||
add r2, r0, r1
|
||||
add r3, r2, r1
|
||||
sub r12, r12, r1, lsl #2
|
||||
add r0, r12, r1, lsr #1
|
||||
|
||||
vst1.u8 {q4}, [r0] ; store op2
|
||||
vst1.u8 {q5}, [r2] ; store op1
|
||||
vst1.u8 {q6}, [r3], r1 ; store op0
|
||||
add r12, r3, r1
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
vst1.u8 {q8}, [r12], r1 ; store oq1
|
||||
vst1.u8 {q9}, [r12] ; store oq2
|
||||
vst1.u8 {q4}, [r12@128],r1 ; store op2
|
||||
vst1.u8 {q5}, [r0@128],r1 ; store op1
|
||||
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||
vst1.u8 {q7}, [r0@128],r1 ; store oq0
|
||||
vst1.u8 {q8}, [r12@128] ; store oq1
|
||||
vst1.u8 {q9}, [r0@128] ; store oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|
||||
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
ldr r12, [sp, #8] ; load v ptr
|
||||
sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; p3
|
||||
vld1.u8 {d7}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r0], r1 ; p2
|
||||
vld1.u8 {d9}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r0], r1 ; p1
|
||||
vld1.u8 {d11}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r0], r1 ; p0
|
||||
vld1.u8 {d13}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r0], r1 ; q0
|
||||
vld1.u8 {d15}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r0], r1 ; q1
|
||||
vld1.u8 {d17}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r0], r1 ; q2
|
||||
vld1.u8 {d19}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r0], r1 ; q3
|
||||
vld1.u8 {d21}, [r3], r1 ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {d6}, [r0@64], r1 ; p3
|
||||
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||
vld1.u8 {d8}, [r0@64], r1 ; p2
|
||||
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||
vld1.u8 {d10}, [r0@64], r1 ; p1
|
||||
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||
vld1.u8 {d12}, [r0@64], r1 ; p0
|
||||
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||
vld1.u8 {d14}, [r0@64], r1 ; q0
|
||||
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r0@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||
vld1.u8 {d18}, [r0@64], r1 ; q2
|
||||
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||
vld1.u8 {d20}, [r0@64], r1 ; q3
|
||||
vld1.u8 {d21}, [r12@64], r1 ; q3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
add r0, r0, r1
|
||||
add r3, r3, r1
|
||||
add r12, r12, r1
|
||||
|
||||
vst1.u8 {d8}, [r0], r1 ; store u op2
|
||||
vst1.u8 {d9}, [r3], r1 ; store v op2
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r3], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r3], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r3], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0], r1 ; store u oq1
|
||||
vst1.u8 {d17}, [r3], r1 ; store v oq1
|
||||
vst1.u8 {d18}, [r0], r1 ; store u oq2
|
||||
vst1.u8 {d19}, [r3], r1 ; store v oq2
|
||||
vst1.u8 {d8}, [r0@64], r1 ; store u op2
|
||||
vst1.u8 {d9}, [r12@64], r1 ; store v op2
|
||||
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r12@64], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r12@64], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r12@64], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0@64], r1 ; store u oq1
|
||||
vst1.u8 {d17}, [r12@64], r1 ; store v oq1
|
||||
vst1.u8 {d18}, [r0@64], r1 ; store u oq2
|
||||
vst1.u8 {d19}, [r12@64], r1 ; store v oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
vdup.s8 q2, r12 ; thresh
|
||||
add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
sub sp, sp, #32
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r12], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r12], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r12], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
|
||||
vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r0], r1
|
||||
vld1.u8 {d11}, [r0], r1
|
||||
vld1.u8 {d13}, [r0], r1
|
||||
vld1.u8 {d15}, [r0], r1
|
||||
vld1.u8 {d17}, [r0], r1
|
||||
vld1.u8 {d19}, [r0], r1
|
||||
vld1.u8 {d21}, [r0], r1
|
||||
vld1.u8 {d21}, [r12], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
|
@ -180,133 +168,11 @@
|
|||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #4
|
||||
|
||||
add r2, r0, r1
|
||||
|
||||
add r3, r2, r1
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
add r12, r3, r1
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0]
|
||||
vst1.8 {d8}, [r2]
|
||||
vst1.8 {d10}, [r3]
|
||||
vst1.8 {d12}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d14}, [r12]
|
||||
vst1.8 {d16}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d18}, [r0]
|
||||
vst1.8 {d20}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst1.8 {d7}, [r2]
|
||||
vst1.8 {d9}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst1.8 {d11}, [r3]
|
||||
vst1.8 {d13}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d15}, [r12]
|
||||
vst1.8 {d17}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d19}, [r0]
|
||||
vst1.8 {d21}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r3, r3, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r3], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
sub sp, sp, #32
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
|
@ -326,23 +192,118 @@
|
|||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0], r1
|
||||
vst1.8 {d7}, [r3], r1
|
||||
vst1.8 {d7}, [r12], r1
|
||||
vst1.8 {d8}, [r0], r1
|
||||
vst1.8 {d9}, [r3], r1
|
||||
vst1.8 {d9}, [r12], r1
|
||||
vst1.8 {d10}, [r0], r1
|
||||
vst1.8 {d11}, [r3], r1
|
||||
vst1.8 {d11}, [r12], r1
|
||||
vst1.8 {d12}, [r0], r1
|
||||
vst1.8 {d13}, [r3], r1
|
||||
vst1.8 {d13}, [r12], r1
|
||||
vst1.8 {d14}, [r0], r1
|
||||
vst1.8 {d15}, [r3], r1
|
||||
vst1.8 {d15}, [r12], r1
|
||||
vst1.8 {d16}, [r0], r1
|
||||
vst1.8 {d17}, [r3], r1
|
||||
vst1.8 {d17}, [r12], r1
|
||||
vst1.8 {d18}, [r0], r1
|
||||
vst1.8 {d19}, [r3], r1
|
||||
vst1.8 {d20}, [r0], r1
|
||||
vst1.8 {d21}, [r3], r1
|
||||
vst1.8 {d19}, [r12], r1
|
||||
vst1.8 {d20}, [r0]
|
||||
vst1.8 {d21}, [r12]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const unsigned char *blimit,
|
||||
; const unsigned char *limit,
|
||||
; const unsigned char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||
push {lr}
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
sub r0, r0, #4 ; move u pointer down by 4 columns
|
||||
vdup.u8 q2, r12 ; thresh
|
||||
ldr r12, [sp, #8] ; load v ptr
|
||||
sub r12, r12, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||
vld1.u8 {d7}, [r12], r1 ;load v data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r12], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r12], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r12], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r12], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r12, r12, r1, lsl #3
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0], r1
|
||||
vst1.8 {d7}, [r12], r1
|
||||
vst1.8 {d8}, [r0], r1
|
||||
vst1.8 {d9}, [r12], r1
|
||||
vst1.8 {d10}, [r0], r1
|
||||
vst1.8 {d11}, [r12], r1
|
||||
vst1.8 {d12}, [r0], r1
|
||||
vst1.8 {d13}, [r12], r1
|
||||
vst1.8 {d14}, [r0], r1
|
||||
vst1.8 {d15}, [r12], r1
|
||||
vst1.8 {d16}, [r0], r1
|
||||
vst1.8 {d17}, [r12], r1
|
||||
vst1.8 {d18}, [r0], r1
|
||||
vst1.8 {d19}, [r12], r1
|
||||
vst1.8 {d20}, [r0]
|
||||
vst1.8 {d21}, [r12]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_neon()
|
||||
|
@ -350,41 +311,33 @@
|
|||
; functions do the necessary load, transpose (if necessary), preserve (if
|
||||
; necessary) and store.
|
||||
|
||||
; TODO:
|
||||
; The vertical filter writes p3/q3 back out because two 4 element writes are
|
||||
; much simpler than ordering and writing two 3 element sets (or three 2 elements
|
||||
; sets, or whichever other combinations are possible).
|
||||
; If we can preserve q3 and q10, the vertical filter will be able to avoid
|
||||
; storing those values on the stack and reading them back after the filter.
|
||||
|
||||
; r0,r1 PRESERVE
|
||||
; r2 flimit
|
||||
; r3 PRESERVE
|
||||
; q1 limit
|
||||
; r2 mblimit
|
||||
; r3 limit
|
||||
|
||||
; q2 thresh
|
||||
; q3 p3
|
||||
; q3 p3 PRESERVE
|
||||
; q4 p2
|
||||
; q5 p1
|
||||
; q6 p0
|
||||
; q7 q0
|
||||
; q8 q1
|
||||
; q9 q2
|
||||
; q10 q3
|
||||
; q10 q3 PRESERVE
|
||||
|
||||
|vp8_mbloop_filter_neon| PROC
|
||||
adr r12, mblf_coeff
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q1, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q0
|
||||
vmax.u8 q1, q1, q0
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
|
||||
|
@ -392,51 +345,53 @@
|
|||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
|
||||
vmax.u8 q15, q15, q3
|
||||
vmax.u8 q15, q15, q1
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r2] ; flimit
|
||||
vdup.u8 q1, r3 ; limit
|
||||
vdup.u8 q2, r2 ; mblimit
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
|
||||
vadd.u8 q2, q2, q2 ; flimit * 2
|
||||
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||
vmov.u16 q11, #3 ; #3
|
||||
|
||||
; vp8_filter
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
|
||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||
|
||||
veor q8, q8, q0 ; qs1
|
||||
veor q4, q4, q0 ; ps2
|
||||
veor q9, q9, q0 ; qs2
|
||||
|
||||
vorr q14, q13, q14 ; vp8_hevmask
|
||||
|
||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; qs0 - ps0
|
||||
vsubl.s8 q13, d15, d13
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
|
||||
vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
|
||||
vadd.s16 q11, q13, q13
|
||||
vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
|
||||
|
||||
vand q15, q15, q12 ; vp8_filter_mask
|
||||
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q11
|
||||
vmul.i16 q13, q13, q11
|
||||
|
||||
vld1.u8 {q12}, [r12]! ; #3
|
||||
vmov.u8 q12, #3 ; #3
|
||||
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
|
||||
vld1.u8 {q11}, [r12]! ; #4
|
||||
vmov.u8 q11, #4 ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
|
@ -444,27 +399,23 @@
|
|||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vld1.u8 {q15}, [r12]! ; #63
|
||||
;
|
||||
vand q13, q1, q14 ; Filter2 &= hev
|
||||
vmov.u16 q15, #63 ; #63
|
||||
|
||||
vld1.u8 {d7}, [r12]! ; #9
|
||||
vand q13, q1, q14 ; Filter2 &= hev
|
||||
|
||||
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
||||
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
||||
|
||||
vld1.u8 {d6}, [r12]! ; #18
|
||||
vmov q0, q15
|
||||
|
||||
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
||||
|
||||
vmov q10, q15
|
||||
vmov q11, q15
|
||||
vmov q12, q15
|
||||
|
||||
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
||||
|
||||
vld1.u8 {d5}, [r12]! ; #27
|
||||
|
||||
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
||||
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
|
@ -472,46 +423,47 @@
|
|||
; roughly 1/7th difference across boundary
|
||||
; roughly 2/7th difference across boundary
|
||||
; roughly 3/7th difference across boundary
|
||||
vmov q11, q15
|
||||
|
||||
vmov.u8 d5, #9 ; #9
|
||||
vmov.u8 d4, #18 ; #18
|
||||
|
||||
vmov q13, q15
|
||||
vmov q14, q15
|
||||
|
||||
vmlal.s8 q10, d2, d7 ; Filter2 * 9
|
||||
vmlal.s8 q11, d3, d7
|
||||
vmlal.s8 q12, d2, d6 ; Filter2 * 18
|
||||
vmlal.s8 q13, d3, d6
|
||||
vmlal.s8 q14, d2, d5 ; Filter2 * 27
|
||||
vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
|
||||
vmlal.s8 q11, d3, d5
|
||||
vmov.u8 d5, #27 ; #27
|
||||
vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
|
||||
vmlal.s8 q13, d3, d4
|
||||
vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
|
||||
vmlal.s8 q15, d3, d5
|
||||
vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||
vqshrn.s16 d21, q11, #7
|
||||
|
||||
vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||
vqshrn.s16 d1, q11, #7
|
||||
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
|
||||
vqshrn.s16 d25, q13, #7
|
||||
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
|
||||
vqshrn.s16 d29, q15, #7
|
||||
|
||||
vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
|
||||
vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
|
||||
vmov.u8 q1, #0x80 ; 0x80
|
||||
|
||||
vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
|
||||
vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
|
||||
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
|
||||
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
|
||||
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
|
||||
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
|
||||
veor q9, q11, q0 ; *oq2 = s^0x80
|
||||
veor q4, q10, q0 ; *op2 = s^0x80
|
||||
veor q8, q13, q0 ; *oq1 = s^0x80
|
||||
veor q5, q12, q0 ; *op2 = s^0x80
|
||||
veor q7, q15, q0 ; *oq0 = s^0x80
|
||||
veor q6, q14, q0 ; *op0 = s^0x80
|
||||
|
||||
veor q9, q11, q1 ; *oq2 = s^0x80
|
||||
veor q4, q0, q1 ; *op2 = s^0x80
|
||||
veor q8, q13, q1 ; *oq1 = s^0x80
|
||||
veor q5, q12, q1 ; *op2 = s^0x80
|
||||
veor q7, q15, q1 ; *oq0 = s^0x80
|
||||
veor q6, q14, q1 ; *op0 = s^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_mbloop_filter_neon|
|
||||
|
||||
AREA mbloopfilter_dat, DATA, READONLY
|
||||
mblf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
|
||||
DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
|
||||
DCD 0x1b1b1b1b, 0x1b1b1b1b
|
||||
;-----------------
|
||||
|
||||
END
|
||||
|
|
|
@ -10,8 +10,8 @@
|
|||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "recon.h"
|
||||
#include "blockd.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
|
||||
|
||||
|
|
|
@ -113,10 +113,7 @@
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA idct4x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
|
||||
idct_coeff
|
||||
DCD 0x4e7b4e7b, 0x8a8c8a8c
|
||||
|
||||
|
|
|
@ -15,6 +15,17 @@
|
|||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
filter16_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
|
@ -33,7 +44,7 @@
|
|||
|vp8_sixtap_predict16x16_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
adrl r12, filter16_coeff
|
||||
adr r12, filter16_coeff
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
|
@ -476,18 +487,4 @@ secondpass_only_inner_loop_neon
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter16_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
||||
|
|
|
@ -15,6 +15,17 @@
|
|||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
filter4_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
|
@ -25,7 +36,7 @@
|
|||
|vp8_sixtap_predict_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
adrl r12, filter4_coeff
|
||||
adr r12, filter4_coeff
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
|
@ -407,18 +418,5 @@ secondpass_filter4x4_only
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter4_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
||||
|
|
|
@ -15,6 +15,17 @@
|
|||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
|
@ -25,7 +36,7 @@
|
|||
|vp8_sixtap_predict8x4_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
adrl r12, filter8_coeff
|
||||
adr r12, filter8_coeff
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
|
@ -458,18 +469,5 @@ secondpass_filter8x4_only
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
||||
|
|
|
@ -15,6 +15,17 @@
|
|||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
|
@ -25,7 +36,7 @@
|
|||
|vp8_sixtap_predict8x8_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
adrl r12, filter8_coeff
|
||||
adr r12, filter8_coeff
|
||||
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
@ -509,18 +520,5 @@ filt_blk2d_spo8x8_loop_neon
|
|||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
||||
|
|
|
@ -10,10 +10,10 @@
|
|||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "blockd.h"
|
||||
#include "reconintra.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/reconintra.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "recon.h"
|
||||
#include "vp8/common/recon.h"
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern void vp8_build_intra_predictors_mby_neon_func(
|
||||
|
|
|
@ -1,87 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
#include "onyxd_int.h"
|
||||
#endif
|
||||
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
|
||||
/*
|
||||
#define BLANK() asm volatile("\n->" : : )
|
||||
*/
|
||||
|
||||
/*
|
||||
* int main(void)
|
||||
* {
|
||||
*/
|
||||
|
||||
#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
|
||||
DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
|
||||
DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
|
||||
DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
|
||||
DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
|
||||
DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
|
||||
DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
|
||||
DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
|
||||
|
||||
DEFINE(detok_scan, offsetof(DETOK, scan));
|
||||
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
|
||||
DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
|
||||
DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
|
||||
DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
|
||||
DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
|
||||
|
||||
DEFINE(detok_A, offsetof(DETOK, A));
|
||||
DEFINE(detok_L, offsetof(DETOK, L));
|
||||
|
||||
DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
|
||||
DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
|
||||
DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
|
||||
DEFINE(detok_eob, offsetof(DETOK, eob));
|
||||
|
||||
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
|
||||
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
|
||||
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
|
||||
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
|
||||
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
|
||||
|
||||
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
|
||||
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
|
||||
#endif
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
/*
|
||||
* return 0;
|
||||
* }
|
||||
*/
|
|
@ -12,8 +12,6 @@
|
|||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
|
||||
|
||||
const unsigned char vp8_block2left[25] =
|
||||
{
|
||||
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
|
|
|
@ -28,11 +28,6 @@ void vpx_log(const char *format, ...);
|
|||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
#define Y1CONTEXT 0
|
||||
#define UCONTEXT 1
|
||||
#define VCONTEXT 2
|
||||
#define Y2CONTEXT 3
|
||||
|
||||
#define MB_FEATURE_TREE_PROBS 3
|
||||
#define MAX_MB_SEGMENTS 4
|
||||
|
||||
|
@ -48,6 +43,11 @@ typedef struct
|
|||
int r, c;
|
||||
} POS;
|
||||
|
||||
#define PLANE_TYPE_Y_NO_DC 0
|
||||
#define PLANE_TYPE_Y2 1
|
||||
#define PLANE_TYPE_UV 2
|
||||
#define PLANE_TYPE_Y_WITH_DC 3
|
||||
|
||||
|
||||
typedef char ENTROPY_CONTEXT;
|
||||
typedef struct
|
||||
|
@ -58,8 +58,6 @@ typedef struct
|
|||
ENTROPY_CONTEXT y2;
|
||||
} ENTROPY_CONTEXT_PLANES;
|
||||
|
||||
extern const int vp8_block2type[25];
|
||||
|
||||
extern const unsigned char vp8_block2left[25];
|
||||
extern const unsigned char vp8_block2above[25];
|
||||
|
||||
|
@ -139,16 +137,11 @@ typedef enum
|
|||
modes for the Y blocks to the left and above us; for interframes, there
|
||||
is a single probability table. */
|
||||
|
||||
typedef struct
|
||||
union b_mode_info
|
||||
{
|
||||
B_PREDICTION_MODE mode;
|
||||
union
|
||||
{
|
||||
int as_int;
|
||||
MV as_mv;
|
||||
} mv;
|
||||
} B_MODE_INFO;
|
||||
|
||||
B_PREDICTION_MODE as_mode;
|
||||
int_mv mv;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
|
@ -163,38 +156,26 @@ typedef struct
|
|||
{
|
||||
MB_PREDICTION_MODE mode, uv_mode;
|
||||
MV_REFERENCE_FRAME ref_frame;
|
||||
union
|
||||
{
|
||||
int as_int;
|
||||
MV as_mv;
|
||||
} mv;
|
||||
int_mv mv;
|
||||
|
||||
unsigned char partitioning;
|
||||
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
unsigned char dc_diff;
|
||||
unsigned char need_to_clamp_mvs;
|
||||
|
||||
unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
|
||||
unsigned char force_no_skip; /* encoder only */
|
||||
} MB_MODE_INFO;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
MB_MODE_INFO mbmi;
|
||||
B_MODE_INFO bmi[16];
|
||||
union b_mode_info bmi[16];
|
||||
} MODE_INFO;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
short *qcoeff;
|
||||
short *dqcoeff;
|
||||
unsigned char *predictor;
|
||||
short *diff;
|
||||
short *reference;
|
||||
|
||||
short *dequant;
|
||||
|
||||
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
|
||||
|
@ -208,15 +189,13 @@ typedef struct
|
|||
|
||||
int eob;
|
||||
|
||||
B_MODE_INFO bmi;
|
||||
|
||||
union b_mode_info bmi;
|
||||
} BLOCKD;
|
||||
|
||||
typedef struct
|
||||
typedef struct MacroBlockD
|
||||
{
|
||||
DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
/* not used DECLARE_ALIGNED(16, short, reference[384]); */
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
@ -273,6 +252,9 @@ typedef struct
|
|||
int mb_to_top_edge;
|
||||
int mb_to_bottom_edge;
|
||||
|
||||
int ref_frame_cost[MAX_REF_FRAMES];
|
||||
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
unsigned int frames_till_alt_ref_frame;
|
||||
vp8_subpix_fn_t subpixel_predict;
|
||||
|
@ -282,6 +264,16 @@ typedef struct
|
|||
|
||||
void *current_bc;
|
||||
|
||||
int corrupted;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
/* This is an intermediate buffer currently used in sub-pixel motion search
|
||||
* to keep a copy of the reference area. This buffer can be used for other
|
||||
* purpose.
|
||||
*/
|
||||
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
|
||||
#endif
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
struct VP8_COMMON_RTCD *rtcd;
|
||||
#endif
|
||||
|
@ -291,4 +283,20 @@ typedef struct
|
|||
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
|
||||
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
|
||||
|
||||
static void update_blockd_bmi(MACROBLOCKD *xd)
|
||||
{
|
||||
int i;
|
||||
int is_4x4;
|
||||
is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
|
||||
(xd->mode_info_context->mbmi.mode == B_PRED);
|
||||
|
||||
if (is_4x4)
|
||||
{
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
xd->block[i].bmi = xd->mode_info_context->bmi[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __INC_BLOCKD_H */
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
/* Update probabilities for the nodes in the token entropy tree.
|
||||
Generated file included by entropy.c */
|
||||
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] =
|
||||
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
|
||||
{
|
||||
{
|
||||
{
|
||||
|
|
|
@ -97,7 +97,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
bindex = (b_row & 3) * 4 + (b_col & 3);
|
||||
|
||||
if (mi[mb_index].mbmi.mode == B_PRED)
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].mode);
|
||||
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
|
||||
else
|
||||
fprintf(mvs, "xx ");
|
||||
|
||||
|
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "defaultcoefcounts.h"
|
||||
|
||||
/* Generated file, included by entropy.c */
|
||||
|
||||
const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[MAX_ENTROPY_TOKENS] =
|
||||
{
|
||||
|
||||
{
|
||||
/* Block Type ( 0 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
|
||||
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
|
||||
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
|
||||
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
|
||||
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
|
||||
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
|
||||
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
|
||||
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
|
||||
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 1 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
|
||||
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
|
||||
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
|
||||
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
|
||||
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
|
||||
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
|
||||
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
|
||||
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
|
||||
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
|
||||
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
|
||||
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
|
||||
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
|
||||
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 2 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
|
||||
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
|
||||
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
|
||||
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
|
||||
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
|
||||
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
|
||||
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
|
||||
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
|
||||
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 3 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
|
||||
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
|
||||
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
|
||||
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
|
||||
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
|
||||
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
|
||||
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
|
||||
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
|
||||
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
|
||||
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
|
||||
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
|
||||
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
|
||||
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
|
||||
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
|
||||
},
|
||||
},
|
||||
};
|
|
@ -8,214 +8,14 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef __DEFAULTCOEFCOUNTS_H
|
||||
#define __DEFAULTCOEFCOUNTS_H
|
||||
|
||||
/* Generated file, included by entropy.c */
|
||||
#include "entropy.h"
|
||||
|
||||
static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens] =
|
||||
{
|
||||
extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
|
||||
[COEF_BANDS]
|
||||
[PREV_COEF_CONTEXTS]
|
||||
[MAX_ENTROPY_TOKENS];
|
||||
|
||||
{
|
||||
/* Block Type ( 0 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
|
||||
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
|
||||
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
|
||||
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
|
||||
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
|
||||
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
|
||||
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
|
||||
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
|
||||
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 1 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
|
||||
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
|
||||
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
|
||||
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
|
||||
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
|
||||
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
|
||||
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
|
||||
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
|
||||
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
|
||||
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
|
||||
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
|
||||
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
|
||||
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 2 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
|
||||
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
|
||||
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
|
||||
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
|
||||
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
|
||||
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
|
||||
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
|
||||
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
|
||||
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
},
|
||||
{
|
||||
/* Block Type ( 3 ) */
|
||||
{
|
||||
/* Coeff Band ( 0 ) */
|
||||
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
|
||||
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
|
||||
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 1 ) */
|
||||
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
|
||||
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
|
||||
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 2 ) */
|
||||
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
|
||||
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
|
||||
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 3 ) */
|
||||
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
|
||||
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
|
||||
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 4 ) */
|
||||
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
|
||||
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
|
||||
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 5 ) */
|
||||
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
|
||||
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 6 ) */
|
||||
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
|
||||
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
|
||||
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
|
||||
},
|
||||
{
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
|
||||
},
|
||||
},
|
||||
};
|
||||
#endif //__DEFAULTCOEFCOUNTS_H
|
||||
|
|
|
@ -26,8 +26,32 @@ typedef vp8_prob Prob;
|
|||
|
||||
#include "coefupdateprobs.h"
|
||||
|
||||
DECLARE_ALIGNED(16, cuchar, vp8_coef_bands[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
DECLARE_ALIGNED(16, cuchar, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
|
||||
{
|
||||
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, cuchar, vp8_coef_bands[16]) =
|
||||
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
|
||||
|
||||
DECLARE_ALIGNED(16, cuchar, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
|
||||
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
|
||||
|
||||
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
||||
{
|
||||
0, 1, 4, 8,
|
||||
|
@ -36,6 +60,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
|
|||
7, 11, 14, 15,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
|
||||
{
|
||||
1, 2, 6, 7,
|
||||
3, 5, 8, 13,
|
||||
4, 9, 12, 14,
|
||||
10, 11, 15, 16
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
|
||||
|
||||
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
|
||||
|
@ -57,7 +89,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
|
|||
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
|
||||
};
|
||||
|
||||
struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens];
|
||||
struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
/* Trees for extra bits. Probabilities are constant and
|
||||
do not depend on previously encoded bits */
|
||||
|
@ -106,23 +138,20 @@ static void init_bit_trees()
|
|||
init_bit_tree(cat6, 11);
|
||||
}
|
||||
|
||||
|
||||
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
|
||||
|
||||
vp8_extra_bit_struct vp8_extra_bits[12] =
|
||||
{
|
||||
{ 0, 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, bcc1, 1, 5},
|
||||
{ cat2, Pcat2, bcc2, 2, 7},
|
||||
{ cat3, Pcat3, bcc3, 3, 11},
|
||||
{ cat4, Pcat4, bcc4, 4, 19},
|
||||
{ cat5, Pcat5, bcc5, 5, 35},
|
||||
{ cat6, Pcat6, bcc6, 11, 67},
|
||||
{ 0, 0, 0, 0, 0}
|
||||
{ 0, 0, 0, 0},
|
||||
{ 0, 0, 0, 1},
|
||||
{ 0, 0, 0, 2},
|
||||
{ 0, 0, 0, 3},
|
||||
{ 0, 0, 0, 4},
|
||||
{ cat1, Pcat1, 1, 5},
|
||||
{ cat2, Pcat2, 2, 7},
|
||||
{ cat3, Pcat3, 3, 11},
|
||||
{ cat4, Pcat4, 4, 19},
|
||||
{ cat5, Pcat5, 5, 35},
|
||||
{ cat6, Pcat6, 11, 67},
|
||||
{ 0, 0, 0, 0}
|
||||
};
|
||||
#include "defaultcoefcounts.h"
|
||||
|
||||
|
@ -140,10 +169,12 @@ void vp8_default_coef_probs(VP8_COMMON *pc)
|
|||
|
||||
do
|
||||
{
|
||||
unsigned int branch_ct [vp8_coef_tokens-1] [2];
|
||||
unsigned int branch_ct [ENTROPY_NODES] [2];
|
||||
vp8_tree_probs_from_distribution(
|
||||
vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree,
|
||||
pc->fc.coef_probs [h][i][k], branch_ct, default_coef_counts [h][i][k],
|
||||
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
|
||||
pc->fc.coef_probs[h][i][k],
|
||||
branch_ct,
|
||||
vp8_default_coef_counts[h][i][k],
|
||||
256, 1);
|
||||
|
||||
}
|
||||
|
|
|
@ -24,25 +24,23 @@
|
|||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define vp8_coef_tokens 12
|
||||
#define MAX_ENTROPY_TOKENS vp8_coef_tokens
|
||||
#define MAX_ENTROPY_TOKENS 12
|
||||
#define ENTROPY_NODES 11
|
||||
|
||||
extern const vp8_tree_index vp8_coef_tree[];
|
||||
|
||||
extern struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens];
|
||||
extern struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_tree_p tree;
|
||||
const vp8_prob *prob;
|
||||
vp8bc_index_t *prob_bc;
|
||||
int Len;
|
||||
int base_val;
|
||||
} vp8_extra_bit_struct;
|
||||
|
@ -86,15 +84,16 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
|||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
|
||||
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
|
||||
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
|
||||
|
||||
struct VP8Common;
|
||||
void vp8_default_coef_probs(struct VP8Common *);
|
||||
|
||||
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
|
||||
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
|
||||
extern short vp8_default_zig_zag_mask[16];
|
||||
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];
|
||||
|
||||
|
|
|
@ -33,11 +33,11 @@ typedef enum
|
|||
SUBMVREF_LEFT_ABOVE_ZED
|
||||
} sumvfref_t;
|
||||
|
||||
int vp8_mv_cont(const MV *l, const MV *a)
|
||||
int vp8_mv_cont(const int_mv *l, const int_mv *a)
|
||||
{
|
||||
int lez = (l->row == 0 && l->col == 0);
|
||||
int aez = (a->row == 0 && a->col == 0);
|
||||
int lea = (l->row == a->row && l->col == a->col);
|
||||
int lez = (l->as_int == 0);
|
||||
int aez = (a->as_int == 0);
|
||||
int lea = (l->as_int == a->as_int);
|
||||
|
||||
if (lea && lez)
|
||||
return SUBMVREF_LEFT_ABOVE_ZED;
|
||||
|
|
|
@ -25,7 +25,7 @@ extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
|
|||
|
||||
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
|
||||
|
||||
extern int vp8_mv_cont(const MV *l, const MV *a);
|
||||
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
|
||||
#define SUBMVREF_COUNT 5
|
||||
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ enum
|
|||
{
|
||||
mv_max = 1023, /* max absolute value of a MV component */
|
||||
MVvals = (2 * mv_max) + 1, /* # possible values "" */
|
||||
mvfp_max = 255, /* max absolute value of a full pixel MV component */
|
||||
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
|
||||
|
||||
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
|
||||
mvnum_short = 8, /* magnitudes 0 through 7 */
|
||||
|
|
|
@ -13,10 +13,12 @@
|
|||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
|
||||
static void extend_plane_borders
|
||||
static void copy_and_extend_plane
|
||||
(
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* pitch */
|
||||
int sp, /* source pitch */
|
||||
unsigned char *d, /* destination */
|
||||
int dp, /* destination pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
|
@ -25,7 +27,6 @@ static void extend_plane_borders
|
|||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
|
||||
int i;
|
||||
unsigned char *src_ptr1, *src_ptr2;
|
||||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
|
@ -34,68 +35,73 @@ static void extend_plane_borders
|
|||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = s - el;
|
||||
dest_ptr2 = s + w;
|
||||
dest_ptr1 = d - el;
|
||||
dest_ptr2 = d + w;
|
||||
|
||||
for (i = 0; i < h - 0 + 1; i++)
|
||||
for (i = 0; i < h; i++)
|
||||
{
|
||||
/* Some linkers will complain if we call vpx_memset with el set to a
|
||||
* constant 0.
|
||||
*/
|
||||
if (el)
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
src_ptr1 += sp;
|
||||
src_ptr2 += sp;
|
||||
dest_ptr1 += sp;
|
||||
dest_ptr2 += sp;
|
||||
dest_ptr1 += dp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
|
||||
/* Now copy the top and bottom source lines into each line of the respective borders */
|
||||
src_ptr1 = s - el;
|
||||
src_ptr2 = s + sp * (h - 1) - el;
|
||||
dest_ptr1 = s + sp * (-et) - el;
|
||||
dest_ptr2 = s + sp * (h) - el;
|
||||
linesize = el + er + w + 1;
|
||||
/* Now copy the top and bottom lines into each line of the respective
|
||||
* borders
|
||||
*/
|
||||
src_ptr1 = d - el;
|
||||
src_ptr2 = d + dp * (h - 1) - el;
|
||||
dest_ptr1 = d + dp * (-et) - el;
|
||||
dest_ptr2 = d + dp * (h) - el;
|
||||
linesize = el + er + w;
|
||||
|
||||
for (i = 0; i < (int)et; i++)
|
||||
for (i = 0; i < et; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
|
||||
dest_ptr1 += sp;
|
||||
dest_ptr1 += dp;
|
||||
}
|
||||
|
||||
for (i = 0; i < (int)eb; i++)
|
||||
for (i = 0; i < eb; i++)
|
||||
{
|
||||
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
|
||||
dest_ptr2 += sp;
|
||||
dest_ptr2 += dp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst)
|
||||
{
|
||||
int er = 0xf & (16 - (width & 0xf));
|
||||
int eb = 0xf & (16 - (height & 0xf));
|
||||
int et = dst->border;
|
||||
int el = dst->border;
|
||||
int eb = dst->border + dst->y_height - src->y_height;
|
||||
int er = dst->border + dst->y_width - src->y_width;
|
||||
|
||||
/* check for non multiples of 16 */
|
||||
if (er != 0 || eb != 0)
|
||||
{
|
||||
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
|
||||
copy_and_extend_plane(src->y_buffer, src->y_stride,
|
||||
dst->y_buffer, dst->y_stride,
|
||||
src->y_height, src->y_width,
|
||||
et, el, eb, er);
|
||||
|
||||
/* adjust for uv */
|
||||
height = (height + 1) >> 1;
|
||||
width = (width + 1) >> 1;
|
||||
er = 0x7 & (8 - (width & 0x7));
|
||||
eb = 0x7 & (8 - (height & 0x7));
|
||||
et = dst->border >> 1;
|
||||
el = dst->border >> 1;
|
||||
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
|
||||
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
|
||||
|
||||
if (er || eb)
|
||||
{
|
||||
extend_plane_borders(ybf->u_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
|
||||
extend_plane_borders(ybf->v_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
|
||||
}
|
||||
}
|
||||
copy_and_extend_plane(src->u_buffer, src->uv_stride,
|
||||
dst->u_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
|
||||
copy_and_extend_plane(src->v_buffer, src->uv_stride,
|
||||
dst->v_buffer, dst->uv_stride,
|
||||
src->uv_height, src->uv_width,
|
||||
et, el, eb, er);
|
||||
}
|
||||
|
||||
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
|
||||
{
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
void Extend(YV12_BUFFER_CONFIG *ybf);
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
|
||||
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height);
|
||||
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
|
||||
YV12_BUFFER_CONFIG *dst);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,13 +10,10 @@
|
|||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
|
||||
static const int bilinear_filters[8][2] =
|
||||
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
|
@ -28,8 +25,7 @@ static const int bilinear_filters[8][2] =
|
|||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
static const short sub_pel_filters[8][6] =
|
||||
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
|
@ -40,12 +36,9 @@ static const short sub_pel_filters[8][6] =
|
|||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
|
||||
|
||||
|
||||
};
|
||||
|
||||
void vp8_filter_block2d_first_pass
|
||||
static void filter_block2d_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int *output_ptr,
|
||||
|
@ -89,7 +82,7 @@ void vp8_filter_block2d_first_pass
|
|||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_second_pass
|
||||
static void filter_block2d_second_pass
|
||||
(
|
||||
int *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
|
@ -136,7 +129,7 @@ void vp8_filter_block2d_second_pass
|
|||
}
|
||||
|
||||
|
||||
void vp8_filter_block2d
|
||||
static void filter_block2d
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
|
@ -146,42 +139,16 @@ void vp8_filter_block2d
|
|||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; /* Temp data bufffer used in filtering */
|
||||
int FData[9*4]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_block_variation_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int *HVar,
|
||||
int *VVar
|
||||
)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *Ptr = src_ptr;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
*HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
|
||||
*VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
|
||||
}
|
||||
|
||||
Ptr += src_pixels_per_line;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void vp8_sixtap_predict_c
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
|
@ -195,10 +162,10 @@ void vp8_sixtap_predict_c
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
void vp8_sixtap_predict8x8_c
|
||||
(
|
||||
|
@ -212,17 +179,17 @@ void vp8_sixtap_predict8x8_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
@ -238,17 +205,17 @@ void vp8_sixtap_predict8x4_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
int FData[13*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
@ -264,17 +231,17 @@ void vp8_sixtap_predict16x16_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; /* Temp data bufffer used in filtering */
|
||||
int FData[21*24]; /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
||||
|
@ -283,57 +250,50 @@ void vp8_sixtap_predict16x16_c
|
|||
*
|
||||
* ROUTINE : filter_block2d_bil_first_pass
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_stride : Stride of source block.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement first-pass
|
||||
* of 2-D separable filter.
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the horizontal direction to produce the filtered output
|
||||
* block. Used to implement first-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_first_pass
|
||||
static void filter_block2d_bil_first_pass
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_stride,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
src_ptr += src_stride - width;
|
||||
dst_ptr += width;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,60 +301,51 @@ void vp8_filter_block2d_bil_first_pass
|
|||
*
|
||||
* ROUTINE : filter_block2d_bil_second_pass
|
||||
*
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* UINT32 pixel_step : Offset between filter input samples (see notes).
|
||||
* UINT32 output_height : Input block height.
|
||||
* UINT32 output_width : Input block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
* INPUTS : INT32 *src_ptr : Pointer to source block.
|
||||
* UINT32 dst_pitch : Destination block pitch.
|
||||
* UINT32 height : Block height.
|
||||
* UINT32 width : Block width.
|
||||
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
|
||||
* either horizontal or vertical direction to produce the
|
||||
* filtered output block. Used to implement second-pass
|
||||
* of 2-D separable filter.
|
||||
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
|
||||
* in the vertical direction to produce the filtered output
|
||||
* block. Used to implement second-pass of 2-D separable filter.
|
||||
*
|
||||
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
|
||||
* Two filter taps should sum to VP8_FILTER_WEIGHT.
|
||||
* pixel_step defines whether the filter is applied
|
||||
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
|
||||
* It defines the offset required to move from one input
|
||||
* to the next.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil_second_pass
|
||||
static void filter_block2d_bil_second_pass
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const int *vp8_filter
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
int Temp;
|
||||
|
||||
for (i = 0; i < output_height; i++)
|
||||
for (i = 0; i < height; i++)
|
||||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
for (j = 0; j < width; j++)
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
dst_ptr += dst_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -404,11 +355,14 @@ void vp8_filter_block2d_bil_second_pass
|
|||
* ROUTINE : filter_block2d_bil
|
||||
*
|
||||
* INPUTS : UINT8 *src_ptr : Pointer to source block.
|
||||
* UINT32 src_pixels_per_line : Stride of input block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* UINT32 src_pitch : Stride of source block.
|
||||
* UINT32 dst_pitch : Stride of destination block.
|
||||
* INT32 *HFilter : Array of 2 horizontal filter taps.
|
||||
* INT32 *VFilter : Array of 2 vertical filter taps.
|
||||
* INT32 Width : Block width
|
||||
* INT32 Height : Block height
|
||||
*
|
||||
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
|
||||
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
|
||||
*
|
||||
* RETURNS : void
|
||||
*
|
||||
|
@ -419,26 +373,26 @@ void vp8_filter_block2d_bil_second_pass
|
|||
* SPECIAL NOTES : The largest block size can be handled here is 16x16
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_filter_block2d_bil
|
||||
static void filter_block2d_bil
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const int *HFilter,
|
||||
const int *VFilter,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
|
||||
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
|
||||
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
|
||||
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
|
@ -452,11 +406,11 @@ void vp8_bilinear_predict4x4_c
|
|||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
#if 0
|
||||
{
|
||||
int i;
|
||||
|
@ -464,19 +418,19 @@ void vp8_bilinear_predict4x4_c
|
|||
unsigned char temp2[16];
|
||||
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
if (temp1[i] != temp2[i])
|
||||
{
|
||||
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
|
||||
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
|
||||
}
|
||||
|
||||
|
@ -490,13 +444,13 @@ void vp8_bilinear_predict8x8_c
|
|||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
|
||||
}
|
||||
|
||||
|
@ -510,13 +464,13 @@ void vp8_bilinear_predict8x4_c
|
|||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
|
||||
}
|
||||
|
||||
|
@ -530,11 +484,11 @@ void vp8_bilinear_predict16x16_c
|
|||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const int *HFilter;
|
||||
const int *VFilter;
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -9,14 +9,14 @@
|
|||
*/
|
||||
|
||||
|
||||
#ifndef DETOKENIZE_ARM_H
|
||||
#define DETOKENIZE_ARM_H
|
||||
#ifndef FILTER_H
|
||||
#define FILTER_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_ARM_ASM_DETOK
|
||||
void vp8_init_detokenizer(VP8D_COMP *dx);
|
||||
void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
|
||||
#endif
|
||||
#endif
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
#endif
|
||||
extern const short vp8_bilinear_filters[8][2];
|
||||
extern const short vp8_sub_pel_filters[8][6];
|
||||
|
||||
#endif //FILTER_H
|
|
@ -11,54 +11,23 @@
|
|||
|
||||
#include "findnearmv.h"
|
||||
|
||||
#define FINDNEAR_SEARCH_SITES 3
|
||||
const unsigned char vp8_mbsplit_offset[4][16] = {
|
||||
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
|
||||
};
|
||||
|
||||
/* Predict motion vectors using those from already-decoded nearby blocks.
|
||||
Note that we only consider one 4x4 subblock from each candidate 16x16
|
||||
macroblock. */
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned int as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates rapid equality tests */
|
||||
|
||||
static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = x->mbmi.mv.as_mv;
|
||||
|
||||
if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
|
||||
void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
MV *nearest,
|
||||
MV *nearby,
|
||||
MV *best_mv,
|
||||
int_mv *nearest,
|
||||
int_mv *nearby,
|
||||
int_mv *best_mv,
|
||||
int cnt[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
|
@ -82,7 +51,7 @@ void vp8_find_near_mvs
|
|||
if (above->mbmi.mv.as_int)
|
||||
{
|
||||
(++mv)->as_int = above->mbmi.mv.as_int;
|
||||
mv_bias(above, refframe, mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
|
||||
++cntx;
|
||||
}
|
||||
|
||||
|
@ -97,7 +66,7 @@ void vp8_find_near_mvs
|
|||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = left->mbmi.mv.as_int;
|
||||
mv_bias(left, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
|
@ -119,7 +88,7 @@ void vp8_find_near_mvs
|
|||
int_mv this_mv;
|
||||
|
||||
this_mv.as_int = aboveleft->mbmi.mv.as_int;
|
||||
mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias);
|
||||
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
|
||||
|
||||
if (this_mv.as_int != mv->as_int)
|
||||
{
|
||||
|
@ -162,13 +131,14 @@ void vp8_find_near_mvs
|
|||
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
|
||||
|
||||
/* Set up return values */
|
||||
*best_mv = near_mvs[0].as_mv;
|
||||
*nearest = near_mvs[CNT_NEAREST].as_mv;
|
||||
*nearby = near_mvs[CNT_NEAR].as_mv;
|
||||
best_mv->as_int = near_mvs[0].as_int;
|
||||
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
|
||||
nearby->as_int = near_mvs[CNT_NEAR].as_int;
|
||||
|
||||
vp8_clamp_mv(nearest, xd);
|
||||
vp8_clamp_mv(nearby, xd);
|
||||
vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
|
||||
//TODO: move clamp outside findnearmv
|
||||
vp8_clamp_mv2(nearest, xd);
|
||||
vp8_clamp_mv2(nearby, xd);
|
||||
vp8_clamp_mv2(best_mv, xd);
|
||||
}
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
|
@ -183,26 +153,3 @@ vp8_prob *vp8_mv_ref_probs(
|
|||
return p;
|
||||
}
|
||||
|
||||
const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return cur_mb->bmi + b - 1;
|
||||
}
|
||||
|
||||
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return cur_mb->bmi + b - 4;
|
||||
}
|
||||
|
|
|
@ -17,11 +17,65 @@
|
|||
#include "modecont.h"
|
||||
#include "treecoder.h"
|
||||
|
||||
|
||||
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
|
||||
{
|
||||
MV xmv;
|
||||
xmv = mvp->as_mv;
|
||||
|
||||
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
|
||||
{
|
||||
xmv.row *= -1;
|
||||
xmv.col *= -1;
|
||||
}
|
||||
|
||||
mvp->as_mv = xmv;
|
||||
}
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
|
||||
{
|
||||
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
|
||||
|
||||
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
|
||||
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
|
||||
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
|
||||
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
|
||||
}
|
||||
|
||||
static void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge,
|
||||
int mb_to_top_edge, int mb_to_bottom_edge)
|
||||
{
|
||||
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
|
||||
mb_to_left_edge : mv->as_mv.col;
|
||||
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
|
||||
mb_to_right_edge : mv->as_mv.col;
|
||||
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
|
||||
mb_to_top_edge : mv->as_mv.row;
|
||||
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
|
||||
mb_to_bottom_edge : mv->as_mv.row;
|
||||
}
|
||||
static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
|
||||
int mb_to_right_edge, int mb_to_top_edge,
|
||||
int mb_to_bottom_edge)
|
||||
{
|
||||
unsigned int need_to_clamp;
|
||||
need_to_clamp = (mv->as_mv.col < mb_to_left_edge) ? 1 : 0;
|
||||
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge) ? 1 : 0;
|
||||
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge) ? 1 : 0;
|
||||
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge) ? 1 : 0;
|
||||
return need_to_clamp;
|
||||
}
|
||||
|
||||
void vp8_find_near_mvs
|
||||
(
|
||||
MACROBLOCKD *xd,
|
||||
const MODE_INFO *here,
|
||||
MV *nearest, MV *nearby, MV *best,
|
||||
int_mv *nearest, int_mv *nearby, int_mv *best,
|
||||
int near_mv_ref_cts[4],
|
||||
int refframe,
|
||||
int *ref_frame_sign_bias
|
||||
|
@ -31,12 +85,89 @@ vp8_prob *vp8_mv_ref_probs(
|
|||
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
|
||||
);
|
||||
|
||||
const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
|
||||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
|
||||
|
||||
#define LEFT_TOP_MARGIN (16 << 3)
|
||||
#define RIGHT_BOTTOM_MARGIN (16 << 3)
|
||||
static int left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
if(cur_mb->mbmi.mode != SPLITMV)
|
||||
return cur_mb->mbmi.mv.as_int;
|
||||
b += 16;
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->mv.as_int;
|
||||
}
|
||||
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
/* On L edge, get from MB to left of us */
|
||||
--cur_mb;
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 3)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 1)->as_mode;
|
||||
}
|
||||
|
||||
static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
/* On top edge, get from MB above us */
|
||||
cur_mb -= mi_stride;
|
||||
|
||||
switch (cur_mb->mbmi.mode)
|
||||
{
|
||||
case B_PRED:
|
||||
return (cur_mb->bmi + b + 12)->as_mode;
|
||||
case DC_PRED:
|
||||
return B_DC_PRED;
|
||||
case V_PRED:
|
||||
return B_VE_PRED;
|
||||
case H_PRED:
|
||||
return B_HE_PRED;
|
||||
case TM_PRED:
|
||||
return B_TM_PRED;
|
||||
default:
|
||||
return B_DC_PRED;
|
||||
}
|
||||
}
|
||||
|
||||
return (cur_mb->bmi + b - 4)->as_mode;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,16 +10,61 @@
|
|||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "g_common.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
#if HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#elif defined(_WIN32)
|
||||
#include <windows.h>
|
||||
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
static int get_cpu_count()
|
||||
{
|
||||
int core_count = 16;
|
||||
|
||||
#if HAVE_UNISTD_H
|
||||
#if defined(_SC_NPROCESSORS_ONLN)
|
||||
core_count = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif defined(_SC_NPROC_ONLN)
|
||||
core_count = sysconf(_SC_NPROC_ONLN);
|
||||
#endif
|
||||
#elif defined(_WIN32)
|
||||
{
|
||||
PGNSI pGNSI;
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
/* Call GetNativeSystemInfo if supported or
|
||||
* GetSystemInfo otherwise. */
|
||||
|
||||
pGNSI = (PGNSI) GetProcAddress(
|
||||
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
|
||||
if (pGNSI != NULL)
|
||||
pGNSI(&sysinfo);
|
||||
else
|
||||
GetSystemInfo(&sysinfo);
|
||||
|
||||
core_count = sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
#else
|
||||
/* other platforms */
|
||||
#endif
|
||||
|
||||
return core_count > 0 ? core_count : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_machine_specific_config(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -43,6 +88,12 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
vp8_build_intra_predictors_mby;
|
||||
rtcd->recon.build_intra_predictors_mby_s =
|
||||
vp8_build_intra_predictors_mby_s;
|
||||
rtcd->recon.build_intra_predictors_mbuv =
|
||||
vp8_build_intra_predictors_mbuv;
|
||||
rtcd->recon.build_intra_predictors_mbuv_s =
|
||||
vp8_build_intra_predictors_mbuv_s;
|
||||
rtcd->recon.intra4x4_predict =
|
||||
vp8_intra4x4_predict;
|
||||
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
|
||||
|
@ -57,17 +108,19 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
|
||||
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb = vp8_blend_mb_c;
|
||||
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
|
||||
rtcd->postproc.down = vp8_mbpost_proc_down_c;
|
||||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
|
||||
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
|
||||
rtcd->postproc.blend_b = vp8_blend_b_c;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -80,4 +133,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
vp8_arch_arm_common_init(ctx);
|
||||
#endif
|
||||
|
||||
#if CONFIG_MULTITHREAD
|
||||
ctx->processor_core_count = get_cpu_count();
|
||||
#endif /* CONFIG_MULTITHREAD */
|
||||
}
|
||||
|
|
|
@ -9,162 +9,149 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
|
||||
unsigned char *v_ptr, int y_stride, int uv_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm)
|
||||
static void lf_init_lut(loop_filter_info_n *lfi)
|
||||
{
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
LOOPFILTERTYPE lft = cm->filter_type;
|
||||
int sharpness_lvl = cm->sharpness_level;
|
||||
int frame_type = cm->frame_type;
|
||||
int i, j;
|
||||
int filt_lvl;
|
||||
|
||||
int block_inside_limit = 0;
|
||||
int HEVThresh;
|
||||
const int yhedge_boost = 2;
|
||||
const int uvhedge_boost = 2;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
if (filt_lvl >= 40)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
|
||||
}
|
||||
else if (filt_lvl >= 20)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
|
||||
}
|
||||
else if (filt_lvl >= 15)
|
||||
{
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
|
||||
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
lfi->mode_lf_lut[DC_PRED] = 1;
|
||||
lfi->mode_lf_lut[V_PRED] = 1;
|
||||
lfi->mode_lf_lut[H_PRED] = 1;
|
||||
lfi->mode_lf_lut[TM_PRED] = 1;
|
||||
lfi->mode_lf_lut[B_PRED] = 0;
|
||||
|
||||
lfi->mode_lf_lut[ZEROMV] = 1;
|
||||
lfi->mode_lf_lut[NEARESTMV] = 2;
|
||||
lfi->mode_lf_lut[NEARMV] = 2;
|
||||
lfi->mode_lf_lut[NEWMV] = 2;
|
||||
lfi->mode_lf_lut[SPLITMV] = 3;
|
||||
|
||||
}
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* For each possible value for the loop filter fill out limits */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
int block_inside_limit = 0;
|
||||
|
||||
/* Set loop filter paramaeters that control sharpness. */
|
||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||
|
@ -179,181 +166,143 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||
if (block_inside_limit < 1)
|
||||
block_inside_limit = 1;
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl + yhedge_boost;
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
lfi[i].flim[j] = filt_lvl;
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl + uvhedge_boost;
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
lfi[i].uvflim[j] = filt_lvl;
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Set up the function pointers depending on the type of loop filtering selected */
|
||||
if (lft == NORMAL_LOOPFILTER)
|
||||
{
|
||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
|
||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v);
|
||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h);
|
||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h);
|
||||
}
|
||||
else
|
||||
{
|
||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v);
|
||||
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v);
|
||||
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h);
|
||||
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h);
|
||||
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
|
||||
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
|
||||
SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
||||
* each frame. Check last_frame_type to skip the function most of times.
|
||||
*/
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
||||
void vp8_loop_filter_init(VP8_COMMON *cm)
|
||||
{
|
||||
int HEVThresh;
|
||||
int i, j;
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
int i;
|
||||
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
/* init limits for given sharpness*/
|
||||
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
|
||||
/* init LUT for lvl and hev thr picking */
|
||||
lf_init_lut(lfi);
|
||||
|
||||
/* init hev threshold const vectors */
|
||||
for(i = 0; i < 4 ; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
||||
if (frame_type == KEY_FRAME)
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filt_lvl >= 40)
|
||||
HEVThresh = 3;
|
||||
else if (filt_lvl >= 20)
|
||||
HEVThresh = 2;
|
||||
else if (filt_lvl >= 15)
|
||||
HEVThresh = 1;
|
||||
else
|
||||
HEVThresh = 0;
|
||||
}
|
||||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
/*lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
/*lfi[i].flim[j] = filt_lvl;*/
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
/*lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
/*lfi[i].uvflim[j] = filt_lvl;*/
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
|
||||
void vp8_loop_filter_frame_init(VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl)
|
||||
{
|
||||
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
|
||||
int seg, /* segment number */
|
||||
ref, /* index in ref_lf_deltas */
|
||||
mode; /* index in mode_lf_deltas */
|
||||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
loop_filter_info_n *lfi = &cm->lf_info;
|
||||
|
||||
/* update limits if sharpness has changed */
|
||||
if(cm->last_sharpness_level != cm->sharpness_level)
|
||||
{
|
||||
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
|
||||
cm->last_sharpness_level = cm->sharpness_level;
|
||||
}
|
||||
|
||||
for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
|
||||
{
|
||||
int lvl_seg = default_filt_lvl;
|
||||
int lvl_ref, lvl_mode;
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (mbd->segmentation_enabled)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
{
|
||||
lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
}
|
||||
else /* Delta Value */
|
||||
{
|
||||
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
|
||||
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
/* we could get rid of this if we assume that deltas are set to
|
||||
* zero when not in use; encoder always uses deltas
|
||||
*/
|
||||
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
|
||||
continue;
|
||||
}
|
||||
|
||||
lvl_ref = lvl_seg;
|
||||
|
||||
/* INTRA_FRAME */
|
||||
ref = INTRA_FRAME;
|
||||
|
||||
/* Apply delta for reference frame */
|
||||
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
/* Apply delta for Intra modes */
|
||||
mode = 0; /* B_PRED */
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
mode = 1; /* all the rest of Intra modes */
|
||||
lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
|
||||
/* LAST, GOLDEN, ALT */
|
||||
for(ref = 1; ref < MAX_REF_FRAMES; ref++)
|
||||
{
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
*filter_level += mbd->mode_lf_deltas[0];
|
||||
int lvl_ref = lvl_seg;
|
||||
|
||||
/* Apply delta for reference frame */
|
||||
lvl_ref += mbd->ref_lf_deltas[ref];
|
||||
|
||||
/* Apply delta for Inter modes */
|
||||
for (mode = 1; mode < 4; mode++)
|
||||
{
|
||||
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
|
||||
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
|
||||
|
||||
lfi->lvl[seg][ref][mode] = lvl_mode;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
*filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
*filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
*filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
/* Range check */
|
||||
if (*filter_level > MAX_LOOP_FILTER)
|
||||
*filter_level = MAX_LOOP_FILTER;
|
||||
else if (*filter_level < 0)
|
||||
*filter_level = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl
|
||||
MACROBLOCKD *mbd
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
|
@ -365,101 +314,108 @@ void vp8_loop_filter_frame
|
|||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mbd->mode_info_context++; /* step to next MB */
|
||||
mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
u_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
v_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
|
||||
mbd->mode_info_context++; /* Skip border mb */
|
||||
mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_frame_yonly
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl,
|
||||
int sharpness_lvl
|
||||
int default_filt_lvl
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
/* Point at base of Mb MODE_INFO list */
|
||||
const MODE_INFO *mode_info_context = cm->mi;
|
||||
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
#if 0
|
||||
if(default_filt_lvl == 0) /* no filter applied */
|
||||
return;
|
||||
#endif
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
|
@ -469,72 +425,106 @@ void vp8_loop_filter_frame_yonly
|
|||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
/* Apply any context driven MB level adjustment */
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
|
||||
const int seg = mode_info_context->mbmi.segment_id;
|
||||
const int ref_frame = mode_info_context->mbmi.ref_frame;
|
||||
|
||||
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context ++; /* step to next MB */
|
||||
mode_info_context ++; /* step to next MB */
|
||||
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context ++; /* Skip border mb */
|
||||
mode_info_context ++; /* Skip border mb */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_partial_frame
|
||||
(
|
||||
VP8_COMMON *cm,
|
||||
MACROBLOCKD *mbd,
|
||||
int default_filt_lvl,
|
||||
int sharpness_lvl,
|
||||
int Fraction
|
||||
int default_filt_lvl
|
||||
)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
|
||||
int i;
|
||||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
/*int mb_rows = post->y_height >> 4;*/
|
||||
int mb_cols = post->y_width >> 4;
|
||||
|
||||
int linestocopy;
|
||||
int linestocopy, i;
|
||||
|
||||
loop_filter_info_n *lfi_n = &cm->lf_info;
|
||||
loop_filter_info lfi;
|
||||
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
const MODE_INFO *mode_info_context;
|
||||
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
|
||||
int lvl_seg[MAX_MB_SEGMENTS];
|
||||
|
||||
linestocopy = (post->y_height >> (4 + Fraction));
|
||||
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
|
||||
|
||||
/* 3 is a magic number. 4 is probably magic too */
|
||||
linestocopy = (post->y_height >> (4 + 3));
|
||||
|
||||
if (linestocopy < 1)
|
||||
linestocopy = 1;
|
||||
|
@ -542,32 +532,27 @@ void vp8_loop_filter_partial_frame
|
|||
linestocopy <<= 4;
|
||||
|
||||
/* Note the baseline filter values for each segment */
|
||||
/* See vp8_loop_filter_frame_init. Rather than call that for each change
|
||||
* to default_filt_lvl, copy the relevant calculation here.
|
||||
*/
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
/* Abs value */
|
||||
{ /* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
{
|
||||
lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
}
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
lvl_seg[i] = default_filt_lvl
|
||||
+ mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
lvl_seg[i] = (lvl_seg[i] > 0) ?
|
||||
((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
||||
|
@ -577,28 +562,64 @@ void vp8_loop_filter_partial_frame
|
|||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
{
|
||||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
|
||||
mode_info_context->mbmi.mode != SPLITMV &&
|
||||
mode_info_context->mbmi.mb_skip_coeff);
|
||||
|
||||
if (alt_flt_enabled)
|
||||
filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
|
||||
else
|
||||
filter_level = default_filt_lvl;
|
||||
|
||||
if (filter_level)
|
||||
{
|
||||
if (mb_col > 0)
|
||||
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (cm->filter_type == NORMAL_LOOPFILTER)
|
||||
{
|
||||
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
|
||||
lfi.mblim = lfi_n->mblim[filter_level];
|
||||
lfi.blim = lfi_n->blim[filter_level];
|
||||
lfi.lim = lfi_n->lim[filter_level];
|
||||
lfi.hev_thr = lfi_n->hev_thr[hev_index];
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
|
||||
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mb_col > 0)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
|
||||
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
|
||||
|
||||
if (!skip_lf)
|
||||
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
|
||||
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
|
||||
}
|
||||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context += 1; /* step to next MB */
|
||||
mode_info_context += 1; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context += 1; /* Skip border mb */
|
||||
mode_info_context += 1; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#define loopfilter_h
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_config.h"
|
||||
|
||||
#define MAX_LOOP_FILTER 63
|
||||
|
||||
|
@ -22,32 +23,45 @@ typedef enum
|
|||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
/* FRK
|
||||
* Need to align this structure so when it is declared and
|
||||
#if ARCH_ARM
|
||||
#define SIMD_WIDTH 1
|
||||
#else
|
||||
#define SIMD_WIDTH 16
|
||||
#endif
|
||||
|
||||
/* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(16, signed char, lim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, flim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, thr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, mbthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvlim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvthr[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbflim[16]);
|
||||
DECLARE_ALIGNED(16, signed char, uvmbthr[16]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
|
||||
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
|
||||
unsigned char lvl[4][4][4];
|
||||
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
|
||||
unsigned char mode_lf_lut[10];
|
||||
} loop_filter_info_n;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const unsigned char * mblim;
|
||||
const unsigned char * blim;
|
||||
const unsigned char * lim;
|
||||
const unsigned char * hev_thr;
|
||||
} loop_filter_info;
|
||||
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const signed char *flimit,\
|
||||
const signed char *limit, const signed char *thresh, int count)
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#define prototype_loopfilter_block(sym) \
|
||||
void sym(unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int ystride, int uv_stride, loop_filter_info *lfi, int simpler)
|
||||
void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
|
||||
int ystride, int uv_stride, loop_filter_info *lfi)
|
||||
|
||||
#define prototype_simple_loopfilter(sym) \
|
||||
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "x86/loopfilter_x86.h"
|
||||
|
@ -77,38 +91,39 @@ extern prototype_loopfilter_block(vp8_lf_normal_mb_h);
|
|||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
|
||||
|
||||
|
||||
#ifndef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_c
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_v);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_mb_v);
|
||||
|
||||
#ifndef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_b_v);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_b_v);
|
||||
|
||||
#ifndef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_c
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_mb_h);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_mb_h);
|
||||
|
||||
#ifndef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
|
||||
#endif
|
||||
extern prototype_loopfilter_block(vp8_lf_simple_b_h);
|
||||
extern prototype_simple_loopfilter(vp8_lf_simple_b_h);
|
||||
|
||||
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
|
||||
typedef prototype_simple_loopfilter((*vp8_slf_block_fn_t));
|
||||
|
||||
typedef struct
|
||||
{
|
||||
vp8_lf_block_fn_t normal_mb_v;
|
||||
vp8_lf_block_fn_t normal_b_v;
|
||||
vp8_lf_block_fn_t normal_mb_h;
|
||||
vp8_lf_block_fn_t normal_b_h;
|
||||
vp8_lf_block_fn_t simple_mb_v;
|
||||
vp8_lf_block_fn_t simple_b_v;
|
||||
vp8_lf_block_fn_t simple_mb_h;
|
||||
vp8_lf_block_fn_t simple_b_h;
|
||||
vp8_slf_block_fn_t simple_mb_v;
|
||||
vp8_slf_block_fn_t simple_b_v;
|
||||
vp8_slf_block_fn_t simple_mb_h;
|
||||
vp8_slf_block_fn_t simple_b_h;
|
||||
} vp8_loopfilter_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -121,10 +136,33 @@ typedef void loop_filter_uvfunction
|
|||
(
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
unsigned char *v
|
||||
);
|
||||
|
||||
/* assorted loopfilter functions which get used elsewhere */
|
||||
struct VP8Common;
|
||||
struct MacroBlockD;
|
||||
|
||||
void vp8_loop_filter_init(struct VP8Common *cm);
|
||||
|
||||
void vp8_loop_filter_frame_init(struct VP8Common *cm,
|
||||
struct MacroBlockD *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame(struct VP8Common *cm, struct MacroBlockD *mbd);
|
||||
|
||||
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
|
||||
struct MacroBlockD *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
|
||||
struct MacroBlockD *mbd,
|
||||
int default_filt_lvl);
|
||||
|
||||
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
|
||||
int sharpness_lvl);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -28,8 +28,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
|
|||
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
||||
static __inline signed char vp8_filter_mask(uc limit, uc blimit,
|
||||
uc p3, uc p2, uc p1, uc p0,
|
||||
uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
signed char mask = 0;
|
||||
mask |= (abs(p3 - p2) > limit) * -1;
|
||||
|
@ -38,13 +39,13 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
|
|||
mask |= (abs(q1 - q0) > limit) * -1;
|
||||
mask |= (abs(q2 - q1) > limit) * -1;
|
||||
mask |= (abs(q3 - q2) > limit) * -1;
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
|
||||
mask = ~mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
hev |= (abs(p1 - p0) > thresh) * -1;
|
||||
|
@ -52,7 +53,8 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
|
|||
return hev;
|
||||
}
|
||||
|
||||
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
|
||||
static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
|
||||
uc *op0, uc *oq0, uc *oq1)
|
||||
|
||||
{
|
||||
signed char ps0, qs0;
|
||||
|
@ -102,9 +104,9 @@ void vp8_loop_filter_horizontal_edge_c
|
|||
(
|
||||
unsigned char *s,
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
|
@ -117,11 +119,11 @@ void vp8_loop_filter_horizontal_edge_c
|
|||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
|
||||
|
@ -134,9 +136,9 @@ void vp8_loop_filter_vertical_edge_c
|
|||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
|
@ -149,10 +151,10 @@ void vp8_loop_filter_vertical_edge_c
|
|||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
|
||||
|
||||
|
@ -161,7 +163,7 @@ void vp8_loop_filter_vertical_edge_c
|
|||
while (++i < count * 8);
|
||||
}
|
||||
|
||||
static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
||||
static __inline void vp8_mbfilter(signed char mask, uc hev,
|
||||
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
|
||||
{
|
||||
signed char s, u;
|
||||
|
@ -220,9 +222,9 @@ void vp8_mbloop_filter_horizontal_edge_c
|
|||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
|
@ -236,11 +238,11 @@ void vp8_mbloop_filter_horizontal_edge_c
|
|||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
|
||||
s[0*p], s[1*p], s[2*p], s[3*p]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
|
||||
|
||||
|
@ -255,9 +257,9 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
const unsigned char *blimit,
|
||||
const unsigned char *limit,
|
||||
const unsigned char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
|
@ -268,10 +270,10 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
do
|
||||
{
|
||||
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
mask = vp8_filter_mask(limit[0], blimit[0],
|
||||
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
|
||||
|
||||
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
|
||||
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
|
||||
|
||||
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
|
||||
|
||||
|
@ -282,13 +284,13 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
}
|
||||
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
||||
static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
@ -321,47 +323,37 @@ void vp8_loop_filter_simple_horizontal_edge_c
|
|||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
(void) thresh;
|
||||
|
||||
do
|
||||
{
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
while (++i < 16);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_simple_vertical_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p,
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count
|
||||
const unsigned char *blimit
|
||||
)
|
||||
{
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
(void) thresh;
|
||||
|
||||
do
|
||||
{
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
|
||||
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
}
|
||||
while (++i < count * 8);
|
||||
while (++i < 16);
|
||||
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@ typedef enum
|
|||
DEST = 1
|
||||
} BLOCKSET;
|
||||
|
||||
void vp8_setup_block
|
||||
static void setup_block
|
||||
(
|
||||
BLOCKD *b,
|
||||
int mv_stride,
|
||||
|
@ -43,7 +43,8 @@ void vp8_setup_block
|
|||
|
||||
}
|
||||
|
||||
void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
|
||||
static void setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
||||
{
|
||||
int block;
|
||||
|
||||
|
@ -64,16 +65,16 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
|||
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
|
||||
vp8_setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
|
||||
setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
}
|
||||
}
|
||||
|
@ -124,6 +125,6 @@ void vp8_build_block_doffsets(MACROBLOCKD *x)
|
|||
{
|
||||
|
||||
/* handle the destination pitch features */
|
||||
vp8_setup_macroblock(x, DEST);
|
||||
vp8_setup_macroblock(x, PRED);
|
||||
setup_macroblock(x, DEST);
|
||||
setup_macroblock(x, PRED);
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#ifndef __INC_MV_H
|
||||
#define __INC_MV_H
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
@ -18,4 +19,10 @@ typedef struct
|
|||
short col;
|
||||
} MV;
|
||||
|
||||
typedef union
|
||||
{
|
||||
uint32_t as_int;
|
||||
MV as_mv;
|
||||
} int_mv; /* facilitates faster equality tests and copies */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,7 @@ extern "C"
|
|||
#endif
|
||||
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "type_aliases.h"
|
||||
#include "ppflags.h"
|
||||
|
@ -45,7 +46,8 @@ extern "C"
|
|||
typedef enum
|
||||
{
|
||||
USAGE_STREAM_FROM_SERVER = 0x0,
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1
|
||||
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
|
||||
USAGE_CONSTRAINED_QUALITY = 0x2
|
||||
} END_USAGE;
|
||||
|
||||
|
||||
|
@ -107,6 +109,7 @@ extern "C"
|
|||
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
|
||||
int Sharpness; // parameter used for sharpening output: recommendation 0:
|
||||
int cpu_used;
|
||||
unsigned int rc_max_intra_bitrate_pct;
|
||||
|
||||
// mode ->
|
||||
//(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
|
||||
|
@ -137,8 +140,9 @@ extern "C"
|
|||
|
||||
int end_usage; // vbr or cbr
|
||||
|
||||
// shoot to keep buffer full at all times by undershooting a bit 95 recommended
|
||||
// buffer targeting aggressiveness
|
||||
int under_shoot_pct;
|
||||
int over_shoot_pct;
|
||||
|
||||
// buffering parameters
|
||||
int starting_buffer_level; // in seconds
|
||||
|
@ -149,6 +153,7 @@ extern "C"
|
|||
int fixed_q;
|
||||
int worst_allowed_q;
|
||||
int best_allowed_q;
|
||||
int cq_level;
|
||||
|
||||
// allow internal resizing ( currently disabled in the build !!!!!)
|
||||
int allow_spatial_resampling;
|
||||
|
@ -179,16 +184,20 @@ extern "C"
|
|||
int token_partitions; // how many token partitions to create for multi core decoding
|
||||
int encode_breakout; // early breakout encode threshold : for video conf recommend 800
|
||||
|
||||
int error_resilient_mode; // if running over udp networks provides decodable frames after a
|
||||
// dropped packet
|
||||
unsigned int error_resilient_mode; // Bitfield defining the error
|
||||
// resiliency features to enable. Can provide
|
||||
// decodable frames after losses in previous
|
||||
// frames and decodable partitions after
|
||||
// losses in the same frame.
|
||||
|
||||
int arnr_max_frames;
|
||||
int arnr_strength ;
|
||||
int arnr_type ;
|
||||
|
||||
|
||||
struct vpx_fixed_buf two_pass_stats_in;
|
||||
struct vpx_codec_pkt_list *output_pkt_list;
|
||||
|
||||
vp8e_tuning tuning;
|
||||
} VP8_CONFIG;
|
||||
|
||||
|
||||
|
@ -202,9 +211,9 @@ extern "C"
|
|||
|
||||
// receive a frames worth of data caller can assume that a copy of this frame is made
|
||||
// and not just a copy of the pointer..
|
||||
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
|
||||
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
|
||||
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
|
||||
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, int64_t *time_stamp, int64_t *time_end, int flush);
|
||||
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
#include "entropy.h"
|
||||
#include "idct.h"
|
||||
#include "recon.h"
|
||||
#if CONFIG_POSTPROC
|
||||
#include "postproc.h"
|
||||
#endif
|
||||
|
||||
/*#ifdef PACKET_TESTING*/
|
||||
#include "header.h"
|
||||
|
@ -35,13 +37,15 @@ void vp8_initialize_common(void);
|
|||
|
||||
#define NUM_YV12_BUFFERS 4
|
||||
|
||||
#define MAX_PARTITIONS 9
|
||||
|
||||
typedef struct frame_contexts
|
||||
{
|
||||
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
|
||||
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
|
||||
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
|
||||
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
|
||||
MV_CONTEXT mvc[2];
|
||||
MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
|
||||
} FRAME_CONTEXT;
|
||||
|
@ -73,7 +77,9 @@ typedef struct VP8_COMMON_RTCD
|
|||
vp8_recon_rtcd_vtable_t recon;
|
||||
vp8_subpix_rtcd_vtable_t subpix;
|
||||
vp8_loopfilter_rtcd_vtable_t loopfilter;
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_postproc_rtcd_vtable_t postproc;
|
||||
#endif
|
||||
int flags;
|
||||
#else
|
||||
int unused;
|
||||
|
@ -81,6 +87,7 @@ typedef struct VP8_COMMON_RTCD
|
|||
} VP8_COMMON_RTCD;
|
||||
|
||||
typedef struct VP8Common
|
||||
|
||||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
|
@ -105,7 +112,8 @@ typedef struct VP8Common
|
|||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
|
||||
|
||||
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
@ -119,7 +127,6 @@ typedef struct VP8Common
|
|||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int simpler_lpf;
|
||||
int use_bilinear_mc_filter;
|
||||
int full_pixel;
|
||||
|
||||
|
@ -140,16 +147,15 @@ typedef struct VP8Common
|
|||
|
||||
MODE_INFO *mip; /* Base of allocated array */
|
||||
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
|
||||
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
|
||||
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
|
||||
|
||||
|
||||
INTERPOLATIONFILTERTYPE mcomp_filter_type;
|
||||
LOOPFILTERTYPE last_filter_type;
|
||||
LOOPFILTERTYPE filter_type;
|
||||
loop_filter_info lf_info[MAX_LOOP_FILTER+1];
|
||||
prototype_loopfilter_block((*lf_mbv));
|
||||
prototype_loopfilter_block((*lf_mbh));
|
||||
prototype_loopfilter_block((*lf_bv));
|
||||
prototype_loopfilter_block((*lf_bh));
|
||||
|
||||
loop_filter_info_n lf_info;
|
||||
|
||||
int filter_level;
|
||||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
|
@ -196,13 +202,12 @@ typedef struct VP8Common
|
|||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
VP8_COMMON_RTCD rtcd;
|
||||
#endif
|
||||
#if CONFIG_MULTITHREAD
|
||||
int processor_core_count;
|
||||
#endif
|
||||
#if CONFIG_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
} VP8_COMMON;
|
||||
|
||||
|
||||
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
|
||||
void vp8_init_loop_filter(VP8_COMMON *cm);
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
|
||||
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,6 +22,7 @@ extern "C"
|
|||
#include "vpx_scale/yv12config.h"
|
||||
#include "ppflags.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
|
||||
typedef void *VP8D_PTR;
|
||||
typedef struct
|
||||
|
@ -31,6 +32,8 @@ extern "C"
|
|||
int Version;
|
||||
int postprocess;
|
||||
int max_threads;
|
||||
int error_concealment;
|
||||
int input_partition;
|
||||
} VP8D_CONFIG;
|
||||
typedef enum
|
||||
{
|
||||
|
@ -50,11 +53,11 @@ extern "C"
|
|||
|
||||
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
|
||||
|
||||
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
|
||||
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
|
||||
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
|
||||
|
||||
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf);
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
|
||||
|
||||
/* global constants */
|
||||
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
|
||||
|
@ -41,13 +41,32 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
|||
{ RGB_TO_YUV(0xFF0000) } /* Red */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
|
||||
static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
|
||||
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
|
||||
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
|
||||
{ RGB_TO_YUV(0xff3366) }, /* Coral */
|
||||
{ RGB_TO_YUV(0x3366ff) }, /* Blue */
|
||||
{ RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
|
||||
{ RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
|
||||
{ RGB_TO_YUV(0xff6633) }, /* Orange */
|
||||
{ RGB_TO_YUV(0x33ccff) }, /* Light Blue */
|
||||
{ RGB_TO_YUV(0x8ab800) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffcc33) }, /* Light Orange */
|
||||
{ RGB_TO_YUV(0x33ffcc) }, /* Aqua */
|
||||
{ RGB_TO_YUV(0x66ff33) }, /* Light Green */
|
||||
{ RGB_TO_YUV(0xccff33) }, /* Yellow */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
|
||||
{ RGB_TO_YUV(0x0000ff) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
|
||||
{ RGB_TO_YUV(0xff0000) }, /* Red */
|
||||
};
|
||||
#endif
|
||||
|
||||
static const short kernel5[] =
|
||||
{
|
||||
|
@ -192,7 +211,7 @@ void vp8_post_proc_down_and_across_c
|
|||
}
|
||||
}
|
||||
|
||||
int vp8_q2mbl(int x)
|
||||
static int q2mbl(int x)
|
||||
{
|
||||
if (x < 20) x = 20;
|
||||
|
||||
|
@ -295,8 +314,8 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
|
|||
(void) flag;
|
||||
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
|
||||
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
|
||||
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
|
||||
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
|
||||
POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
|
||||
|
@ -476,7 +495,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
|||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
*/
|
||||
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
|
@ -484,10 +503,10 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
|||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
y += stride + 2;
|
||||
for (i = 0; i < 14; i++)
|
||||
y += 2*stride + 2;
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
for (j = 0; j < 14; j++)
|
||||
for (j = 0; j < 12; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
|
@ -511,6 +530,104 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
|||
}
|
||||
}
|
||||
|
||||
/* Blend only the edge of the macro block. Leave center
|
||||
* unblended to allow for other visualizations to be layered.
|
||||
*/
|
||||
void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 12; i++)
|
||||
{
|
||||
y[0] = (y[0]*alpha + y1_const)>>16;
|
||||
y[1] = (y[1]*alpha + y1_const)>>16;
|
||||
y[14] = (y[14]*alpha + y1_const)>>16;
|
||||
y[15] = (y[15]*alpha + y1_const)>>16;
|
||||
y += stride;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
u[0] = (u[0]*alpha + u1_const)>>16;
|
||||
v[0] = (v[0]*alpha + v1_const)>>16;
|
||||
|
||||
u[7] = (u[7]*alpha + u1_const)>>16;
|
||||
v[7] = (v[7]*alpha + v1_const)>>16;
|
||||
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
|
||||
for (j = 0; j < 8; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
|
||||
{
|
||||
int dx;
|
||||
|
@ -522,7 +639,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
|||
dy = *y1 - y0;
|
||||
|
||||
*x1 = width;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*y1 = ((width-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*x1 < 0)
|
||||
|
@ -531,7 +648,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
|||
dy = *y1 - y0;
|
||||
|
||||
*x1 = 0;
|
||||
if (dy)
|
||||
if (dx)
|
||||
*y1 = ((0-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*y1 > height)
|
||||
|
@ -540,7 +657,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
|||
dy = *y1 - y0;
|
||||
|
||||
*y1 = height;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*x1 = ((height-y0)*dx)/dy + x0;
|
||||
}
|
||||
if (*y1 < 0)
|
||||
|
@ -549,7 +666,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
|||
dy = *y1 - y0;
|
||||
|
||||
*y1 = 0;
|
||||
if (dx)
|
||||
if (dy)
|
||||
*x1 = ((0-y0)*dx)/dy + x0;
|
||||
}
|
||||
}
|
||||
|
@ -561,10 +678,12 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
|
|||
#define RTCD_VTABLE(oci) NULL
|
||||
#endif
|
||||
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
|
||||
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
|
||||
{
|
||||
char message[512];
|
||||
int q = oci->filter_level * 10 / 6;
|
||||
int flags = ppflags->post_proc_flag;
|
||||
int deblock_level = ppflags->deblocking_level;
|
||||
int noise_level = ppflags->noise_level;
|
||||
|
||||
if (!oci->frame_to_show)
|
||||
return -1;
|
||||
|
@ -621,8 +740,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL1)
|
||||
#if CONFIG_POSTPROC_VISUALIZER
|
||||
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
|
||||
{
|
||||
char message[512];
|
||||
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
|
||||
(oci->frame_type == KEY_FRAME),
|
||||
oci->refresh_golden_frame,
|
||||
|
@ -633,7 +754,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL2)
|
||||
if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
|
@ -665,7 +786,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL3)
|
||||
if (flags & VP8D_DEBUG_TXT_DC_DIFF)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
|
@ -683,11 +804,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
for (j = 0; j < mb_cols; j++)
|
||||
{
|
||||
char zz[4];
|
||||
int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED &&
|
||||
mi[mb_index].mbmi.mode != SPLITMV &&
|
||||
mi[mb_index].mbmi.mb_skip_coeff);
|
||||
|
||||
if (oci->frame_type == KEY_FRAME)
|
||||
sprintf(zz, "a");
|
||||
else
|
||||
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
|
||||
sprintf(zz, "%c", dc_diff + '0');
|
||||
|
||||
vp8_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index ++;
|
||||
|
@ -700,78 +824,162 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
}
|
||||
}
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL4)
|
||||
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
|
||||
{
|
||||
char message[512];
|
||||
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
#if 0
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int mb_rows = post->y_height >> 4;
|
||||
int mb_cols = post->y_width >> 4;
|
||||
int mb_index = 0;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
{
|
||||
char zz[4];
|
||||
|
||||
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
|
||||
vp8_blit_text(zz, y_ptr, post->y_stride);
|
||||
mb_index ++;
|
||||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/* Draw motion vectors */
|
||||
if (flags & VP8D_DEBUG_LEVEL5)
|
||||
if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
int mb_cols = width >> 4;
|
||||
unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
int x0, y0;
|
||||
|
||||
for (y0 = 8; y0 < (height + 8); y0 += 16)
|
||||
for (y0 = 0; y0 < height; y0 += 16)
|
||||
{
|
||||
for (x0 = 8; x0 < (width + 8); x0 += 16)
|
||||
for (x0 = 0; x0 < width; x0 += 16)
|
||||
{
|
||||
int x1, y1;
|
||||
if (mi->mbmi.mode >= NEARESTMV)
|
||||
int x1, y1;
|
||||
|
||||
if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
|
||||
{
|
||||
mi++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (mi->mbmi.mode == SPLITMV)
|
||||
{
|
||||
switch (mi->mbmi.partitioning)
|
||||
{
|
||||
case 0 : /* mv_top_bottom */
|
||||
{
|
||||
union b_mode_info *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 8 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+8, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 1 : /* mv_left_right */
|
||||
{
|
||||
union b_mode_info *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 8 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+8, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
case 2 : /* mv_quarters */
|
||||
{
|
||||
union b_mode_info *bmi = &mi->bmi[0];
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[2];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 + 4 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+4, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[8];
|
||||
|
||||
x1 = x0 + 4 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+4, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
|
||||
|
||||
bmi = &mi->bmi[10];
|
||||
|
||||
x1 = x0 +12 + (mv->col >> 3);
|
||||
y1 = y0 +12 + (mv->row >> 3);
|
||||
|
||||
constrain_line (x0+12, &x1, y0+12, &y1, width, height);
|
||||
vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
|
||||
break;
|
||||
}
|
||||
default :
|
||||
{
|
||||
union b_mode_info *bmi = mi->bmi;
|
||||
int bx0, by0;
|
||||
|
||||
for (by0 = y0; by0 < (y0+16); by0 += 4)
|
||||
{
|
||||
for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
|
||||
{
|
||||
MV *mv = &bmi->mv.as_mv;
|
||||
|
||||
x1 = bx0 + 2 + (mv->col >> 3);
|
||||
y1 = by0 + 2 + (mv->row >> 3);
|
||||
|
||||
constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
|
||||
vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
|
||||
|
||||
bmi++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (mi->mbmi.mode >= NEARESTMV)
|
||||
{
|
||||
MV *mv = &mi->mbmi.mv.as_mv;
|
||||
const int lx0 = x0 + 8;
|
||||
const int ly0 = y0 + 8;
|
||||
|
||||
x1 = x0 + (mv->col >> 3);
|
||||
y1 = y0 + (mv->row >> 3);
|
||||
x1 = lx0 + (mv->col >> 3);
|
||||
y1 = ly0 + (mv->row >> 3);
|
||||
|
||||
if (x1 != x0 && y1 != y0)
|
||||
if (x1 != lx0 && y1 != ly0)
|
||||
{
|
||||
constrain_line (x0, &x1, y0-1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
|
||||
constrain_line (lx0, &x1, ly0-1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
|
||||
|
||||
constrain_line (x0, &x1, y0+1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
|
||||
constrain_line (lx0, &x1, ly0+1, &y1, width, height);
|
||||
vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
|
||||
}
|
||||
else
|
||||
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
|
||||
vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
mi++;
|
||||
|
@ -779,9 +987,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
}
|
||||
|
||||
/* Color in block modes */
|
||||
if (flags & VP8D_DEBUG_LEVEL6)
|
||||
if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
|
||||
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
|
||||
{
|
||||
int i, j;
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
|
@ -791,18 +1000,54 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
for (y = 0; y < height; y += 16)
|
||||
{
|
||||
for (j = 0; j < width; j += 16)
|
||||
for (x = 0; x < width; x += 16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
if (mi->mbmi.mode == B_PRED &&
|
||||
((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
|
||||
{
|
||||
int by, bx;
|
||||
unsigned char *yl, *ul, *vl;
|
||||
union b_mode_info *bmi = mi->bmi;
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
yl = y_ptr + x;
|
||||
ul = u_ptr + (x>>1);
|
||||
vl = v_ptr + (x>>1);
|
||||
|
||||
for (by = 0; by < 16; by += 4)
|
||||
{
|
||||
for (bx = 0; bx < 16; bx += 4)
|
||||
{
|
||||
if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
|
||||
|| (ppflags->display_mb_modes_flag & B_PRED))
|
||||
{
|
||||
Y = B_PREDICTION_MODE_colors[bmi->as_mode][0];
|
||||
U = B_PREDICTION_MODE_colors[bmi->as_mode][1];
|
||||
V = B_PREDICTION_MODE_colors[bmi->as_mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
|
||||
(yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
bmi++;
|
||||
}
|
||||
|
||||
yl += y_stride*4;
|
||||
ul += y_stride*1;
|
||||
vl += y_stride*1;
|
||||
}
|
||||
}
|
||||
else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
|
||||
{
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
|
@ -815,9 +1060,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
}
|
||||
|
||||
/* Color in frame reference blocks */
|
||||
if (flags & VP8D_DEBUG_LEVEL7)
|
||||
if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
|
||||
{
|
||||
int i, j;
|
||||
int y, x;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
|
@ -827,18 +1072,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
for (y = 0; y < height; y += 16)
|
||||
{
|
||||
for (j = 0; j < width; j +=16)
|
||||
for (x = 0; x < width; x +=16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
|
||||
{
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
|
||||
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
|
||||
}
|
||||
|
||||
mi++;
|
||||
}
|
||||
|
@ -849,6 +1097,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
mi++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*dest = oci->post_proc_buffer;
|
||||
|
||||
|
|
|
@ -24,7 +24,15 @@
|
|||
char whiteclamp[16], char bothclamp[16],\
|
||||
unsigned int w, unsigned int h, int pitch)
|
||||
|
||||
#define prototype_postproc_blend_mb(sym)\
|
||||
#define prototype_postproc_blend_mb_inner(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_mb_outer(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#define prototype_postproc_blend_b(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
|
@ -52,22 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
|
|||
#endif
|
||||
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb
|
||||
#define vp8_postproc_blend_mb vp8_blend_mb_c
|
||||
#ifndef vp8_postproc_blend_mb_inner
|
||||
#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
|
||||
extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb_outer
|
||||
#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
|
||||
|
||||
#ifndef vp8_postproc_blend_b
|
||||
#define vp8_postproc_blend_b vp8_blend_b_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_b(vp8_postproc_blend_b);
|
||||
|
||||
typedef prototype_postproc((*vp8_postproc_fn_t));
|
||||
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
|
||||
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
|
||||
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
|
||||
typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
|
||||
typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
|
||||
typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
|
||||
typedef struct
|
||||
{
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_fn_t blend_mb;
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
|
||||
vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
|
||||
vp8_postproc_blend_b_fn_t blend_b;
|
||||
} vp8_postproc_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -89,7 +111,7 @@ struct postproc_state
|
|||
#include "onyxc_int.h"
|
||||
#include "ppflags.h"
|
||||
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
|
||||
int deblock_level, int noise_level, int flags);
|
||||
vp8_ppflags_t *flags);
|
||||
|
||||
|
||||
void vp8_de_noise(YV12_BUFFER_CONFIG *source,
|
||||
|
|
|
@ -13,17 +13,28 @@
|
|||
#define __INC_PPFLAGS_H
|
||||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_LEVEL1 = 1<<3,
|
||||
VP8D_DEBUG_LEVEL2 = 1<<4,
|
||||
VP8D_DEBUG_LEVEL3 = 1<<5,
|
||||
VP8D_DEBUG_LEVEL4 = 1<<6,
|
||||
VP8D_DEBUG_LEVEL5 = 1<<7,
|
||||
VP8D_DEBUG_LEVEL6 = 1<<8,
|
||||
VP8D_DEBUG_LEVEL7 = 1<<9
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
|
||||
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
|
||||
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
|
||||
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
|
||||
VP8D_DEBUG_DRAW_MV = 1<<7,
|
||||
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
|
||||
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
int display_ref_frame_flag;
|
||||
int display_mb_modes_flag;
|
||||
int display_b_modes_flag;
|
||||
int display_mv_flag;
|
||||
} vp8_ppflags_t;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,44 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "blockd.h"
|
||||
|
||||
|
||||
void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons)
|
||||
{
|
||||
int diff;
|
||||
int sign;
|
||||
int last_dc = *lastdc;
|
||||
int this_dc = *thisdc;
|
||||
|
||||
if (*cons > DCPREDCNTTHRESH)
|
||||
{
|
||||
this_dc += last_dc;
|
||||
}
|
||||
|
||||
diff = abs(last_dc - this_dc);
|
||||
sign = (last_dc >> 31) ^(this_dc >> 31);
|
||||
sign |= (!last_dc | !this_dc);
|
||||
|
||||
if (sign)
|
||||
{
|
||||
*cons = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (diff <= DCPREDSIMTHRESH * quant)
|
||||
(*cons)++ ;
|
||||
}
|
||||
|
||||
*thisdc = this_dc;
|
||||
*lastdc = this_dc;
|
||||
}
|
|
@ -1,18 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __PREDICTDC_H
|
||||
#define __PREDICTDC_H
|
||||
|
||||
void uvvp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
|
||||
void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
|
||||
|
||||
#endif
|
|
@ -1,46 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* Module Title : preproc.h
|
||||
*
|
||||
* Description : simple preprocessor
|
||||
*
|
||||
****************************************************************************/
|
||||
|
||||
#ifndef __INC_PREPROC_H
|
||||
#define __INC_PREPROC_H
|
||||
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unsigned char *frame_buffer;
|
||||
int frame;
|
||||
unsigned int *fixed_divide;
|
||||
|
||||
unsigned char *frame_buffer_alloc;
|
||||
unsigned int *fixed_divide_alloc;
|
||||
} pre_proc_instance;
|
||||
|
||||
/****************************************************************************
|
||||
* Functions.
|
||||
****************************************************************************/
|
||||
void pre_proc_machine_specific_config(void);
|
||||
void delete_pre_proc(pre_proc_instance *ppi);
|
||||
int init_pre_proc(pre_proc_instance *ppi, int frame_size);
|
||||
extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength);
|
||||
extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
|
||||
|
||||
#endif
|
|
@ -26,6 +26,9 @@
|
|||
#define prototype_build_intra_predictors(sym) \
|
||||
void sym(MACROBLOCKD *x)
|
||||
|
||||
#define prototype_intra4x4_predict(sym) \
|
||||
void sym(BLOCKD *x, int b_mode, unsigned char *predictor)
|
||||
|
||||
struct vp8_recon_rtcd_vtable;
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
|
@ -88,11 +91,30 @@ extern prototype_build_intra_predictors\
|
|||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mby_s);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mbuv
|
||||
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mbuv);
|
||||
|
||||
#ifndef vp8_recon_build_intra_predictors_mbuv_s
|
||||
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s
|
||||
#endif
|
||||
extern prototype_build_intra_predictors\
|
||||
(vp8_recon_build_intra_predictors_mbuv_s);
|
||||
|
||||
#ifndef vp8_recon_intra4x4_predict
|
||||
#define vp8_recon_intra4x4_predict vp8_intra4x4_predict
|
||||
#endif
|
||||
extern prototype_intra4x4_predict\
|
||||
(vp8_recon_intra4x4_predict);
|
||||
|
||||
|
||||
typedef prototype_copy_block((*vp8_copy_block_fn_t));
|
||||
typedef prototype_recon_block((*vp8_recon_fn_t));
|
||||
typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
|
||||
typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
|
||||
typedef prototype_intra4x4_predict((*vp8_intra4x4_pred_fn_t));
|
||||
typedef struct vp8_recon_rtcd_vtable
|
||||
{
|
||||
vp8_copy_block_fn_t copy16x16;
|
||||
|
@ -105,6 +127,9 @@ typedef struct vp8_recon_rtcd_vtable
|
|||
vp8_recon_mb_fn_t recon_mby;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby_s;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mby;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mbuv_s;
|
||||
vp8_build_intra_pred_fn_t build_intra_predictors_mbuv;
|
||||
vp8_intra4x4_pred_fn_t intra4x4_predict;
|
||||
} vp8_recon_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -113,6 +138,5 @@ typedef struct vp8_recon_rtcd_vtable
|
|||
#define RECON_INVOKE(ctx,fn) vp8_recon_##fn
|
||||
#endif
|
||||
|
||||
void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
|
||||
void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
|
||||
#endif
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "recon.h"
|
||||
#include "subpixel.h"
|
||||
#include "blockd.h"
|
||||
|
@ -18,12 +19,6 @@
|
|||
#include "onyxc_int.h"
|
||||
#endif
|
||||
|
||||
/* use this define on systems where unaligned int reads and writes are
|
||||
* not allowed, i.e. ARM architectures
|
||||
*/
|
||||
/*#define MUST_BE_ALIGNED*/
|
||||
|
||||
|
||||
static const int bbb[4] = {0, 2, 8, 10};
|
||||
|
||||
|
||||
|
@ -39,7 +34,7 @@ void vp8_copy_mem16x16_c(
|
|||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
#ifdef MUST_BE_ALIGNED
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
|
@ -58,10 +53,10 @@ void vp8_copy_mem16x16_c(
|
|||
dst[15] = src[15];
|
||||
|
||||
#else
|
||||
((int *)dst)[0] = ((int *)src)[0] ;
|
||||
((int *)dst)[1] = ((int *)src)[1] ;
|
||||
((int *)dst)[2] = ((int *)src)[2] ;
|
||||
((int *)dst)[3] = ((int *)src)[3] ;
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
|
||||
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
|
||||
|
||||
#endif
|
||||
src += src_stride;
|
||||
|
@ -81,7 +76,7 @@ void vp8_copy_mem8x8_c(
|
|||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
#ifdef MUST_BE_ALIGNED
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
|
@ -91,8 +86,8 @@ void vp8_copy_mem8x8_c(
|
|||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((int *)dst)[0] = ((int *)src)[0] ;
|
||||
((int *)dst)[1] = ((int *)src)[1] ;
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
@ -111,7 +106,7 @@ void vp8_copy_mem8x4_c(
|
|||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#ifdef MUST_BE_ALIGNED
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
dst[2] = src[2];
|
||||
|
@ -121,8 +116,8 @@ void vp8_copy_mem8x4_c(
|
|||
dst[6] = src[6];
|
||||
dst[7] = src[7];
|
||||
#else
|
||||
((int *)dst)[0] = ((int *)src)[0] ;
|
||||
((int *)dst)[1] = ((int *)src)[1] ;
|
||||
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
|
||||
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
|
||||
#endif
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
|
@ -154,13 +149,13 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
|
|||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#ifdef MUST_BE_ALIGNED
|
||||
#if !(CONFIG_FAST_UNALIGNED)
|
||||
pred_ptr[0] = ptr[0];
|
||||
pred_ptr[1] = ptr[1];
|
||||
pred_ptr[2] = ptr[2];
|
||||
pred_ptr[3] = ptr[3];
|
||||
#else
|
||||
*(int *)pred_ptr = *(int *)ptr ;
|
||||
*(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
|
||||
#endif
|
||||
pred_ptr += pitch;
|
||||
ptr += d->pre_stride;
|
||||
|
@ -168,7 +163,7 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
|
|||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
|
@ -187,7 +182,7 @@ void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
|||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
|
@ -207,12 +202,12 @@ void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
|
|||
}
|
||||
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
if (x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
|
@ -246,7 +241,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
|||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 8);
|
||||
build_inter_predictors2b(x, d0, 8);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
|
||||
|
@ -257,158 +252,132 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
|
|||
}
|
||||
|
||||
/*encoder only*/
|
||||
void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
|
||||
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = x->predictor;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = x->predictor;
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
|
||||
}
|
||||
}
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
|
||||
unsigned char *ptr_base = x->pre.y_buffer;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
|
||||
}
|
||||
|
||||
mv_row = x->block[16].bmi.mv.as_mv.row;
|
||||
mv_col = x->block[16].bmi.mv.as_mv.col;
|
||||
pre_stride >>= 1;
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, dst_v, dst_uvstride);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
build_inter_predictors4b(x, d, 16);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, 16);
|
||||
else
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
vp8_build_inter_predictors4b(x, d, 16);
|
||||
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (i = 16; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
build_inter_predictors2b(x, d0, 8);
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 16);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
|
||||
{
|
||||
|
||||
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
if (x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
unsigned char *pred_ptr = x->predictor;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];
|
||||
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->block[0].pre_stride;
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
|
||||
}
|
||||
|
||||
mv_row = x->block[16].bmi.mv.as_mv.row;
|
||||
mv_col = x->block[16].bmi.mv.as_mv.col;
|
||||
pre_stride >>= 1;
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
|
||||
}
|
||||
vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
|
||||
&x->predictor[320], 16, 8);
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
vp8_build_inter_predictors4b(x, d, 16);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 16);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (i = 16; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
vp8_build_inter_predictors2b(x, d0, 8);
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
vp8_build_inter4x4_predictors_mb(x);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -492,202 +461,5 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
|
|||
}
|
||||
|
||||
|
||||
/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
|
||||
* situation, we can write the result directly to dst buffer instead of writing it to predictor
|
||||
* buffer and then copying it to dst buffer.
|
||||
*/
|
||||
static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
|
||||
{
|
||||
int r;
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
/*unsigned char *pred_ptr = d->predictor;*/
|
||||
int dst_stride = d->dst_stride;
|
||||
int pre_stride = d->pre_stride;
|
||||
|
||||
ptr_base = *(d->base_pre);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, dst_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
ptr = ptr_base;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
#ifdef MUST_BE_ALIGNED
|
||||
dst_ptr[0] = ptr[0];
|
||||
dst_ptr[1] = ptr[1];
|
||||
dst_ptr[2] = ptr[2];
|
||||
dst_ptr[3] = ptr[3];
|
||||
#else
|
||||
*(int *)dst_ptr = *(int *)ptr ;
|
||||
#endif
|
||||
dst_ptr += dst_stride;
|
||||
ptr += pre_stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
|
||||
{
|
||||
/*unsigned char *pred_ptr = x->block[0].predictor;
|
||||
unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
|
||||
unsigned char *pred_ptr = x->predictor;
|
||||
unsigned char *dst_ptr = x->dst.y_buffer;
|
||||
|
||||
if (x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
int offset;
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *uptr, *vptr;
|
||||
/*unsigned char *pred_ptr = x->predictor;
|
||||
unsigned char *upred_ptr = &x->predictor[256];
|
||||
unsigned char *vpred_ptr = &x->predictor[320];*/
|
||||
unsigned char *udst_ptr = x->dst.u_buffer;
|
||||
unsigned char *vdst_ptr = x->dst.v_buffer;
|
||||
|
||||
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
|
||||
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
|
||||
int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
|
||||
|
||||
ptr_base = x->pre.y_buffer;
|
||||
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
|
||||
}
|
||||
|
||||
mv_row = x->block[16].bmi.mv.as_mv.row;
|
||||
mv_col = x->block[16].bmi.mv.as_mv.col;
|
||||
pre_stride >>= 1;
|
||||
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
|
||||
uptr = x->pre.u_buffer + offset;
|
||||
vptr = x->pre.v_buffer + offset;
|
||||
|
||||
if ((mv_row | mv_col) & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
|
||||
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
|
||||
* if sth is wrong, go back to what it is in build_inter_predictors_mb.
|
||||
*/
|
||||
int i;
|
||||
|
||||
if (x->mode_info_context->mbmi.partitioning < 3)
|
||||
{
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
BLOCKD *d = &x->block[bbb[i]];
|
||||
/*vp8_build_inter_predictors4b(x, d, 16);*/
|
||||
|
||||
{
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = d->predictor;
|
||||
|
||||
ptr_base = *(d->base_pre);
|
||||
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < 16; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
{
|
||||
/*vp8_build_inter_predictors2b(x, d0, 16);*/
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = d0->predictor;
|
||||
|
||||
ptr_base = *(d0->base_pre);
|
||||
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 16; i < 24; i += 2)
|
||||
{
|
||||
BLOCKD *d0 = &x->block[i];
|
||||
BLOCKD *d1 = &x->block[i+1];
|
||||
|
||||
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
|
||||
{
|
||||
/*vp8_build_inter_predictors2b(x, d0, 8);*/
|
||||
unsigned char *ptr_base;
|
||||
unsigned char *ptr;
|
||||
unsigned char *pred_ptr = d0->predictor;
|
||||
|
||||
ptr_base = *(d0->base_pre);
|
||||
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
|
||||
|
||||
if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
|
||||
{
|
||||
x->subpixel_predict8x4(ptr, d0->pre_stride,
|
||||
d0->bmi.mv.as_mv.col & 7,
|
||||
d0->bmi.mv.as_mv.row & 7,
|
||||
dst_ptr, x->dst.uv_stride);
|
||||
}
|
||||
else
|
||||
{
|
||||
RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
|
||||
d0->pre_stride, dst_ptr, x->dst.uv_stride);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
|
||||
vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,9 +13,15 @@
|
|||
#define __INC_RECONINTER_H
|
||||
|
||||
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
|
||||
unsigned char *dst_y,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_ystride,
|
||||
int dst_uvstride);
|
||||
|
||||
extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x);
|
||||
|
||||
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
|
||||
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
|
||||
extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x);
|
||||
|
|
|
@ -14,9 +14,4 @@
|
|||
|
||||
extern void init_intra_left_above_pixels(MACROBLOCKD *x);
|
||||
|
||||
extern void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x);
|
||||
|
||||
extern void vp8_predict_intra4x4(BLOCKD *x, int b_mode, unsigned char *Predictor);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "reconintra.h"
|
||||
|
||||
void vp8_predict_intra4x4(BLOCKD *x,
|
||||
void vp8_intra4x4_predict(BLOCKD *x,
|
||||
int b_mode,
|
||||
unsigned char *predictor)
|
||||
{
|
||||
|
@ -313,89 +313,3 @@ void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
|
|||
}
|
||||
|
||||
|
||||
void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_intra_prediction_down_copy(x);
|
||||
|
||||
#if ARCH_ARM
|
||||
{
|
||||
BLOCKD *b = &x->block[0];
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
b += 1;
|
||||
|
||||
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
BLOCKD *b = &x->block[i];
|
||||
|
||||
vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor);
|
||||
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
|
||||
}
|
||||
#endif
|
||||
|
||||
vp8_recon_intra_mbuv(rtcd, x);
|
||||
|
||||
}
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#ifndef _PTHREAD_EMULATION
|
||||
#define _PTHREAD_EMULATION
|
||||
|
||||
#define VPXINFINITE 10000 /* 10second. */
|
||||
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
|
||||
|
||||
/* Thread management macros */
|
||||
#ifdef _WIN32
|
||||
|
@ -26,7 +26,7 @@
|
|||
#define pthread_t HANDLE
|
||||
#define pthread_attr_t DWORD
|
||||
#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
|
||||
#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
|
||||
#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
|
||||
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
#define pthread_cancel(thread) terminate_thread(thread,0)
|
||||
|
@ -36,6 +36,7 @@
|
|||
#define pthread_self() GetCurrentThreadId()
|
||||
#else
|
||||
#ifdef __APPLE__
|
||||
#include <mach/mach_init.h>
|
||||
#include <mach/semaphore.h>
|
||||
#include <mach/task.h>
|
||||
#include <time.h>
|
||||
|
@ -58,9 +59,9 @@
|
|||
#ifdef _WIN32
|
||||
#define sem_t HANDLE
|
||||
#define pause(voidpara) __asm PAUSE
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL)
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE))
|
||||
#define sem_post(sem) SetEvent(*sem)
|
||||
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
|
||||
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
|
||||
#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
|
||||
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
|
||||
#define thread_sleep(nms) Sleep(nms)
|
||||
|
||||
|
@ -88,4 +89,6 @@
|
|||
#define x86_pause_hint()
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define ALLOC_FAILURE -2
|
|
@ -14,18 +14,18 @@
|
|||
; /****************************************************************************
|
||||
; * Notes:
|
||||
; *
|
||||
; * This implementation makes use of 16 bit fixed point verio of two multiply
|
||||
; * This implementation makes use of 16 bit fixed point version of two multiply
|
||||
; * constants:
|
||||
; * 1. sqrt(2) * cos (pi/8)
|
||||
; * 2. sqrt(2) * sin (pi/8)
|
||||
; * Becuase the first constant is bigger than 1, to maintain the same 16 bit
|
||||
; * fixed point prrcision as the second one, we use a trick of
|
||||
; * 2. sqrt(2) * sin (pi/8)
|
||||
; * Because the first constant is bigger than 1, to maintain the same 16 bit
|
||||
; * fixed point precision as the second one, we use a trick of
|
||||
; * x * a = x + x*(a-1)
|
||||
; * so
|
||||
; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
|
||||
; *
|
||||
; * For the second constant, becuase of the 16bit version is 35468, which
|
||||
; * is bigger than 32768, in signed 16 bit multiply, it become a negative
|
||||
; * For the second constant, because of the 16bit version is 35468, which
|
||||
; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
|
||||
; * number.
|
||||
; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
|
||||
; *
|
||||
|
|
|
@ -32,9 +32,6 @@ sym(idct_dequant_0_2x_sse2):
|
|||
mov rdx, arg(1) ; dequant
|
||||
mov rax, arg(0) ; qcoeff
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
|
||||
movd xmm4, [rax]
|
||||
movd xmm5, [rdx]
|
||||
|
||||
|
@ -43,9 +40,12 @@ sym(idct_dequant_0_2x_sse2):
|
|||
|
||||
pmullw xmm4, xmm5
|
||||
|
||||
; Zero out xmm5, for use unpacking
|
||||
pxor xmm5, xmm5
|
||||
|
||||
; clear coeffs
|
||||
movd [rax], xmm7
|
||||
movd [rax+32], xmm7
|
||||
movd [rax], xmm5
|
||||
movd [rax+32], xmm5
|
||||
;pshufb
|
||||
pshuflw xmm4, xmm4, 00000000b
|
||||
pshufhw xmm4, xmm4, 00000000b
|
||||
|
@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2):
|
|||
lea rcx, [3*rcx]
|
||||
movq xmm3, [rax+rcx]
|
||||
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
punpcklbw xmm2, xmm7
|
||||
punpcklbw xmm3, xmm7
|
||||
punpcklbw xmm0, xmm5
|
||||
punpcklbw xmm1, xmm5
|
||||
punpcklbw xmm2, xmm5
|
||||
punpcklbw xmm3, xmm5
|
||||
|
||||
mov rax, arg(3) ; dst
|
||||
movsxd rdx, dword ptr arg(4) ; dst_stride
|
||||
|
@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2):
|
|||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
packuswb xmm0, xmm5
|
||||
packuswb xmm1, xmm5
|
||||
packuswb xmm2, xmm5
|
||||
packuswb xmm3, xmm5
|
||||
|
||||
; store blocks back out
|
||||
movq [rax], xmm0
|
||||
|
@ -102,6 +102,7 @@ sym(idct_dequant_full_2x_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -347,6 +348,7 @@ sym(idct_dequant_full_2x_sse2):
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -377,8 +379,8 @@ sym(idct_dequant_dc_0_2x_sse2):
|
|||
mov rdi, arg(3) ; dst
|
||||
mov rdx, arg(5) ; dc
|
||||
|
||||
; Zero out xmm7, for use unpacking
|
||||
pxor xmm7, xmm7
|
||||
; Zero out xmm5, for use unpacking
|
||||
pxor xmm5, xmm5
|
||||
|
||||
; load up 2 dc words here == 2*16 = doubleword
|
||||
movd xmm4, [rdx]
|
||||
|
@ -398,10 +400,10 @@ sym(idct_dequant_dc_0_2x_sse2):
|
|||
psraw xmm4, 3
|
||||
|
||||
; Predict buffer needs to be expanded from bytes to words
|
||||
punpcklbw xmm0, xmm7
|
||||
punpcklbw xmm1, xmm7
|
||||
punpcklbw xmm2, xmm7
|
||||
punpcklbw xmm3, xmm7
|
||||
punpcklbw xmm0, xmm5
|
||||
punpcklbw xmm1, xmm5
|
||||
punpcklbw xmm2, xmm5
|
||||
punpcklbw xmm3, xmm5
|
||||
|
||||
; Add to predict buffer
|
||||
paddw xmm0, xmm4
|
||||
|
@ -410,10 +412,10 @@ sym(idct_dequant_dc_0_2x_sse2):
|
|||
paddw xmm3, xmm4
|
||||
|
||||
; pack up before storing
|
||||
packuswb xmm0, xmm7
|
||||
packuswb xmm1, xmm7
|
||||
packuswb xmm2, xmm7
|
||||
packuswb xmm3, xmm7
|
||||
packuswb xmm0, xmm5
|
||||
packuswb xmm1, xmm5
|
||||
packuswb xmm2, xmm5
|
||||
packuswb xmm3, xmm5
|
||||
|
||||
; Load destination stride before writing out,
|
||||
; doesn't need to persist
|
||||
|
@ -441,6 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -692,6 +695,7 @@ sym(idct_dequant_dc_full_2x_sse2):
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
|
|
@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 2
|
||||
SAVE_XMM
|
||||
SAVE_XMM 6
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
movdqa xmm4, xmm0
|
||||
punpcklqdq xmm0, xmm3 ;d1 a1
|
||||
punpckhqdq xmm4, xmm3 ;c1 b1
|
||||
movd xmm7, eax
|
||||
movd xmm6, eax
|
||||
|
||||
movdqa xmm1, xmm4 ;c1 b1
|
||||
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
|
||||
|
@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
|
||||
movdqa xmm3, xmm4 ;ip[4] ip[0]
|
||||
|
||||
pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03
|
||||
pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
|
||||
|
||||
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
|
||||
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
|
||||
|
@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2):
|
|||
punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
|
||||
punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
|
||||
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
paddw xmm5, xmm7
|
||||
paddw xmm1, xmm7
|
||||
paddw xmm5, xmm6
|
||||
paddw xmm1, xmm6
|
||||
|
||||
psraw xmm5, 3
|
||||
psraw xmm1, 3
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -122,12 +122,10 @@ next8_h:
|
|||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
movq mm2, [rdx] ; flimit mm2
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx] ; blimit
|
||||
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm5
|
||||
pxor mm5, mm5
|
||||
pcmpeqb mm1, mm5 ; mask mm1
|
||||
|
@ -230,7 +228,7 @@ next8_h:
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -406,9 +404,9 @@ next8_v:
|
|||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
mov rdx, arg(2) ;blimit ;
|
||||
|
||||
movq mm2, [rdx] ;flimit mm2
|
||||
movq mm4, [rdx] ;blimit
|
||||
movq mm1, mm3 ; mm1=mm3=p0
|
||||
|
||||
movq mm7, mm6 ; mm7=mm6=q0
|
||||
|
@ -419,10 +417,7 @@ next8_v:
|
|||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm0; ; mask
|
||||
|
||||
pxor mm0, mm0
|
||||
|
@ -603,7 +598,7 @@ next8_v:
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -719,17 +714,15 @@ next8_mbh:
|
|||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
movq mm2, [rdx] ; flimit mm2
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx] ; blimit
|
||||
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm5
|
||||
pxor mm5, mm5
|
||||
pcmpeqb mm1, mm5 ; mask mm1
|
||||
|
||||
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
|
||||
; mm6 = p0,
|
||||
|
||||
; calculate high edge variance
|
||||
|
@ -922,7 +915,7 @@ next8_mbh:
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -1108,9 +1101,9 @@ next8_mbv:
|
|||
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
psrlw mm5, 1 ; abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ;
|
||||
mov rdx, arg(2) ;blimit ;
|
||||
|
||||
movq mm2, [rdx] ;flimit mm2
|
||||
movq mm4, [rdx] ;blimit
|
||||
movq mm1, mm3 ; mm1=mm3=p0
|
||||
|
||||
movq mm7, mm6 ; mm7=mm6=q0
|
||||
|
@ -1121,10 +1114,7 @@ next8_mbv:
|
|||
paddusb mm1, mm1 ; abs(q0-p0)*2
|
||||
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
paddb mm2, mm2 ; flimit*2 (less than 255)
|
||||
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por mm1, mm0; ; mask
|
||||
|
||||
pxor mm0, mm0
|
||||
|
@ -1392,16 +1382,13 @@ next8_mbv:
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
|
|||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
|
||||
movsxd rcx, dword ptr arg(5) ;count
|
||||
mov rcx, 2 ; count
|
||||
nexts8_h:
|
||||
mov rdx, arg(3) ;limit
|
||||
movq mm7, [rdx]
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm3, [rdx] ;
|
||||
paddb mm3, mm3 ; flimit*2 (less than 255)
|
||||
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||
add rdi, rax
|
||||
|
@ -1445,7 +1428,7 @@ nexts8_h:
|
|||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor mm3, mm3
|
||||
pcmpeqb mm5, mm3
|
||||
|
||||
|
@ -1515,16 +1498,13 @@ nexts8_h:
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
|
||||
sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
|
|||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
|
||||
lea rsi, [rsi + rax*4- 2]; ;
|
||||
movsxd rcx, dword ptr arg(5) ;count
|
||||
mov rcx, 2 ; count
|
||||
nexts8_v:
|
||||
|
||||
lea rdi, [rsi + rax];
|
||||
|
@ -1602,14 +1582,10 @@ nexts8_v:
|
|||
paddusb mm5, mm5 ; abs(p0-q0)*2
|
||||
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit ; get blimit
|
||||
movq mm7, [rdx]
|
||||
mov rdx, arg(3) ; get limit
|
||||
movq mm6, [rdx]
|
||||
paddb mm7, mm7 ; flimit*2 (less than 255)
|
||||
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor mm7, mm7
|
||||
pcmpeqb mm5, mm7 ; mm5 = mask
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@
|
|||
psubusb xmm6, xmm5 ; p1-=p0
|
||||
|
||||
por xmm6, xmm4 ; abs(p1 - p0)
|
||||
mov rdx, arg(2) ; get flimit
|
||||
mov rdx, arg(2) ; get blimit
|
||||
|
||||
movdqa t1, xmm6 ; save to t1
|
||||
|
||||
|
@ -123,7 +123,7 @@
|
|||
psubusb xmm1, xmm7
|
||||
por xmm2, xmm3 ; abs(p1-q1)
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
|
||||
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
|
||||
|
||||
movdqa xmm3, xmm0 ; q0
|
||||
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
|
||||
|
@ -134,13 +134,11 @@
|
|||
psrlw xmm2, 1 ; abs(p1-q1)/2
|
||||
|
||||
psubusb xmm5, xmm3 ; p0-=q0
|
||||
paddb xmm4, xmm4 ; flimit*2 (less than 255)
|
||||
|
||||
psubusb xmm3, xmm6 ; q0-=p0
|
||||
por xmm5, xmm3 ; abs(p0 - q0)
|
||||
|
||||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
movdqa xmm4, t0 ; hev get abs (q1 - q0)
|
||||
|
||||
|
@ -150,7 +148,7 @@
|
|||
|
||||
movdqa xmm2, XMMWORD PTR [rdx] ; hev
|
||||
|
||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
psubusb xmm4, xmm2 ; hev
|
||||
|
||||
psubusb xmm3, xmm2 ; hev
|
||||
|
@ -278,7 +276,7 @@
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -288,7 +286,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -338,7 +336,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -584,7 +582,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
|
|||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
|
@ -634,7 +632,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
movdqa xmm4, XMMWORD PTR [rdx]; limit
|
||||
|
||||
pmaxub xmm0, xmm7
|
||||
mov rdx, arg(2) ; flimit
|
||||
mov rdx, arg(2) ; blimit
|
||||
|
||||
psubusb xmm0, xmm4
|
||||
movdqa xmm5, xmm2 ; q1
|
||||
|
@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
psrlw xmm5, 1 ; abs(p1-q1)/2
|
||||
psubusb xmm6, xmm3 ; q0-p0
|
||||
|
||||
movdqa xmm2, XMMWORD PTR [rdx]; flimit
|
||||
movdqa xmm4, XMMWORD PTR [rdx]; blimit
|
||||
|
||||
mov rdx, arg(4) ; get thresh
|
||||
|
||||
por xmm1, xmm6 ; abs(q0-p0)
|
||||
paddb xmm2, xmm2 ; flimit*2 (less than 255)
|
||||
|
||||
movdqa xmm6, t0 ; get abs (q1 - q0)
|
||||
|
||||
|
@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
|
||||
|
||||
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
|
||||
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
|
||||
|
||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
|
||||
|
||||
por xmm1, xmm0 ; mask
|
||||
|
@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -1024,7 +1020,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
|
|||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
|
@ -1091,7 +1087,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
|
@ -1249,7 +1245,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
|
|||
;(
|
||||
; unsigned char *u,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *blimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; unsigned char *v
|
||||
|
@ -1318,7 +1314,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1376,17 +1372,14 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
|
||||
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
|
||||
mov rdx, arg(2) ;flimit ; get flimit
|
||||
mov rdx, arg(2) ;blimit
|
||||
movdqa xmm3, XMMWORD PTR [rdx]
|
||||
mov rdx, arg(3) ;limit
|
||||
movdqa xmm7, XMMWORD PTR [rdx]
|
||||
|
||||
paddb xmm3, xmm3 ; flimit*2 (less than 255)
|
||||
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
mov rdi, rsi ; rdi points to row +1 for indirect addressing
|
||||
add rdi, rax
|
||||
|
@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor xmm3, xmm3
|
||||
pcmpeqb xmm5, xmm3
|
||||
|
||||
|
@ -1493,17 +1481,14 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
|
|||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; int src_pixel_step,
|
||||
; const char *flimit,
|
||||
; const char *limit,
|
||||
; const char *thresh,
|
||||
; int count
|
||||
; const char *blimit,
|
||||
;)
|
||||
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
|
||||
sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
||||
push rbp ; save old base pointer value.
|
||||
mov rbp, rsp ; set new base pointer value.
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SHADOW_ARGS_TO_STACK 3
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx ; save callee-saved reg
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
|
|||
paddusb xmm5, xmm5 ; abs(p0-q0)*2
|
||||
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
|
||||
|
||||
mov rdx, arg(2) ;flimit
|
||||
mov rdx, arg(2) ;blimit
|
||||
movdqa xmm7, XMMWORD PTR [rdx]
|
||||
mov rdx, arg(3) ; get limit
|
||||
movdqa xmm6, XMMWORD PTR [rdx]
|
||||
paddb xmm7, xmm7 ; flimit*2 (less than 255)
|
||||
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
|
||||
|
||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
|
||||
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
|
||||
pxor xmm7, xmm7
|
||||
pcmpeqb xmm5, xmm7 ; mm5 = mask
|
||||
|
||||
|
|
|
@ -9,30 +9,18 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "loopfilter.h"
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||
|
||||
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
|
||||
|
@ -42,113 +30,77 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
|
|||
#if HAVE_MMX
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -156,113 +108,65 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
|
|||
/* Horizontal MB filtering */
|
||||
#if HAVE_SSE2
|
||||
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
|
||||
}
|
||||
|
||||
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
|
||||
int y_stride,
|
||||
loop_filter_info *lfi)
|
||||
{
|
||||
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
|
|||
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
|
||||
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
|
|||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
|
||||
|
@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
|
|||
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
|
||||
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
|
||||
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
|
|||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2
|
||||
|
|
|
@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
|
|
@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
@ -229,3 +229,460 @@ sym(vp8_copy_mem16x16_sse2):
|
|||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_intra_pred_uv_dc_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc_mmx2)
|
||||
sym(vp8_intra_pred_uv_dc_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; from top
|
||||
mov rsi, arg(2) ;src;
|
||||
movsxd rax, dword ptr arg(3) ;src_stride;
|
||||
sub rsi, rax
|
||||
pxor mm0, mm0
|
||||
movq mm1, [rsi]
|
||||
psadbw mm1, mm0
|
||||
|
||||
; from left
|
||||
dec rsi
|
||||
lea rdi, [rax*3]
|
||||
movzx ecx, byte [rsi+rax]
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
add ecx, edx
|
||||
lea rsi, [rsi+rax*4]
|
||||
movzx edx, byte [rsi]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*4]
|
||||
add ecx, edx
|
||||
|
||||
; add up
|
||||
pextrw edx, mm1, 0x0
|
||||
lea edx, [edx+ecx+8]
|
||||
sar edx, 4
|
||||
movd mm1, edx
|
||||
pshufw mm1, mm1, 0x0
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
lea rax, [rcx*3]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dctop_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dctop_mmx2)
|
||||
sym(vp8_intra_pred_uv_dctop_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; from top
|
||||
mov rsi, arg(2) ;src;
|
||||
movsxd rax, dword ptr arg(3) ;src_stride;
|
||||
sub rsi, rax
|
||||
pxor mm0, mm0
|
||||
movq mm1, [rsi]
|
||||
psadbw mm1, mm0
|
||||
|
||||
; add up
|
||||
paddw mm1, [GLOBAL(dc_4)]
|
||||
psraw mm1, 3
|
||||
pshufw mm1, mm1, 0x0
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
lea rax, [rcx*3]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dcleft_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dcleft_mmx2)
|
||||
sym(vp8_intra_pred_uv_dcleft_mmx2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; from left
|
||||
mov rsi, arg(2) ;src;
|
||||
movsxd rax, dword ptr arg(3) ;src_stride;
|
||||
dec rsi
|
||||
lea rdi, [rax*3]
|
||||
movzx ecx, byte [rsi]
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
add ecx, edx
|
||||
lea rsi, [rsi+rax*4]
|
||||
movzx edx, byte [rsi]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rax*2]
|
||||
add ecx, edx
|
||||
movzx edx, byte [rsi+rdi]
|
||||
lea edx, [ecx+edx+4]
|
||||
|
||||
; add up
|
||||
shr edx, 3
|
||||
movd mm1, edx
|
||||
pshufw mm1, mm1, 0x0
|
||||
packuswb mm1, mm1
|
||||
|
||||
; write out
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
lea rax, [rcx*3]
|
||||
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movq [rdi ], mm1
|
||||
movq [rdi+rcx ], mm1
|
||||
movq [rdi+rcx*2], mm1
|
||||
movq [rdi+rax ], mm1
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_dc128_mmx(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_dc128_mmx)
|
||||
sym(vp8_intra_pred_uv_dc128_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
GET_GOT rbx
|
||||
; end prolog
|
||||
|
||||
; write out
|
||||
movq mm1, [GLOBAL(dc_128)]
|
||||
mov rax, arg(0) ;dst;
|
||||
movsxd rdx, dword ptr arg(1) ;dst_stride
|
||||
lea rcx, [rdx*3]
|
||||
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
lea rax, [rax+rdx*4]
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_tm_sse2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_tm 1
|
||||
global sym(vp8_intra_pred_uv_tm_%1)
|
||||
sym(vp8_intra_pred_uv_tm_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
; read top row
|
||||
mov edx, 4
|
||||
mov rsi, arg(2) ;src;
|
||||
movsxd rax, dword ptr arg(3) ;src_stride;
|
||||
sub rsi, rax
|
||||
pxor xmm0, xmm0
|
||||
%ifidn %1, ssse3
|
||||
movdqa xmm2, [GLOBAL(dc_1024)]
|
||||
%endif
|
||||
movq xmm1, [rsi]
|
||||
punpcklbw xmm1, xmm0
|
||||
|
||||
; set up left ptrs ans subtract topleft
|
||||
movd xmm3, [rsi-1]
|
||||
lea rsi, [rsi+rax-1]
|
||||
%ifidn %1, sse2
|
||||
punpcklbw xmm3, xmm0
|
||||
pshuflw xmm3, xmm3, 0x0
|
||||
punpcklqdq xmm3, xmm3
|
||||
%else
|
||||
pshufb xmm3, xmm2
|
||||
%endif
|
||||
psubw xmm1, xmm3
|
||||
|
||||
; set up dest ptrs
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
|
||||
vp8_intra_pred_uv_tm_%1_loop:
|
||||
movd xmm3, [rsi]
|
||||
movd xmm5, [rsi+rax]
|
||||
%ifidn %1, sse2
|
||||
punpcklbw xmm3, xmm0
|
||||
punpcklbw xmm5, xmm0
|
||||
pshuflw xmm3, xmm3, 0x0
|
||||
pshuflw xmm5, xmm5, 0x0
|
||||
punpcklqdq xmm3, xmm3
|
||||
punpcklqdq xmm5, xmm5
|
||||
%else
|
||||
pshufb xmm3, xmm2
|
||||
pshufb xmm5, xmm2
|
||||
%endif
|
||||
paddw xmm3, xmm1
|
||||
paddw xmm5, xmm1
|
||||
packuswb xmm3, xmm5
|
||||
movq [rdi ], xmm3
|
||||
movhps[rdi+rcx], xmm3
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
dec edx
|
||||
jnz vp8_intra_pred_uv_tm_%1_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
vp8_intra_pred_uv_tm sse2
|
||||
vp8_intra_pred_uv_tm ssse3
|
||||
|
||||
;void vp8_intra_pred_uv_ve_mmx(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
global sym(vp8_intra_pred_uv_ve_mmx)
|
||||
sym(vp8_intra_pred_uv_ve_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
; end prolog
|
||||
|
||||
; read from top
|
||||
mov rax, arg(2) ;src;
|
||||
movsxd rdx, dword ptr arg(3) ;src_stride;
|
||||
sub rax, rdx
|
||||
movq mm1, [rax]
|
||||
|
||||
; write out
|
||||
mov rax, arg(0) ;dst;
|
||||
movsxd rdx, dword ptr arg(1) ;dst_stride
|
||||
lea rcx, [rdx*3]
|
||||
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
lea rax, [rax+rdx*4]
|
||||
movq [rax ], mm1
|
||||
movq [rax+rdx ], mm1
|
||||
movq [rax+rdx*2], mm1
|
||||
movq [rax+rcx ], mm1
|
||||
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_intra_pred_uv_ho_mmx2(
|
||||
; unsigned char *dst,
|
||||
; int dst_stride
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; )
|
||||
%macro vp8_intra_pred_uv_ho 1
|
||||
global sym(vp8_intra_pred_uv_ho_%1)
|
||||
sym(vp8_intra_pred_uv_ho_%1):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 4
|
||||
push rsi
|
||||
push rdi
|
||||
%ifidn %1, ssse3
|
||||
%ifndef GET_GOT_SAVE_ARG
|
||||
push rbx
|
||||
%endif
|
||||
GET_GOT rbx
|
||||
%endif
|
||||
; end prolog
|
||||
|
||||
; read from left and write out
|
||||
%ifidn %1, mmx2
|
||||
mov edx, 4
|
||||
%endif
|
||||
mov rsi, arg(2) ;src;
|
||||
movsxd rax, dword ptr arg(3) ;src_stride;
|
||||
mov rdi, arg(0) ;dst;
|
||||
movsxd rcx, dword ptr arg(1) ;dst_stride
|
||||
%ifidn %1, ssse3
|
||||
lea rdx, [rcx*3]
|
||||
movdqa xmm2, [GLOBAL(dc_00001111)]
|
||||
lea rbx, [rax*3]
|
||||
%endif
|
||||
dec rsi
|
||||
%ifidn %1, mmx2
|
||||
vp8_intra_pred_uv_ho_%1_loop:
|
||||
movd mm0, [rsi]
|
||||
movd mm1, [rsi+rax]
|
||||
punpcklbw mm0, mm0
|
||||
punpcklbw mm1, mm1
|
||||
pshufw mm0, mm0, 0x0
|
||||
pshufw mm1, mm1, 0x0
|
||||
movq [rdi ], mm0
|
||||
movq [rdi+rcx], mm1
|
||||
lea rsi, [rsi+rax*2]
|
||||
lea rdi, [rdi+rcx*2]
|
||||
dec edx
|
||||
jnz vp8_intra_pred_uv_ho_%1_loop
|
||||
%else
|
||||
movd xmm0, [rsi]
|
||||
movd xmm3, [rsi+rax]
|
||||
movd xmm1, [rsi+rax*2]
|
||||
movd xmm4, [rsi+rbx]
|
||||
punpcklbw xmm0, xmm3
|
||||
punpcklbw xmm1, xmm4
|
||||
pshufb xmm0, xmm2
|
||||
pshufb xmm1, xmm2
|
||||
movq [rdi ], xmm0
|
||||
movhps [rdi+rcx], xmm0
|
||||
movq [rdi+rcx*2], xmm1
|
||||
movhps [rdi+rdx], xmm1
|
||||
lea rsi, [rsi+rax*4]
|
||||
lea rdi, [rdi+rcx*4]
|
||||
movd xmm0, [rsi]
|
||||
movd xmm3, [rsi+rax]
|
||||
movd xmm1, [rsi+rax*2]
|
||||
movd xmm4, [rsi+rbx]
|
||||
punpcklbw xmm0, xmm3
|
||||
punpcklbw xmm1, xmm4
|
||||
pshufb xmm0, xmm2
|
||||
pshufb xmm1, xmm2
|
||||
movq [rdi ], xmm0
|
||||
movhps [rdi+rcx], xmm0
|
||||
movq [rdi+rcx*2], xmm1
|
||||
movhps [rdi+rdx], xmm1
|
||||
%endif
|
||||
|
||||
; begin epilog
|
||||
%ifidn %1, ssse3
|
||||
RESTORE_GOT
|
||||
%ifndef GET_GOT_SAVE_ARG
|
||||
pop rbx
|
||||
%endif
|
||||
%endif
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%endmacro
|
||||
|
||||
vp8_intra_pred_uv_ho mmx2
|
||||
vp8_intra_pred_uv_ho ssse3
|
||||
|
||||
SECTION_RODATA
|
||||
dc_128:
|
||||
times 8 db 128
|
||||
dc_4:
|
||||
times 4 dw 4
|
||||
align 16
|
||||
dc_1024:
|
||||
times 8 dw 0x400
|
||||
align 16
|
||||
dc_00001111:
|
||||
times 8 db 0
|
||||
times 8 db 1
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "recon_x86.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#define build_intra_predictors_mbuv_prototype(sym) \
|
||||
void sym(unsigned char *dst, int dst_stride, \
|
||||
const unsigned char *src, int src_stride)
|
||||
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
|
||||
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
|
||||
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
|
||||
|
||||
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
|
||||
unsigned char *dst_u,
|
||||
unsigned char *dst_v,
|
||||
int dst_stride,
|
||||
build_intra_predictors_mbuv_fn_t tm_func,
|
||||
build_intra_predictors_mbuv_fn_t ho_func)
|
||||
{
|
||||
int mode = x->mode_info_context->mbmi.uv_mode;
|
||||
build_intra_predictors_mbuv_fn_t fn;
|
||||
int src_stride = x->dst.uv_stride;
|
||||
|
||||
switch (mode) {
|
||||
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
|
||||
case H_PRED: fn = ho_func; break;
|
||||
case TM_PRED: fn = tm_func; break;
|
||||
case DC_PRED:
|
||||
if (x->up_available) {
|
||||
if (x->left_available) {
|
||||
fn = vp8_intra_pred_uv_dc_mmx2; break;
|
||||
} else {
|
||||
fn = vp8_intra_pred_uv_dctop_mmx2; break;
|
||||
}
|
||||
} else if (x->left_available) {
|
||||
fn = vp8_intra_pred_uv_dcleft_mmx2; break;
|
||||
} else {
|
||||
fn = vp8_intra_pred_uv_dc128_mmx; break;
|
||||
}
|
||||
break;
|
||||
default: return;
|
||||
}
|
||||
|
||||
fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
|
||||
fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
|
||||
&x->predictor[320], 8,
|
||||
vp8_intra_pred_uv_tm_sse2,
|
||||
vp8_intra_pred_uv_ho_mmx2);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
|
||||
&x->predictor[320], 8,
|
||||
vp8_intra_pred_uv_tm_ssse3,
|
||||
vp8_intra_pred_uv_ho_ssse3);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
|
||||
x->dst.v_buffer, x->dst.uv_stride,
|
||||
vp8_intra_pred_uv_tm_sse2,
|
||||
vp8_intra_pred_uv_ho_mmx2);
|
||||
}
|
||||
|
||||
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
|
||||
{
|
||||
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
|
||||
x->dst.v_buffer, x->dst.uv_stride,
|
||||
vp8_intra_pred_uv_tm_ssse3,
|
||||
vp8_intra_pred_uv_ho_ssse3);
|
||||
}
|
|
@ -46,6 +46,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx);
|
|||
extern prototype_recon_block(vp8_recon2b_sse2);
|
||||
extern prototype_recon_block(vp8_recon4b_sse2);
|
||||
extern prototype_copy_block(vp8_copy_mem16x16_sse2);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon2
|
||||
|
@ -57,6 +59,26 @@ extern prototype_copy_block(vp8_copy_mem16x16_sse2);
|
|||
#undef vp8_recon_copy16x16
|
||||
#define vp8_recon_copy16x16 vp8_copy_mem16x16_sse2
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mbuv
|
||||
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_sse2
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mbuv_s
|
||||
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_SSSE3
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
|
||||
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_build_intra_predictors_mbuv
|
||||
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_ssse3
|
||||
|
||||
#undef vp8_recon_build_intra_predictors_mbuv_s
|
||||
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -113,97 +113,6 @@ nextrow:
|
|||
ret
|
||||
|
||||
|
||||
;
|
||||
; THIS FUNCTION APPEARS TO BE UNUSED
|
||||
;
|
||||
;void vp8_filter_block1d_v6_mmx
|
||||
;(
|
||||
; short *src_ptr,
|
||||
; unsigned char *output_ptr,
|
||||
; unsigned int pixels_per_line,
|
||||
; unsigned int pixel_step,
|
||||
; unsigned int output_height,
|
||||
; unsigned int output_width,
|
||||
; short * vp8_filter
|
||||
;)
|
||||
global sym(vp8_filter_block1d_v6_mmx)
|
||||
sym(vp8_filter_block1d_v6_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movq mm5, [GLOBAL(rd)]
|
||||
push rbx
|
||||
mov rbx, arg(6) ;vp8_filter
|
||||
movq mm1, [rbx + 16] ; do both the negative taps first!!!
|
||||
movq mm2, [rbx + 32] ;
|
||||
movq mm6, [rbx + 48] ;
|
||||
movq mm7, [rbx + 64] ;
|
||||
|
||||
movsxd rdx, dword ptr arg(2) ;pixels_per_line
|
||||
mov rdi, arg(1) ;output_ptr
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
sub rsi, rdx
|
||||
sub rsi, rdx
|
||||
movsxd rcx, DWORD PTR arg(4) ;output_height
|
||||
movsxd rax, DWORD PTR arg(5) ;output_width ; destination pitch?
|
||||
pxor mm0, mm0 ; mm0 = 00000000
|
||||
|
||||
|
||||
nextrow_v:
|
||||
movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
|
||||
pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
|
||||
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
|
||||
pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
|
||||
pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi] ; mm4 = p0..p3 = row -2
|
||||
pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
|
||||
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
|
||||
pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
|
||||
pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
|
||||
paddsw mm3, mm4 ; mm3 += mm4
|
||||
|
||||
|
||||
paddsw mm3, mm5 ; mm3 += round value
|
||||
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
|
||||
packuswb mm3, mm0 ; pack and saturate
|
||||
|
||||
movd [rdi],mm3 ; store the results in the destination
|
||||
|
||||
add rdi,rax;
|
||||
|
||||
dec rcx ; decrement count
|
||||
jnz nextrow_v ; next row
|
||||
|
||||
pop rbx
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_filter_block1dc_v6_mmx
|
||||
;(
|
||||
; short *src_ptr,
|
||||
|
|
|
@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
;SAVE_XMM ;xmm6, xmm7 are not used here.
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
;RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
|
|
@ -39,6 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -107,6 +108,7 @@ filter_block1d8_h6_rowloop_ssse3:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -162,6 +164,7 @@ filter_block1d8_h4_rowloop_ssse3:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -179,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -194,10 +197,6 @@ sym(vp8_filter_block1d16_h6_ssse3):
|
|||
|
||||
mov rdi, arg(2) ;output_ptr
|
||||
|
||||
;;
|
||||
;; cmp esi, DWORD PTR [rax]
|
||||
;; je vp8_filter_block1d16_h4_ssse3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
|
||||
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
|
||||
|
@ -271,61 +270,7 @@ filter_block1d16_h6_rowloop_ssse3:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
vp8_filter_block1d16_h4_ssse3:
|
||||
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
|
||||
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
|
||||
movsxd rcx, dword ptr arg(4) ;output_height
|
||||
movsxd rdx, dword ptr arg(3) ;output_pitch
|
||||
|
||||
filter_block1d16_h4_rowloop_ssse3:
|
||||
movdqu xmm1, XMMWORD PTR [rsi - 2]
|
||||
|
||||
movdqa xmm2, xmm1
|
||||
pshufb xmm1, [GLOBAL(shuf2b)]
|
||||
pshufb xmm2, [GLOBAL(shuf3b)]
|
||||
pmaddubsw xmm1, xmm5
|
||||
|
||||
movdqu xmm3, XMMWORD PTR [rsi + 6]
|
||||
|
||||
pmaddubsw xmm2, xmm6
|
||||
movdqa xmm0, xmm3
|
||||
pshufb xmm3, [GLOBAL(shuf3b)]
|
||||
pshufb xmm0, [GLOBAL(shuf2b)]
|
||||
|
||||
paddsw xmm1, [GLOBAL(rd)]
|
||||
paddsw xmm1, xmm2
|
||||
|
||||
pmaddubsw xmm0, xmm5
|
||||
pmaddubsw xmm3, xmm6
|
||||
|
||||
psraw xmm1, 7
|
||||
packuswb xmm1, xmm1
|
||||
lea rsi, [rsi + rax]
|
||||
paddsw xmm3, xmm0
|
||||
paddsw xmm3, [GLOBAL(rd)]
|
||||
psraw xmm3, 7
|
||||
packuswb xmm3, xmm3
|
||||
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa XMMWORD Ptr [rdi], xmm1
|
||||
|
||||
add rdi, rdx
|
||||
dec rcx
|
||||
jnz filter_block1d16_h4_rowloop_ssse3
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -344,6 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -451,6 +397,7 @@ filter_block1d4_h4_rowloop_ssse3:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -471,6 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -566,6 +514,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -638,6 +587,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -656,6 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -728,6 +679,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -776,6 +728,7 @@ vp8_filter_block1d8_v4_ssse3_loop:
|
|||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
@ -932,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
@ -1195,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
|
|||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
SAVE_XMM
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "subpixel.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
|
||||
extern const short vp8_six_tap_mmx[8][6*8];
|
||||
extern const short vp8_bilinear_filters_mmx[8][2*8];
|
||||
|
|
|
@ -9,25 +9,21 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_ports/x86.h"
|
||||
#include "g_common.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "pragmas.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "vp8/common/g_common.h"
|
||||
#include "vp8/common/subpixel.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/recon.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/common/pragmas.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
|
||||
int flags = x86_simd_caps();
|
||||
int mmx_enabled = flags & HAS_MMX;
|
||||
int xmm_enabled = flags & HAS_SSE;
|
||||
int wmt_enabled = flags & HAS_SSE2;
|
||||
int SSSE3Enabled = flags & HAS_SSSE3;
|
||||
|
||||
/* Note:
|
||||
*
|
||||
|
@ -39,7 +35,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
/* Override default functions with fastest ones for this CPU. */
|
||||
#if HAVE_MMX
|
||||
|
||||
if (mmx_enabled)
|
||||
if (flags & HAS_MMX)
|
||||
{
|
||||
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
|
||||
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
|
||||
|
@ -67,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
|
@ -83,11 +79,15 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
#endif
|
||||
#if HAVE_SSE2
|
||||
|
||||
if (wmt_enabled)
|
||||
if (flags & HAS_SSE2)
|
||||
{
|
||||
rtcd->recon.recon2 = vp8_recon2b_sse2;
|
||||
rtcd->recon.recon4 = vp8_recon4b_sse2;
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_sse2;
|
||||
rtcd->recon.build_intra_predictors_mbuv =
|
||||
vp8_build_intra_predictors_mbuv_sse2;
|
||||
rtcd->recon.build_intra_predictors_mbuv_s =
|
||||
vp8_build_intra_predictors_mbuv_s_sse2;
|
||||
|
||||
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
|
||||
|
||||
|
@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
|
@ -118,7 +118,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
|
||||
#if HAVE_SSSE3
|
||||
|
||||
if (SSSE3Enabled)
|
||||
if (flags & HAS_SSSE3)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
|
||||
|
@ -126,6 +126,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
|
|||
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
|
||||
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
|
||||
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3;
|
||||
|
||||
rtcd->recon.build_intra_predictors_mbuv =
|
||||
vp8_build_intra_predictors_mbuv_ssse3;
|
||||
rtcd->recon.build_intra_predictors_mbuv_s =
|
||||
vp8_build_intra_predictors_mbuv_s_ssse3;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -11,23 +11,24 @@
|
|||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
#include "blockd.h"
|
||||
#include "pragmas.h"
|
||||
#include "postproc.h"
|
||||
#include "dboolhuff.h"
|
||||
#include "dequantize.h"
|
||||
#include "onyxd_int.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/common/pragmas.h"
|
||||
#include "vp8/decoder/dequantize.h"
|
||||
#include "vp8/decoder/onyxd_int.h"
|
||||
|
||||
void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
int flags = pbi->common.rtcd.flags;
|
||||
int has_edsp = flags & HAS_EDSP;
|
||||
int has_media = flags & HAS_MEDIA;
|
||||
int has_neon = flags & HAS_NEON;
|
||||
|
||||
#if HAVE_ARMV5TE
|
||||
if (flags & HAS_EDSP)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
if (has_media)
|
||||
if (flags & HAS_MEDIA)
|
||||
{
|
||||
pbi->dequant.block = vp8_dequantize_b_v6;
|
||||
pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
|
||||
|
@ -35,17 +36,11 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
|
|||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
|
||||
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
|
||||
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
|
||||
#if 0 /*For use with RTCD, when implemented*/
|
||||
pbi->dboolhuff.start = vp8dx_start_decode_c;
|
||||
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
|
||||
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
|
||||
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
if (has_neon)
|
||||
if (flags & HAS_NEON)
|
||||
{
|
||||
pbi->dequant.block = vp8_dequantize_b_neon;
|
||||
pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
|
||||
|
@ -54,12 +49,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
|
|||
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
|
||||
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
|
||||
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
|
||||
#if 0 /*For use with RTCD, when implemented*/
|
||||
pbi->dboolhuff.start = vp8dx_start_decode_c;
|
||||
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
|
||||
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
|
||||
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
*/
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "idct.h"
|
||||
#include "dequantize.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/decoder/dequantize.h"
|
||||
|
||||
void vp8_dequant_dc_idct_add_y_block_v6
|
||||
(short *q, short *dq, unsigned char *pre,
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
#ifndef DBOOLHUFF_ARM_H
|
||||
#define DBOOLHUFF_ARM_H
|
||||
|
||||
/* JLK
|
||||
* There are currently no arm-optimized versions of
|
||||
* these functions. As they are implemented, they
|
||||
* can be uncommented below and added to
|
||||
* arm/dsystemdependent.c
|
||||
*
|
||||
* The existing asm code is likely so different as
|
||||
* to be useless. However, its been left (for now)
|
||||
* for reference.
|
||||
*/
|
||||
#if 0
|
||||
#if HAVE_ARMV6
|
||||
#undef vp8_dbool_start
|
||||
#define vp8_dbool_start vp8dx_start_decode_v6
|
||||
|
||||
#undef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
|
||||
|
||||
#undef vp8_dbool_debool
|
||||
#define vp8_dbool_debool vp8_decode_bool_v6
|
||||
|
||||
#undef vp8_dbool_devalue
|
||||
#define vp8_dbool_devalue vp8_decode_value_v6
|
||||
#endif /* HAVE_ARMV6 */
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#undef vp8_dbool_start
|
||||
#define vp8_dbool_start vp8dx_start_decode_neon
|
||||
|
||||
#undef vp8_dbool_fill
|
||||
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
|
||||
|
||||
#undef vp8_dbool_debool
|
||||
#define vp8_dbool_debool vp8_decode_bool_neon
|
||||
|
||||
#undef vp8_dbool_devalue
|
||||
#define vp8_dbool_devalue vp8_decode_value_neon
|
||||
#endif /* HAVE_ARMV7 */
|
||||
#endif
|
||||
#endif /* DBOOLHUFF_ARM_H */
|
|
@ -10,9 +10,8 @@
|
|||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "dequantize.h"
|
||||
#include "predictdc.h"
|
||||
#include "idct.h"
|
||||
#include "vp8/decoder/dequantize.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
#if HAVE_ARMV7
|
||||
|
@ -27,7 +26,6 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
|||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
short *DQC = d->dequant;
|
||||
|
@ -39,7 +37,6 @@ void vp8_dequantize_b_neon(BLOCKD *d)
|
|||
#if HAVE_ARMV6
|
||||
void vp8_dequantize_b_v6(BLOCKD *d)
|
||||
{
|
||||
int i;
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
short *DQC = d->dequant;
|
||||
|
|
|
@ -1,320 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_decode_mb_tokens_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
INCLUDE vpx_asm_offsets.asm
|
||||
|
||||
l_qcoeff EQU 0
|
||||
l_i EQU 4
|
||||
l_type EQU 8
|
||||
l_stop EQU 12
|
||||
l_c EQU 16
|
||||
l_l_ptr EQU 20
|
||||
l_a_ptr EQU 24
|
||||
l_bc EQU 28
|
||||
l_coef_ptr EQU 32
|
||||
l_stacksize EQU 64
|
||||
|
||||
|
||||
;; constant offsets -- these should be created at build time
|
||||
c_block2above_offset EQU 25
|
||||
c_entropy_nodes EQU 11
|
||||
c_dct_eob_token EQU 11
|
||||
|
||||
|vp8_decode_mb_tokens_v6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
sub sp, sp, #l_stacksize
|
||||
mov r7, r1 ; type
|
||||
mov r9, r0 ; detoken
|
||||
|
||||
ldr r1, [r9, #detok_current_bc]
|
||||
ldr r0, [r9, #detok_qcoeff_start_ptr]
|
||||
mov r11, #0 ; i
|
||||
mov r3, #16 ; stop
|
||||
|
||||
cmp r7, #1 ; type ?= 1
|
||||
addeq r11, r11, #24 ; i = 24
|
||||
addeq r3, r3, #8 ; stop = 24
|
||||
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
|
||||
|
||||
str r0, [sp, #l_qcoeff]
|
||||
str r11, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
str r1, [sp, #l_bc]
|
||||
|
||||
add lr, r9, r7, lsl #2 ; detoken + type*4
|
||||
|
||||
ldr r8, [r1, #bool_decoder_user_buffer]
|
||||
|
||||
ldr r10, [lr, #detok_coef_probs]
|
||||
ldr r5, [r1, #bool_decoder_count]
|
||||
ldr r6, [r1, #bool_decoder_range]
|
||||
ldr r4, [r1, #bool_decoder_value]
|
||||
|
||||
str r10, [sp, #l_coef_ptr]
|
||||
|
||||
BLOCK_LOOP
|
||||
ldr r3, [r9, #detok_ptr_block2leftabove]
|
||||
ldr r1, [r9, #detok_L]
|
||||
ldr r2, [r9, #detok_A]
|
||||
ldrb r12, [r3, r11]! ; block2left[i]
|
||||
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
|
||||
|
||||
cmp r7, #0 ; c = !type
|
||||
moveq r7, #1
|
||||
movne r7, #0
|
||||
|
||||
ldrb r0, [r1, r12]! ; *(L += block2left[i])
|
||||
ldrb r3, [r2, r3]! ; *(A += block2above[i])
|
||||
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
|
||||
|
||||
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
|
||||
cmp r0, #0 ; *l ?= 0
|
||||
movne r0, #1
|
||||
cmp r3, #0 ; *a ?= 0
|
||||
addne r0, r0, #1 ; t
|
||||
|
||||
str r1, [sp, #l_l_ptr] ; save &l
|
||||
str r2, [sp, #l_a_ptr] ; save &a
|
||||
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
|
||||
mov r1, #0 ; t = 0
|
||||
str r7, [sp, #l_c]
|
||||
|
||||
;align 4
|
||||
COEFF_LOOP
|
||||
ldr r3, [r9, #detok_ptr_coef_bands_x]
|
||||
ldr lr, [r9, #detok_coef_tree_ptr]
|
||||
;STALL
|
||||
ldrb r3, [r3, r7] ; coef_bands_x[c]
|
||||
;STALL
|
||||
;STALL
|
||||
add r0, r0, r3 ; Prob += coef_bands_x[c]
|
||||
|
||||
get_token_loop
|
||||
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; (range << 8) - (1 << 8)
|
||||
mov r10, #1 ; 1
|
||||
|
||||
smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
|
||||
;++
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
|
||||
addhs r1, r1, #1 ; t += 1
|
||||
movhs r4, r3 ; value -= bigsplit (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
; movlo r6, r2 ; range = split
|
||||
|
||||
ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
|
||||
sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; 24 - count
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
|
||||
|
||||
cmp r1, #0 ; t ?= 0
|
||||
bgt get_token_loop ; while (t > 0)
|
||||
|
||||
cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
|
||||
beq END_OF_BLOCK ; break
|
||||
|
||||
rsb lr, r1, #0 ; v = -t;
|
||||
|
||||
cmp lr, #4 ; if(v > FOUR_TOKEN)
|
||||
ble SKIP_EXTRABITS
|
||||
|
||||
ldr r3, [r9, #detok_teb_base_ptr]
|
||||
mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
|
||||
add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
|
||||
|
||||
ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
|
||||
ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
|
||||
|
||||
extrabits_loop
|
||||
add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
|
||||
|
||||
ldrb r2, [r3, #4] ; probability. why +4?
|
||||
mov r3, r6, lsl #8 ; range << 8
|
||||
sub r3, r3, #256 ; range << 8 + 1 << 8
|
||||
|
||||
smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
ldrb r12, [r8] ; *bufptr
|
||||
;++
|
||||
|
||||
subs r10, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r10 ; value = value - (split << 24)
|
||||
subhs r2, r6, r2 ; range = range - split
|
||||
addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; shift - leading zeros in split
|
||||
sub r3, r3, #24 ; don't count first 3 bytes
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range = range << shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
|
||||
addle r5, r5, #8 ; count += BR_COUNT
|
||||
addle r8, r8, #1 ; bufptr++
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
subs r0, r0, #1 ; bits_count --
|
||||
bpl extrabits_loop
|
||||
|
||||
|
||||
SKIP_EXTRABITS
|
||||
ldr r11, [sp, #l_qcoeff]
|
||||
ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
|
||||
|
||||
cmp r1, #0 ; check for nonzero token - if (t)
|
||||
beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
|
||||
|
||||
add r3, r6, #1 ; range + 1
|
||||
mov r2, r3, lsr #1 ; split = (range + 1) >> 1
|
||||
|
||||
subs r3, r4, r2, lsl #24 ; value - (split<<24)
|
||||
movhs r4, r3 ; value -= (split << 24)
|
||||
subhs r2, r6, r2 ; range -= split
|
||||
mvnhs r3, lr ; -v
|
||||
addhs lr, r3, #1 ; v = (v ^ -1) + 1
|
||||
|
||||
; NORMALIZE
|
||||
clz r3, r2 ; leading 0s in split
|
||||
sub r3, r3, #24 ; shift
|
||||
subs r5, r5, r3 ; count -= shift
|
||||
mov r6, r2, lsl r3 ; range <<= shift
|
||||
mov r4, r4, lsl r3 ; value <<= shift
|
||||
ldrleb r2, [r8], #1 ; *(bufptr++)
|
||||
addle r5, r5, #8 ; count += 8
|
||||
rsble r3, r5, #24 ; BR_COUNT - count
|
||||
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
|
||||
|
||||
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
cmn r1, #1 ; t < -ONE_TOKEN
|
||||
|
||||
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
|
||||
|
||||
mvn r1, #1 ; t = -1 ???? C is -2
|
||||
|
||||
SKIP_EOB_CHECK
|
||||
ldr r7, [sp, #l_c] ; c
|
||||
ldr r3, [r9, #detok_scan]
|
||||
add r1, r1, #2 ; t+= 2
|
||||
cmp r7, #15 ; c should will be one higher
|
||||
|
||||
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
|
||||
add r7, r7, #1 ; c++
|
||||
add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
|
||||
|
||||
str r7, [sp, #l_c] ; store c
|
||||
strh lr, [r3] ; qcoef_ptr[scan[c]] = v
|
||||
|
||||
blt COEFF_LOOP
|
||||
|
||||
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
|
||||
|
||||
END_OF_BLOCK
|
||||
ldr r3, [sp, #l_type] ; type
|
||||
ldr r10, [sp, #l_coef_ptr] ; coef_ptr
|
||||
ldr r0, [sp, #l_qcoeff] ; qcoeff
|
||||
ldr r11, [sp, #l_i] ; i
|
||||
ldr r12, [sp, #l_stop] ; stop
|
||||
|
||||
cmp r3, #0 ; type ?= 0
|
||||
moveq r1, #1
|
||||
movne r1, #0
|
||||
add r3, r11, r9 ; detok + i
|
||||
|
||||
cmp r7, r1 ; c ?= !type
|
||||
strb r7, [r3, #detok_eob] ; eob[i] = c
|
||||
|
||||
ldr r7, [sp, #l_l_ptr] ; l
|
||||
ldr r2, [sp, #l_a_ptr] ; a
|
||||
movne r3, #1 ; t
|
||||
moveq r3, #0
|
||||
|
||||
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
|
||||
add r11, r11, #1 ; i++
|
||||
strb r3, [r7] ; *l = t
|
||||
strb r3, [r2] ; *a = t
|
||||
str r0, [sp, #l_qcoeff] ; qcoeff
|
||||
str r11, [sp, #l_i] ; i
|
||||
|
||||
cmp r11, r12 ; i < stop
|
||||
ldr r7, [sp, #l_type] ; type
|
||||
|
||||
blt BLOCK_LOOP
|
||||
|
||||
cmp r11, #25 ; i ?= 25
|
||||
bne ln2_decode_mb_to
|
||||
|
||||
ldr r12, [r9, #detok_qcoeff_start_ptr]
|
||||
ldr r10, [r9, #detok_coef_probs]
|
||||
mov r7, #0 ; type/i = 0
|
||||
mov r3, #16 ; stop = 16
|
||||
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
|
||||
str r7, [sp, #l_i]
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
|
||||
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln2_decode_mb_to
|
||||
cmp r11, #16 ; i ?= 16
|
||||
bne ln1_decode_mb_to
|
||||
|
||||
mov r10, #detok_coef_probs
|
||||
add r10, r10, #2*4 ; coef_probs[type]
|
||||
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
|
||||
|
||||
mov r7, #2 ; type = 2
|
||||
mov r3, #24 ; stop = 24
|
||||
|
||||
str r7, [sp, #l_type]
|
||||
str r3, [sp, #l_stop]
|
||||
|
||||
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
|
||||
b BLOCK_LOOP
|
||||
|
||||
ln1_decode_mb_to
|
||||
ldr r2, [sp, #l_bc]
|
||||
mov r0, #0
|
||||
nop
|
||||
|
||||
str r8, [r2, #bool_decoder_user_buffer]
|
||||
str r5, [r2, #bool_decoder_count]
|
||||
str r4, [r2, #bool_decoder_value]
|
||||
str r6, [r2, #bool_decoder_range]
|
||||
|
||||
add sp, sp, #l_stacksize
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_decode_mb_tokens_v6|
|
||||
|
||||
END
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
ldr r1, [sp, #4] ; stride
|
||||
|
||||
adr r12, _CONSTANTS_
|
||||
adr r12, cospi8sqrt2minus1 ; pointer to the first constant
|
||||
|
||||
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
|
||||
vmul.i16 q2, q4, q6
|
||||
|
@ -123,7 +123,6 @@
|
|||
ENDP ; |vp8_dequant_idct_add_neon|
|
||||
|
||||
; Constant Pool
|
||||
_CONSTANTS_ EQU cospi8sqrt2minus1
|
||||
cospi8sqrt2minus1 DCD 0x4e7b4e7b
|
||||
sinpi8sqrt2 DCD 0x8a8c8a8c
|
||||
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
*/
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "idct.h"
|
||||
#include "dequantize.h"
|
||||
#include "vp8/common/idct.h"
|
||||
#include "vp8/decoder/dequantize.h"
|
||||
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
ldr r1, [sp, #4]
|
||||
vld1.32 {d31[1]}, [r12]
|
||||
|
||||
adr r2, _CONSTANTS_
|
||||
adr r2, cospi8sqrt2minus1 ; pointer to the first constant
|
||||
|
||||
ldrh r12, [r1], #2 ; lo *dc
|
||||
ldrh r1, [r1] ; hi *dc
|
||||
|
@ -198,7 +198,6 @@
|
|||
ENDP ; |idct_dequant_dc_full_2x_neon|
|
||||
|
||||
; Constant Pool
|
||||
_CONSTANTS_ EQU cospi8sqrt2minus1
|
||||
cospi8sqrt2minus1 DCD 0x4e7b
|
||||
; because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
sinpi8sqrt2 DCD 0x4546
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче