Bug 693057 - Upgrade libvpx decoder to v0.9.7-p1, r=cpearce,khuey

--HG--
rename : media/libvpx/vp8/common/filter_c.c => media/libvpx/vp8/common/filter.c
rename : media/libvpx/vpx_scale/generic/scalesystemdependant.c => media/libvpx/vpx_scale/generic/scalesystemdependent.c
This commit is contained in:
Timothy B. Terriberry 2011-10-13 17:37:21 -07:00
Родитель f0c60879a9
Коммит 7a9faf2d18
158 изменённых файлов: 6228 добавлений и 5812 удалений

Просмотреть файл

@ -5356,16 +5356,41 @@ if test -n "$MOZ_WEBM"; then
fi
CFLAGS="-I${LIBVPX_DIR}/include $CFLAGS"
LDFLAGS="-L${LIBVPX_DIR}/lib $LDFLAGS"
MOZ_NATIVE_LIBVPX_DEC_TEST=
MOZ_CHECK_HEADER(vpx/vpx_decoder.h,
[if test ! -f "${LIBVPX_DIR}/include/vpx/vpx_decoder.h"; then
AC_MSG_ERROR([vpx/vpx_decoder.h found, but is not in ${LIBVPX_DIR}/include])
fi],
AC_MSG_ERROR([--with-system-libvpx requested but vpx/vpx_decoder.h not found]))
AC_CHECK_LIB(vpx, vpx_codec_dec_init_ver,
[MOZ_NATIVE_LIBVPX=1
MOZ_LIBVPX_INCLUDES="-I${LIBVPX_DIR}/include"
MOZ_LIBVPX_LIBS="-L${LIBVPX_DIR}/lib -lvpx"],
[MOZ_NATIVE_LIBVPX_DEC_TEST=1],
([--with-system-libvpx requested but symbol vpx_codec_dec_init_ver not found]))
if test -n "$MOZ_NATIVE_LIBVPX_DEC_TEST" ; then
AC_MSG_CHECKING([for libvpx version >= v0.9.7])
dnl We need at least v0.9.7 to fix several crash bugs (for which we
dnl had local patches prior to v0.9.7).
dnl
dnl This is a terrible test for the library version, but we don't
dnl have a good one. There is no version number in a public header,
dnl and testing the headers still doesn't guarantee we link against
dnl the right version. While we could call vpx_codec_version() at
dnl run-time, that would break cross-compiling. There are no
dnl additional exported symbols between the v0.9.7 release and the
dnl v0.9.6 one to check for.
AC_TRY_COMPILE([
#include <vpx/vpx_decoder.h>
#if !defined(VPX_CODEC_USE_INPUT_PARTITION)
#error "test failed."
#endif
],
[return 0;],
[AC_MSG_RESULT([yes])
MOZ_NATIVE_LIBVPX=1
MOZ_LIBVPX_INCLUDES="-I${LIBVPX_DIR}/include"
MOZ_LIBVPX_LIBS="-L${LIBVPX_DIR}/lib -lvpx"],
[AC_MSG_RESULT([no])
AC_MSG_ERROR([--with-system-libvpx requested but it is not v0.9.7 or later])])
fi
CFLAGS=$_SAVE_CFLAGS
LDFLAGS=$_SAVE_LDFLAGS
LIBS=$_SAVE_LIBS

Просмотреть файл

@ -130,12 +130,13 @@ CSRCS += \
alloccommon.c \
blockd.c \
debugmodes.c \
defaultcoefcounts.c \
dsystemdependent.c \
entropy.c \
entropymode.c \
entropymv.c \
extend.c \
filter_c.c \
filter.c \
findnearmv.c \
idctllm.c \
invtrans.c \
@ -145,7 +146,6 @@ CSRCS += \
modecont.c \
modecontext.c \
postproc.c \
predictdc.c \
quant_common.c \
recon.c \
reconinter.c \
@ -173,7 +173,7 @@ CSRCS += \
vpx_mem.c \
gen_scalers.c \
vpxscale.c \
scalesystemdependant.c \
scalesystemdependent.c \
yv12config.c \
yv12extend.c \
$(NULL)
@ -186,6 +186,7 @@ CSRCS += \
idct_blk_mmx.c \
idct_blk_sse2.c \
loopfilter_x86.c \
recon_wrapper_sse2.c \
vp8_asm_stubs.c \
x86_systemdependent.c \
x86_dsystemdependent.c \
@ -284,11 +285,6 @@ VPX_ASFILES = \
dequantizeb_neon.asm \
$(NULL)
# The ARM asm needs to extract the offsets of various C struct members.
# We need a program that runs on the host to pull them out of a .o file.
HOST_CSRCS = obj_int_extract.c
HOST_PROGRAM = host_obj_int_extract$(HOST_BIN_SUFFIX)
ifdef VPX_AS_CONVERSION
# The ARM asm is written in ARM RVCT syntax, but we actually build it with
# gas using GNU syntax. Add some rules to perform the conversion.
@ -303,8 +299,6 @@ else
ASFILES += $(VPX_ASFILES)
endif
GARBAGE += vpx_asm_offsets.$(OBJ_SUFFIX) vpx_asm_offsets.asm
endif
include $(topsrcdir)/config/rules.mk
@ -318,8 +312,3 @@ filter_c.o: filter_c.c Makefile.in
$(CC) -o $@ -c $(patsubst -xO[45],-xO3,$(COMPILE_CFLAGS)) $<
endif
endif
ifdef VPX_ARM_ASM
vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
./$(HOST_PROGRAM) rvds $< $(if $(VPX_AS_CONVERSION),| $(VPX_AS_CONVERSION)) > $@
endif

33
media/libvpx/build/make/ads2gas.pl Normal file → Executable file
Просмотреть файл

@ -21,8 +21,14 @@ print "@ This file was created from a .asm file\n";
print "@ using the ads2gas.pl script.\n";
print "\t.equ DO1STROUNDING, 0\n";
# Stack of procedure names.
@proc_stack = ();
while (<STDIN>)
{
# Load and store alignment
s/@/,:/g;
# Comment character
s/;/@/g;
@ -117,8 +123,8 @@ while (<STDIN>)
# put the colon at the end of the line in the macro
s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
# Strip ALIGN
s/\sALIGN/@ ALIGN/g;
# ALIGN directive
s/ALIGN/.balign/g;
# Strip ARM
s/\sARM/@ ARM/g;
@ -130,9 +136,23 @@ while (<STDIN>)
# Strip PRESERVE8
s/\sPRESERVE8/@ PRESERVE8/g;
# Strip PROC and ENDPROC
s/\sPROC/@/g;
s/\sENDP/@/g;
# Use PROC and ENDP to give the symbols a .size directive.
# This makes them show up properly in debugging tools like gdb and valgrind.
if (/\bPROC\b/)
{
my $proc;
/^_([\.0-9A-Z_a-z]\w+)\b/;
$proc = $1;
push(@proc_stack, $proc) if ($proc);
s/\bPROC\b/@ $&/;
}
if (/\bENDP\b/)
{
my $proc;
s/\bENDP\b/@ $&/;
$proc = pop(@proc_stack);
$_ = "\t.size $proc, .-$proc".$_ if ($proc);
}
# EQU directive
s/(.*)EQU(.*)/.equ $1, $2/;
@ -151,3 +171,6 @@ while (<STDIN>)
next if /^\s*END\s*$/;
print;
}
# Mark that this object doesn't need an executable stack.
printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n");

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,39 @@
diff --git a/media/libvpx/vpx/vp8.h b/media/libvpx/vpx/vp8.h
--- a/media/libvpx/vpx/vp8.h
+++ b/media/libvpx/vpx/vp8.h
@@ -41,33 +41,33 @@ enum vp8_com_control_id
VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */
VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */
VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */
VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */
VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */
VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */
VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */
VP8_COMMON_CTRL_ID_MAX,
- VP8_DECODER_CTRL_ID_START = 256,
+ VP8_DECODER_CTRL_ID_START = 256
};
/*!\brief post process flags
*
* The set of macros define VP8 decoder post processing flags
*/
enum vp8_postproc_level
{
VP8_NOFILTERING = 0,
VP8_DEBLOCK = 1<<0,
VP8_DEMACROBLOCK = 1<<1,
VP8_ADDNOISE = 1<<2,
VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */
VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */
VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */
- VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */
+ VP8_DEBUG_TXT_RATE_INFO = 1<<6 /**< print video rate info (encoder only) */
};
/*!\brief post process flags
*
* This define a structure that describe the post processing settings. For
* the best objective measure (using the PSNR metric) set post_proc_flag
* to VP8_DEBLOCK and deblocking_level to 1.
*/

Просмотреть файл

@ -16,69 +16,6 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
static __inline signed char vp8_signed_char_clamp(int t)
{
t = (t < -128 ? -128 : t);
diff --git a/media/libvpx/vpx/internal/vpx_codec_internal.h b/media/libvpx/vpx/internal/vpx_codec_internal.h
--- a/media/libvpx/vpx/internal/vpx_codec_internal.h
+++ b/media/libvpx/vpx/internal/vpx_codec_internal.h
@@ -316,17 +316,17 @@ struct vpx_codec_iface
/*!\brief Callback function pointer / user data pair storage */
typedef struct vpx_codec_priv_cb_pair
{
union
{
vpx_codec_put_frame_cb_fn_t put_frame;
vpx_codec_put_slice_cb_fn_t put_slice;
- };
+ } fn;
void *user_priv;
} vpx_codec_priv_cb_pair_t;
/*!\brief Instance private storage
*
* This structure is allocated by the algorithm's init function. It can be
* extended in one of two ways. First, a second, algorithm specific structure
diff --git a/media/libvpx/vpx/src/vpx_decoder.c b/media/libvpx/vpx/src/vpx_decoder.c
--- a/media/libvpx/vpx/src/vpx_decoder.c
+++ b/media/libvpx/vpx/src/vpx_decoder.c
@@ -165,17 +165,17 @@ vpx_codec_err_t vpx_codec_register_put_f
if (!ctx || !cb)
res = VPX_CODEC_INVALID_PARAM;
else if (!ctx->iface || !ctx->priv
|| !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
res = VPX_CODEC_ERROR;
else
{
- ctx->priv->dec.put_frame_cb.put_frame = cb;
+ ctx->priv->dec.put_frame_cb.fn.put_frame = cb;
ctx->priv->dec.put_frame_cb.user_priv = user_priv;
res = VPX_CODEC_OK;
}
return SAVE_STATUS(ctx, res);
}
@@ -187,17 +187,17 @@ vpx_codec_err_t vpx_codec_register_put_s
if (!ctx || !cb)
res = VPX_CODEC_INVALID_PARAM;
else if (!ctx->iface || !ctx->priv
|| !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME))
res = VPX_CODEC_ERROR;
else
{
- ctx->priv->dec.put_slice_cb.put_slice = cb;
+ ctx->priv->dec.put_slice_cb.fn.put_slice = cb;
ctx->priv->dec.put_slice_cb.user_priv = user_priv;
res = VPX_CODEC_OK;
}
return SAVE_STATUS(ctx, res);
}
diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h
--- a/media/libvpx/vpx_ports/mem.h
+++ b/media/libvpx/vpx_ports/mem.h
@ -172,8 +109,8 @@ diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h
+ asm volatile ("pause \n\t")
#else
#if ARCH_X86_64
/* No pause intrinsic for windows x64 */
#define x86_pause_hint()
#define x86_pause_hint()\
_mm_pause();
#else
#define x86_pause_hint()\
__asm pause

Просмотреть файл

@ -51,16 +51,17 @@ fi
# These are relative to SDK source dir.
commonFiles=(
vp8/vp8_dx_iface.c
vp8/common/alloccommon.c
vp8/common/blockd.c
vp8/common/debugmodes.c
vp8/common/defaultcoefcounts.c
vp8/common/entropy.c
vp8/common/entropymode.c
vp8/common/entropymv.c
vp8/common/extend.c
vp8/common/filter_c.c
vp8/common/filter.c
vp8/common/findnearmv.c
vp8/common/generic/systemdependent.c
vp8/common/idctllm.c
vp8/common/invtrans.c
vp8/common/loopfilter.c
@ -69,7 +70,6 @@ commonFiles=(
vp8/common/modecont.c
vp8/common/modecontext.c
vp8/common/postproc.c
vp8/common/predictdc.c
vp8/common/quant_common.c
vp8/common/recon.c
vp8/common/reconinter.c
@ -84,9 +84,10 @@ commonFiles=(
vp8/common/arm/filter_arm.c
vp8/common/arm/loopfilter_arm.c
vp8/common/arm/reconintra_arm.c
vp8/common/arm/vpx_asm_offsets.c
vp8/common/arm/neon/recon_neon.c
vp8/common/generic/systemdependent.c
vp8/common/x86/loopfilter_x86.c
vp8/common/x86/recon_wrapper_sse2.c
vp8/common/x86/vp8_asm_stubs.c
vp8/common/x86/x86_systemdependent.c
vp8/decoder/dboolhuff.c
@ -94,19 +95,18 @@ commonFiles=(
vp8/decoder/decodframe.c
vp8/decoder/dequantize.c
vp8/decoder/detokenize.c
vp8/decoder/reconintra_mt.c
vp8/decoder/generic/dsystemdependent.c
vp8/decoder/idct_blk.c
vp8/decoder/onyxd_if.c
vp8/decoder/reconintra_mt.c
vp8/decoder/threading.c
vp8/decoder/arm/arm_dsystemdependent.c
vp8/decoder/arm/dequantize_arm.c
vp8/decoder/arm/armv6/idct_blk_v6.c
vp8/decoder/arm/neon/idct_blk_neon.c
vp8/decoder/generic/dsystemdependent.c
vp8/decoder/x86/idct_blk_mmx.c
vp8/decoder/x86/idct_blk_sse2.c
vp8/decoder/x86/x86_dsystemdependent.c
vp8/vp8_dx_iface.c
vpx/src/vpx_codec.c
vpx/src/vpx_decoder.c
vpx/src/vpx_decoder_compat.c
@ -114,7 +114,7 @@ commonFiles=(
vpx/src/vpx_image.c
vpx_mem/vpx_mem.c
vpx_scale/generic/gen_scalers.c
vpx_scale/generic/scalesystemdependant.c
vpx_scale/generic/scalesystemdependent.c
vpx_scale/generic/vpxscale.c
vpx_scale/generic/yv12config.c
vpx_scale/generic/yv12extend.c
@ -128,6 +128,7 @@ commonFiles=(
vp8/common/entropymode.h
vp8/common/entropymv.h
vp8/common/extend.h
vp8/common/filter.h
vp8/common/findnearmv.h
vp8/common/g_common.h
vp8/common/header.h
@ -142,8 +143,6 @@ commonFiles=(
vp8/common/postproc.h
vp8/common/ppflags.h
vp8/common/pragmas.h
vp8/common/predictdc.h
vp8/common/preproc.h
vp8/common/quant_common.h
vp8/common/recon.h
vp8/common/reconinter.h
@ -156,7 +155,7 @@ commonFiles=(
vp8/common/threading.h
vp8/common/treecoder.h
vp8/common/type_aliases.h
vp8/common/vpxerrors.h
vp8/common/arm/bilinearfilter_arm.h
vp8/common/arm/idct_arm.h
vp8/common/arm/loopfilter_arm.h
vp8/common/arm/recon_arm.h
@ -174,9 +173,7 @@ commonFiles=(
vp8/decoder/onyxd_int.h
vp8/decoder/reconintra_mt.h
vp8/decoder/treereader.h
vp8/decoder/arm/dboolhuff_arm.h
vp8/decoder/arm/dequantize_arm.h
vp8/decoder/arm/detokenize_arm.h
vp8/decoder/x86/dequantize_x86.h
vpx/internal/vpx_codec_internal.h
vpx/vp8cx.h
@ -240,7 +237,6 @@ commonFiles=(
vp8/common/arm/neon/recon16x16mb_neon.asm
vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
vp8/common/arm/neon/save_neon_reg.asm
vp8/decoder/arm/detokenize.asm
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
vp8/decoder/arm/armv6/dequant_idct_v6.asm
vp8/decoder/arm/armv6/dequantize_v6.asm
@ -318,17 +314,5 @@ done
# Patch to compile with Sun Studio on Solaris
patch -p3 < solaris.patch
# Patch to fix link with xcode4
patch -p1 < xcode4.patch
# Patch to fix data race on global function pointers
patch -p3 < bug640935.patch
# Patch to avoid text relocations on ARM
patch -p3 < bug646815.patch
# Patch to fix alignment problems with using ARM asm in Thumb mode.
patch -p3 < bug666931.patch
# Patch to make chroma planes 16-byte aligned.
patch -p3 < bug671818.patch
# Patch to fix errors including C headers in C++
patch -p3 < compile_errors.patch

Просмотреть файл

@ -16,18 +16,20 @@
#include "findnearmv.h"
#include "entropymode.h"
#include "systemdependent.h"
#include "vpxerrors.h"
extern void vp8_init_scan_order_mask();
void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
static void update_mode_info_border(MODE_INFO *mi, int rows, int cols)
{
int i;
vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
for (i = 0; i < rows; i++)
{
/* TODO(holmer): Bug? This updates the last element of each row
* rather than the border element!
*/
vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
}
}
@ -44,9 +46,11 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
vpx_free(oci->above_context);
vpx_free(oci->mip);
vpx_free(oci->prev_mip);
oci->above_context = 0;
oci->mip = 0;
oci->prev_mip = 0;
}
@ -66,12 +70,12 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
for (i = 0; i < NUM_YV12_BUFFERS; i++)
{
oci->fb_idx_ref_cnt[0] = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
oci->fb_idx_ref_cnt[i] = 0;
oci->yv12_fb[i].flags = 0;
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
return 1;
}
}
@ -88,13 +92,13 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
return 1;
}
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
return 1;
}
oci->mb_rows = height >> 4;
@ -106,21 +110,39 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
if (!oci->mip)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
return 1;
}
oci->mi = oci->mip + oci->mode_info_stride + 1;
/* allocate memory for last frame MODE_INFO array */
#if CONFIG_ERROR_CONCEALMENT
oci->prev_mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
if (!oci->prev_mip)
{
vp8_de_alloc_frame_buffers(oci);
return 1;
}
oci->prev_mi = oci->prev_mip + oci->mode_info_stride + 1;
#else
oci->prev_mip = NULL;
oci->prev_mi = NULL;
#endif
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
if (!oci->above_context)
{
vp8_de_alloc_frame_buffers(oci);
return ALLOC_FAILURE;
return 1;
}
vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
#if CONFIG_ERROR_CONCEALMENT
update_mode_info_border(oci->prev_mi, oci->mb_rows, oci->mb_cols);
#endif
return 0;
}
@ -130,32 +152,32 @@ void vp8_setup_version(VP8_COMMON *cm)
{
case 0:
cm->no_lpf = 0;
cm->simpler_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
case 1:
cm->no_lpf = 0;
cm->simpler_lpf = 1;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 2:
cm->no_lpf = 1;
cm->simpler_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 0;
break;
case 3:
cm->no_lpf = 1;
cm->simpler_lpf = 1;
cm->filter_type = SIMPLE_LOOPFILTER;
cm->use_bilinear_mc_filter = 1;
cm->full_pixel = 1;
break;
default:
/*4,5,6,7 are reserved for future use*/
cm->no_lpf = 0;
cm->simpler_lpf = 0;
cm->filter_type = NORMAL_LOOPFILTER;
cm->use_bilinear_mc_filter = 0;
cm->full_pixel = 0;
break;
@ -170,7 +192,7 @@ void vp8_create_common(VP8_COMMON *oci)
oci->mb_no_coeff_skip = 1;
oci->no_lpf = 0;
oci->simpler_lpf = 0;
oci->filter_type = NORMAL_LOOPFILTER;
oci->use_bilinear_mc_filter = 0;
oci->full_pixel = 0;
oci->multi_token_partition = ONE_PARTITION;

Просмотреть файл

@ -11,27 +11,30 @@
#include "vpx_ports/config.h"
#include "vpx_ports/arm.h"
#include "g_common.h"
#include "pragmas.h"
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
#include "idct.h"
#include "onyxc_int.h"
#include "vp8/common/g_common.h"
#include "vp8/common/pragmas.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
#include "vp8/common/idct.h"
#include "vp8/common/onyxc_int.h"
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
{
#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
int flags = arm_cpu_caps();
int has_edsp = flags & HAS_EDSP;
int has_media = flags & HAS_MEDIA;
int has_neon = flags & HAS_NEON;
rtcd->flags = flags;
/* Override default functions with fastest ones for this CPU. */
#if HAVE_ARMV5TE
if (flags & HAS_EDSP)
{
}
#endif
#if HAVE_ARMV6
if (has_media)
if (flags & HAS_MEDIA)
{
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
@ -51,9 +54,11 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
rtcd->loopfilter.simple_mb_v =
vp8_loop_filter_simple_vertical_edge_armv6;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
rtcd->loopfilter.simple_mb_h =
vp8_loop_filter_simple_horizontal_edge_armv6;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
@ -66,7 +71,7 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
#endif
#if HAVE_ARMV7
if (has_neon)
if (flags & HAS_NEON)
{
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;

Просмотреть файл

@ -15,33 +15,33 @@
AREA |.text|, CODE, READONLY ; name this block of code
;-------------------------------------
; r0 unsigned char *src_ptr,
; r1 unsigned short *output_ptr,
; r2 unsigned int src_pixels_per_line,
; r3 unsigned int output_height,
; stack unsigned int output_width,
; stack const short *vp8_filter
; r0 unsigned char *src_ptr,
; r1 unsigned short *dst_ptr,
; r2 unsigned int src_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp8_filter
;-------------------------------------
; The output is transposed stroed in output array to make it easy for second pass filtering.
|vp8_filter_block2d_bil_first_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r4, [sp, #36] ; output width
ldr r4, [sp, #36] ; width
mov r12, r3 ; outer-loop counter
sub r2, r2, r4 ; src increment for height loop
;;IF ARCHITECTURE=6
pld [r0]
;;ENDIF
add r7, r2, r4 ; preload next row
pld [r0, r7]
sub r2, r2, r4 ; src increment for height loop
ldr r5, [r11] ; load up filter coefficients
mov r3, r3, lsl #1 ; output_height*2
mov r3, r3, lsl #1 ; height*2
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
mov r11, r1 ; save output_ptr for each row
mov r11, r1 ; save dst_ptr for each row
cmp r5, #128 ; if filter coef = 128, then skip the filter
beq bil_null_1st_filter
@ -96,9 +96,8 @@
add r0, r0, r2 ; move to next input row
subs r12, r12, #1
;;IF ARCHITECTURE=6
pld [r0]
;;ENDIF
add r9, r2, r4, lsl #1 ; adding back block width
pld [r0, r9] ; preload next row
add r11, r11, #2 ; move over to next column
mov r1, r11
@ -140,17 +139,17 @@
;---------------------------------
; r0 unsigned short *src_ptr,
; r1 unsigned char *output_ptr,
; r2 int output_pitch,
; r3 unsigned int output_height,
; stack unsigned int output_width,
; stack const short *vp8_filter
; r1 unsigned char *dst_ptr,
; r2 int dst_pitch,
; r3 unsigned int height,
; stack unsigned int width,
; stack const short *vp8_filter
;---------------------------------
|vp8_filter_block2d_bil_second_pass_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r4, [sp, #36] ; output width
ldr r4, [sp, #36] ; width
ldr r5, [r11] ; load up filter coefficients
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix

Просмотреть файл

@ -22,9 +22,7 @@
;push {r4-r7}
;preload
pld [r0]
pld [r0, r1]
pld [r0, r1, lsl #1]
pld [r0, #31] ; preload for next 16x16 block
ands r4, r0, #15
beq copy_mem16x16_fast
@ -90,6 +88,8 @@ copy_mem16x16_1_loop
ldrneb r6, [r0, #2]
ldrneb r7, [r0, #3]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_1_loop
ldmia sp!, {r4 - r7}
@ -121,6 +121,8 @@ copy_mem16x16_4_loop
ldrne r6, [r0, #8]
ldrne r7, [r0, #12]
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_4_loop
ldmia sp!, {r4 - r7}
@ -148,6 +150,7 @@ copy_mem16x16_8_loop
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_8_loop
ldmia sp!, {r4 - r7}
@ -171,6 +174,7 @@ copy_mem16x16_fast_loop
;stm r2, {r4-r7}
add r2, r2, r3
pld [r0, #31] ; preload for next 16x16 block
bne copy_mem16x16_fast_loop
ldmia sp!, {r4 - r7}

Просмотреть файл

@ -10,6 +10,8 @@
EXPORT |vp8_filter_block2d_first_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
EXPORT |vp8_filter_block2d_second_pass_armv6|
EXPORT |vp8_filter4_block2d_second_pass_armv6|
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
@ -40,11 +42,6 @@
add r12, r3, #16 ; square off the output
sub sp, sp, #4
;;IF ARCHITECTURE=6
;pld [r0, #-2]
;;pld [r0, #30]
;;ENDIF
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
@ -101,15 +98,10 @@
bne width_loop_1st_6
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
;;IF ARCHITECTURE=6
;pld [r0, r2]
;;pld [r0, r9]
;;ENDIF
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r1, r1, #2 ; move over to next column
str r1, [sp]
@ -120,6 +112,192 @@
ENDP
; --------------------------
; 16x16 version
; -----------------------------
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r7, [sp, #36] ; output height
add r4, r2, #18 ; preload next low
pld [r0, r4]
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
add r12, r3, #16 ; square off the output
sub sp, sp, #4
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r1, [sp] ; push destination to stack
mov r7, r7, lsl #16 ; height is top part of counter
; six tap filter
|height_loop_1st_16_6|
ldrb r8, [r0, #-2] ; load source data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
orr r7, r7, r3, lsr #2 ; construct loop counter
|width_loop_1st_16_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
smuad lr, lr, r4 ; apply the filter
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
sub r7, r7, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r11, r10, r6, r8
ands r10, r7, #0xff ; test loop counter
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
add r11, r11, #0x40
ldrneb r9, [r0, #-1]
usat r11, #8, r11, asr #7
strh lr, [r1], r12 ; result is transposed and stored, which
; will make second pass filtering easier.
ldrneb r10, [r0], #2
strh r11, [r1], r12
bne width_loop_1st_16_6
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r11, r2, #34 ; adding back block width(=16)
pld [r0, r11] ; preload next low
add r1, r1, #2 ; move over to next column
str r1, [sp]
bne height_loop_1st_16_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
; --------------------------
; 8x8 version
; -----------------------------
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
stmdb sp!, {r4 - r11, lr}
ldr r11, [sp, #40] ; vp8_filter address
ldr r7, [sp, #36] ; output height
add r4, r2, #10 ; preload next low
pld [r0, r4]
sub r2, r2, r3 ; inside loop increments input array,
; so the height loop only needs to add
; r2 - width to the input pointer
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
add r12, r3, #16 ; square off the output
sub sp, sp, #4
ldr r4, [r11] ; load up packed filter coefficients
ldr r5, [r11, #4]
ldr r6, [r11, #8]
str r1, [sp] ; push destination to stack
mov r7, r7, lsl #16 ; height is top part of counter
; six tap filter
|height_loop_1st_8_6|
ldrb r8, [r0, #-2] ; load source data
ldrb r9, [r0, #-1]
ldrb r10, [r0], #2
orr r7, r7, r3, lsr #2 ; construct loop counter
|width_loop_1st_8_6|
ldrb r11, [r0, #-1]
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
ldrb r9, [r0]
smuad lr, lr, r4 ; apply the filter
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smuad r8, r8, r4
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
smlad lr, r10, r5, lr
ldrb r10, [r0, #1]
smlad r8, r11, r5, r8
ldrb r11, [r0, #2]
sub r7, r7, #1
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
smlad lr, r9, r6, lr
smlad r11, r10, r6, r8
ands r10, r7, #0xff ; test loop counter
add lr, lr, #0x40 ; round_shift_and_clamp
ldrneb r8, [r0, #-2] ; load data for next loop
usat lr, #8, lr, asr #7
add r11, r11, #0x40
ldrneb r9, [r0, #-1]
usat r11, #8, r11, asr #7
strh lr, [r1], r12 ; result is transposed and stored, which
; will make second pass filtering easier.
ldrneb r10, [r0], #2
strh r11, [r1], r12
bne width_loop_1st_8_6
ldr r1, [sp] ; load and update dst address
subs r7, r7, #0x10000
add r0, r0, r2 ; move to next input line
add r11, r2, #18 ; adding back block width(=8)
pld [r0, r11] ; preload next low
add r1, r1, #2 ; move over to next column
str r1, [sp]
bne height_loop_1st_8_6
add sp, sp, #4
ldmia sp!, {r4 - r11, pc}
ENDP
;---------------------------------
; r0 short *src_ptr,
; r1 unsigned char *output_ptr,
@ -262,6 +440,10 @@
|vp8_filter_block2d_first_pass_only_armv6| PROC
stmdb sp!, {r4 - r11, lr}
add r7, r2, r3 ; preload next low
add r7, r7, #2
pld [r0, r7]
ldr r4, [sp, #36] ; output pitch
ldr r11, [sp, #40] ; HFilter address
sub sp, sp, #8
@ -330,16 +512,15 @@
bne width_loop_1st_only_6
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
;;IF ARCHITECTURE=6
;pld [r0, r2]
;;pld [r0, r9]
;;ENDIF
ldr lr, [sp] ; load back output pitch
ldr r12, [sp, #4] ; load back output pitch
subs r7, r7, #1
add r0, r0, r12 ; updata src for next loop
add r11, r12, r3 ; preload next low
add r11, r11, #2
pld [r0, r11]
add r1, r1, lr ; update dst for next loop
bne height_loop_1st_only_6

Просмотреть файл

@ -53,14 +53,11 @@ count RN r5
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
;r2 const char *flimit,
;r2 const char *blimit,
;r3 const char *limit,
;stack const char *thresh,
;stack int count
;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
;for flimit. Same way applies to limit and thresh.
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
@ -72,14 +69,18 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
ldr r4, [r2], #4 ; flimit
ldrb r4, [r2] ; blimit
ldr r10, [src], pstep ; p2
ldr r2, [r3], #4 ; limit
ldrb r2, [r3] ; limit
ldr r11, [src], pstep ; p1
uadd8 r4, r4, r4 ; flimit * 2
ldr r3, [r6], #4 ; thresh
orr r4, r4, r4, lsl #8
ldrb r3, [r6] ; thresh
orr r2, r2, r2, lsl #8
mov count, count, lsl #1 ; 4-in-parallel
uadd8 r4, r4, r2 ; flimit * 2 + limit
orr r4, r4, r4, lsl #16
orr r3, r3, r3, lsl #8
orr r2, r2, r2, lsl #16
orr r3, r3, r3, lsl #16
|Hnext8|
; vp8_filter_mask() function
@ -253,12 +254,6 @@ count RN r5
subs count, count, #1
;pld [src]
;pld [src, pstep]
;pld [src, pstep, lsl #1]
;pld [src, pstep, lsl #2]
;pld [src, pstep, lsl #3]
ldrne r9, [src], pstep ; p3
ldrne r10, [src], pstep ; p2
ldrne r11, [src], pstep ; p1
@ -281,14 +276,18 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r9, [src], pstep ; p3
ldr r4, [r2], #4 ; flimit
ldrb r4, [r2] ; blimit
ldr r10, [src], pstep ; p2
ldr r2, [r3], #4 ; limit
ldrb r2, [r3] ; limit
ldr r11, [src], pstep ; p1
uadd8 r4, r4, r4 ; flimit * 2
ldr r3, [r6], #4 ; thresh
orr r4, r4, r4, lsl #8
ldrb r3, [r6] ; thresh
orr r2, r2, r2, lsl #8
mov count, count, lsl #1 ; 4-in-parallel
uadd8 r4, r4, r2 ; flimit * 2 + limit
orr r4, r4, r4, lsl #16
orr r3, r3, r3, lsl #8
orr r2, r2, r2, lsl #16
orr r3, r3, r3, lsl #16
|MBHnext8|
@ -590,15 +589,19 @@ count RN r5
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
ldr r4, [r2], #4 ; flimit
ldrb r4, [r2] ; blimit
ldr r7, [src], pstep
ldr r2, [r3], #4 ; limit
ldrb r2, [r3] ; limit
ldr r8, [src], pstep
uadd8 r4, r4, r4 ; flimit * 2
ldr r3, [r12], #4 ; thresh
orr r4, r4, r4, lsl #8
ldrb r3, [r12] ; thresh
orr r2, r2, r2, lsl #8
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
uadd8 r4, r4, r2 ; flimit * 2 + limit
orr r4, r4, r4, lsl #16
orr r3, r3, r3, lsl #8
orr r2, r2, r2, lsl #16
orr r3, r3, r3, lsl #16
|Vnext8|
@ -857,18 +860,26 @@ count RN r5
sub src, src, #4 ; move src pointer down by 4
ldr count, [sp, #40] ; count for 8-in-parallel
ldr r12, [sp, #36] ; load thresh address
pld [src, #23] ; preload for next block
sub sp, sp, #16 ; create temp buffer
ldr r6, [src], pstep ; load source data
ldr r4, [r2], #4 ; flimit
ldrb r4, [r2] ; blimit
pld [src, #23]
ldr r7, [src], pstep
ldr r2, [r3], #4 ; limit
ldrb r2, [r3] ; limit
pld [src, #23]
ldr r8, [src], pstep
uadd8 r4, r4, r4 ; flimit * 2
ldr r3, [r12], #4 ; thresh
orr r4, r4, r4, lsl #8
ldrb r3, [r12] ; thresh
orr r2, r2, r2, lsl #8
pld [src, #23]
ldr lr, [src], pstep
mov count, count, lsl #1 ; 4-in-parallel
uadd8 r4, r4, r2 ; flimit * 2 + limit
orr r4, r4, r4, lsl #16
orr r3, r3, r3, lsl #8
orr r2, r2, r2, lsl #16
orr r3, r3, r3, lsl #16
|MBVnext8|
; vp8_filter_mask() function
@ -908,6 +919,7 @@ count RN r5
str lr, [sp, #8]
ldr lr, [src], pstep
TRANSPOSE_MATRIX r6, r7, r8, lr, r9, r10, r11, r12
ldr lr, [sp, #8] ; load back (f)limit accumulator
@ -956,6 +968,7 @@ count RN r5
beq mbvskip_filter ; skip filtering
;vp8_hevmask() function
;calculate high edge variance
@ -1123,6 +1136,7 @@ count RN r5
smlabb r8, r6, lr, r7
smlatb r6, r6, lr, r7
smlabb r9, r10, lr, r7
smlatb r10, r10, lr, r7
ssat r8, #8, r8, asr #7
ssat r6, #8, r6, asr #7
@ -1242,9 +1256,13 @@ count RN r5
sub src, src, #4
subs count, count, #1
pld [src, #23] ; preload for next block
ldrne r6, [src], pstep ; load source data
pld [src, #23]
ldrne r7, [src], pstep
pld [src, #23]
ldrne r8, [src], pstep
pld [src, #23]
ldrne lr, [src], pstep
bne MBVnext8

Просмотреть файл

@ -45,35 +45,28 @@
MEND
src RN r0
pstep RN r1
;r0 unsigned char *src_ptr,
;r1 int src_pixel_step,
;r2 const char *flimit,
;r3 const char *limit,
;stack const char *thresh,
;stack int count
; All 16 elements in flimit are equal. So, in the code, only one load is needed
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
;r2 const char *blimit
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
ldr r12, [r3] ; limit
ldrb r12, [r2] ; blimit
ldr r3, [src, -pstep, lsl #1] ; p1
ldr r4, [src, -pstep] ; p0
ldr r5, [src] ; q0
ldr r6, [src, pstep] ; q1
ldr r7, [r2] ; flimit
orr r12, r12, r12, lsl #8 ; blimit
ldr r2, c0x80808080
ldr r9, [sp, #40] ; count for 8-in-parallel
uadd8 r7, r7, r7 ; flimit * 2
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
uadd8 r12, r7, r12 ; flimit * 2 + limit
orr r12, r12, r12, lsl #16 ; blimit
mov r9, #4 ; double the count. we're doing 4 at a time
mov lr, #0 ; need 0 in a couple places
|simple_hnext8|
@ -148,30 +141,32 @@ pstep RN r1
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
stmdb sp!, {r4 - r11, lr}
ldr r12, [r2] ; r12: flimit
ldrb r12, [r2] ; r12: blimit
ldr r2, c0x80808080
ldr r7, [r3] ; limit
orr r12, r12, r12, lsl #8
; load soure data to r7, r8, r9, r10
ldrh r3, [src, #-2]
pld [src, #23] ; preload for next block
ldrh r4, [src], pstep
uadd8 r12, r12, r12 ; flimit * 2
orr r12, r12, r12, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
uadd8 r12, r12, r7 ; flimit * 2 + limit
pkhbt r7, r3, r4, lsl #16
ldrh r3, [src, #-2]
pld [src, #23]
ldrh r4, [src], pstep
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
pkhbt r8, r5, r6, lsl #16
ldrh r5, [src, #-2]
pld [src, #23]
ldrh r6, [src], pstep
mov r11, r11, lsl #1 ; 4-in-parallel
mov r11, #4 ; double the count. we're doing 4 at a time
|simple_vnext8|
; vp8_simple_filter_mask() function
@ -259,19 +254,23 @@ pstep RN r1
; load soure data to r7, r8, r9, r10
ldrneh r3, [src, #-2]
pld [src, #23] ; preload for next block
ldrneh r4, [src], pstep
ldrneh r5, [src, #-2]
pld [src, #23]
ldrneh r6, [src], pstep
pkhbt r7, r3, r4, lsl #16
ldrneh r3, [src, #-2]
pld [src, #23]
ldrneh r4, [src], pstep
pkhbt r8, r5, r6, lsl #16
ldrneh r5, [src, #-2]
pld [src, #23]
ldrneh r6, [src], pstep
bne simple_vnext8

Просмотреть файл

@ -35,6 +35,9 @@
adr r12, filter8_coeff
sub r0, r0, r1, lsl #1
add r3, r1, #10 ; preload next low
pld [r0, r3]
add r2, r12, r2, lsl #4 ;calculate filter location
add r0, r0, #3 ;adjust src only for loading convinience
@ -110,6 +113,9 @@
add r0, r0, r1 ; move to next input line
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
pld [r0, r11]
bne first_pass_hloop_v6
;second pass filter
@ -243,8 +249,6 @@ skip_secondpass_hloop
ENDP
;-----------------
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter8_coeff

Просмотреть файл

@ -10,128 +10,29 @@
#include <math.h>
#include "subpixel.h"
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
static const short bilinear_filters[8][2] =
{
{ 128, 0 },
{ 112, 16 },
{ 96, 32 },
{ 80, 48 },
{ 64, 64 },
{ 48, 80 },
{ 32, 96 },
{ 16, 112 }
};
extern void vp8_filter_block2d_bil_first_pass_armv6
(
unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
);
extern void vp8_filter_block2d_bil_second_pass_armv6
(
unsigned short *src_ptr,
unsigned char *output_ptr,
int output_pitch,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
);
#if 0
void vp8_filter_block2d_bil_first_pass_6
(
unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i, j;
for ( i=0; i<output_height; i++ )
{
for ( j=0; j<output_width; j++ )
{
/* Apply bilinear filter */
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
}
}
void vp8_filter_block2d_bil_second_pass_6
(
unsigned short *src_ptr,
unsigned char *output_ptr,
int output_pitch,
unsigned int output_height,
unsigned int output_width,
const short *vp8_filter
)
{
unsigned int i,j;
int Temp;
for ( i=0; i<output_height; i++ )
{
for ( j=0; j<output_width; j++ )
{
/* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[output_width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT/2);
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
/* Next row... */
/*src_ptr += src_pixels_per_line - output_width;*/
output_ptr += output_pitch;
}
}
#endif
#include "vp8/common/filter.h"
#include "vp8/common/subpixel.h"
#include "bilinearfilter_arm.h"
void vp8_filter_block2d_bil_armv6
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const short *HFilter,
const short *VFilter,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
/* pixel_step = 1; */
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
@ -148,8 +49,8 @@ void vp8_bilinear_predict4x4_armv6
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
}
@ -167,8 +68,8 @@ void vp8_bilinear_predict8x8_armv6
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
}
@ -186,8 +87,8 @@ void vp8_bilinear_predict8x4_armv6
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
}
@ -205,8 +106,8 @@ void vp8_bilinear_predict16x16_armv6
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}

Просмотреть файл

@ -0,0 +1,35 @@
/*
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef BILINEARFILTER_ARM_H
#define BILINEARFILTER_ARM_H
extern void vp8_filter_block2d_bil_first_pass_armv6
(
const unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
extern void vp8_filter_block2d_bil_second_pass_armv6
(
const unsigned short *src_ptr,
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
);
#endif /* BILINEARFILTER_ARM_H */

Просмотреть файл

@ -11,26 +11,10 @@
#include "vpx_ports/config.h"
#include <math.h>
#include "subpixel.h"
#include "vp8/common/filter.h"
#include "vp8/common/subpixel.h"
#include "vpx_ports/mem.h"
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
{
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
{ 0, -6, 123, 12, -1, 0 },
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
{ 0, -9, 93, 50, -6, 0 },
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
extern void vp8_filter_block2d_first_pass_armv6
(
unsigned char *src_ptr,
@ -41,6 +25,28 @@ extern void vp8_filter_block2d_first_pass_armv6
const short *vp8_filter
);
// 8x8
extern void vp8_filter_block2d_first_pass_8x8_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
// 16x16
extern void vp8_filter_block2d_first_pass_16x16_armv6
(
unsigned char *src_ptr,
short *output_ptr,
unsigned int src_pixels_per_line,
unsigned int output_width,
unsigned int output_height,
const short *vp8_filter
);
extern void vp8_filter_block2d_second_pass_armv6
(
short *src_ptr,
@ -93,11 +99,11 @@ void vp8_sixtap_predict_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* Vfilter is null. First pass only */
if (xoffset && !yoffset)
@ -129,47 +135,6 @@ void vp8_sixtap_predict_armv6
}
}
#if 0
void vp8_sixtap_predict8x4_armv6
(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch
)
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
/*if (xoffset && !yoffset)
{
vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
}*/
/* Hfilter is null. Second pass only */
/*else if (!xoffset && yoffset)
{
vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
}
else
{
if (yoffset & 0x1)
vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
else*/
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
/*}*/
}
#endif
void vp8_sixtap_predict8x8_armv6
(
unsigned char *src_ptr,
@ -182,10 +147,10 @@ void vp8_sixtap_predict8x8_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
@ -200,12 +165,12 @@ void vp8_sixtap_predict8x8_armv6
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
else
{
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
}
}
@ -224,10 +189,10 @@ void vp8_sixtap_predict16x16_armv6
{
const short *HFilter;
const short *VFilter;
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
if (xoffset && !yoffset)
{
@ -242,12 +207,12 @@ void vp8_sixtap_predict16x16_armv6
{
if (yoffset & 0x1)
{
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
else
{
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
}
}

Просмотреть файл

@ -9,135 +9,107 @@
*/
#include "vpx_ports/config.h"
#include <math.h>
#include "loopfilter.h"
#include "onyxc_int.h"
#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/onyxc_int.h"
#if HAVE_ARMV6
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
#endif
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
#if HAVE_ARMV7
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh);
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
unsigned char blimit, unsigned char limit, unsigned char thresh,
unsigned char *v);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
#endif
#if HAVE_ARMV6
/*ARMV6 loopfilter functions*/
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
}
#endif
@ -145,93 +117,60 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
/* NEON loopfilter functions */
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
unsigned char mblim = *lfi->mblim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
}
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
unsigned char blim = *lfi->blim;
unsigned char lim = *lfi->lim;
unsigned char hev_thr = *lfi->hev_thr;
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
}
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
}
#endif

Просмотреть файл

@ -12,15 +12,17 @@
#ifndef LOOPFILTER_ARM_H
#define LOOPFILTER_ARM_H
#include "vpx_config.h"
#if HAVE_ARMV6
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_armv6);
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_armv6);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
@ -36,28 +38,29 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
#undef vp8_lf_simple_mb_v
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_armv6
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
#undef vp8_lf_simple_mb_h
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_armv6
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
#endif
#endif
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
#endif /* HAVE_ARMV6 */
#if HAVE_ARMV7
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
extern prototype_simple_loopfilter(vp8_loop_filter_mbvs_neon);
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_neon);
extern prototype_simple_loopfilter(vp8_loop_filter_mbhs_neon);
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_neon);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_lf_normal_mb_v
@ -83,7 +86,8 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
#endif
#endif
#endif /* !CONFIG_RUNTIME_CPU_DETECT */
#endif
#endif /* HAVE_ARMV7 */
#endif /* LOOPFILTER_ARM_H */

Просмотреть файл

@ -350,10 +350,7 @@ filt_blk2d_spo16x16_loop_neon
ENDP
;-----------------
AREA bifilters16_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
bifilter16_coeff
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112

Просмотреть файл

@ -123,10 +123,7 @@ skip_secondpass_filter
ENDP
;-----------------
AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
bifilter4_coeff
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112

Просмотреть файл

@ -128,10 +128,7 @@ skip_secondpass_filter
ENDP
;-----------------
AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
bifilter8x4_coeff
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112

Просмотреть файл

@ -176,10 +176,7 @@ skip_secondpass_filter
ENDP
;-----------------
AREA bifilters8_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
bifilter8_coeff
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112

Просмотреть файл

@ -20,19 +20,16 @@
|vp8_short_inv_walsh4x4_neon| PROC
; read in all four lines of values: d0->d3
vldm.64 r0, {q0, q1}
vld1.i16 {q0-q1}, [r0@128]
; first for loop
vadd.s16 d4, d0, d3 ;a = [0] + [12]
vadd.s16 d5, d1, d2 ;b = [4] + [8]
vsub.s16 d6, d1, d2 ;c = [4] - [8]
vsub.s16 d7, d0, d3 ;d = [0] - [12]
vadd.s16 d6, d1, d2 ;b = [4] + [8]
vsub.s16 d5, d0, d3 ;d = [0] - [12]
vsub.s16 d7, d1, d2 ;c = [4] - [8]
vadd.s16 d0, d4, d5 ;a + b
vadd.s16 d1, d6, d7 ;c + d
vsub.s16 d2, d4, d5 ;a - b
vsub.s16 d3, d7, d6 ;d - c
vadd.s16 q0, q2, q3 ; a+b d+c
vsub.s16 q1, q2, q3 ; a-b d-c
vtrn.32 d0, d2 ;d0: 0 1 8 9
;d2: 2 3 10 11
@ -47,29 +44,22 @@
; second for loop
vadd.s16 d4, d0, d3 ;a = [0] + [3]
vadd.s16 d5, d1, d2 ;b = [1] + [2]
vsub.s16 d6, d1, d2 ;c = [1] - [2]
vsub.s16 d7, d0, d3 ;d = [0] - [3]
vadd.s16 d6, d1, d2 ;b = [1] + [2]
vsub.s16 d5, d0, d3 ;d = [0] - [3]
vsub.s16 d7, d1, d2 ;c = [1] - [2]
vadd.s16 d0, d4, d5 ;e = a + b
vadd.s16 d1, d6, d7 ;f = c + d
vsub.s16 d2, d4, d5 ;g = a - b
vsub.s16 d3, d7, d6 ;h = d - c
vmov.i16 q8, #3
vmov.i16 q2, #3
vadd.i16 q0, q0, q2 ;e/f += 3
vadd.i16 q1, q1, q2 ;g/h += 3
vadd.s16 q0, q2, q3 ; a+b d+c
vsub.s16 q1, q2, q3 ; a-b d-c
vadd.i16 q0, q0, q8 ;e/f += 3
vadd.i16 q1, q1, q8 ;g/h += 3
vshr.s16 q0, q0, #3 ;e/f >> 3
vshr.s16 q1, q1, #3 ;g/h >> 3
vtrn.32 d0, d2
vtrn.32 d1, d3
vtrn.16 d0, d1
vtrn.16 d2, d3
vstmia.16 r1!, {q0}
vstmia.16 r1!, {q1}
vst4.i16 {d0,d1,d2,d3}, [r1@128]
bx lr
ENDP ; |vp8_short_inv_walsh4x4_neon|
@ -77,19 +67,13 @@
;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
|vp8_short_inv_walsh4x4_1_neon| PROC
; load a full line into a neon register
vld1.16 {q0}, [r0]
; extract first element and replicate
vdup.16 q1, d0[0]
; add 3 to all values
vmov.i16 q2, #3
vadd.i16 q3, q1, q2
; right shift
vshr.s16 q3, q3, #3
; write it back
vstmia.16 r1!, {q3}
vstmia.16 r1!, {q3}
ldrsh r2, [r0] ; load input[0]
add r3, r2, #3 ; add 3
add r2, r1, #16 ; base for last 8 output
asr r0, r3, #3 ; right shift 3
vdup.16 q0, r0 ; load and duplicate
vst1.16 {q0}, [r1@128] ; write back 8
vst1.16 {q0}, [r2@128] ; write back last 8
bx lr
ENDP ; |vp8_short_inv_walsh4x4_1_neon|

Просмотреть файл

@ -14,109 +14,97 @@
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; flimit, limit, and thresh should be positive numbers.
; All 16 elements in these variables are equal.
; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; int count)
; r0 unsigned char *src
; r1 int pitch
; r2 const signed char *flimit
; r3 const signed char *limit
; sp const signed char *thresh,
; sp+4 int count (unused)
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
|vp8_loop_filter_horizontal_edge_y_neon| PROC
stmdb sp!, {lr}
vld1.s8 {d0[], d1[]}, [r2] ; flimit
vld1.s8 {d2[], d3[]}, [r3] ; limit
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
ldr r12, [sp, #4] ; load thresh pointer
push {lr}
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
ldr r3, [sp, #4] ; load thresh
add r12, r2, r1
add r1, r1, r1
vld1.u8 {q3}, [r2], r1 ; p3
vld1.u8 {q4}, [r2], r1 ; p2
vld1.u8 {q5}, [r2], r1 ; p1
vld1.u8 {q6}, [r2], r1 ; p0
vld1.u8 {q7}, [r2], r1 ; q0
vld1.u8 {q8}, [r2], r1 ; q1
vld1.u8 {q9}, [r2], r1 ; q2
vld1.u8 {q10}, [r2] ; q3
vld1.s8 {d4[], d5[]}, [r12] ; thresh
sub r0, r0, r1, lsl #1
vdup.u8 q2, r3 ; duplicate thresh
vld1.u8 {q3}, [r2@128], r1 ; p3
vld1.u8 {q4}, [r12@128], r1 ; p2
vld1.u8 {q5}, [r2@128], r1 ; p1
vld1.u8 {q6}, [r12@128], r1 ; p0
vld1.u8 {q7}, [r2@128], r1 ; q0
vld1.u8 {q8}, [r12@128], r1 ; q1
vld1.u8 {q9}, [r2@128] ; q2
vld1.u8 {q10}, [r12@128] ; q3
sub r2, r2, r1, lsl #1
sub r12, r12, r1, lsl #1
bl vp8_loop_filter_neon
vst1.u8 {q5}, [r0], r1 ; store op1
vst1.u8 {q6}, [r0], r1 ; store op0
vst1.u8 {q7}, [r0], r1 ; store oq0
vst1.u8 {q8}, [r0], r1 ; store oq1
vst1.u8 {q5}, [r2@128], r1 ; store op1
vst1.u8 {q6}, [r12@128], r1 ; store op0
vst1.u8 {q7}, [r2@128], r1 ; store oq0
vst1.u8 {q8}, [r12@128], r1 ; store oq1
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
; sp+4 unsigned char *v
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
stmdb sp!, {lr}
vld1.s8 {d0[], d1[]}, [r2] ; flimit
vld1.s8 {d2[], d3[]}, [r3] ; limit
push {lr}
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
ldr r12, [sp, #4] ; load thresh
ldr r2, [sp, #8] ; load v ptr
vdup.u8 q2, r12 ; duplicate thresh
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
vld1.u8 {d6}, [r3], r1 ; p3
vld1.u8 {d8}, [r3], r1 ; p2
vld1.u8 {d10}, [r3], r1 ; p1
vld1.u8 {d12}, [r3], r1 ; p0
vld1.u8 {d14}, [r3], r1 ; q0
vld1.u8 {d16}, [r3], r1 ; q1
vld1.u8 {d18}, [r3], r1 ; q2
vld1.u8 {d20}, [r3] ; q3
ldr r3, [sp, #4] ; load thresh pointer
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
vld1.u8 {d7}, [r12], r1 ; p3
vld1.u8 {d9}, [r12], r1 ; p2
vld1.u8 {d11}, [r12], r1 ; p1
vld1.u8 {d13}, [r12], r1 ; p0
vld1.u8 {d15}, [r12], r1 ; q0
vld1.u8 {d17}, [r12], r1 ; q1
vld1.u8 {d19}, [r12], r1 ; q2
vld1.u8 {d21}, [r12] ; q3
vld1.s8 {d4[], d5[]}, [r3] ; thresh
vld1.u8 {d6}, [r3@64], r1 ; p3
vld1.u8 {d7}, [r12@64], r1 ; p3
vld1.u8 {d8}, [r3@64], r1 ; p2
vld1.u8 {d9}, [r12@64], r1 ; p2
vld1.u8 {d10}, [r3@64], r1 ; p1
vld1.u8 {d11}, [r12@64], r1 ; p1
vld1.u8 {d12}, [r3@64], r1 ; p0
vld1.u8 {d13}, [r12@64], r1 ; p0
vld1.u8 {d14}, [r3@64], r1 ; q0
vld1.u8 {d15}, [r12@64], r1 ; q0
vld1.u8 {d16}, [r3@64], r1 ; q1
vld1.u8 {d17}, [r12@64], r1 ; q1
vld1.u8 {d18}, [r3@64], r1 ; q2
vld1.u8 {d19}, [r12@64], r1 ; q2
vld1.u8 {d20}, [r3@64] ; q3
vld1.u8 {d21}, [r12@64] ; q3
bl vp8_loop_filter_neon
sub r0, r0, r1, lsl #1
sub r2, r2, r1, lsl #1
vst1.u8 {d10}, [r0], r1 ; store u op1
vst1.u8 {d11}, [r2], r1 ; store v op1
vst1.u8 {d12}, [r0], r1 ; store u op0
vst1.u8 {d13}, [r2], r1 ; store v op0
vst1.u8 {d14}, [r0], r1 ; store u oq0
vst1.u8 {d15}, [r2], r1 ; store v oq0
vst1.u8 {d16}, [r0] ; store u oq1
vst1.u8 {d17}, [r2] ; store v oq1
vst1.u8 {d10}, [r0@64], r1 ; store u op1
vst1.u8 {d11}, [r2@64], r1 ; store v op1
vst1.u8 {d12}, [r0@64], r1 ; store u op0
vst1.u8 {d13}, [r2@64], r1 ; store v op0
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
vst1.u8 {d15}, [r2@64], r1 ; store v oq0
vst1.u8 {d16}, [r0@64] ; store u oq1
vst1.u8 {d17}, [r2@64] ; store v oq1
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
@ -124,39 +112,38 @@
; const signed char *limit,
; const signed char *thresh,
; int count)
; r0 unsigned char *src,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; sp+4 int count (unused)
; r0 unsigned char *src
; r1 int pitch
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
|vp8_loop_filter_vertical_edge_y_neon| PROC
stmdb sp!, {lr}
vld1.s8 {d0[], d1[]}, [r2] ; flimit
vld1.s8 {d2[], d3[]}, [r3] ; limit
sub r2, r0, #4 ; src ptr down by 4 columns
sub r0, r0, #2 ; dst ptr
ldr r12, [sp, #4] ; load thresh pointer
push {lr}
vdup.u8 q0, r2 ; duplicate blimit
vdup.u8 q1, r3 ; duplicate limit
sub r2, r0, #4 ; src ptr down by 4 columns
add r1, r1, r1
ldr r3, [sp, #4] ; load thresh
add r12, r2, r1, asr #1
vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
vld1.u8 {d8}, [r2], r1
vld1.u8 {d6}, [r2], r1
vld1.u8 {d8}, [r12], r1
vld1.u8 {d10}, [r2], r1
vld1.u8 {d12}, [r2], r1
vld1.u8 {d12}, [r12], r1
vld1.u8 {d14}, [r2], r1
vld1.u8 {d16}, [r2], r1
vld1.u8 {d16}, [r12], r1
vld1.u8 {d18}, [r2], r1
vld1.u8 {d20}, [r2], r1
vld1.s8 {d4[], d5[]}, [r12] ; thresh
vld1.u8 {d20}, [r12], r1
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
vld1.u8 {d9}, [r2], r1
vld1.u8 {d9}, [r12], r1
vld1.u8 {d11}, [r2], r1
vld1.u8 {d13}, [r2], r1
vld1.u8 {d13}, [r12], r1
vld1.u8 {d15}, [r2], r1
vld1.u8 {d17}, [r2], r1
vld1.u8 {d19}, [r2], r1
vld1.u8 {d21}, [r2]
vld1.u8 {d17}, [r12], r1
vld1.u8 {d19}, [r2]
vld1.u8 {d21}, [r12]
;transpose to 8x16 matrix
vtrn.32 q3, q7
@ -164,6 +151,8 @@
vtrn.32 q5, q9
vtrn.32 q6, q10
vdup.u8 q2, r3 ; duplicate thresh
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
@ -178,28 +167,34 @@
vswp d12, d11
vswp d16, d13
sub r0, r0, #2 ; dst ptr
vswp d14, d12
vswp d16, d15
add r12, r0, r1, asr #1
;store op1, op0, oq0, oq1
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
ldmia sp!, {pc}
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
@ -209,38 +204,36 @@
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
; sp+4 unsigned char *v
|vp8_loop_filter_vertical_edge_uv_neon| PROC
stmdb sp!, {lr}
sub r12, r0, #4 ; move u pointer down by 4 columns
vld1.s8 {d0[], d1[]}, [r2] ; flimit
vld1.s8 {d2[], d3[]}, [r3] ; limit
push {lr}
vdup.u8 q0, r2 ; duplicate blimit
sub r12, r0, #4 ; move u pointer down by 4 columns
ldr r2, [sp, #8] ; load v ptr
vld1.u8 {d6}, [r12], r1 ;load u data
vld1.u8 {d8}, [r12], r1
vld1.u8 {d10}, [r12], r1
vld1.u8 {d12}, [r12], r1
vld1.u8 {d14}, [r12], r1
vld1.u8 {d16}, [r12], r1
vld1.u8 {d18}, [r12], r1
vld1.u8 {d20}, [r12]
vdup.u8 q1, r3 ; duplicate limit
sub r3, r2, #4 ; move v pointer down by 4 columns
vld1.u8 {d6}, [r12], r1 ;load u data
vld1.u8 {d7}, [r3], r1 ;load v data
vld1.u8 {d8}, [r12], r1
vld1.u8 {d9}, [r3], r1
vld1.u8 {d10}, [r12], r1
vld1.u8 {d11}, [r3], r1
vld1.u8 {d12}, [r12], r1
vld1.u8 {d13}, [r3], r1
vld1.u8 {d14}, [r12], r1
vld1.u8 {d15}, [r3], r1
vld1.u8 {d16}, [r12], r1
vld1.u8 {d17}, [r3], r1
vld1.u8 {d18}, [r12], r1
vld1.u8 {d19}, [r3], r1
vld1.u8 {d20}, [r12]
vld1.u8 {d21}, [r3]
ldr r12, [sp, #4] ; load thresh pointer
ldr r12, [sp, #4] ; load thresh
;transpose to 8x16 matrix
vtrn.32 q3, q7
@ -248,6 +241,8 @@
vtrn.32 q5, q9
vtrn.32 q6, q10
vdup.u8 q2, r12 ; duplicate thresh
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
@ -258,18 +253,16 @@
vtrn.8 q7, q8
vtrn.8 q9, q10
vld1.s8 {d4[], d5[]}, [r12] ; thresh
bl vp8_loop_filter_neon
sub r0, r0, #2
sub r2, r2, #2
vswp d12, d11
vswp d16, d13
vswp d14, d12
vswp d16, d15
sub r0, r0, #2
sub r2, r2, #2
;store op1, op0, oq0, oq1
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
@ -288,7 +281,7 @@
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
; void vp8_loop_filter_neon();
@ -308,7 +301,6 @@
; q9 q2
; q10 q3
|vp8_loop_filter_neon| PROC
adr r12, lf_coeff
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
@ -317,42 +309,44 @@
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
vmax.u8 q11, q11, q12
vmax.u8 q12, q13, q14
vmax.u8 q3, q3, q4
vmax.u8 q15, q11, q12
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
; vp8_hevmask
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
vmax.u8 q15, q15, q3
vadd.u8 q0, q0, q0 ; flimit * 2
vadd.u8 q0, q0, q1 ; flimit * 2 + limit
vcge.u8 q15, q1, q15
vmov.u8 q10, #0x80 ; 0x80
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
vshr.u8 q2, q2, #1 ; a = a / 2
vqadd.u8 q9, q9, q2 ; a = b + a
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
vld1.u8 {q0}, [r12]!
vcge.u8 q15, q1, q15
; vp8_filter() function
; convert to signed
veor q7, q7, q0 ; qs0
veor q6, q6, q0 ; ps0
veor q5, q5, q0 ; ps1
veor q8, q8, q0 ; qs1
veor q7, q7, q10 ; qs0
vshr.u8 q2, q2, #1 ; a = a / 2
veor q6, q6, q10 ; ps0
vld1.u8 {q10}, [r12]!
veor q5, q5, q10 ; ps1
vqadd.u8 q9, q9, q2 ; a = b + a
veor q8, q8, q10 ; qs1
vmov.u8 q10, #3 ; #3
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q11, d15, d13
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
vmovl.u8 q4, d20
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
@ -367,7 +361,7 @@
vaddw.s8 q2, q2, d2
vaddw.s8 q11, q11, d3
vld1.u8 {q9}, [r12]!
vmov.u8 q9, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@ -379,29 +373,25 @@
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
; outer tap adjustments: ++vp8_filter >> 1
vrshr.s8 q1, q1, #1
vbic q1, q1, q14 ; vp8_filter &= ~hev
vmov.u8 q0, #0x80 ; 0x80
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
veor q5, q13, q0 ; *op1 = u^0x80
veor q6, q11, q0 ; *op0 = u^0x80
veor q7, q10, q0 ; *oq0 = u^0x80
veor q5, q13, q0 ; *op1 = u^0x80
veor q8, q12, q0 ; *oq1 = u^0x80
bx lr
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
AREA loopfilter_dat, DATA, READONLY
lf_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
;-----------------
END

Просмотреть файл

@ -9,108 +9,109 @@
;
EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
EXPORT |vp8_loop_filter_bhs_neon|
EXPORT |vp8_loop_filter_mbhs_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
;are equal. So, in the code, only one load is needed
;for flimit. Same way applies to limit and thresh.
; r0 unsigned char *s,
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
; stack(r4) const signed char *thresh,
; //stack(r5) int count --unused
; r0 unsigned char *s, PRESERVE
; r1 int p, PRESERVE
; q1 limit, PRESERVE
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
adr r12, lfhy_coeff
vld1.u8 {q5}, [r0], r1 ; p1
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld1.u8 {q6}, [r0], r1 ; p0
vld1.u8 {q0}, [r12]! ; 0x80
vld1.u8 {q7}, [r0], r1 ; q0
vld1.u8 {q10}, [r12]! ; 0x03
vld1.u8 {q8}, [r0] ; q1
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
vld1.u8 {q7}, [r0@128], r1 ; q0
vld1.u8 {q5}, [r3@128], r1 ; p0
vld1.u8 {q8}, [r0@128] ; q1
vld1.u8 {q6}, [r3@128] ; p1
;vp8_filter_mask() function
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
vmov.u8 q0, #0x80 ; 0x80
vmov.s16 q13, #3
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
;vp8_filter() function
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
vadd.u8 q1, q1, q1 ; flimit * 2
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
;;;;;;;;;;
;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
vsubl.s8 q3, d15, d13
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
vadd.s16 q12, q3, q3
vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
vmul.s16 q3, q3, q13
vld1.u8 {q9}, [r12]! ; 0x04
vadd.s16 q2, q2, q11
vadd.s16 q3, q3, q12
vmov.u8 q10, #0x03 ; 0x03
vmov.u8 q9, #0x04 ; 0x04
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q3, q3, d9
;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d9, q3
;;;;;;;;;;;;;
vand q4, q4, q15 ; vp8_filter &= mask
vand q14, q4, q15 ; vp8_filter &= mask
vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
vshr.s8 q4, q4, #3 ; Filter1 >>= 3
vshr.s8 q4, q3, #3 ; Filter1 >>= 3
sub r0, r0, r1, lsl #1
sub r0, r0, r1
;calculate output
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
add r3, r0, r1
veor q6, q11, q0 ; *op0 = u^0x80
veor q7, q10, q0 ; *oq0 = u^0x80
vst1.u8 {q6}, [r0] ; store op0
vst1.u8 {q7}, [r3] ; store oq0
vst1.u8 {q6}, [r3@128] ; store op0
vst1.u8 {q7}, [r0@128] ; store oq0
bx lr
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
;-----------------
AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
lfhy_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
; r0 unsigned char *y
; r1 int ystride
; r2 const unsigned char *blimit
|vp8_loop_filter_bhs_neon| PROC
push {r4, lr}
ldrb r3, [r2] ; load blim from mem
vdup.s8 q1, r3 ; duplicate blim
add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
bl vp8_loop_filter_simple_horizontal_edge_neon
; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
bl vp8_loop_filter_simple_horizontal_edge_neon
add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
pop {r4, lr}
b vp8_loop_filter_simple_horizontal_edge_neon
ENDP ;|vp8_loop_filter_bhs_neon|
; r0 unsigned char *y
; r1 int ystride
; r2 const unsigned char *blimit
|vp8_loop_filter_mbhs_neon| PROC
ldrb r3, [r2] ; load blim from mem
vdup.s8 q1, r3 ; duplicate mblim
b vp8_loop_filter_simple_horizontal_edge_neon
ENDP ;|vp8_loop_filter_bhs_neon|
END

Просмотреть файл

@ -9,60 +9,54 @@
;
EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
;EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
EXPORT |vp8_loop_filter_bvs_neon|
EXPORT |vp8_loop_filter_mbvs_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
;are equal. So, in the code, only one load is needed
;for flimit. Same way applies to limit and thresh.
; r0 unsigned char *s,
; r1 int p, //pitch
; r2 const signed char *flimit,
; r3 const signed char *limit,
; stack(r4) const signed char *thresh,
; //stack(r5) int count --unused
; r0 unsigned char *s, PRESERVE
; r1 int p, PRESERVE
; q1 limit, PRESERVE
|vp8_loop_filter_simple_vertical_edge_neon| PROC
sub r0, r0, #2 ; move src pointer down by 2 columns
add r12, r1, r1
add r3, r0, r1
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
vld1.s8 {d2[], d3[]}, [r2] ; flimit
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
adr r12, vlfy_coeff
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r12
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r3], r12
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r12
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r3], r12
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r12
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r3], r12
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r12
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r3], r12
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
vld1.u8 {q0}, [r12]! ; 0x80
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
vld1.u8 {q11}, [r12]! ; 0x03
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
vld1.u8 {q12}, [r12]! ; 0x04
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r12
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r3], r12
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r12
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r3], r12
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r12
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r3], r12
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r12
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r3]
vswp d7, d10
vswp d12, d9
;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
;vp8_filter_mask() function
;vp8_hevmask() function
sub r0, r0, r1, lsl #4
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
vmov.u8 q0, #0x80 ; 0x80
vmov.s16 q11, #3
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
@ -70,88 +64,91 @@
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
vadd.u8 q1, q1, q1 ; flimit * 2
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
;vp8_filter() function
;;;;;;;;;;
;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
vsubl.s8 q13, d9, d11
vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
vqsub.s8 q14, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
vadd.s16 q14, q13, q13
vadd.s16 q2, q2, q10
vadd.s16 q13, q13, q14
vmul.s16 q2, q2, q11 ; 3 * ( qs0 - ps0)
vmul.s16 q13, q13, q11
;vqadd.s8 q1, q1, q2
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q13, q13, d3
vmov.u8 q11, #0x03 ; 0x03
vmov.u8 q12, #0x04 ; 0x04
vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d3, q13
vaddw.s8 q2, q2, d28 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q13, q13, d29
vqmovn.s16 d28, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d29, q13
add r0, r0, #1
add r2, r0, r1
;;;;;;;;;;;
add r3, r0, r1
vand q1, q1, q15 ; vp8_filter &= mask
vand q14, q14, q15 ; vp8_filter &= mask
vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vqadd.s8 q2, q14, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
vqadd.s8 q3, q14, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
vshr.s8 q14, q3, #3 ; Filter1 >>= 3
;calculate output
vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
vqsub.s8 q10, q4, q14 ; u = vp8_signed_char_clamp(qs0 - Filter1)
veor q7, q10, q0 ; *oq0 = u^0x80
veor q6, q11, q0 ; *op0 = u^0x80
add r3, r2, r1
veor q7, q10, q0 ; *oq0 = u^0x80
add r12, r1, r1
vswp d13, d14
add r12, r3, r1
;store op1, op0, oq0, oq1
vst2.8 {d12[0], d13[0]}, [r0]
vst2.8 {d12[1], d13[1]}, [r2]
vst2.8 {d12[2], d13[2]}, [r3]
vst2.8 {d12[3], d13[3]}, [r12], r1
add r0, r12, r1
vst2.8 {d12[4], d13[4]}, [r12]
vst2.8 {d12[5], d13[5]}, [r0], r1
add r2, r0, r1
vst2.8 {d12[6], d13[6]}, [r0]
vst2.8 {d12[7], d13[7]}, [r2], r1
add r3, r2, r1
vst2.8 {d14[0], d15[0]}, [r2]
vst2.8 {d14[1], d15[1]}, [r3], r1
add r12, r3, r1
vst2.8 {d14[2], d15[2]}, [r3]
vst2.8 {d14[3], d15[3]}, [r12], r1
add r0, r12, r1
vst2.8 {d14[4], d15[4]}, [r12]
vst2.8 {d14[5], d15[5]}, [r0], r1
add r2, r0, r1
vst2.8 {d14[6], d15[6]}, [r0]
vst2.8 {d14[7], d15[7]}, [r2]
vst2.8 {d12[0], d13[0]}, [r0], r12
vst2.8 {d12[1], d13[1]}, [r3], r12
vst2.8 {d12[2], d13[2]}, [r0], r12
vst2.8 {d12[3], d13[3]}, [r3], r12
vst2.8 {d12[4], d13[4]}, [r0], r12
vst2.8 {d12[5], d13[5]}, [r3], r12
vst2.8 {d12[6], d13[6]}, [r0], r12
vst2.8 {d12[7], d13[7]}, [r3], r12
vst2.8 {d14[0], d15[0]}, [r0], r12
vst2.8 {d14[1], d15[1]}, [r3], r12
vst2.8 {d14[2], d15[2]}, [r0], r12
vst2.8 {d14[3], d15[3]}, [r3], r12
vst2.8 {d14[4], d15[4]}, [r0], r12
vst2.8 {d14[5], d15[5]}, [r3], r12
vst2.8 {d14[6], d15[6]}, [r0], r12
vst2.8 {d14[7], d15[7]}, [r3]
bx lr
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
;-----------------
AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
vlfy_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
; r0 unsigned char *y
; r1 int ystride
; r2 const unsigned char *blimit
|vp8_loop_filter_bvs_neon| PROC
push {r4, lr}
ldrb r3, [r2] ; load blim from mem
mov r4, r0
add r0, r0, #4
vdup.s8 q1, r3 ; duplicate blim
bl vp8_loop_filter_simple_vertical_edge_neon
; vp8_loop_filter_simple_vertical_edge_neon preserves r1 and q1
add r0, r4, #8
bl vp8_loop_filter_simple_vertical_edge_neon
add r0, r4, #12
pop {r4, lr}
b vp8_loop_filter_simple_vertical_edge_neon
ENDP ;|vp8_loop_filter_bvs_neon|
; r0 unsigned char *y
; r1 int ystride
; r2 const unsigned char *blimit
|vp8_loop_filter_mbvs_neon| PROC
ldrb r3, [r2] ; load mblim from mem
vdup.s8 q1, r3 ; duplicate mblim
b vp8_loop_filter_simple_vertical_edge_neon
ENDP ;|vp8_loop_filter_bvs_neon|
END

Просмотреть файл

@ -14,155 +14,143 @@
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
; flimit, limit, and thresh should be positive numbers.
; All 16 elements in these variables are equal.
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; int count)
; const unsigned char *blimit,
; const unsigned char *limit,
; const unsigned char *thresh)
; r0 unsigned char *src,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; sp+4 int count (unused)
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
stmdb sp!, {lr}
sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
ldr r12, [sp, #4] ; load thresh pointer
push {lr}
add r1, r1, r1 ; double stride
ldr r12, [sp, #4] ; load thresh
sub r0, r0, r1, lsl #1 ; move src pointer down by 4 lines
vdup.u8 q2, r12 ; thresh
add r12, r0, r1, lsr #1 ; move src pointer up by 1 line
vld1.u8 {q3}, [r0], r1 ; p3
vld1.s8 {d2[], d3[]}, [r3] ; limit
vld1.u8 {q4}, [r0], r1 ; p2
vld1.s8 {d4[], d5[]}, [r12] ; thresh
vld1.u8 {q5}, [r0], r1 ; p1
vld1.u8 {q6}, [r0], r1 ; p0
vld1.u8 {q7}, [r0], r1 ; q0
vld1.u8 {q8}, [r0], r1 ; q1
vld1.u8 {q9}, [r0], r1 ; q2
vld1.u8 {q10}, [r0], r1 ; q3
vld1.u8 {q3}, [r0@128], r1 ; p3
vld1.u8 {q4}, [r12@128], r1 ; p2
vld1.u8 {q5}, [r0@128], r1 ; p1
vld1.u8 {q6}, [r12@128], r1 ; p0
vld1.u8 {q7}, [r0@128], r1 ; q0
vld1.u8 {q8}, [r12@128], r1 ; q1
vld1.u8 {q9}, [r0@128], r1 ; q2
vld1.u8 {q10}, [r12@128], r1 ; q3
bl vp8_mbloop_filter_neon
sub r0, r0, r1, lsl #3
add r0, r0, r1
add r2, r0, r1
add r3, r2, r1
sub r12, r12, r1, lsl #2
add r0, r12, r1, lsr #1
vst1.u8 {q4}, [r0] ; store op2
vst1.u8 {q5}, [r2] ; store op1
vst1.u8 {q6}, [r3], r1 ; store op0
add r12, r3, r1
vst1.u8 {q7}, [r3] ; store oq0
vst1.u8 {q8}, [r12], r1 ; store oq1
vst1.u8 {q9}, [r12] ; store oq2
vst1.u8 {q4}, [r12@128],r1 ; store op2
vst1.u8 {q5}, [r0@128],r1 ; store op1
vst1.u8 {q6}, [r12@128], r1 ; store op0
vst1.u8 {q7}, [r0@128],r1 ; store oq0
vst1.u8 {q8}, [r12@128] ; store oq1
vst1.u8 {q9}, [r0@128] ; store oq2
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; const unsigned char *blimit,
; const unsigned char *limit,
; const unsigned char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
; sp+4 unsigned char *v
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
stmdb sp!, {lr}
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
vld1.s8 {d2[], d3[]}, [r3] ; limit
ldr r3, [sp, #8] ; load v ptr
ldr r12, [sp, #4] ; load thresh pointer
sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
push {lr}
ldr r12, [sp, #4] ; load thresh
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
vdup.u8 q2, r12 ; thresh
ldr r12, [sp, #8] ; load v ptr
sub r12, r12, r1, lsl #2 ; move v pointer down by 4 lines
vld1.u8 {d6}, [r0], r1 ; p3
vld1.u8 {d7}, [r3], r1 ; p3
vld1.u8 {d8}, [r0], r1 ; p2
vld1.u8 {d9}, [r3], r1 ; p2
vld1.u8 {d10}, [r0], r1 ; p1
vld1.u8 {d11}, [r3], r1 ; p1
vld1.u8 {d12}, [r0], r1 ; p0
vld1.u8 {d13}, [r3], r1 ; p0
vld1.u8 {d14}, [r0], r1 ; q0
vld1.u8 {d15}, [r3], r1 ; q0
vld1.u8 {d16}, [r0], r1 ; q1
vld1.u8 {d17}, [r3], r1 ; q1
vld1.u8 {d18}, [r0], r1 ; q2
vld1.u8 {d19}, [r3], r1 ; q2
vld1.u8 {d20}, [r0], r1 ; q3
vld1.u8 {d21}, [r3], r1 ; q3
vld1.s8 {d4[], d5[]}, [r12] ; thresh
vld1.u8 {d6}, [r0@64], r1 ; p3
vld1.u8 {d7}, [r12@64], r1 ; p3
vld1.u8 {d8}, [r0@64], r1 ; p2
vld1.u8 {d9}, [r12@64], r1 ; p2
vld1.u8 {d10}, [r0@64], r1 ; p1
vld1.u8 {d11}, [r12@64], r1 ; p1
vld1.u8 {d12}, [r0@64], r1 ; p0
vld1.u8 {d13}, [r12@64], r1 ; p0
vld1.u8 {d14}, [r0@64], r1 ; q0
vld1.u8 {d15}, [r12@64], r1 ; q0
vld1.u8 {d16}, [r0@64], r1 ; q1
vld1.u8 {d17}, [r12@64], r1 ; q1
vld1.u8 {d18}, [r0@64], r1 ; q2
vld1.u8 {d19}, [r12@64], r1 ; q2
vld1.u8 {d20}, [r0@64], r1 ; q3
vld1.u8 {d21}, [r12@64], r1 ; q3
bl vp8_mbloop_filter_neon
sub r0, r0, r1, lsl #3
sub r3, r3, r1, lsl #3
sub r12, r12, r1, lsl #3
add r0, r0, r1
add r3, r3, r1
add r12, r12, r1
vst1.u8 {d8}, [r0], r1 ; store u op2
vst1.u8 {d9}, [r3], r1 ; store v op2
vst1.u8 {d10}, [r0], r1 ; store u op1
vst1.u8 {d11}, [r3], r1 ; store v op1
vst1.u8 {d12}, [r0], r1 ; store u op0
vst1.u8 {d13}, [r3], r1 ; store v op0
vst1.u8 {d14}, [r0], r1 ; store u oq0
vst1.u8 {d15}, [r3], r1 ; store v oq0
vst1.u8 {d16}, [r0], r1 ; store u oq1
vst1.u8 {d17}, [r3], r1 ; store v oq1
vst1.u8 {d18}, [r0], r1 ; store u oq2
vst1.u8 {d19}, [r3], r1 ; store v oq2
vst1.u8 {d8}, [r0@64], r1 ; store u op2
vst1.u8 {d9}, [r12@64], r1 ; store v op2
vst1.u8 {d10}, [r0@64], r1 ; store u op1
vst1.u8 {d11}, [r12@64], r1 ; store v op1
vst1.u8 {d12}, [r0@64], r1 ; store u op0
vst1.u8 {d13}, [r12@64], r1 ; store v op0
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
vst1.u8 {d15}, [r12@64], r1 ; store v oq0
vst1.u8 {d16}, [r0@64], r1 ; store u oq1
vst1.u8 {d17}, [r12@64], r1 ; store v oq1
vst1.u8 {d18}, [r0@64], r1 ; store u oq2
vst1.u8 {d19}, [r12@64], r1 ; store v oq2
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; int count)
; const unsigned char *blimit,
; const unsigned char *limit,
; const unsigned char *thresh)
; r0 unsigned char *src,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; sp+4 int count (unused)
; r2 unsigned char blimit
; r3 unsigned char limit
; sp unsigned char thresh,
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
stmdb sp!, {lr}
push {lr}
ldr r12, [sp, #4] ; load thresh
sub r0, r0, #4 ; move src pointer down by 4 columns
vdup.s8 q2, r12 ; thresh
add r12, r0, r1, lsl #3 ; move src pointer down by 8 lines
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
ldr r12, [sp, #4] ; load thresh pointer
vld1.u8 {d7}, [r12], r1 ; load second 8-line src data
vld1.u8 {d8}, [r0], r1
sub sp, sp, #32
vld1.u8 {d9}, [r12], r1
vld1.u8 {d10}, [r0], r1
vld1.u8 {d11}, [r12], r1
vld1.u8 {d12}, [r0], r1
vld1.u8 {d13}, [r12], r1
vld1.u8 {d14}, [r0], r1
vld1.u8 {d15}, [r12], r1
vld1.u8 {d16}, [r0], r1
vld1.u8 {d17}, [r12], r1
vld1.u8 {d18}, [r0], r1
vld1.u8 {d19}, [r12], r1
vld1.u8 {d20}, [r0], r1
vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
vld1.u8 {d9}, [r0], r1
vld1.u8 {d11}, [r0], r1
vld1.u8 {d13}, [r0], r1
vld1.u8 {d15}, [r0], r1
vld1.u8 {d17}, [r0], r1
vld1.u8 {d19}, [r0], r1
vld1.u8 {d21}, [r0], r1
vld1.u8 {d21}, [r12], r1
;transpose to 8x16 matrix
vtrn.32 q3, q7
@ -180,133 +168,11 @@
vtrn.8 q7, q8
vtrn.8 q9, q10
vld1.s8 {d4[], d5[]}, [r12] ; thresh
vld1.s8 {d2[], d3[]}, [r3] ; limit
mov r12, sp
vst1.u8 {q3}, [r12]!
vst1.u8 {q10}, [r12]!
bl vp8_mbloop_filter_neon
sub r0, r0, r1, lsl #4
add r2, r0, r1
add r3, r2, r1
vld1.u8 {q3}, [sp]!
vld1.u8 {q10}, [sp]!
;transpose to 16x8 matrix
vtrn.32 q3, q7
vtrn.32 q4, q8
vtrn.32 q5, q9
vtrn.32 q6, q10
add r12, r3, r1
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
vtrn.16 q8, q10
vtrn.8 q3, q4
vtrn.8 q5, q6
vtrn.8 q7, q8
vtrn.8 q9, q10
;store op2, op1, op0, oq0, oq1, oq2
vst1.8 {d6}, [r0]
vst1.8 {d8}, [r2]
vst1.8 {d10}, [r3]
vst1.8 {d12}, [r12], r1
add r0, r12, r1
vst1.8 {d14}, [r12]
vst1.8 {d16}, [r0], r1
add r2, r0, r1
vst1.8 {d18}, [r0]
vst1.8 {d20}, [r2], r1
add r3, r2, r1
vst1.8 {d7}, [r2]
vst1.8 {d9}, [r3], r1
add r12, r3, r1
vst1.8 {d11}, [r3]
vst1.8 {d13}, [r12], r1
add r0, r12, r1
vst1.8 {d15}, [r12]
vst1.8 {d17}, [r0], r1
add r2, r0, r1
vst1.8 {d19}, [r0]
vst1.8 {d21}, [r2]
ldmia sp!, {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
; const signed char *flimit,
; const signed char *limit,
; const signed char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; sp+4 unsigned char *v
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
stmdb sp!, {lr}
sub r0, r0, #4 ; move src pointer down by 4 columns
vld1.s8 {d2[], d3[]}, [r3] ; limit
ldr r3, [sp, #8] ; load v ptr
ldr r12, [sp, #4] ; load thresh pointer
sub r3, r3, #4 ; move v pointer down by 4 columns
vld1.u8 {d6}, [r0], r1 ;load u data
vld1.u8 {d7}, [r3], r1 ;load v data
vld1.u8 {d8}, [r0], r1
vld1.u8 {d9}, [r3], r1
vld1.u8 {d10}, [r0], r1
vld1.u8 {d11}, [r3], r1
vld1.u8 {d12}, [r0], r1
vld1.u8 {d13}, [r3], r1
vld1.u8 {d14}, [r0], r1
vld1.u8 {d15}, [r3], r1
vld1.u8 {d16}, [r0], r1
vld1.u8 {d17}, [r3], r1
vld1.u8 {d18}, [r0], r1
vld1.u8 {d19}, [r3], r1
vld1.u8 {d20}, [r0], r1
vld1.u8 {d21}, [r3], r1
;transpose to 8x16 matrix
vtrn.32 q3, q7
vtrn.32 q4, q8
vtrn.32 q5, q9
vtrn.32 q6, q10
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
vtrn.16 q8, q10
vtrn.8 q3, q4
vtrn.8 q5, q6
vtrn.8 q7, q8
vtrn.8 q9, q10
sub sp, sp, #32
vld1.s8 {d4[], d5[]}, [r12] ; thresh
mov r12, sp
vst1.u8 {q3}, [r12]!
vst1.u8 {q10}, [r12]!
bl vp8_mbloop_filter_neon
sub r0, r0, r1, lsl #3
sub r3, r3, r1, lsl #3
vld1.u8 {q3}, [sp]!
vld1.u8 {q10}, [sp]!
bl vp8_mbloop_filter_neon
sub r12, r12, r1, lsl #3
;transpose to 16x8 matrix
vtrn.32 q3, q7
@ -326,23 +192,118 @@
;store op2, op1, op0, oq0, oq1, oq2
vst1.8 {d6}, [r0], r1
vst1.8 {d7}, [r3], r1
vst1.8 {d7}, [r12], r1
vst1.8 {d8}, [r0], r1
vst1.8 {d9}, [r3], r1
vst1.8 {d9}, [r12], r1
vst1.8 {d10}, [r0], r1
vst1.8 {d11}, [r3], r1
vst1.8 {d11}, [r12], r1
vst1.8 {d12}, [r0], r1
vst1.8 {d13}, [r3], r1
vst1.8 {d13}, [r12], r1
vst1.8 {d14}, [r0], r1
vst1.8 {d15}, [r3], r1
vst1.8 {d15}, [r12], r1
vst1.8 {d16}, [r0], r1
vst1.8 {d17}, [r3], r1
vst1.8 {d17}, [r12], r1
vst1.8 {d18}, [r0], r1
vst1.8 {d19}, [r3], r1
vst1.8 {d20}, [r0], r1
vst1.8 {d21}, [r3], r1
vst1.8 {d19}, [r12], r1
vst1.8 {d20}, [r0]
vst1.8 {d21}, [r12]
ldmia sp!, {pc}
pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
; const unsigned char *blimit,
; const unsigned char *limit,
; const unsigned char *thresh,
; unsigned char *v)
; r0 unsigned char *u,
; r1 int pitch,
; r2 const signed char *flimit,
; r3 const signed char *limit,
; sp const signed char *thresh,
; sp+4 unsigned char *v
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
push {lr}
ldr r12, [sp, #4] ; load thresh
sub r0, r0, #4 ; move u pointer down by 4 columns
vdup.u8 q2, r12 ; thresh
ldr r12, [sp, #8] ; load v ptr
sub r12, r12, #4 ; move v pointer down by 4 columns
vld1.u8 {d6}, [r0], r1 ;load u data
vld1.u8 {d7}, [r12], r1 ;load v data
vld1.u8 {d8}, [r0], r1
vld1.u8 {d9}, [r12], r1
vld1.u8 {d10}, [r0], r1
vld1.u8 {d11}, [r12], r1
vld1.u8 {d12}, [r0], r1
vld1.u8 {d13}, [r12], r1
vld1.u8 {d14}, [r0], r1
vld1.u8 {d15}, [r12], r1
vld1.u8 {d16}, [r0], r1
vld1.u8 {d17}, [r12], r1
vld1.u8 {d18}, [r0], r1
vld1.u8 {d19}, [r12], r1
vld1.u8 {d20}, [r0], r1
vld1.u8 {d21}, [r12], r1
;transpose to 8x16 matrix
vtrn.32 q3, q7
vtrn.32 q4, q8
vtrn.32 q5, q9
vtrn.32 q6, q10
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
vtrn.16 q8, q10
vtrn.8 q3, q4
vtrn.8 q5, q6
vtrn.8 q7, q8
vtrn.8 q9, q10
sub r0, r0, r1, lsl #3
bl vp8_mbloop_filter_neon
sub r12, r12, r1, lsl #3
;transpose to 16x8 matrix
vtrn.32 q3, q7
vtrn.32 q4, q8
vtrn.32 q5, q9
vtrn.32 q6, q10
vtrn.16 q3, q5
vtrn.16 q4, q6
vtrn.16 q7, q9
vtrn.16 q8, q10
vtrn.8 q3, q4
vtrn.8 q5, q6
vtrn.8 q7, q8
vtrn.8 q9, q10
;store op2, op1, op0, oq0, oq1, oq2
vst1.8 {d6}, [r0], r1
vst1.8 {d7}, [r12], r1
vst1.8 {d8}, [r0], r1
vst1.8 {d9}, [r12], r1
vst1.8 {d10}, [r0], r1
vst1.8 {d11}, [r12], r1
vst1.8 {d12}, [r0], r1
vst1.8 {d13}, [r12], r1
vst1.8 {d14}, [r0], r1
vst1.8 {d15}, [r12], r1
vst1.8 {d16}, [r0], r1
vst1.8 {d17}, [r12], r1
vst1.8 {d18}, [r0], r1
vst1.8 {d19}, [r12], r1
vst1.8 {d20}, [r0]
vst1.8 {d21}, [r12]
pop {pc}
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
; void vp8_mbloop_filter_neon()
@ -350,41 +311,33 @@
; functions do the necessary load, transpose (if necessary), preserve (if
; necessary) and store.
; TODO:
; The vertical filter writes p3/q3 back out because two 4 element writes are
; much simpler than ordering and writing two 3 element sets (or three 2 elements
; sets, or whichever other combinations are possible).
; If we can preserve q3 and q10, the vertical filter will be able to avoid
; storing those values on the stack and reading them back after the filter.
; r0,r1 PRESERVE
; r2 flimit
; r3 PRESERVE
; q1 limit
; r2 mblimit
; r3 limit
; q2 thresh
; q3 p3
; q3 p3 PRESERVE
; q4 p2
; q5 p1
; q6 p0
; q7 q0
; q8 q1
; q9 q2
; q10 q3
; q10 q3 PRESERVE
|vp8_mbloop_filter_neon| PROC
adr r12, mblf_coeff
; vp8_filter_mask
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
vabd.u8 q1, q9, q8 ; abs(q2 - q1)
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
vmax.u8 q11, q11, q12
vmax.u8 q12, q13, q14
vmax.u8 q3, q3, q0
vmax.u8 q1, q1, q0
vmax.u8 q15, q11, q12
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
@ -392,51 +345,53 @@
; vp8_hevmask
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
vmax.u8 q15, q15, q3
vmax.u8 q15, q15, q1
vld1.s8 {d4[], d5[]}, [r2] ; flimit
vdup.u8 q1, r3 ; limit
vdup.u8 q2, r2 ; mblimit
vld1.u8 {q0}, [r12]!
vmov.u8 q0, #0x80 ; 0x80
vadd.u8 q2, q2, q2 ; flimit * 2
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
vcge.u8 q15, q1, q15
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
vshr.u8 q1, q1, #1 ; a = a / 2
vqadd.u8 q12, q12, q1 ; a = b + a
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
vmov.u16 q11, #3 ; #3
; vp8_filter
; convert to signed
veor q7, q7, q0 ; qs0
vshr.u8 q1, q1, #1 ; a = a / 2
veor q6, q6, q0 ; ps0
veor q5, q5, q0 ; ps1
vqadd.u8 q12, q12, q1 ; a = b + a
veor q8, q8, q0 ; qs1
veor q4, q4, q0 ; ps2
veor q9, q9, q0 ; qs2
vorr q14, q13, q14 ; vp8_hevmask
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
vsubl.s8 q2, d14, d12 ; qs0 - ps0
vsubl.s8 q13, d15, d13
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
vadd.s16 q11, q13, q13
vmul.i16 q2, q2, q11 ; 3 * ( qs0 - ps0)
vand q15, q15, q12 ; vp8_filter_mask
vadd.s16 q2, q2, q10
vadd.s16 q13, q13, q11
vmul.i16 q13, q13, q11
vld1.u8 {q12}, [r12]! ; #3
vmov.u8 q12, #3 ; #3
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
vaddw.s8 q13, q13, d3
vld1.u8 {q11}, [r12]! ; #4
vmov.u8 q11, #4 ; #4
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
vqmovn.s16 d2, q2
@ -444,27 +399,23 @@
vand q1, q1, q15 ; vp8_filter &= mask
vld1.u8 {q15}, [r12]! ; #63
;
vand q13, q1, q14 ; Filter2 &= hev
vmov.u16 q15, #63 ; #63
vld1.u8 {d7}, [r12]! ; #9
vand q13, q1, q14 ; Filter2 &= hev
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
vld1.u8 {d6}, [r12]! ; #18
vmov q0, q15
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
vmov q10, q15
vmov q11, q15
vmov q12, q15
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
vld1.u8 {d5}, [r12]! ; #27
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
vbic q1, q1, q14 ; vp8_filter &= ~hev
@ -472,46 +423,47 @@
; roughly 1/7th difference across boundary
; roughly 2/7th difference across boundary
; roughly 3/7th difference across boundary
vmov q11, q15
vmov.u8 d5, #9 ; #9
vmov.u8 d4, #18 ; #18
vmov q13, q15
vmov q14, q15
vmlal.s8 q10, d2, d7 ; Filter2 * 9
vmlal.s8 q11, d3, d7
vmlal.s8 q12, d2, d6 ; Filter2 * 18
vmlal.s8 q13, d3, d6
vmlal.s8 q14, d2, d5 ; Filter2 * 27
vmlal.s8 q0, d2, d5 ; 63 + Filter2 * 9
vmlal.s8 q11, d3, d5
vmov.u8 d5, #27 ; #27
vmlal.s8 q12, d2, d4 ; 63 + Filter2 * 18
vmlal.s8 q13, d3, d4
vmlal.s8 q14, d2, d5 ; 63 + Filter2 * 27
vmlal.s8 q15, d3, d5
vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
vqshrn.s16 d21, q11, #7
vqshrn.s16 d0, q0, #7 ; u = clamp((63 + Filter2 * 9)>>7)
vqshrn.s16 d1, q11, #7
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
vqshrn.s16 d25, q13, #7
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
vqshrn.s16 d29, q15, #7
vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
vmov.u8 q1, #0x80 ; 0x80
vqsub.s8 q11, q9, q0 ; s = clamp(qs2 - u)
vqadd.s8 q0, q4, q0 ; s = clamp(ps2 + u)
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
veor q9, q11, q0 ; *oq2 = s^0x80
veor q4, q10, q0 ; *op2 = s^0x80
veor q8, q13, q0 ; *oq1 = s^0x80
veor q5, q12, q0 ; *op2 = s^0x80
veor q7, q15, q0 ; *oq0 = s^0x80
veor q6, q14, q0 ; *op0 = s^0x80
veor q9, q11, q1 ; *oq2 = s^0x80
veor q4, q0, q1 ; *op2 = s^0x80
veor q8, q13, q1 ; *oq1 = s^0x80
veor q5, q12, q1 ; *op2 = s^0x80
veor q7, q15, q1 ; *oq0 = s^0x80
veor q6, q14, q1 ; *op0 = s^0x80
bx lr
ENDP ; |vp8_mbloop_filter_neon|
AREA mbloopfilter_dat, DATA, READONLY
mblf_coeff
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
DCD 0x1b1b1b1b, 0x1b1b1b1b
;-----------------
END

Просмотреть файл

@ -10,8 +10,8 @@
#include "vpx_ports/config.h"
#include "recon.h"
#include "blockd.h"
#include "vp8/common/recon.h"
#include "vp8/common/blockd.h"
extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);

Просмотреть файл

@ -113,10 +113,7 @@
ENDP
;-----------------
AREA idct4x4_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
idct_coeff
DCD 0x4e7b4e7b, 0x8a8c8a8c

Просмотреть файл

@ -15,6 +15,17 @@
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
filter16_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
; r2 int xoffset,
@ -33,7 +44,7 @@
|vp8_sixtap_predict16x16_neon| PROC
push {r4-r5, lr}
adrl r12, filter16_coeff
adr r12, filter16_coeff
ldr r4, [sp, #12] ;load parameters from stack
ldr r5, [sp, #16] ;load parameters from stack
@ -476,18 +487,4 @@ secondpass_only_inner_loop_neon
ENDP
;-----------------
AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter16_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
END

Просмотреть файл

@ -15,6 +15,17 @@
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
filter4_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
; r2 int xoffset,
@ -25,7 +36,7 @@
|vp8_sixtap_predict_neon| PROC
push {r4, lr}
adrl r12, filter4_coeff
adr r12, filter4_coeff
ldr r4, [sp, #8] ;load parameters from stack
ldr lr, [sp, #12] ;load parameters from stack
@ -407,18 +418,5 @@ secondpass_filter4x4_only
ENDP
;-----------------
AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter4_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
END

Просмотреть файл

@ -15,6 +15,17 @@
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
filter8_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
; r2 int xoffset,
@ -25,7 +36,7 @@
|vp8_sixtap_predict8x4_neon| PROC
push {r4-r5, lr}
adrl r12, filter8_coeff
adr r12, filter8_coeff
ldr r4, [sp, #12] ;load parameters from stack
ldr r5, [sp, #16] ;load parameters from stack
@ -458,18 +469,5 @@ secondpass_filter8x4_only
ENDP
;-----------------
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter8_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
END

Просмотреть файл

@ -15,6 +15,17 @@
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
filter8_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
; r0 unsigned char *src_ptr,
; r1 int src_pixels_per_line,
; r2 int xoffset,
@ -25,7 +36,7 @@
|vp8_sixtap_predict8x8_neon| PROC
push {r4-r5, lr}
adrl r12, filter8_coeff
adr r12, filter8_coeff
ldr r4, [sp, #12] ;load parameters from stack
ldr r5, [sp, #16] ;load parameters from stack
@ -509,18 +520,5 @@ filt_blk2d_spo8x8_loop_neon
ENDP
;-----------------
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
;One word each is reserved. Label filter_coeff can be used to access the data.
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
filter8_coeff
DCD 0, 0, 128, 0, 0, 0, 0, 0
DCD 0, -6, 123, 12, -1, 0, 0, 0
DCD 2, -11, 108, 36, -8, 1, 0, 0
DCD 0, -9, 93, 50, -6, 0, 0, 0
DCD 3, -16, 77, 77, -16, 3, 0, 0
DCD 0, -6, 50, 93, -9, 0, 0, 0
DCD 1, -8, 36, 108, -11, 2, 0, 0
DCD 0, -1, 12, 123, -6, 0, 0, 0
END

Просмотреть файл

@ -10,10 +10,10 @@
#include "vpx_ports/config.h"
#include "blockd.h"
#include "reconintra.h"
#include "vp8/common/blockd.h"
#include "vp8/common/reconintra.h"
#include "vpx_mem/vpx_mem.h"
#include "recon.h"
#include "vp8/common/recon.h"
#if HAVE_ARMV7
extern void vp8_build_intra_predictors_mby_neon_func(

Просмотреть файл

@ -1,87 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/config.h"
#include <stddef.h>
#if CONFIG_VP8_ENCODER
#include "vpx_scale/yv12config.h"
#endif
#if CONFIG_VP8_DECODER
#include "onyxd_int.h"
#endif
#define DEFINE(sym, val) int sym = val;
/*
#define BLANK() asm volatile("\n->" : : )
*/
/*
* int main(void)
* {
*/
#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
#endif
#if CONFIG_VP8_DECODER
DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
DEFINE(detok_scan, offsetof(DETOK, scan));
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
DEFINE(detok_A, offsetof(DETOK, A));
DEFINE(detok_L, offsetof(DETOK, L));
DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
DEFINE(detok_eob, offsetof(DETOK, eob));
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
#endif
//add asserts for any offset that is not supported by assembly code
//add asserts for any size that is not supported by assembly code
/*
* return 0;
* }
*/

Просмотреть файл

@ -12,8 +12,6 @@
#include "blockd.h"
#include "vpx_mem/vpx_mem.h"
const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
const unsigned char vp8_block2left[25] =
{
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8

Просмотреть файл

@ -28,11 +28,6 @@ void vpx_log(const char *format, ...);
#define DCPREDSIMTHRESH 0
#define DCPREDCNTTHRESH 3
#define Y1CONTEXT 0
#define UCONTEXT 1
#define VCONTEXT 2
#define Y2CONTEXT 3
#define MB_FEATURE_TREE_PROBS 3
#define MAX_MB_SEGMENTS 4
@ -48,6 +43,11 @@ typedef struct
int r, c;
} POS;
#define PLANE_TYPE_Y_NO_DC 0
#define PLANE_TYPE_Y2 1
#define PLANE_TYPE_UV 2
#define PLANE_TYPE_Y_WITH_DC 3
typedef char ENTROPY_CONTEXT;
typedef struct
@ -58,8 +58,6 @@ typedef struct
ENTROPY_CONTEXT y2;
} ENTROPY_CONTEXT_PLANES;
extern const int vp8_block2type[25];
extern const unsigned char vp8_block2left[25];
extern const unsigned char vp8_block2above[25];
@ -139,16 +137,11 @@ typedef enum
modes for the Y blocks to the left and above us; for interframes, there
is a single probability table. */
typedef struct
union b_mode_info
{
B_PREDICTION_MODE mode;
union
{
int as_int;
MV as_mv;
} mv;
} B_MODE_INFO;
B_PREDICTION_MODE as_mode;
int_mv mv;
};
typedef enum
{
@ -163,38 +156,26 @@ typedef struct
{
MB_PREDICTION_MODE mode, uv_mode;
MV_REFERENCE_FRAME ref_frame;
union
{
int as_int;
MV as_mv;
} mv;
int_mv mv;
unsigned char partitioning;
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
unsigned char dc_diff;
unsigned char need_to_clamp_mvs;
unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
unsigned char force_no_skip; /* encoder only */
} MB_MODE_INFO;
typedef struct
{
MB_MODE_INFO mbmi;
B_MODE_INFO bmi[16];
union b_mode_info bmi[16];
} MODE_INFO;
typedef struct
{
short *qcoeff;
short *dqcoeff;
unsigned char *predictor;
short *diff;
short *reference;
short *dequant;
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
@ -208,15 +189,13 @@ typedef struct
int eob;
B_MODE_INFO bmi;
union b_mode_info bmi;
} BLOCKD;
typedef struct
typedef struct MacroBlockD
{
DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
/* not used DECLARE_ALIGNED(16, short, reference[384]); */
DECLARE_ALIGNED(16, short, qcoeff[400]);
DECLARE_ALIGNED(16, short, dqcoeff[400]);
DECLARE_ALIGNED(16, char, eobs[25]);
@ -273,6 +252,9 @@ typedef struct
int mb_to_top_edge;
int mb_to_bottom_edge;
int ref_frame_cost[MAX_REF_FRAMES];
unsigned int frames_since_golden;
unsigned int frames_till_alt_ref_frame;
vp8_subpix_fn_t subpixel_predict;
@ -282,6 +264,16 @@ typedef struct
void *current_bc;
int corrupted;
#if ARCH_X86 || ARCH_X86_64
/* This is an intermediate buffer currently used in sub-pixel motion search
* to keep a copy of the reference area. This buffer can be used for other
* purpose.
*/
DECLARE_ALIGNED(32, unsigned char, y_buf[22*32]);
#endif
#if CONFIG_RUNTIME_CPU_DETECT
struct VP8_COMMON_RTCD *rtcd;
#endif
@ -291,4 +283,20 @@ typedef struct
extern void vp8_build_block_doffsets(MACROBLOCKD *x);
extern void vp8_setup_block_dptrs(MACROBLOCKD *x);
static void update_blockd_bmi(MACROBLOCKD *xd)
{
int i;
int is_4x4;
is_4x4 = (xd->mode_info_context->mbmi.mode == SPLITMV) ||
(xd->mode_info_context->mbmi.mode == B_PRED);
if (is_4x4)
{
for (i = 0; i < 16; i++)
{
xd->block[i].bmi = xd->mode_info_context->bmi[i];
}
}
}
#endif /* __INC_BLOCKD_H */

Просмотреть файл

@ -12,7 +12,7 @@
/* Update probabilities for the nodes in the token entropy tree.
Generated file included by entropy.c */
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1] =
const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] =
{
{
{

Просмотреть файл

@ -97,7 +97,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
bindex = (b_row & 3) * 4 + (b_col & 3);
if (mi[mb_index].mbmi.mode == B_PRED)
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].mode);
fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode);
else
fprintf(mvs, "xx ");

Просмотреть файл

@ -0,0 +1,225 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "defaultcoefcounts.h"
/* Generated file, included by entropy.c */
const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS] =
{
{
/* Block Type ( 0 ) */
{
/* Coeff Band ( 0 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
{
/* Coeff Band ( 1 ) */
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
},
{
/* Coeff Band ( 2 ) */
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
},
{
/* Coeff Band ( 3 ) */
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
},
{
/* Coeff Band ( 4 ) */
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
},
{
/* Coeff Band ( 5 ) */
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
},
{
/* Coeff Band ( 6 ) */
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
},
},
{
/* Block Type ( 1 ) */
{
/* Coeff Band ( 0 ) */
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
},
{
/* Coeff Band ( 1 ) */
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
},
{
/* Coeff Band ( 2 ) */
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
},
{
/* Coeff Band ( 3 ) */
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
},
{
/* Coeff Band ( 4 ) */
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
},
{
/* Coeff Band ( 5 ) */
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
},
{
/* Coeff Band ( 6 ) */
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
},
},
{
/* Block Type ( 2 ) */
{
/* Coeff Band ( 0 ) */
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
},
{
/* Coeff Band ( 1 ) */
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
},
{
/* Coeff Band ( 2 ) */
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
},
{
/* Coeff Band ( 3 ) */
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
},
{
/* Coeff Band ( 4 ) */
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
},
{
/* Coeff Band ( 5 ) */
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
},
{
/* Coeff Band ( 6 ) */
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
},
{
/* Block Type ( 3 ) */
{
/* Coeff Band ( 0 ) */
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
},
{
/* Coeff Band ( 1 ) */
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
},
{
/* Coeff Band ( 2 ) */
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
},
{
/* Coeff Band ( 3 ) */
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
},
{
/* Coeff Band ( 4 ) */
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
},
{
/* Coeff Band ( 5 ) */
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
},
{
/* Coeff Band ( 6 ) */
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
},
},
};

Просмотреть файл

@ -8,214 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __DEFAULTCOEFCOUNTS_H
#define __DEFAULTCOEFCOUNTS_H
/* Generated file, included by entropy.c */
#include "entropy.h"
static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens] =
{
extern const unsigned int vp8_default_coef_counts[BLOCK_TYPES]
[COEF_BANDS]
[PREV_COEF_CONTEXTS]
[MAX_ENTROPY_TOKENS];
{
/* Block Type ( 0 ) */
{
/* Coeff Band ( 0 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
{
/* Coeff Band ( 1 ) */
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
},
{
/* Coeff Band ( 2 ) */
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
},
{
/* Coeff Band ( 3 ) */
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
},
{
/* Coeff Band ( 4 ) */
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
},
{
/* Coeff Band ( 5 ) */
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
},
{
/* Coeff Band ( 6 ) */
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
},
},
{
/* Block Type ( 1 ) */
{
/* Coeff Band ( 0 ) */
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
},
{
/* Coeff Band ( 1 ) */
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
},
{
/* Coeff Band ( 2 ) */
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
},
{
/* Coeff Band ( 3 ) */
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
},
{
/* Coeff Band ( 4 ) */
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
},
{
/* Coeff Band ( 5 ) */
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
},
{
/* Coeff Band ( 6 ) */
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
},
},
{
/* Block Type ( 2 ) */
{
/* Coeff Band ( 0 ) */
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
},
{
/* Coeff Band ( 1 ) */
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
},
{
/* Coeff Band ( 2 ) */
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
},
{
/* Coeff Band ( 3 ) */
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
},
{
/* Coeff Band ( 4 ) */
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
},
{
/* Coeff Band ( 5 ) */
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
},
{
/* Coeff Band ( 6 ) */
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
},
},
{
/* Block Type ( 3 ) */
{
/* Coeff Band ( 0 ) */
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
},
{
/* Coeff Band ( 1 ) */
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
},
{
/* Coeff Band ( 2 ) */
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
},
{
/* Coeff Band ( 3 ) */
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
},
{
/* Coeff Band ( 4 ) */
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
},
{
/* Coeff Band ( 5 ) */
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
},
{
/* Coeff Band ( 6 ) */
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
},
{
/* Coeff Band ( 7 ) */
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
},
},
};
#endif //__DEFAULTCOEFCOUNTS_H

Просмотреть файл

@ -26,8 +26,32 @@ typedef vp8_prob Prob;
#include "coefupdateprobs.h"
DECLARE_ALIGNED(16, cuchar, vp8_coef_bands[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
DECLARE_ALIGNED(16, cuchar, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) =
{
0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
DECLARE_ALIGNED(16, cuchar, vp8_coef_bands[16]) =
{ 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7};
DECLARE_ALIGNED(16, cuchar, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) =
{ 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0};
DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
{
0, 1, 4, 8,
@ -36,6 +60,14 @@ DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) =
7, 11, 14, 15,
};
DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) =
{
1, 2, 6, 7,
3, 5, 8, 13,
4, 9, 12, 14,
10, 11, 15, 16
};
DECLARE_ALIGNED(16, short, vp8_default_zig_zag_mask[16]);
const int vp8_mb_feature_data_bits[MB_LVL_MAX] = {7, 6};
@ -57,7 +89,7 @@ const vp8_tree_index vp8_coef_tree[ 22] = /* corresponding _CONTEXT_NODEs */
-DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */
};
struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens];
struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
/* Trees for extra bits. Probabilities are constant and
do not depend on previously encoded bits */
@ -106,23 +138,20 @@ static void init_bit_trees()
init_bit_tree(cat6, 11);
}
static vp8bc_index_t bcc1[1], bcc2[2], bcc3[3], bcc4[4], bcc5[5], bcc6[11];
vp8_extra_bit_struct vp8_extra_bits[12] =
{
{ 0, 0, 0, 0, 0},
{ 0, 0, 0, 0, 1},
{ 0, 0, 0, 0, 2},
{ 0, 0, 0, 0, 3},
{ 0, 0, 0, 0, 4},
{ cat1, Pcat1, bcc1, 1, 5},
{ cat2, Pcat2, bcc2, 2, 7},
{ cat3, Pcat3, bcc3, 3, 11},
{ cat4, Pcat4, bcc4, 4, 19},
{ cat5, Pcat5, bcc5, 5, 35},
{ cat6, Pcat6, bcc6, 11, 67},
{ 0, 0, 0, 0, 0}
{ 0, 0, 0, 0},
{ 0, 0, 0, 1},
{ 0, 0, 0, 2},
{ 0, 0, 0, 3},
{ 0, 0, 0, 4},
{ cat1, Pcat1, 1, 5},
{ cat2, Pcat2, 2, 7},
{ cat3, Pcat3, 3, 11},
{ cat4, Pcat4, 4, 19},
{ cat5, Pcat5, 5, 35},
{ cat6, Pcat6, 11, 67},
{ 0, 0, 0, 0}
};
#include "defaultcoefcounts.h"
@ -140,10 +169,12 @@ void vp8_default_coef_probs(VP8_COMMON *pc)
do
{
unsigned int branch_ct [vp8_coef_tokens-1] [2];
unsigned int branch_ct [ENTROPY_NODES] [2];
vp8_tree_probs_from_distribution(
vp8_coef_tokens, vp8_coef_encodings, vp8_coef_tree,
pc->fc.coef_probs [h][i][k], branch_ct, default_coef_counts [h][i][k],
MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree,
pc->fc.coef_probs[h][i][k],
branch_ct,
vp8_default_coef_counts[h][i][k],
256, 1);
}

Просмотреть файл

@ -24,25 +24,23 @@
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
#define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */
#define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */
#define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */
#define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
#define vp8_coef_tokens 12
#define MAX_ENTROPY_TOKENS vp8_coef_tokens
#define MAX_ENTROPY_TOKENS 12
#define ENTROPY_NODES 11
extern const vp8_tree_index vp8_coef_tree[];
extern struct vp8_token_struct vp8_coef_encodings[vp8_coef_tokens];
extern struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS];
typedef struct
{
vp8_tree_p tree;
const vp8_prob *prob;
vp8bc_index_t *prob_bc;
int Len;
int base_val;
} vp8_extra_bit_struct;
@ -86,15 +84,16 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
# define PREV_COEF_CONTEXTS 3
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]);
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
extern const vp8_prob vp8_coef_update_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
struct VP8Common;
void vp8_default_coef_probs(struct VP8Common *);
extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]);
extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]);
extern short vp8_default_zig_zag_mask[16];
extern const int vp8_mb_feature_data_bits[MB_LVL_MAX];

Просмотреть файл

@ -33,11 +33,11 @@ typedef enum
SUBMVREF_LEFT_ABOVE_ZED
} sumvfref_t;
int vp8_mv_cont(const MV *l, const MV *a)
int vp8_mv_cont(const int_mv *l, const int_mv *a)
{
int lez = (l->row == 0 && l->col == 0);
int aez = (a->row == 0 && a->col == 0);
int lea = (l->row == a->row && l->col == a->col);
int lez = (l->as_int == 0);
int aez = (a->as_int == 0);
int lea = (l->as_int == a->as_int);
if (lea && lez)
return SUBMVREF_LEFT_ABOVE_ZED;

Просмотреть файл

@ -25,7 +25,7 @@ extern const int vp8_mbsplit_count [VP8_NUMMBSPLITS]; /* # of subsets */
extern const vp8_prob vp8_mbsplit_probs [VP8_NUMMBSPLITS-1];
extern int vp8_mv_cont(const MV *l, const MV *a);
extern int vp8_mv_cont(const int_mv *l, const int_mv *a);
#define SUBMVREF_COUNT 5
extern const vp8_prob vp8_sub_mv_ref_prob2 [SUBMVREF_COUNT][VP8_SUBMVREFS-1];

Просмотреть файл

@ -18,6 +18,8 @@ enum
{
mv_max = 1023, /* max absolute value of a MV component */
MVvals = (2 * mv_max) + 1, /* # possible values "" */
mvfp_max = 255, /* max absolute value of a full pixel MV component */
MVfpvals = (2 * mvfp_max) +1, /* # possible full pixel MV values */
mvlong_width = 10, /* Large MVs have 9 bit magnitudes */
mvnum_short = 8, /* magnitudes 0 through 7 */

Просмотреть файл

@ -13,10 +13,12 @@
#include "vpx_mem/vpx_mem.h"
static void extend_plane_borders
static void copy_and_extend_plane
(
unsigned char *s, /* source */
int sp, /* pitch */
int sp, /* source pitch */
unsigned char *d, /* destination */
int dp, /* destination pitch */
int h, /* height */
int w, /* width */
int et, /* extend top border */
@ -25,7 +27,6 @@ static void extend_plane_borders
int er /* extend right border */
)
{
int i;
unsigned char *src_ptr1, *src_ptr2;
unsigned char *dest_ptr1, *dest_ptr2;
@ -34,68 +35,73 @@ static void extend_plane_borders
/* copy the left and right most columns out */
src_ptr1 = s;
src_ptr2 = s + w - 1;
dest_ptr1 = s - el;
dest_ptr2 = s + w;
dest_ptr1 = d - el;
dest_ptr2 = d + w;
for (i = 0; i < h - 0 + 1; i++)
for (i = 0; i < h; i++)
{
/* Some linkers will complain if we call vpx_memset with el set to a
* constant 0.
*/
if (el)
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memset(dest_ptr1, src_ptr1[0], el);
vpx_memcpy(dest_ptr1 + el, src_ptr1, w);
vpx_memset(dest_ptr2, src_ptr2[0], er);
src_ptr1 += sp;
src_ptr2 += sp;
dest_ptr1 += sp;
dest_ptr2 += sp;
dest_ptr1 += dp;
dest_ptr2 += dp;
}
/* Now copy the top and bottom source lines into each line of the respective borders */
src_ptr1 = s - el;
src_ptr2 = s + sp * (h - 1) - el;
dest_ptr1 = s + sp * (-et) - el;
dest_ptr2 = s + sp * (h) - el;
linesize = el + er + w + 1;
/* Now copy the top and bottom lines into each line of the respective
* borders
*/
src_ptr1 = d - el;
src_ptr2 = d + dp * (h - 1) - el;
dest_ptr1 = d + dp * (-et) - el;
dest_ptr2 = d + dp * (h) - el;
linesize = el + er + w;
for (i = 0; i < (int)et; i++)
for (i = 0; i < et; i++)
{
vpx_memcpy(dest_ptr1, src_ptr1, linesize);
dest_ptr1 += sp;
dest_ptr1 += dp;
}
for (i = 0; i < (int)eb; i++)
for (i = 0; i < eb; i++)
{
vpx_memcpy(dest_ptr2, src_ptr2, linesize);
dest_ptr2 += sp;
dest_ptr2 += dp;
}
}
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst)
{
int er = 0xf & (16 - (width & 0xf));
int eb = 0xf & (16 - (height & 0xf));
int et = dst->border;
int el = dst->border;
int eb = dst->border + dst->y_height - src->y_height;
int er = dst->border + dst->y_width - src->y_width;
/* check for non multiples of 16 */
if (er != 0 || eb != 0)
{
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
copy_and_extend_plane(src->y_buffer, src->y_stride,
dst->y_buffer, dst->y_stride,
src->y_height, src->y_width,
et, el, eb, er);
/* adjust for uv */
height = (height + 1) >> 1;
width = (width + 1) >> 1;
er = 0x7 & (8 - (width & 0x7));
eb = 0x7 & (8 - (height & 0x7));
et = dst->border >> 1;
el = dst->border >> 1;
eb = (dst->border >> 1) + dst->uv_height - src->uv_height;
er = (dst->border >> 1) + dst->uv_width - src->uv_width;
if (er || eb)
{
extend_plane_borders(ybf->u_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
extend_plane_borders(ybf->v_buffer, ybf->uv_stride, height, width, 0, 0, eb, er);
}
}
copy_and_extend_plane(src->u_buffer, src->uv_stride,
dst->u_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
copy_and_extend_plane(src->v_buffer, src->uv_stride,
dst->v_buffer, dst->uv_stride,
src->uv_height, src->uv_width,
et, el, eb, er);
}
/* note the extension is only for the last row, for intra prediction purpose */
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
{

Просмотреть файл

@ -14,8 +14,8 @@
#include "vpx_scale/yv12config.h"
void Extend(YV12_BUFFER_CONFIG *ybf);
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr);
void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height);
void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src,
YV12_BUFFER_CONFIG *dst);
#endif

Просмотреть файл

@ -10,13 +10,10 @@
#include <stdlib.h>
#include "filter.h"
#include "vpx_ports/mem.h"
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
static const int bilinear_filters[8][2] =
DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
{
{ 128, 0 },
{ 112, 16 },
@ -28,8 +25,7 @@ static const int bilinear_filters[8][2] =
{ 16, 112 }
};
static const short sub_pel_filters[8][6] =
DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) =
{
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
@ -40,12 +36,9 @@ static const short sub_pel_filters[8][6] =
{ 0, -6, 50, 93, -9, 0 },
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
{ 0, -1, 12, 123, -6, 0 },
};
void vp8_filter_block2d_first_pass
static void filter_block2d_first_pass
(
unsigned char *src_ptr,
int *output_ptr,
@ -89,7 +82,7 @@ void vp8_filter_block2d_first_pass
}
}
void vp8_filter_block2d_second_pass
static void filter_block2d_second_pass
(
int *src_ptr,
unsigned char *output_ptr,
@ -136,7 +129,7 @@ void vp8_filter_block2d_second_pass
}
void vp8_filter_block2d
static void filter_block2d
(
unsigned char *src_ptr,
unsigned char *output_ptr,
@ -146,42 +139,16 @@ void vp8_filter_block2d
const short *VFilter
)
{
int FData[9*4]; /* Temp data bufffer used in filtering */
int FData[9*4]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
/* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
}
void vp8_block_variation_c
(
unsigned char *src_ptr,
int src_pixels_per_line,
int *HVar,
int *VVar
)
{
int i, j;
unsigned char *Ptr = src_ptr;
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
*HVar += abs((int)Ptr[j] - (int)Ptr[j+1]);
*VVar += abs((int)Ptr[j] - (int)Ptr[j+src_pixels_per_line]);
}
Ptr += src_pixels_per_line;
}
}
void vp8_sixtap_predict_c
(
unsigned char *src_ptr,
@ -195,10 +162,10 @@ void vp8_sixtap_predict_c
const short *HFilter;
const short *VFilter;
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
}
void vp8_sixtap_predict8x8_c
(
@ -212,17 +179,17 @@ void vp8_sixtap_predict8x8_c
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data bufffer used in filtering */
int FData[13*16]; /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
/* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
}
@ -238,17 +205,17 @@ void vp8_sixtap_predict8x4_c
{
const short *HFilter;
const short *VFilter;
int FData[13*16]; /* Temp data bufffer used in filtering */
int FData[13*16]; /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
/* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
}
@ -264,17 +231,17 @@ void vp8_sixtap_predict16x16_c
{
const short *HFilter;
const short *VFilter;
int FData[21*24]; /* Temp data bufffer used in filtering */
int FData[21*24]; /* Temp data buffer used in filtering */
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
/* First filter 1-D horizontally... */
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
/* then filter verticaly... */
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
}
@ -283,57 +250,50 @@ void vp8_sixtap_predict16x16_c
*
* ROUTINE : filter_block2d_bil_first_pass
*
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_pixels_per_line : Stride of input block.
* UINT32 pixel_step : Offset between filter input samples (see notes).
* UINT32 output_height : Input block height.
* UINT32 output_width : Input block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_stride : Stride of source block.
* UINT32 height : Block height.
* UINT32 width : Block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
* OUTPUTS : INT32 *output_ptr : Pointer to filtered block.
* OUTPUTS : INT32 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
* either horizontal or vertical direction to produce the
* filtered output block. Used to implement first-pass
* of 2-D separable filter.
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
* in the horizontal direction to produce the filtered output
* block. Used to implement first-pass of 2-D separable filter.
*
* SPECIAL NOTES : Produces INT32 output to retain precision for next pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
* pixel_step defines whether the filter is applied
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
* It defines the offset required to move from one input
* to the next.
*
****************************************************************************/
void vp8_filter_block2d_bil_first_pass
static void filter_block2d_bil_first_pass
(
unsigned char *src_ptr,
unsigned short *output_ptr,
unsigned int src_pixels_per_line,
int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int *vp8_filter
unsigned char *src_ptr,
unsigned short *dst_ptr,
unsigned int src_stride,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
for (i = 0; i < output_height; i++)
for (i = 0; i < height; i++)
{
for (j = 0; j < output_width; j++)
for (j = 0; j < width; j++)
{
/* Apply bilinear filter */
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[1] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_width;
src_ptr += src_stride - width;
dst_ptr += width;
}
}
@ -341,60 +301,51 @@ void vp8_filter_block2d_bil_first_pass
*
* ROUTINE : filter_block2d_bil_second_pass
*
* INPUTS : INT32 *src_ptr : Pointer to source block.
* UINT32 src_pixels_per_line : Stride of input block.
* UINT32 pixel_step : Offset between filter input samples (see notes).
* UINT32 output_height : Input block height.
* UINT32 output_width : Input block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
* INPUTS : INT32 *src_ptr : Pointer to source block.
* UINT32 dst_pitch : Destination block pitch.
* UINT32 height : Block height.
* UINT32 width : Block width.
* INT32 *vp8_filter : Array of 2 bi-linear filter taps.
*
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in
* either horizontal or vertical direction to produce the
* filtered output block. Used to implement second-pass
* of 2-D separable filter.
* FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block
* in the vertical direction to produce the filtered output
* block. Used to implement second-pass of 2-D separable filter.
*
* SPECIAL NOTES : Requires 32-bit input as produced by filter_block2d_bil_first_pass.
* Two filter taps should sum to VP8_FILTER_WEIGHT.
* pixel_step defines whether the filter is applied
* horizontally (pixel_step=1) or vertically (pixel_step=stride).
* It defines the offset required to move from one input
* to the next.
*
****************************************************************************/
void vp8_filter_block2d_bil_second_pass
static void filter_block2d_bil_second_pass
(
unsigned short *src_ptr,
unsigned char *output_ptr,
int output_pitch,
unsigned int src_pixels_per_line,
unsigned int pixel_step,
unsigned int output_height,
unsigned int output_width,
const int *vp8_filter
unsigned char *dst_ptr,
int dst_pitch,
unsigned int height,
unsigned int width,
const short *vp8_filter
)
{
unsigned int i, j;
int Temp;
for (i = 0; i < output_height; i++)
for (i = 0; i < height; i++)
{
for (j = 0; j < output_width; j++)
for (j = 0; j < width; j++)
{
/* Apply filter */
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[pixel_step] * vp8_filter[1]) +
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
((int)src_ptr[width] * vp8_filter[1]) +
(VP8_FILTER_WEIGHT / 2);
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
src_ptr++;
}
/* Next row... */
src_ptr += src_pixels_per_line - output_width;
output_ptr += output_pitch;
dst_ptr += dst_pitch;
}
}
@ -404,11 +355,14 @@ void vp8_filter_block2d_bil_second_pass
* ROUTINE : filter_block2d_bil
*
* INPUTS : UINT8 *src_ptr : Pointer to source block.
* UINT32 src_pixels_per_line : Stride of input block.
* INT32 *HFilter : Array of 2 horizontal filter taps.
* INT32 *VFilter : Array of 2 vertical filter taps.
* UINT32 src_pitch : Stride of source block.
* UINT32 dst_pitch : Stride of destination block.
* INT32 *HFilter : Array of 2 horizontal filter taps.
* INT32 *VFilter : Array of 2 vertical filter taps.
* INT32 Width : Block width
* INT32 Height : Block height
*
* OUTPUTS : UINT16 *output_ptr : Pointer to filtered block.
* OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block.
*
* RETURNS : void
*
@ -419,26 +373,26 @@ void vp8_filter_block2d_bil_second_pass
* SPECIAL NOTES : The largest block size can be handled here is 16x16
*
****************************************************************************/
void vp8_filter_block2d_bil
static void filter_block2d_bil
(
unsigned char *src_ptr,
unsigned char *output_ptr,
unsigned int src_pixels_per_line,
unsigned char *dst_ptr,
unsigned int src_pitch,
unsigned int dst_pitch,
const int *HFilter,
const int *VFilter,
const short *HFilter,
const short *VFilter,
int Width,
int Height
)
{
unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
unsigned short FData[17*16]; /* Temp data buffer used in filtering */
/* First filter 1-D horizontally... */
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
/* then 1-D vertically... */
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
}
@ -452,11 +406,11 @@ void vp8_bilinear_predict4x4_c
int dst_pitch
)
{
const int *HFilter;
const int *VFilter;
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
#if 0
{
int i;
@ -464,19 +418,19 @@ void vp8_bilinear_predict4x4_c
unsigned char temp2[16];
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
for (i = 0; i < 16; i++)
{
if (temp1[i] != temp2[i])
{
bilinear_predict4x4_mmx(src_ptr, src_pixels_per_line, xoffset, yoffset, temp1, 4);
vp8_filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
filter_block2d_bil(src_ptr, temp2, src_pixels_per_line, 4, HFilter, VFilter, 4, 4);
}
}
}
#endif
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
}
@ -490,13 +444,13 @@ void vp8_bilinear_predict8x8_c
int dst_pitch
)
{
const int *HFilter;
const int *VFilter;
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
}
@ -510,13 +464,13 @@ void vp8_bilinear_predict8x4_c
int dst_pitch
)
{
const int *HFilter;
const int *VFilter;
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
}
@ -530,11 +484,11 @@ void vp8_bilinear_predict16x16_c
int dst_pitch
)
{
const int *HFilter;
const int *VFilter;
const short *HFilter;
const short *VFilter;
HFilter = bilinear_filters[xoffset];
VFilter = bilinear_filters[yoffset];
HFilter = vp8_bilinear_filters[xoffset];
VFilter = vp8_bilinear_filters[yoffset];
vp8_filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
}

Просмотреть файл

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -9,14 +9,14 @@
*/
#ifndef DETOKENIZE_ARM_H
#define DETOKENIZE_ARM_H
#ifndef FILTER_H
#define FILTER_H
#if HAVE_ARMV6
#if CONFIG_ARM_ASM_DETOK
void vp8_init_detokenizer(VP8D_COMP *dx);
void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
#endif
#endif
#define BLOCK_HEIGHT_WIDTH 4
#define VP8_FILTER_WEIGHT 128
#define VP8_FILTER_SHIFT 7
#endif
extern const short vp8_bilinear_filters[8][2];
extern const short vp8_sub_pel_filters[8][6];
#endif //FILTER_H

Просмотреть файл

@ -11,54 +11,23 @@
#include "findnearmv.h"
#define FINDNEAR_SEARCH_SITES 3
const unsigned char vp8_mbsplit_offset[4][16] = {
{ 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}
};
/* Predict motion vectors using those from already-decoded nearby blocks.
Note that we only consider one 4x4 subblock from each candidate 16x16
macroblock. */
typedef union
{
unsigned int as_int;
MV as_mv;
} int_mv; /* facilitates rapid equality tests */
static void mv_bias(const MODE_INFO *x, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
{
MV xmv;
xmv = x->mbmi.mv.as_mv;
if (ref_frame_sign_bias[x->mbmi.ref_frame] != ref_frame_sign_bias[refframe])
{
xmv.row *= -1;
xmv.col *= -1;
}
mvp->as_mv = xmv;
}
void vp8_clamp_mv(MV *mv, const MACROBLOCKD *xd)
{
if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
}
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
MV *nearest,
MV *nearby,
MV *best_mv,
int_mv *nearest,
int_mv *nearby,
int_mv *best_mv,
int cnt[4],
int refframe,
int *ref_frame_sign_bias
@ -82,7 +51,7 @@ void vp8_find_near_mvs
if (above->mbmi.mv.as_int)
{
(++mv)->as_int = above->mbmi.mv.as_int;
mv_bias(above, refframe, mv, ref_frame_sign_bias);
mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias);
++cntx;
}
@ -97,7 +66,7 @@ void vp8_find_near_mvs
int_mv this_mv;
this_mv.as_int = left->mbmi.mv.as_int;
mv_bias(left, refframe, &this_mv, ref_frame_sign_bias);
mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
if (this_mv.as_int != mv->as_int)
{
@ -119,7 +88,7 @@ void vp8_find_near_mvs
int_mv this_mv;
this_mv.as_int = aboveleft->mbmi.mv.as_int;
mv_bias(aboveleft, refframe, &this_mv, ref_frame_sign_bias);
mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias);
if (this_mv.as_int != mv->as_int)
{
@ -162,13 +131,14 @@ void vp8_find_near_mvs
near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST];
/* Set up return values */
*best_mv = near_mvs[0].as_mv;
*nearest = near_mvs[CNT_NEAREST].as_mv;
*nearby = near_mvs[CNT_NEAR].as_mv;
best_mv->as_int = near_mvs[0].as_int;
nearest->as_int = near_mvs[CNT_NEAREST].as_int;
nearby->as_int = near_mvs[CNT_NEAR].as_int;
vp8_clamp_mv(nearest, xd);
vp8_clamp_mv(nearby, xd);
vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
//TODO: move clamp outside findnearmv
vp8_clamp_mv2(nearest, xd);
vp8_clamp_mv2(nearby, xd);
vp8_clamp_mv2(best_mv, xd);
}
vp8_prob *vp8_mv_ref_probs(
@ -183,26 +153,3 @@ vp8_prob *vp8_mv_ref_probs(
return p;
}
const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
b += 4;
}
return cur_mb->bmi + b - 1;
}
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
b += 16;
}
return cur_mb->bmi + b - 4;
}

Просмотреть файл

@ -17,11 +17,65 @@
#include "modecont.h"
#include "treecoder.h"
static void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias)
{
MV xmv;
xmv = mvp->as_mv;
if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe])
{
xmv.row *= -1;
xmv.col *= -1;
}
mvp->as_mv = xmv;
}
#define LEFT_TOP_MARGIN (16 << 3)
#define RIGHT_BOTTOM_MARGIN (16 << 3)
static void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd)
{
if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
}
static void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge,
int mb_to_top_edge, int mb_to_bottom_edge)
{
mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ?
mb_to_left_edge : mv->as_mv.col;
mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ?
mb_to_right_edge : mv->as_mv.col;
mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ?
mb_to_top_edge : mv->as_mv.row;
mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ?
mb_to_bottom_edge : mv->as_mv.row;
}
static unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge,
int mb_to_right_edge, int mb_to_top_edge,
int mb_to_bottom_edge)
{
unsigned int need_to_clamp;
need_to_clamp = (mv->as_mv.col < mb_to_left_edge) ? 1 : 0;
need_to_clamp |= (mv->as_mv.col > mb_to_right_edge) ? 1 : 0;
need_to_clamp |= (mv->as_mv.row < mb_to_top_edge) ? 1 : 0;
need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge) ? 1 : 0;
return need_to_clamp;
}
void vp8_find_near_mvs
(
MACROBLOCKD *xd,
const MODE_INFO *here,
MV *nearest, MV *nearby, MV *best,
int_mv *nearest, int_mv *nearby, int_mv *best,
int near_mv_ref_cts[4],
int refframe,
int *ref_frame_sign_bias
@ -31,12 +85,89 @@ vp8_prob *vp8_mv_ref_probs(
vp8_prob p[VP8_MVREFS-1], const int near_mv_ref_ct[4]
);
const B_MODE_INFO *vp8_left_bmi(const MODE_INFO *cur_mb, int b);
extern const unsigned char vp8_mbsplit_offset[4][16];
const B_MODE_INFO *vp8_above_bmi(const MODE_INFO *cur_mb, int b, int mi_stride);
#define LEFT_TOP_MARGIN (16 << 3)
#define RIGHT_BOTTOM_MARGIN (16 << 3)
static int left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 4;
}
return (cur_mb->bmi + b - 1)->mv.as_int;
}
static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
if(cur_mb->mbmi.mode != SPLITMV)
return cur_mb->mbmi.mv.as_int;
b += 16;
}
return (cur_mb->bmi + b - 4)->mv.as_int;
}
static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
/* On L edge, get from MB to left of us */
--cur_mb;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 3)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 1)->as_mode;
}
static B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride)
{
if (!(b >> 2))
{
/* On top edge, get from MB above us */
cur_mb -= mi_stride;
switch (cur_mb->mbmi.mode)
{
case B_PRED:
return (cur_mb->bmi + b + 12)->as_mode;
case DC_PRED:
return B_DC_PRED;
case V_PRED:
return B_VE_PRED;
case H_PRED:
return B_HE_PRED;
case TM_PRED:
return B_TM_PRED;
default:
return B_DC_PRED;
}
}
return (cur_mb->bmi + b - 4)->as_mode;
}
#endif

Просмотреть файл

@ -10,16 +10,61 @@
#include "vpx_ports/config.h"
#include "g_common.h"
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
#include "idct.h"
#include "onyxc_int.h"
#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
#include "vp8/common/idct.h"
#include "vp8/common/onyxc_int.h"
#if CONFIG_MULTITHREAD
#if HAVE_UNISTD_H
#include <unistd.h>
#elif defined(_WIN32)
#include <windows.h>
typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
#endif
#endif
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
#if CONFIG_MULTITHREAD
static int get_cpu_count()
{
int core_count = 16;
#if HAVE_UNISTD_H
#if defined(_SC_NPROCESSORS_ONLN)
core_count = sysconf(_SC_NPROCESSORS_ONLN);
#elif defined(_SC_NPROC_ONLN)
core_count = sysconf(_SC_NPROC_ONLN);
#endif
#elif defined(_WIN32)
{
PGNSI pGNSI;
SYSTEM_INFO sysinfo;
/* Call GetNativeSystemInfo if supported or
* GetSystemInfo otherwise. */
pGNSI = (PGNSI) GetProcAddress(
GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo");
if (pGNSI != NULL)
pGNSI(&sysinfo);
else
GetSystemInfo(&sysinfo);
core_count = sysinfo.dwNumberOfProcessors;
}
#else
/* other platforms */
#endif
return core_count > 0 ? core_count : 1;
}
#endif
void vp8_machine_specific_config(VP8_COMMON *ctx)
{
#if CONFIG_RUNTIME_CPU_DETECT
@ -43,6 +88,12 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_build_intra_predictors_mby;
rtcd->recon.build_intra_predictors_mby_s =
vp8_build_intra_predictors_mby_s;
rtcd->recon.build_intra_predictors_mbuv =
vp8_build_intra_predictors_mbuv;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s;
rtcd->recon.intra4x4_predict =
vp8_intra4x4_predict;
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
@ -57,17 +108,19 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_c;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_c;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c;
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
rtcd->postproc.down = vp8_mbpost_proc_down_c;
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
rtcd->postproc.blend_mb = vp8_blend_mb_c;
#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_INTERNAL_STATS)
rtcd->postproc.down = vp8_mbpost_proc_down_c;
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c;
rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c;
rtcd->postproc.blend_b = vp8_blend_b_c;
#endif
#endif
@ -80,4 +133,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
vp8_arch_arm_common_init(ctx);
#endif
#if CONFIG_MULTITHREAD
ctx->processor_core_count = get_cpu_count();
#endif /* CONFIG_MULTITHREAD */
}

Просмотреть файл

@ -9,162 +9,149 @@
*/
#include "vpx_ports/config.h"
#include "vpx_config.h"
#include "loopfilter.h"
#include "onyxc_int.h"
#include "vpx_mem/vpx_mem.h"
typedef unsigned char uc;
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride,
const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit);
}
void vp8_init_loop_filter(VP8_COMMON *cm)
static void lf_init_lut(loop_filter_info_n *lfi)
{
loop_filter_info *lfi = cm->lf_info;
LOOPFILTERTYPE lft = cm->filter_type;
int sharpness_lvl = cm->sharpness_level;
int frame_type = cm->frame_type;
int i, j;
int filt_lvl;
int block_inside_limit = 0;
int HEVThresh;
const int yhedge_boost = 2;
const int uvhedge_boost = 2;
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
for (i = 0; i <= MAX_LOOP_FILTER; i++)
for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; filt_lvl++)
{
int filt_lvl = i;
if (frame_type == KEY_FRAME)
if (filt_lvl >= 40)
{
if (filt_lvl >= 40)
HEVThresh = 2;
else if (filt_lvl >= 15)
HEVThresh = 1;
else
HEVThresh = 0;
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2;
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3;
}
else if (filt_lvl >= 20)
{
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2;
}
else if (filt_lvl >= 15)
{
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1;
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1;
}
else
{
if (filt_lvl >= 40)
HEVThresh = 3;
else if (filt_lvl >= 20)
HEVThresh = 2;
else if (filt_lvl >= 15)
HEVThresh = 1;
else
HEVThresh = 0;
lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0;
lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0;
}
}
lfi->mode_lf_lut[DC_PRED] = 1;
lfi->mode_lf_lut[V_PRED] = 1;
lfi->mode_lf_lut[H_PRED] = 1;
lfi->mode_lf_lut[TM_PRED] = 1;
lfi->mode_lf_lut[B_PRED] = 0;
lfi->mode_lf_lut[ZEROMV] = 1;
lfi->mode_lf_lut[NEARESTMV] = 2;
lfi->mode_lf_lut[NEARMV] = 2;
lfi->mode_lf_lut[NEWMV] = 2;
lfi->mode_lf_lut[SPLITMV] = 3;
}
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl)
{
int i;
/* For each possible value for the loop filter fill out limits */
for (i = 0; i <= MAX_LOOP_FILTER; i++)
{
int filt_lvl = i;
int block_inside_limit = 0;
/* Set loop filter paramaeters that control sharpness. */
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
@ -179,181 +166,143 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
if (block_inside_limit < 1)
block_inside_limit = 1;
for (j = 0; j < 16; j++)
{
lfi[i].lim[j] = block_inside_limit;
lfi[i].mbflim[j] = filt_lvl + yhedge_boost;
lfi[i].mbthr[j] = HEVThresh;
lfi[i].flim[j] = filt_lvl;
lfi[i].thr[j] = HEVThresh;
lfi[i].uvlim[j] = block_inside_limit;
lfi[i].uvmbflim[j] = filt_lvl + uvhedge_boost;
lfi[i].uvmbthr[j] = HEVThresh;
lfi[i].uvflim[j] = filt_lvl;
lfi[i].uvthr[j] = HEVThresh;
}
}
/* Set up the function pointers depending on the type of loop filtering selected */
if (lft == NORMAL_LOOPFILTER)
{
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v);
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h);
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h);
}
else
{
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v);
cm->lf_bv = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v);
cm->lf_mbh = LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h);
cm->lf_bh = LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h);
vpx_memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH);
vpx_memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit),
SIMD_WIDTH);
vpx_memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit),
SIMD_WIDTH);
}
}
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
* each frame. Check last_frame_type to skip the function most of times.
*/
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
void vp8_loop_filter_init(VP8_COMMON *cm)
{
int HEVThresh;
int i, j;
loop_filter_info_n *lfi = &cm->lf_info;
int i;
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
for (i = 0; i <= MAX_LOOP_FILTER; i++)
/* init limits for given sharpness*/
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
/* init LUT for lvl and hev thr picking */
lf_init_lut(lfi);
/* init hev threshold const vectors */
for(i = 0; i < 4 ; i++)
{
int filt_lvl = i;
if (frame_type == KEY_FRAME)
{
if (filt_lvl >= 40)
HEVThresh = 2;
else if (filt_lvl >= 15)
HEVThresh = 1;
else
HEVThresh = 0;
}
else
{
if (filt_lvl >= 40)
HEVThresh = 3;
else if (filt_lvl >= 20)
HEVThresh = 2;
else if (filt_lvl >= 15)
HEVThresh = 1;
else
HEVThresh = 0;
}
for (j = 0; j < 16; j++)
{
/*lfi[i].lim[j] = block_inside_limit;
lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
lfi[i].mbthr[j] = HEVThresh;
/*lfi[i].flim[j] = filt_lvl;*/
lfi[i].thr[j] = HEVThresh;
/*lfi[i].uvlim[j] = block_inside_limit;
lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
lfi[i].uvmbthr[j] = HEVThresh;
/*lfi[i].uvflim[j] = filt_lvl;*/
lfi[i].uvthr[j] = HEVThresh;
}
vpx_memset(lfi->hev_thr[i], i, SIMD_WIDTH);
}
}
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
void vp8_loop_filter_frame_init(VP8_COMMON *cm,
MACROBLOCKD *mbd,
int default_filt_lvl)
{
MB_MODE_INFO *mbmi = &mbd->mode_info_context->mbmi;
int seg, /* segment number */
ref, /* index in ref_lf_deltas */
mode; /* index in mode_lf_deltas */
if (mbd->mode_ref_lf_delta_enabled)
loop_filter_info_n *lfi = &cm->lf_info;
/* update limits if sharpness has changed */
if(cm->last_sharpness_level != cm->sharpness_level)
{
vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level);
cm->last_sharpness_level = cm->sharpness_level;
}
for(seg = 0; seg < MAX_MB_SEGMENTS; seg++)
{
int lvl_seg = default_filt_lvl;
int lvl_ref, lvl_mode;
/* Note the baseline filter values for each segment */
if (mbd->segmentation_enabled)
{
/* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
{
lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
}
else /* Delta Value */
{
lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg];
lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63: lvl_seg) : 0;
}
}
if (!mbd->mode_ref_lf_delta_enabled)
{
/* we could get rid of this if we assume that deltas are set to
* zero when not in use; encoder always uses deltas
*/
vpx_memset(lfi->lvl[seg][0], lvl_seg, 4 * 4 );
continue;
}
lvl_ref = lvl_seg;
/* INTRA_FRAME */
ref = INTRA_FRAME;
/* Apply delta for reference frame */
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
lvl_ref += mbd->ref_lf_deltas[ref];
/* Apply delta for mode */
if (mbmi->ref_frame == INTRA_FRAME)
/* Apply delta for Intra modes */
mode = 0; /* B_PRED */
/* Only the split mode BPRED has a further special case */
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
lfi->lvl[seg][ref][mode] = lvl_mode;
mode = 1; /* all the rest of Intra modes */
lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; /* clamp */
lfi->lvl[seg][ref][mode] = lvl_mode;
/* LAST, GOLDEN, ALT */
for(ref = 1; ref < MAX_REF_FRAMES; ref++)
{
/* Only the split mode BPRED has a further special case */
if (mbmi->mode == B_PRED)
*filter_level += mbd->mode_lf_deltas[0];
int lvl_ref = lvl_seg;
/* Apply delta for reference frame */
lvl_ref += mbd->ref_lf_deltas[ref];
/* Apply delta for Inter modes */
for (mode = 1; mode < 4; mode++)
{
lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode];
lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; /* clamp */
lfi->lvl[seg][ref][mode] = lvl_mode;
}
}
else
{
/* Zero motion mode */
if (mbmi->mode == ZEROMV)
*filter_level += mbd->mode_lf_deltas[1];
/* Split MB motion mode */
else if (mbmi->mode == SPLITMV)
*filter_level += mbd->mode_lf_deltas[3];
/* All other inter motion modes (Nearest, Near, New) */
else
*filter_level += mbd->mode_lf_deltas[2];
}
/* Range check */
if (*filter_level > MAX_LOOP_FILTER)
*filter_level = MAX_LOOP_FILTER;
else if (*filter_level < 0)
*filter_level = 0;
}
}
void vp8_loop_filter_frame
(
VP8_COMMON *cm,
MACROBLOCKD *mbd,
int default_filt_lvl
MACROBLOCKD *mbd
)
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
loop_filter_info *lfi = cm->lf_info;
loop_filter_info_n *lfi_n = &cm->lf_info;
loop_filter_info lfi;
FRAME_TYPE frame_type = cm->frame_type;
int mb_row;
int mb_col;
int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
int i;
unsigned char *y_ptr, *u_ptr, *v_ptr;
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
/* Note the baseline filter values for each segment */
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
/* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
/* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
}
}
}
else
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
baseline_filter_level[i] = default_filt_lvl;
}
/* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi;
/* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
vp8_loop_filter_frame_init(cm, mbd, cm->filter_level);
/* Set up the buffer pointers */
y_ptr = post->y_buffer;
@ -365,101 +314,108 @@ void vp8_loop_filter_frame
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
filter_level = baseline_filter_level[Segment];
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
/* Distance of Mb to the various image edges.
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
* Apply any context driven MB level adjustment
*/
vp8_adjust_mb_lf_value(mbd, &filter_level);
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
if (mb_col > 0)
cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
if (cm->filter_type == NORMAL_LOOPFILTER)
{
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level];
lfi.blim = lfi_n->blim[filter_level];
lfi.lim = lfi_n->lim[filter_level];
lfi.hev_thr = lfi_n->hev_thr[hev_index];
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
/* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
/* don't apply across umv border */
if (mb_row > 0)
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi);
}
else
{
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
/* don't apply across umv border */
if (mb_row > 0)
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
}
}
y_ptr += 16;
u_ptr += 8;
v_ptr += 8;
mbd->mode_info_context++; /* step to next MB */
mode_info_context++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
u_ptr += post->uv_stride * 8 - post->uv_width;
v_ptr += post->uv_stride * 8 - post->uv_width;
mbd->mode_info_context++; /* Skip border mb */
mode_info_context++; /* Skip border mb */
}
}
void vp8_loop_filter_frame_yonly
(
VP8_COMMON *cm,
MACROBLOCKD *mbd,
int default_filt_lvl,
int sharpness_lvl
int default_filt_lvl
)
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
int i;
unsigned char *y_ptr;
int mb_row;
int mb_col;
loop_filter_info *lfi = cm->lf_info;
int baseline_filter_level[MAX_MB_SEGMENTS];
loop_filter_info_n *lfi_n = &cm->lf_info;
loop_filter_info lfi;
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
FRAME_TYPE frame_type = cm->frame_type;
(void) sharpness_lvl;
/* Point at base of Mb MODE_INFO list */
const MODE_INFO *mode_info_context = cm->mi;
/*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
/* Note the baseline filter values for each segment */
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
/* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
/* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
}
}
}
else
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
baseline_filter_level[i] = default_filt_lvl;
}
#if 0
if(default_filt_lvl == 0) /* no filter applied */
return;
#endif
/* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
vp8_loop_filter_frame_init( cm, mbd, default_filt_lvl);
/* Set up the buffer pointers */
y_ptr = post->y_buffer;
@ -469,72 +425,106 @@ void vp8_loop_filter_frame_yonly
{
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
filter_level = baseline_filter_level[Segment];
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
/* Apply any context driven MB level adjustment */
vp8_adjust_mb_lf_value(mbd, &filter_level);
const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode];
const int seg = mode_info_context->mbmi.segment_id;
const int ref_frame = mode_info_context->mbmi.ref_frame;
filter_level = lfi_n->lvl[seg][ref_frame][mode_index];
if (filter_level)
{
if (mb_col > 0)
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (cm->filter_type == NORMAL_LOOPFILTER)
{
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level];
lfi.blim = lfi_n->blim[filter_level];
lfi.lim = lfi_n->lim[filter_level];
lfi.hev_thr = lfi_n->hev_thr[hev_index];
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
/* don't apply across umv border */
if (mb_row > 0)
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
/* don't apply across umv border */
if (mb_row > 0)
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
}
else
{
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
/* don't apply across umv border */
if (mb_row > 0)
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
}
}
y_ptr += 16;
mbd->mode_info_context ++; /* step to next MB */
mode_info_context ++; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
mbd->mode_info_context ++; /* Skip border mb */
mode_info_context ++; /* Skip border mb */
}
}
void vp8_loop_filter_partial_frame
(
VP8_COMMON *cm,
MACROBLOCKD *mbd,
int default_filt_lvl,
int sharpness_lvl,
int Fraction
int default_filt_lvl
)
{
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
int i;
unsigned char *y_ptr;
int mb_row;
int mb_col;
/*int mb_rows = post->y_height >> 4;*/
int mb_cols = post->y_width >> 4;
int linestocopy;
int linestocopy, i;
loop_filter_info_n *lfi_n = &cm->lf_info;
loop_filter_info lfi;
loop_filter_info *lfi = cm->lf_info;
int baseline_filter_level[MAX_MB_SEGMENTS];
int filter_level;
int alt_flt_enabled = mbd->segmentation_enabled;
FRAME_TYPE frame_type = cm->frame_type;
(void) sharpness_lvl;
const MODE_INFO *mode_info_context;
/*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
int lvl_seg[MAX_MB_SEGMENTS];
linestocopy = (post->y_height >> (4 + Fraction));
mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);
/* 3 is a magic number. 4 is probably magic too */
linestocopy = (post->y_height >> (4 + 3));
if (linestocopy < 1)
linestocopy = 1;
@ -542,32 +532,27 @@ void vp8_loop_filter_partial_frame
linestocopy <<= 4;
/* Note the baseline filter values for each segment */
/* See vp8_loop_filter_frame_init. Rather than call that for each change
* to default_filt_lvl, copy the relevant calculation here.
*/
if (alt_flt_enabled)
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
{
/* Abs value */
{ /* Abs value */
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
{
lvl_seg[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
}
/* Delta Value */
else
{
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
lvl_seg[i] = default_filt_lvl
+ mbd->segment_feature_data[MB_LVL_ALT_LF][i];
lvl_seg[i] = (lvl_seg[i] > 0) ?
((lvl_seg[i] > 63) ? 63: lvl_seg[i]) : 0;
}
}
}
else
{
for (i = 0; i < MAX_MB_SEGMENTS; i++)
baseline_filter_level[i] = default_filt_lvl;
}
/* Initialize the loop filter for this frame. */
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
vp8_init_loop_filter(cm);
else if (frame_type != cm->last_frame_type)
vp8_frame_init_loop_filter(lfi, frame_type);
/* Set up the buffer pointers */
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
@ -577,28 +562,64 @@ void vp8_loop_filter_partial_frame
{
for (mb_col = 0; mb_col < mb_cols; mb_col++)
{
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
filter_level = baseline_filter_level[Segment];
int skip_lf = (mode_info_context->mbmi.mode != B_PRED &&
mode_info_context->mbmi.mode != SPLITMV &&
mode_info_context->mbmi.mb_skip_coeff);
if (alt_flt_enabled)
filter_level = lvl_seg[mode_info_context->mbmi.segment_id];
else
filter_level = default_filt_lvl;
if (filter_level)
{
if (mb_col > 0)
cm->lf_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (cm->filter_type == NORMAL_LOOPFILTER)
{
const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level];
lfi.mblim = lfi_n->mblim[filter_level];
lfi.blim = lfi_n->blim[filter_level];
lfi.lim = lfi_n->lim[filter_level];
lfi.hev_thr = lfi_n->hev_thr[hev_index];
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_v)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
if (mbd->mode_info_context->mbmi.dc_diff > 0)
cm->lf_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_h)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, normal_b_h)
(y_ptr, 0, 0, post->y_stride, 0, &lfi);
}
else
{
if (mb_col > 0)
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_v)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_v)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
LF_INVOKE(&cm->rtcd.loopfilter, simple_mb_h)
(y_ptr, post->y_stride, lfi_n->mblim[filter_level]);
if (!skip_lf)
LF_INVOKE(&cm->rtcd.loopfilter, simple_b_h)
(y_ptr, post->y_stride, lfi_n->blim[filter_level]);
}
}
y_ptr += 16;
mbd->mode_info_context += 1; /* step to next MB */
mode_info_context += 1; /* step to next MB */
}
y_ptr += post->y_stride * 16 - post->y_width;
mbd->mode_info_context += 1; /* Skip border mb */
mode_info_context += 1; /* Skip border mb */
}
}

Просмотреть файл

@ -13,6 +13,7 @@
#define loopfilter_h
#include "vpx_ports/mem.h"
#include "vpx_config.h"
#define MAX_LOOP_FILTER 63
@ -22,32 +23,45 @@ typedef enum
SIMPLE_LOOPFILTER = 1
} LOOPFILTERTYPE;
/* FRK
* Need to align this structure so when it is declared and
#if ARCH_ARM
#define SIMD_WIDTH 1
#else
#define SIMD_WIDTH 16
#endif
/* Need to align this structure so when it is declared and
* passed it can be loaded into vector registers.
*/
typedef struct
{
DECLARE_ALIGNED(16, signed char, lim[16]);
DECLARE_ALIGNED(16, signed char, flim[16]);
DECLARE_ALIGNED(16, signed char, thr[16]);
DECLARE_ALIGNED(16, signed char, mbflim[16]);
DECLARE_ALIGNED(16, signed char, mbthr[16]);
DECLARE_ALIGNED(16, signed char, uvlim[16]);
DECLARE_ALIGNED(16, signed char, uvflim[16]);
DECLARE_ALIGNED(16, signed char, uvthr[16]);
DECLARE_ALIGNED(16, signed char, uvmbflim[16]);
DECLARE_ALIGNED(16, signed char, uvmbthr[16]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]);
DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]);
unsigned char lvl[4][4][4];
unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1];
unsigned char mode_lf_lut[10];
} loop_filter_info_n;
typedef struct
{
const unsigned char * mblim;
const unsigned char * blim;
const unsigned char * lim;
const unsigned char * hev_thr;
} loop_filter_info;
#define prototype_loopfilter(sym) \
void sym(unsigned char *src, int pitch, const signed char *flimit,\
const signed char *limit, const signed char *thresh, int count)
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
const unsigned char *limit, const unsigned char *thresh, int count)
#define prototype_loopfilter_block(sym) \
void sym(unsigned char *y, unsigned char *u, unsigned char *v,\
int ystride, int uv_stride, loop_filter_info *lfi, int simpler)
void sym(unsigned char *y, unsigned char *u, unsigned char *v, \
int ystride, int uv_stride, loop_filter_info *lfi)
#define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
#if ARCH_X86 || ARCH_X86_64
#include "x86/loopfilter_x86.h"
@ -77,38 +91,39 @@ extern prototype_loopfilter_block(vp8_lf_normal_mb_h);
#endif
extern prototype_loopfilter_block(vp8_lf_normal_b_h);
#ifndef vp8_lf_simple_mb_v
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_c
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_c
#endif
extern prototype_loopfilter_block(vp8_lf_simple_mb_v);
extern prototype_simple_loopfilter(vp8_lf_simple_mb_v);
#ifndef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_c
#endif
extern prototype_loopfilter_block(vp8_lf_simple_b_v);
extern prototype_simple_loopfilter(vp8_lf_simple_b_v);
#ifndef vp8_lf_simple_mb_h
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_c
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_c
#endif
extern prototype_loopfilter_block(vp8_lf_simple_mb_h);
extern prototype_simple_loopfilter(vp8_lf_simple_mb_h);
#ifndef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_c
#endif
extern prototype_loopfilter_block(vp8_lf_simple_b_h);
extern prototype_simple_loopfilter(vp8_lf_simple_b_h);
typedef prototype_loopfilter_block((*vp8_lf_block_fn_t));
typedef prototype_simple_loopfilter((*vp8_slf_block_fn_t));
typedef struct
{
vp8_lf_block_fn_t normal_mb_v;
vp8_lf_block_fn_t normal_b_v;
vp8_lf_block_fn_t normal_mb_h;
vp8_lf_block_fn_t normal_b_h;
vp8_lf_block_fn_t simple_mb_v;
vp8_lf_block_fn_t simple_b_v;
vp8_lf_block_fn_t simple_mb_h;
vp8_lf_block_fn_t simple_b_h;
vp8_slf_block_fn_t simple_mb_v;
vp8_slf_block_fn_t simple_b_v;
vp8_slf_block_fn_t simple_mb_h;
vp8_slf_block_fn_t simple_b_h;
} vp8_loopfilter_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@ -121,10 +136,33 @@ typedef void loop_filter_uvfunction
(
unsigned char *u, /* source pointer */
int p, /* pitch */
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
unsigned char *v
);
/* assorted loopfilter functions which get used elsewhere */
struct VP8Common;
struct MacroBlockD;
void vp8_loop_filter_init(struct VP8Common *cm);
void vp8_loop_filter_frame_init(struct VP8Common *cm,
struct MacroBlockD *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame(struct VP8Common *cm, struct MacroBlockD *mbd);
void vp8_loop_filter_partial_frame(struct VP8Common *cm,
struct MacroBlockD *mbd,
int default_filt_lvl);
void vp8_loop_filter_frame_yonly(struct VP8Common *cm,
struct MacroBlockD *mbd,
int default_filt_lvl);
void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi,
int sharpness_lvl);
#endif

Просмотреть файл

@ -28,8 +28,9 @@ static __inline signed char vp8_signed_char_clamp(int t)
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
static __inline signed char vp8_filter_mask(uc limit, uc blimit,
uc p3, uc p2, uc p1, uc p0,
uc q0, uc q1, uc q2, uc q3)
{
signed char mask = 0;
mask |= (abs(p3 - p2) > limit) * -1;
@ -38,13 +39,13 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
mask |= (abs(q1 - q0) > limit) * -1;
mask |= (abs(q2 - q1) > limit) * -1;
mask |= (abs(q3 - q2) > limit) * -1;
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1;
mask = ~mask;
return mask;
}
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
static __inline signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1)
{
signed char hev = 0;
hev |= (abs(p1 - p0) > thresh) * -1;
@ -52,7 +53,8 @@ static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0,
return hev;
}
static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
static __inline void vp8_filter(signed char mask, uc hev, uc *op1,
uc *op0, uc *oq0, uc *oq1)
{
signed char ps0, qs0;
@ -102,9 +104,9 @@ void vp8_loop_filter_horizontal_edge_c
(
unsigned char *s,
int p, /* pitch */
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
@ -117,11 +119,11 @@ void vp8_loop_filter_horizontal_edge_c
*/
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p);
@ -134,9 +136,9 @@ void vp8_loop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
@ -149,10 +151,10 @@ void vp8_loop_filter_vertical_edge_c
*/
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_filter(mask, hev, s - 2, s - 1, s, s + 1);
@ -161,7 +163,7 @@ void vp8_loop_filter_vertical_edge_c
while (++i < count * 8);
}
static __inline void vp8_mbfilter(signed char mask, signed char hev,
static __inline void vp8_mbfilter(signed char mask, uc hev,
uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
{
signed char s, u;
@ -220,9 +222,9 @@ void vp8_mbloop_filter_horizontal_edge_c
(
unsigned char *s,
int p,
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
@ -236,11 +238,11 @@ void vp8_mbloop_filter_horizontal_edge_c
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4*p], s[-3*p], s[-2*p], s[-1*p],
s[0*p], s[1*p], s[2*p], s[3*p]);
hev = vp8_hevmask(thresh[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
hev = vp8_hevmask(thresh[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p);
@ -255,9 +257,9 @@ void vp8_mbloop_filter_vertical_edge_c
(
unsigned char *s,
int p,
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
const unsigned char *blimit,
const unsigned char *limit,
const unsigned char *thresh,
int count
)
{
@ -268,10 +270,10 @@ void vp8_mbloop_filter_vertical_edge_c
do
{
mask = vp8_filter_mask(limit[i], flimit[i],
mask = vp8_filter_mask(limit[0], blimit[0],
s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]);
hev = vp8_hevmask(thresh[i], s[-2], s[-1], s[0], s[1]);
hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]);
vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2);
@ -282,13 +284,13 @@ void vp8_mbloop_filter_vertical_edge_c
}
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
static __inline signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1)
{
/* Why does this cause problems for win32?
* error C2143: syntax error : missing ';' before 'type'
* (void) limit;
*/
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1;
return mask;
}
@ -321,47 +323,37 @@ void vp8_loop_filter_simple_horizontal_edge_c
(
unsigned char *s,
int p,
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
int count
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
(void) thresh;
do
{
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
mask = vp8_simple_filter_mask(blimit[0], s[-2*p], s[-1*p], s[0*p], s[1*p]);
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
++s;
}
while (++i < count * 8);
while (++i < 16);
}
void vp8_loop_filter_simple_vertical_edge_c
(
unsigned char *s,
int p,
const signed char *flimit,
const signed char *limit,
const signed char *thresh,
int count
const unsigned char *blimit
)
{
signed char mask = 0;
int i = 0;
(void) thresh;
do
{
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
mask = vp8_simple_filter_mask(blimit[0], s[-2], s[-1], s[0], s[1]);
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
s += p;
}
while (++i < count * 8);
while (++i < 16);
}

Просмотреть файл

@ -17,7 +17,7 @@ typedef enum
DEST = 1
} BLOCKSET;
void vp8_setup_block
static void setup_block
(
BLOCKD *b,
int mv_stride,
@ -43,7 +43,8 @@ void vp8_setup_block
}
void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
static void setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
{
int block;
@ -64,16 +65,16 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
for (block = 0; block < 16; block++) /* y blocks */
{
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
}
for (block = 16; block < 20; block++) /* U and V blocks */
{
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
vp8_setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
setup_block(&x->block[block+4], x->dst.uv_stride, v, x->dst.uv_stride,
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
}
}
@ -124,6 +125,6 @@ void vp8_build_block_doffsets(MACROBLOCKD *x)
{
/* handle the destination pitch features */
vp8_setup_macroblock(x, DEST);
vp8_setup_macroblock(x, PRED);
setup_macroblock(x, DEST);
setup_macroblock(x, PRED);
}

Просмотреть файл

@ -11,6 +11,7 @@
#ifndef __INC_MV_H
#define __INC_MV_H
#include "vpx/vpx_integer.h"
typedef struct
{
@ -18,4 +19,10 @@ typedef struct
short col;
} MV;
typedef union
{
uint32_t as_int;
MV as_mv;
} int_mv; /* facilitates faster equality tests and copies */
#endif

Просмотреть файл

@ -18,6 +18,7 @@ extern "C"
#endif
#include "vpx/internal/vpx_codec_internal.h"
#include "vpx/vp8cx.h"
#include "vpx_scale/yv12config.h"
#include "type_aliases.h"
#include "ppflags.h"
@ -45,7 +46,8 @@ extern "C"
typedef enum
{
USAGE_STREAM_FROM_SERVER = 0x0,
USAGE_LOCAL_FILE_PLAYBACK = 0x1
USAGE_LOCAL_FILE_PLAYBACK = 0x1,
USAGE_CONSTRAINED_QUALITY = 0x2
} END_USAGE;
@ -107,6 +109,7 @@ extern "C"
int noise_sensitivity; // parameter used for applying pre processing blur: recommendation 0
int Sharpness; // parameter used for sharpening output: recommendation 0:
int cpu_used;
unsigned int rc_max_intra_bitrate_pct;
// mode ->
//(0)=Realtime/Live Encoding. This mode is optimized for realtim encoding (for example, capturing
@ -137,8 +140,9 @@ extern "C"
int end_usage; // vbr or cbr
// shoot to keep buffer full at all times by undershooting a bit 95 recommended
// buffer targeting aggressiveness
int under_shoot_pct;
int over_shoot_pct;
// buffering parameters
int starting_buffer_level; // in seconds
@ -149,6 +153,7 @@ extern "C"
int fixed_q;
int worst_allowed_q;
int best_allowed_q;
int cq_level;
// allow internal resizing ( currently disabled in the build !!!!!)
int allow_spatial_resampling;
@ -179,16 +184,20 @@ extern "C"
int token_partitions; // how many token partitions to create for multi core decoding
int encode_breakout; // early breakout encode threshold : for video conf recommend 800
int error_resilient_mode; // if running over udp networks provides decodable frames after a
// dropped packet
unsigned int error_resilient_mode; // Bitfield defining the error
// resiliency features to enable. Can provide
// decodable frames after losses in previous
// frames and decodable partitions after
// losses in the same frame.
int arnr_max_frames;
int arnr_strength ;
int arnr_type ;
struct vpx_fixed_buf two_pass_stats_in;
struct vpx_codec_pkt_list *output_pkt_list;
vp8e_tuning tuning;
} VP8_CONFIG;
@ -202,9 +211,9 @@ extern "C"
// receive a frames worth of data caller can assume that a copy of this frame is made
// and not just a copy of the pointer..
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time_stamp);
int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, int64_t *time_stamp, int64_t *time_end, int flush);
int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);

Просмотреть файл

@ -19,7 +19,9 @@
#include "entropy.h"
#include "idct.h"
#include "recon.h"
#if CONFIG_POSTPROC
#include "postproc.h"
#endif
/*#ifdef PACKET_TESTING*/
#include "header.h"
@ -35,13 +37,15 @@ void vp8_initialize_common(void);
#define NUM_YV12_BUFFERS 4
#define MAX_PARTITIONS 9
typedef struct frame_contexts
{
vp8_prob bmode_prob [VP8_BINTRAMODES-1];
vp8_prob ymode_prob [VP8_YMODES-1]; /* interframe intra mode probs */
vp8_prob uv_mode_prob [VP8_UV_MODES-1];
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES];
MV_CONTEXT mvc[2];
MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
} FRAME_CONTEXT;
@ -73,7 +77,9 @@ typedef struct VP8_COMMON_RTCD
vp8_recon_rtcd_vtable_t recon;
vp8_subpix_rtcd_vtable_t subpix;
vp8_loopfilter_rtcd_vtable_t loopfilter;
#if CONFIG_POSTPROC
vp8_postproc_rtcd_vtable_t postproc;
#endif
int flags;
#else
int unused;
@ -81,6 +87,7 @@ typedef struct VP8_COMMON_RTCD
} VP8_COMMON_RTCD;
typedef struct VP8Common
{
struct vpx_internal_error_info error;
@ -105,7 +112,8 @@ typedef struct VP8Common
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG temp_scale_frame;
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */
FRAME_TYPE frame_type;
int show_frame;
@ -119,7 +127,6 @@ typedef struct VP8Common
/* profile settings */
int mb_no_coeff_skip;
int no_lpf;
int simpler_lpf;
int use_bilinear_mc_filter;
int full_pixel;
@ -140,16 +147,15 @@ typedef struct VP8Common
MODE_INFO *mip; /* Base of allocated array */
MODE_INFO *mi; /* Corresponds to upper left visible macroblock */
MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */
INTERPOLATIONFILTERTYPE mcomp_filter_type;
LOOPFILTERTYPE last_filter_type;
LOOPFILTERTYPE filter_type;
loop_filter_info lf_info[MAX_LOOP_FILTER+1];
prototype_loopfilter_block((*lf_mbv));
prototype_loopfilter_block((*lf_mbh));
prototype_loopfilter_block((*lf_bv));
prototype_loopfilter_block((*lf_bh));
loop_filter_info_n lf_info;
int filter_level;
int last_sharpness_level;
int sharpness_level;
@ -196,13 +202,12 @@ typedef struct VP8Common
#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON_RTCD rtcd;
#endif
#if CONFIG_MULTITHREAD
int processor_core_count;
#endif
#if CONFIG_POSTPROC
struct postproc_state postproc_state;
#endif
} VP8_COMMON;
void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
void vp8_init_loop_filter(VP8_COMMON *cm);
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val);
#endif

Просмотреть файл

@ -22,6 +22,7 @@ extern "C"
#include "vpx_scale/yv12config.h"
#include "ppflags.h"
#include "vpx_ports/mem.h"
#include "vpx/vpx_codec.h"
typedef void *VP8D_PTR;
typedef struct
@ -31,6 +32,8 @@ extern "C"
int Version;
int postprocess;
int max_threads;
int error_concealment;
int input_partition;
} VP8D_CONFIG;
typedef enum
{
@ -50,11 +53,11 @@ extern "C"
int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags);
int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, int64_t time_stamp);
int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags);
int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
vpx_codec_err_t vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf);

Просмотреть файл

@ -26,7 +26,7 @@
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
/* global constants */
#if CONFIG_POSTPROC_VISUALIZER
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
@ -41,13 +41,32 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
{ RGB_TO_YUV(0xFF0000) } /* Red */
};
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
{
{ RGB_TO_YUV(0x6633ff) }, /* Purple */
{ RGB_TO_YUV(0xcc33ff) }, /* Magenta */
{ RGB_TO_YUV(0xff33cc) }, /* Pink */
{ RGB_TO_YUV(0xff3366) }, /* Coral */
{ RGB_TO_YUV(0x3366ff) }, /* Blue */
{ RGB_TO_YUV(0xed00f5) }, /* Dark Blue */
{ RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */
{ RGB_TO_YUV(0xff6633) }, /* Orange */
{ RGB_TO_YUV(0x33ccff) }, /* Light Blue */
{ RGB_TO_YUV(0x8ab800) }, /* Green */
{ RGB_TO_YUV(0xffcc33) }, /* Light Orange */
{ RGB_TO_YUV(0x33ffcc) }, /* Aqua */
{ RGB_TO_YUV(0x66ff33) }, /* Light Green */
{ RGB_TO_YUV(0xccff33) }, /* Yellow */
};
static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
{
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
{ RGB_TO_YUV(0x0000ff) }, /* Green */
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
{ RGB_TO_YUV(0xff0000) }, /* Red */
};
#endif
static const short kernel5[] =
{
@ -192,7 +211,7 @@ void vp8_post_proc_down_and_across_c
}
}
int vp8_q2mbl(int x)
static int q2mbl(int x)
{
if (x < 20) x = 20;
@ -295,8 +314,8 @@ static void vp8_deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
(void) flag;
POSTPROC_INVOKE(rtcd, downacross)(source->y_buffer, post->y_buffer, source->y_stride, post->y_stride, source->y_height, source->y_width, ppl);
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, vp8_q2mbl(q));
POSTPROC_INVOKE(rtcd, across)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
POSTPROC_INVOKE(rtcd, down)(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q));
POSTPROC_INVOKE(rtcd, downacross)(source->u_buffer, post->u_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
POSTPROC_INVOKE(rtcd, downacross)(source->v_buffer, post->v_buffer, source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl);
@ -476,7 +495,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
*/
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
@ -484,10 +503,10 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
y += stride + 2;
for (i = 0; i < 14; i++)
y += 2*stride + 2;
for (i = 0; i < 12; i++)
{
for (j = 0; j < 14; j++)
for (j = 0; j < 12; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
@ -511,6 +530,104 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
}
}
/* Blend only the edge of the macro block. Leave center
* unblended to allow for other visualizations to be layered.
*/
void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
int y1_const = y1*((1<<16)-alpha);
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
for (i = 0; i < 2; i++)
{
for (j = 0; j < 16; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
for (i = 0; i < 12; i++)
{
y[0] = (y[0]*alpha + y1_const)>>16;
y[1] = (y[1]*alpha + y1_const)>>16;
y[14] = (y[14]*alpha + y1_const)>>16;
y[15] = (y[15]*alpha + y1_const)>>16;
y += stride;
}
for (i = 0; i < 2; i++)
{
for (j = 0; j < 16; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
stride >>= 1;
for (j = 0; j < 8; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
u += stride;
v += stride;
for (i = 0; i < 6; i++)
{
u[0] = (u[0]*alpha + u1_const)>>16;
v[0] = (v[0]*alpha + v1_const)>>16;
u[7] = (u[7]*alpha + u1_const)>>16;
v[7] = (v[7]*alpha + v1_const)>>16;
u += stride;
v += stride;
}
for (j = 0; j < 8; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
}
void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
int y1, int u1, int v1, int alpha, int stride)
{
int i, j;
int y1_const = y1*((1<<16)-alpha);
int u1_const = u1*((1<<16)-alpha);
int v1_const = v1*((1<<16)-alpha);
for (i = 0; i < 4; i++)
{
for (j = 0; j < 4; j++)
{
y[j] = (y[j]*alpha + y1_const)>>16;
}
y += stride;
}
stride >>= 1;
for (i = 0; i < 2; i++)
{
for (j = 0; j < 2; j++)
{
u[j] = (u[j]*alpha + u1_const)>>16;
v[j] = (v[j]*alpha + v1_const)>>16;
}
u += stride;
v += stride;
}
}
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
{
int dx;
@ -522,7 +639,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*x1 = width;
if (dy)
if (dx)
*y1 = ((width-x0)*dy)/dx + y0;
}
if (*x1 < 0)
@ -531,7 +648,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*x1 = 0;
if (dy)
if (dx)
*y1 = ((0-x0)*dy)/dx + y0;
}
if (*y1 > height)
@ -540,7 +657,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*y1 = height;
if (dx)
if (dy)
*x1 = ((height-y0)*dx)/dy + x0;
}
if (*y1 < 0)
@ -549,7 +666,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
dy = *y1 - y0;
*y1 = 0;
if (dx)
if (dy)
*x1 = ((0-y0)*dx)/dy + x0;
}
}
@ -561,10 +678,12 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
#define RTCD_VTABLE(oci) NULL
#endif
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
{
char message[512];
int q = oci->filter_level * 10 / 6;
int flags = ppflags->post_proc_flag;
int deblock_level = ppflags->deblocking_level;
int noise_level = ppflags->noise_level;
if (!oci->frame_to_show)
return -1;
@ -621,8 +740,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
oci->post_proc_buffer.y_stride);
}
if (flags & VP8D_DEBUG_LEVEL1)
#if CONFIG_POSTPROC_VISUALIZER
if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
{
char message[512];
sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
(oci->frame_type == KEY_FRAME),
oci->refresh_golden_frame,
@ -633,7 +754,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
}
if (flags & VP8D_DEBUG_LEVEL2)
if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
{
int i, j;
unsigned char *y_ptr;
@ -665,7 +786,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
}
}
if (flags & VP8D_DEBUG_LEVEL3)
if (flags & VP8D_DEBUG_TXT_DC_DIFF)
{
int i, j;
unsigned char *y_ptr;
@ -683,11 +804,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
for (j = 0; j < mb_cols; j++)
{
char zz[4];
int dc_diff = !(mi[mb_index].mbmi.mode != B_PRED &&
mi[mb_index].mbmi.mode != SPLITMV &&
mi[mb_index].mbmi.mb_skip_coeff);
if (oci->frame_type == KEY_FRAME)
sprintf(zz, "a");
else
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
sprintf(zz, "%c", dc_diff + '0');
vp8_blit_text(zz, y_ptr, post->y_stride);
mb_index ++;
@ -700,78 +824,162 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
}
}
if (flags & VP8D_DEBUG_LEVEL4)
if (flags & VP8D_DEBUG_TXT_RATE_INFO)
{
char message[512];
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
#if 0
int i, j;
unsigned char *y_ptr;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int mb_rows = post->y_height >> 4;
int mb_cols = post->y_width >> 4;
int mb_index = 0;
MODE_INFO *mi = oci->mi;
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
/* vp8_filter each macro block */
for (i = 0; i < mb_rows; i++)
{
for (j = 0; j < mb_cols; j++)
{
char zz[4];
sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
vp8_blit_text(zz, y_ptr, post->y_stride);
mb_index ++;
y_ptr += 16;
}
mb_index ++; /* border */
y_ptr += post->y_stride * 16 - post->y_width;
}
#endif
}
/* Draw motion vectors */
if (flags & VP8D_DEBUG_LEVEL5)
if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
{
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
int height = post->y_height;
int mb_cols = width >> 4;
unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
int y_stride = oci->post_proc_buffer.y_stride;
MODE_INFO *mi = oci->mi;
int x0, y0;
for (y0 = 8; y0 < (height + 8); y0 += 16)
for (y0 = 0; y0 < height; y0 += 16)
{
for (x0 = 8; x0 < (width + 8); x0 += 16)
for (x0 = 0; x0 < width; x0 += 16)
{
int x1, y1;
if (mi->mbmi.mode >= NEARESTMV)
int x1, y1;
if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
{
mi++;
continue;
}
if (mi->mbmi.mode == SPLITMV)
{
switch (mi->mbmi.partitioning)
{
case 0 : /* mv_top_bottom */
{
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 8 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+8, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[8];
x1 = x0 + 8 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+8, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride);
break;
}
case 1 : /* mv_left_right */
{
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 + 8 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+8, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride);
bmi = &mi->bmi[2];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 + 8 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+8, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride);
break;
}
case 2 : /* mv_quarters */
{
union b_mode_info *bmi = &mi->bmi[0];
MV *mv = &bmi->mv.as_mv;
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[2];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 + 4 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+4, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride);
bmi = &mi->bmi[8];
x1 = x0 + 4 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+4, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride);
bmi = &mi->bmi[10];
x1 = x0 +12 + (mv->col >> 3);
y1 = y0 +12 + (mv->row >> 3);
constrain_line (x0+12, &x1, y0+12, &y1, width, height);
vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride);
break;
}
default :
{
union b_mode_info *bmi = mi->bmi;
int bx0, by0;
for (by0 = y0; by0 < (y0+16); by0 += 4)
{
for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
{
MV *mv = &bmi->mv.as_mv;
x1 = bx0 + 2 + (mv->col >> 3);
y1 = by0 + 2 + (mv->row >> 3);
constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride);
bmi++;
}
}
}
}
}
else if (mi->mbmi.mode >= NEARESTMV)
{
MV *mv = &mi->mbmi.mv.as_mv;
const int lx0 = x0 + 8;
const int ly0 = y0 + 8;
x1 = x0 + (mv->col >> 3);
y1 = y0 + (mv->row >> 3);
x1 = lx0 + (mv->col >> 3);
y1 = ly0 + (mv->row >> 3);
if (x1 != x0 && y1 != y0)
if (x1 != lx0 && y1 != ly0)
{
constrain_line (x0, &x1, y0-1, &y1, width, height);
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
constrain_line (lx0, &x1, ly0-1, &y1, width, height);
vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride);
constrain_line (x0, &x1, y0+1, &y1, width, height);
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
constrain_line (lx0, &x1, ly0+1, &y1, width, height);
vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride);
}
else
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride);
}
mi++;
}
mi++;
@ -779,9 +987,10 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
}
/* Color in block modes */
if (flags & VP8D_DEBUG_LEVEL6)
if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
&& (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
{
int i, j;
int y, x;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
int height = post->y_height;
@ -791,18 +1000,54 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
int y_stride = oci->post_proc_buffer.y_stride;
MODE_INFO *mi = oci->mi;
for (i = 0; i < height; i += 16)
for (y = 0; y < height; y += 16)
{
for (j = 0; j < width; j += 16)
for (x = 0; x < width; x += 16)
{
int Y = 0, U = 0, V = 0;
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
if (mi->mbmi.mode == B_PRED &&
((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
{
int by, bx;
unsigned char *yl, *ul, *vl;
union b_mode_info *bmi = mi->bmi;
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
yl = y_ptr + x;
ul = u_ptr + (x>>1);
vl = v_ptr + (x>>1);
for (by = 0; by < 16; by += 4)
{
for (bx = 0; bx < 16; bx += 4)
{
if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
|| (ppflags->display_mb_modes_flag & B_PRED))
{
Y = B_PREDICTION_MODE_colors[bmi->as_mode][0];
U = B_PREDICTION_MODE_colors[bmi->as_mode][1];
V = B_PREDICTION_MODE_colors[bmi->as_mode][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
(yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
}
bmi++;
}
yl += y_stride*4;
ul += y_stride*1;
vl += y_stride*1;
}
}
else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
{
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
}
mi++;
}
@ -815,9 +1060,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
}
/* Color in frame reference blocks */
if (flags & VP8D_DEBUG_LEVEL7)
if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
{
int i, j;
int y, x;
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
int width = post->y_width;
int height = post->y_height;
@ -827,18 +1072,21 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
int y_stride = oci->post_proc_buffer.y_stride;
MODE_INFO *mi = oci->mi;
for (i = 0; i < height; i += 16)
for (y = 0; y < height; y += 16)
{
for (j = 0; j < width; j +=16)
for (x = 0; x < width; x +=16)
{
int Y = 0, U = 0, V = 0;
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
{
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
(y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
}
mi++;
}
@ -849,6 +1097,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
mi++;
}
}
#endif
*dest = oci->post_proc_buffer;

Просмотреть файл

@ -24,7 +24,15 @@
char whiteclamp[16], char bothclamp[16],\
unsigned int w, unsigned int h, int pitch)
#define prototype_postproc_blend_mb(sym)\
#define prototype_postproc_blend_mb_inner(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
#define prototype_postproc_blend_mb_outer(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
#define prototype_postproc_blend_b(sym)\
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
int y1, int u1, int v1, int alpha, int stride)
@ -52,22 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
#endif
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
#ifndef vp8_postproc_blend_mb
#define vp8_postproc_blend_mb vp8_blend_mb_c
#ifndef vp8_postproc_blend_mb_inner
#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
#endif
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
#ifndef vp8_postproc_blend_mb_outer
#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
#endif
extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
#ifndef vp8_postproc_blend_b
#define vp8_postproc_blend_b vp8_blend_b_c
#endif
extern prototype_postproc_blend_b(vp8_postproc_blend_b);
typedef prototype_postproc((*vp8_postproc_fn_t));
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
typedef struct
{
vp8_postproc_inplace_fn_t down;
vp8_postproc_inplace_fn_t across;
vp8_postproc_fn_t downacross;
vp8_postproc_addnoise_fn_t addnoise;
vp8_postproc_blend_mb_fn_t blend_mb;
vp8_postproc_inplace_fn_t down;
vp8_postproc_inplace_fn_t across;
vp8_postproc_fn_t downacross;
vp8_postproc_addnoise_fn_t addnoise;
vp8_postproc_blend_mb_inner_fn_t blend_mb_inner;
vp8_postproc_blend_mb_outer_fn_t blend_mb_outer;
vp8_postproc_blend_b_fn_t blend_b;
} vp8_postproc_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@ -89,7 +111,7 @@ struct postproc_state
#include "onyxc_int.h"
#include "ppflags.h"
int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
int deblock_level, int noise_level, int flags);
vp8_ppflags_t *flags);
void vp8_de_noise(YV12_BUFFER_CONFIG *source,

Просмотреть файл

@ -13,17 +13,28 @@
#define __INC_PPFLAGS_H
enum
{
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_LEVEL1 = 1<<3,
VP8D_DEBUG_LEVEL2 = 1<<4,
VP8D_DEBUG_LEVEL3 = 1<<5,
VP8D_DEBUG_LEVEL4 = 1<<6,
VP8D_DEBUG_LEVEL5 = 1<<7,
VP8D_DEBUG_LEVEL6 = 1<<8,
VP8D_DEBUG_LEVEL7 = 1<<9
VP8D_NOFILTERING = 0,
VP8D_DEBLOCK = 1<<0,
VP8D_DEMACROBLOCK = 1<<1,
VP8D_ADDNOISE = 1<<2,
VP8D_DEBUG_TXT_FRAME_INFO = 1<<3,
VP8D_DEBUG_TXT_MBLK_MODES = 1<<4,
VP8D_DEBUG_TXT_DC_DIFF = 1<<5,
VP8D_DEBUG_TXT_RATE_INFO = 1<<6,
VP8D_DEBUG_DRAW_MV = 1<<7,
VP8D_DEBUG_CLR_BLK_MODES = 1<<8,
VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
};
typedef struct
{
int post_proc_flag;
int deblocking_level;
int noise_level;
int display_ref_frame_flag;
int display_mb_modes_flag;
int display_b_modes_flag;
int display_mv_flag;
} vp8_ppflags_t;
#endif

Просмотреть файл

@ -1,44 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "blockd.h"
void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons)
{
int diff;
int sign;
int last_dc = *lastdc;
int this_dc = *thisdc;
if (*cons > DCPREDCNTTHRESH)
{
this_dc += last_dc;
}
diff = abs(last_dc - this_dc);
sign = (last_dc >> 31) ^(this_dc >> 31);
sign |= (!last_dc | !this_dc);
if (sign)
{
*cons = 0;
}
else
{
if (diff <= DCPREDSIMTHRESH * quant)
(*cons)++ ;
}
*thisdc = this_dc;
*lastdc = this_dc;
}

Просмотреть файл

@ -1,18 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef __PREDICTDC_H
#define __PREDICTDC_H
void uvvp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
void vp8_predict_dc(short *lastdc, short *thisdc, short quant, short *cons);
#endif

Просмотреть файл

@ -1,46 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/****************************************************************************
*
* Module Title : preproc.h
*
* Description : simple preprocessor
*
****************************************************************************/
#ifndef __INC_PREPROC_H
#define __INC_PREPROC_H
/****************************************************************************
* Types
****************************************************************************/
typedef struct
{
unsigned char *frame_buffer;
int frame;
unsigned int *fixed_divide;
unsigned char *frame_buffer_alloc;
unsigned int *fixed_divide_alloc;
} pre_proc_instance;
/****************************************************************************
* Functions.
****************************************************************************/
void pre_proc_machine_specific_config(void);
void delete_pre_proc(pre_proc_instance *ppi);
int init_pre_proc(pre_proc_instance *ppi, int frame_size);
extern void spatial_filter_c(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int width, int height, int pitch, int strength);
extern void (*temp_filter)(pre_proc_instance *ppi, unsigned char *s, unsigned char *d, int bytes, int strength);
#endif

Просмотреть файл

@ -26,6 +26,9 @@
#define prototype_build_intra_predictors(sym) \
void sym(MACROBLOCKD *x)
#define prototype_intra4x4_predict(sym) \
void sym(BLOCKD *x, int b_mode, unsigned char *predictor)
struct vp8_recon_rtcd_vtable;
#if ARCH_X86 || ARCH_X86_64
@ -88,11 +91,30 @@ extern prototype_build_intra_predictors\
extern prototype_build_intra_predictors\
(vp8_recon_build_intra_predictors_mby_s);
#ifndef vp8_recon_build_intra_predictors_mbuv
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv
#endif
extern prototype_build_intra_predictors\
(vp8_recon_build_intra_predictors_mbuv);
#ifndef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s
#endif
extern prototype_build_intra_predictors\
(vp8_recon_build_intra_predictors_mbuv_s);
#ifndef vp8_recon_intra4x4_predict
#define vp8_recon_intra4x4_predict vp8_intra4x4_predict
#endif
extern prototype_intra4x4_predict\
(vp8_recon_intra4x4_predict);
typedef prototype_copy_block((*vp8_copy_block_fn_t));
typedef prototype_recon_block((*vp8_recon_fn_t));
typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
typedef prototype_build_intra_predictors((*vp8_build_intra_pred_fn_t));
typedef prototype_intra4x4_predict((*vp8_intra4x4_pred_fn_t));
typedef struct vp8_recon_rtcd_vtable
{
vp8_copy_block_fn_t copy16x16;
@ -105,6 +127,9 @@ typedef struct vp8_recon_rtcd_vtable
vp8_recon_mb_fn_t recon_mby;
vp8_build_intra_pred_fn_t build_intra_predictors_mby_s;
vp8_build_intra_pred_fn_t build_intra_predictors_mby;
vp8_build_intra_pred_fn_t build_intra_predictors_mbuv_s;
vp8_build_intra_pred_fn_t build_intra_predictors_mbuv;
vp8_intra4x4_pred_fn_t intra4x4_predict;
} vp8_recon_rtcd_vtable_t;
#if CONFIG_RUNTIME_CPU_DETECT
@ -113,6 +138,5 @@ typedef struct vp8_recon_rtcd_vtable
#define RECON_INVOKE(ctx,fn) vp8_recon_##fn
#endif
void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
#endif

Просмотреть файл

@ -10,6 +10,7 @@
#include "vpx_ports/config.h"
#include "vpx/vpx_integer.h"
#include "recon.h"
#include "subpixel.h"
#include "blockd.h"
@ -18,12 +19,6 @@
#include "onyxc_int.h"
#endif
/* use this define on systems where unaligned int reads and writes are
* not allowed, i.e. ARM architectures
*/
/*#define MUST_BE_ALIGNED*/
static const int bbb[4] = {0, 2, 8, 10};
@ -39,7 +34,7 @@ void vp8_copy_mem16x16_c(
for (r = 0; r < 16; r++)
{
#ifdef MUST_BE_ALIGNED
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
@ -58,10 +53,10 @@ void vp8_copy_mem16x16_c(
dst[15] = src[15];
#else
((int *)dst)[0] = ((int *)src)[0] ;
((int *)dst)[1] = ((int *)src)[1] ;
((int *)dst)[2] = ((int *)src)[2] ;
((int *)dst)[3] = ((int *)src)[3] ;
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
((uint32_t *)dst)[2] = ((uint32_t *)src)[2] ;
((uint32_t *)dst)[3] = ((uint32_t *)src)[3] ;
#endif
src += src_stride;
@ -81,7 +76,7 @@ void vp8_copy_mem8x8_c(
for (r = 0; r < 8; r++)
{
#ifdef MUST_BE_ALIGNED
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
@ -91,8 +86,8 @@ void vp8_copy_mem8x8_c(
dst[6] = src[6];
dst[7] = src[7];
#else
((int *)dst)[0] = ((int *)src)[0] ;
((int *)dst)[1] = ((int *)src)[1] ;
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
src += src_stride;
dst += dst_stride;
@ -111,7 +106,7 @@ void vp8_copy_mem8x4_c(
for (r = 0; r < 4; r++)
{
#ifdef MUST_BE_ALIGNED
#if !(CONFIG_FAST_UNALIGNED)
dst[0] = src[0];
dst[1] = src[1];
dst[2] = src[2];
@ -121,8 +116,8 @@ void vp8_copy_mem8x4_c(
dst[6] = src[6];
dst[7] = src[7];
#else
((int *)dst)[0] = ((int *)src)[0] ;
((int *)dst)[1] = ((int *)src)[1] ;
((uint32_t *)dst)[0] = ((uint32_t *)src)[0] ;
((uint32_t *)dst)[1] = ((uint32_t *)src)[1] ;
#endif
src += src_stride;
dst += dst_stride;
@ -154,13 +149,13 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
for (r = 0; r < 4; r++)
{
#ifdef MUST_BE_ALIGNED
#if !(CONFIG_FAST_UNALIGNED)
pred_ptr[0] = ptr[0];
pred_ptr[1] = ptr[1];
pred_ptr[2] = ptr[2];
pred_ptr[3] = ptr[3];
#else
*(int *)pred_ptr = *(int *)ptr ;
*(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
#endif
pred_ptr += pitch;
ptr += d->pre_stride;
@ -168,7 +163,7 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf)
}
}
void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
{
unsigned char *ptr_base;
unsigned char *ptr;
@ -187,7 +182,7 @@ void vp8_build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, int pitch)
}
}
void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
{
unsigned char *ptr_base;
unsigned char *ptr;
@ -207,12 +202,12 @@ void vp8_build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, int pitch)
}
/*encoder only*/
void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
{
int i;
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.mode != SPLITMV)
{
unsigned char *uptr, *vptr;
unsigned char *upred_ptr = &x->predictor[256];
@ -246,7 +241,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
vp8_build_inter_predictors2b(x, d0, 8);
build_inter_predictors2b(x, d0, 8);
else
{
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
@ -257,158 +252,132 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
}
/*encoder only*/
void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x)
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = x->predictor;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = x->predictor;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
}
}
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride)
{
int offset;
unsigned char *ptr;
unsigned char *uptr, *vptr;
if ((mv_row | mv_col) & 7)
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
unsigned char *ptr_base = x->pre.y_buffer;
int pre_stride = x->block[0].pre_stride;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_y, dst_ystride);
}
mv_row = x->block[16].bmi.mv.as_mv.row;
mv_col = x->block[16].bmi.mv.as_mv.col;
pre_stride >>= 1;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, dst_u, dst_uvstride);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, dst_v, dst_uvstride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, dst_u, dst_uvstride);
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, dst_v, dst_uvstride);
}
}
void vp8_build_inter4x4_predictors_mb(MACROBLOCKD *x)
{
int i;
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
BLOCKD *d = &x->block[bbb[i]];
build_inter_predictors4b(x, d, 16);
}
}
else
{
int i;
if (x->mode_info_context->mbmi.partitioning < 3)
for (i = 0; i < 16; i += 2)
{
for (i = 0; i < 4; i++)
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, 16);
else
{
BLOCKD *d = &x->block[bbb[i]];
vp8_build_inter_predictors4b(x, d, 16);
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
}
}
}
for (i = 16; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
build_inter_predictors2b(x, d0, 8);
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
vp8_build_inter_predictors2b(x, d0, 16);
else
{
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
}
}
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
}
}
}
void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
{
if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
x->mode_info_context->mbmi.mode != SPLITMV)
if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *uptr, *vptr;
unsigned char *pred_ptr = x->predictor;
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->block[0].pre_stride;
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, pred_ptr, 16);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, pred_ptr, 16);
}
mv_row = x->block[16].bmi.mv.as_mv.row;
mv_col = x->block[16].bmi.mv.as_mv.col;
pre_stride >>= 1;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, upred_ptr, 8);
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vpred_ptr, 8);
}
vp8_build_inter16x16_predictors_mb(x, x->predictor, &x->predictor[256],
&x->predictor[320], 16, 8);
}
else
{
int i;
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
BLOCKD *d = &x->block[bbb[i]];
vp8_build_inter_predictors4b(x, d, 16);
}
}
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
vp8_build_inter_predictors2b(x, d0, 16);
else
{
vp8_build_inter_predictors_b(d0, 16, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 16, x->subpixel_predict);
}
}
}
for (i = 16; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
vp8_build_inter_predictors2b(x, d0, 8);
else
{
vp8_build_inter_predictors_b(d0, 8, x->subpixel_predict);
vp8_build_inter_predictors_b(d1, 8, x->subpixel_predict);
}
}
vp8_build_inter4x4_predictors_mb(x);
}
}
@ -492,202 +461,5 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
}
/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
* situation, we can write the result directly to dst buffer instead of writing it to predictor
* buffer and then copying it to dst buffer.
*/
static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
{
int r;
unsigned char *ptr_base;
unsigned char *ptr;
/*unsigned char *pred_ptr = d->predictor;*/
int dst_stride = d->dst_stride;
int pre_stride = d->pre_stride;
ptr_base = *(d->base_pre);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, dst_stride);
}
else
{
ptr_base += d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
ptr = ptr_base;
for (r = 0; r < 4; r++)
{
#ifdef MUST_BE_ALIGNED
dst_ptr[0] = ptr[0];
dst_ptr[1] = ptr[1];
dst_ptr[2] = ptr[2];
dst_ptr[3] = ptr[3];
#else
*(int *)dst_ptr = *(int *)ptr ;
#endif
dst_ptr += dst_stride;
ptr += pre_stride;
}
}
}
void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
{
/*unsigned char *pred_ptr = x->block[0].predictor;
unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
unsigned char *pred_ptr = x->predictor;
unsigned char *dst_ptr = x->dst.y_buffer;
if (x->mode_info_context->mbmi.mode != SPLITMV)
{
int offset;
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *uptr, *vptr;
/*unsigned char *pred_ptr = x->predictor;
unsigned char *upred_ptr = &x->predictor[256];
unsigned char *vpred_ptr = &x->predictor[320];*/
unsigned char *udst_ptr = x->dst.u_buffer;
unsigned char *vdst_ptr = x->dst.v_buffer;
int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
ptr_base = x->pre.y_buffer;
ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
mv_row = x->block[16].bmi.mv.as_mv.row;
mv_col = x->block[16].bmi.mv.as_mv.col;
pre_stride >>= 1;
offset = (mv_row >> 3) * pre_stride + (mv_col >> 3);
uptr = x->pre.u_buffer + offset;
vptr = x->pre.v_buffer + offset;
if ((mv_row | mv_col) & 7)
{
x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, udst_ptr, x->dst.uv_stride);
x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vdst_ptr, x->dst.uv_stride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, pre_stride, udst_ptr, x->dst.uv_stride);
RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, pre_stride, vdst_ptr, x->dst.uv_stride);
}
}
else
{
/* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
* if sth is wrong, go back to what it is in build_inter_predictors_mb.
*/
int i;
if (x->mode_info_context->mbmi.partitioning < 3)
{
for (i = 0; i < 4; i++)
{
BLOCKD *d = &x->block[bbb[i]];
/*vp8_build_inter_predictors4b(x, d, 16);*/
{
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = d->predictor;
ptr_base = *(d->base_pre);
ptr = ptr_base + d->pre + (d->bmi.mv.as_mv.row >> 3) * d->pre_stride + (d->bmi.mv.as_mv.col >> 3);
if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
}
}
}
}
else
{
for (i = 0; i < 16; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
{
/*vp8_build_inter_predictors2b(x, d0, 16);*/
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
}
}
else
{
vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
}
}
}
for (i = 16; i < 24; i += 2)
{
BLOCKD *d0 = &x->block[i];
BLOCKD *d1 = &x->block[i+1];
if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
{
/*vp8_build_inter_predictors2b(x, d0, 8);*/
unsigned char *ptr_base;
unsigned char *ptr;
unsigned char *pred_ptr = d0->predictor;
ptr_base = *(d0->base_pre);
ptr = ptr_base + d0->pre + (d0->bmi.mv.as_mv.row >> 3) * d0->pre_stride + (d0->bmi.mv.as_mv.col >> 3);
if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
{
x->subpixel_predict8x4(ptr, d0->pre_stride,
d0->bmi.mv.as_mv.col & 7,
d0->bmi.mv.as_mv.row & 7,
dst_ptr, x->dst.uv_stride);
}
else
{
RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
d0->pre_stride, dst_ptr, x->dst.uv_stride);
}
}
else
{
vp8_build_inter_predictors_b_s(d0, dst_ptr, x->subpixel_predict);
vp8_build_inter_predictors_b_s(d1, dst_ptr, x->subpixel_predict);
}
}
}
}

Просмотреть файл

@ -13,9 +13,15 @@
#define __INC_RECONINTER_H
extern void vp8_build_inter_predictors_mb(MACROBLOCKD *x);
extern void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x,
unsigned char *dst_y,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_ystride,
int dst_uvstride);
extern void vp8_build_inter_predictors_mby(MACROBLOCKD *x);
extern void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x);
extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
extern void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, vp8_subpix_fn_t sppf);
extern void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x);

Просмотреть файл

@ -14,9 +14,4 @@
extern void init_intra_left_above_pixels(MACROBLOCKD *x);
extern void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x);
extern void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x);
extern void vp8_predict_intra4x4(BLOCKD *x, int b_mode, unsigned char *Predictor);
#endif

Просмотреть файл

@ -14,7 +14,7 @@
#include "vpx_mem/vpx_mem.h"
#include "reconintra.h"
void vp8_predict_intra4x4(BLOCKD *x,
void vp8_intra4x4_predict(BLOCKD *x,
int b_mode,
unsigned char *predictor)
{
@ -313,89 +313,3 @@ void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
}
void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
{
int i;
vp8_intra_prediction_down_copy(x);
#if ARCH_ARM
{
BLOCKD *b = &x->block[0];
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
b += 1;
vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
#else
for (i = 0; i < 16; i++)
{
BLOCKD *b = &x->block[i];
vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor);
RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
}
#endif
vp8_recon_intra_mbuv(rtcd, x);
}

Просмотреть файл

@ -12,7 +12,7 @@
#ifndef _PTHREAD_EMULATION
#define _PTHREAD_EMULATION
#define VPXINFINITE 10000 /* 10second. */
#if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD
/* Thread management macros */
#ifdef _WIN32
@ -26,7 +26,7 @@
#define pthread_t HANDLE
#define pthread_attr_t DWORD
#define pthread_create(thhandle,attr,thfunc,tharg) (int)((*thhandle=(HANDLE)_beginthreadex(NULL,0,(unsigned int (__stdcall *)(void *))thfunc,tharg,0,NULL))==NULL)
#define pthread_join(thread, result) ((WaitForSingleObject((thread),VPXINFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
#define pthread_join(thread, result) ((WaitForSingleObject((thread),INFINITE)!=WAIT_OBJECT_0) || !CloseHandle(thread))
#define pthread_detach(thread) if(thread!=NULL)CloseHandle(thread)
#define thread_sleep(nms) Sleep(nms)
#define pthread_cancel(thread) terminate_thread(thread,0)
@ -36,6 +36,7 @@
#define pthread_self() GetCurrentThreadId()
#else
#ifdef __APPLE__
#include <mach/mach_init.h>
#include <mach/semaphore.h>
#include <mach/task.h>
#include <time.h>
@ -58,9 +59,9 @@
#ifdef _WIN32
#define sem_t HANDLE
#define pause(voidpara) __asm PAUSE
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateEvent(NULL,FALSE,FALSE,NULL))==NULL)
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,VPXINFINITE))
#define sem_post(sem) SetEvent(*sem)
#define sem_init(sem, sem_attr1, sem_init_value) (int)((*sem = CreateSemaphore(NULL,0,32768,NULL))==NULL)
#define sem_wait(sem) (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem,INFINITE))
#define sem_post(sem) ReleaseSemaphore(*sem,1,NULL)
#define sem_destroy(sem) if(*sem)((int)(CloseHandle(*sem))==TRUE)
#define thread_sleep(nms) Sleep(nms)
@ -88,4 +89,6 @@
#define x86_pause_hint()
#endif
#endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */
#endif

Просмотреть файл

@ -1,13 +0,0 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define ALLOC_FAILURE -2

Просмотреть файл

@ -14,18 +14,18 @@
; /****************************************************************************
; * Notes:
; *
; * This implementation makes use of 16 bit fixed point verio of two multiply
; * This implementation makes use of 16 bit fixed point version of two multiply
; * constants:
; * 1. sqrt(2) * cos (pi/8)
; * 2. sqrt(2) * sin (pi/8)
; * Becuase the first constant is bigger than 1, to maintain the same 16 bit
; * fixed point prrcision as the second one, we use a trick of
; * 2. sqrt(2) * sin (pi/8)
; * Because the first constant is bigger than 1, to maintain the same 16 bit
; * fixed point precision as the second one, we use a trick of
; * x * a = x + x*(a-1)
; * so
; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1).
; *
; * For the second constant, becuase of the 16bit version is 35468, which
; * is bigger than 32768, in signed 16 bit multiply, it become a negative
; * For the second constant, because of the 16bit version is 35468, which
; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative
; * number.
; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x
; *

Просмотреть файл

@ -32,9 +32,6 @@ sym(idct_dequant_0_2x_sse2):
mov rdx, arg(1) ; dequant
mov rax, arg(0) ; qcoeff
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
movd xmm4, [rax]
movd xmm5, [rdx]
@ -43,9 +40,12 @@ sym(idct_dequant_0_2x_sse2):
pmullw xmm4, xmm5
; Zero out xmm5, for use unpacking
pxor xmm5, xmm5
; clear coeffs
movd [rax], xmm7
movd [rax+32], xmm7
movd [rax], xmm5
movd [rax+32], xmm5
;pshufb
pshuflw xmm4, xmm4, 00000000b
pshufhw xmm4, xmm4, 00000000b
@ -62,10 +62,10 @@ sym(idct_dequant_0_2x_sse2):
lea rcx, [3*rcx]
movq xmm3, [rax+rcx]
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
punpcklbw xmm0, xmm5
punpcklbw xmm1, xmm5
punpcklbw xmm2, xmm5
punpcklbw xmm3, xmm5
mov rax, arg(3) ; dst
movsxd rdx, dword ptr arg(4) ; dst_stride
@ -77,10 +77,10 @@ sym(idct_dequant_0_2x_sse2):
paddw xmm3, xmm4
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
packuswb xmm0, xmm5
packuswb xmm1, xmm5
packuswb xmm2, xmm5
packuswb xmm3, xmm5
; store blocks back out
movq [rax], xmm0
@ -102,6 +102,7 @@ sym(idct_dequant_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -347,6 +348,7 @@ sym(idct_dequant_full_2x_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -377,8 +379,8 @@ sym(idct_dequant_dc_0_2x_sse2):
mov rdi, arg(3) ; dst
mov rdx, arg(5) ; dc
; Zero out xmm7, for use unpacking
pxor xmm7, xmm7
; Zero out xmm5, for use unpacking
pxor xmm5, xmm5
; load up 2 dc words here == 2*16 = doubleword
movd xmm4, [rdx]
@ -398,10 +400,10 @@ sym(idct_dequant_dc_0_2x_sse2):
psraw xmm4, 3
; Predict buffer needs to be expanded from bytes to words
punpcklbw xmm0, xmm7
punpcklbw xmm1, xmm7
punpcklbw xmm2, xmm7
punpcklbw xmm3, xmm7
punpcklbw xmm0, xmm5
punpcklbw xmm1, xmm5
punpcklbw xmm2, xmm5
punpcklbw xmm3, xmm5
; Add to predict buffer
paddw xmm0, xmm4
@ -410,10 +412,10 @@ sym(idct_dequant_dc_0_2x_sse2):
paddw xmm3, xmm4
; pack up before storing
packuswb xmm0, xmm7
packuswb xmm1, xmm7
packuswb xmm2, xmm7
packuswb xmm3, xmm7
packuswb xmm0, xmm5
packuswb xmm1, xmm5
packuswb xmm2, xmm5
packuswb xmm3, xmm5
; Load destination stride before writing out,
; doesn't need to persist
@ -441,6 +443,7 @@ sym(idct_dequant_dc_full_2x_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -692,6 +695,7 @@ sym(idct_dequant_dc_full_2x_sse2):
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret

Просмотреть файл

@ -17,7 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 2
SAVE_XMM
SAVE_XMM 6
push rsi
push rdi
; end prolog
@ -41,7 +41,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
movdqa xmm4, xmm0
punpcklqdq xmm0, xmm3 ;d1 a1
punpckhqdq xmm4, xmm3 ;c1 b1
movd xmm7, eax
movd xmm6, eax
movdqa xmm1, xmm4 ;c1 b1
paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0]
@ -66,7 +66,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
pshufd xmm2, xmm1, 4eh ;ip[8] ip[12]
movdqa xmm3, xmm4 ;ip[4] ip[0]
pshufd xmm7, xmm7, 0 ;03 03 03 03 03 03 03 03
pshufd xmm6, xmm6, 0 ;03 03 03 03 03 03 03 03
paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
@ -90,8 +90,8 @@ sym(vp8_short_inv_walsh4x4_sse2):
punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00
punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02
;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
paddw xmm5, xmm7
paddw xmm1, xmm7
paddw xmm5, xmm6
paddw xmm1, xmm6
psraw xmm5, 3
psraw xmm1, 3

Просмотреть файл

@ -16,7 +16,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -122,12 +122,10 @@ next8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
movq mm2, [rdx] ; flimit mm2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx] ; blimit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
@ -230,7 +228,7 @@ next8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -406,9 +404,9 @@ next8_v:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
mov rdx, arg(2) ;blimit ;
movq mm2, [rdx] ;flimit mm2
movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@ -419,10 +417,7 @@ next8_v:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@ -603,7 +598,7 @@ next8_v:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -719,17 +714,15 @@ next8_mbh:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
movq mm2, [rdx] ; flimit mm2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm7, mm2 ; flimit * 2 + limit (less than 255)
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx] ; blimit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm5
pxor mm5, mm5
pcmpeqb mm1, mm5 ; mask mm1
; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm1 = mask, mm0=q0, mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
; mm6 = p0,
; calculate high edge variance
@ -922,7 +915,7 @@ next8_mbh:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -1108,9 +1101,9 @@ next8_mbv:
pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero
psrlw mm5, 1 ; abs(p1-q1)/2
mov rdx, arg(2) ;flimit ;
mov rdx, arg(2) ;blimit ;
movq mm2, [rdx] ;flimit mm2
movq mm4, [rdx] ;blimit
movq mm1, mm3 ; mm1=mm3=p0
movq mm7, mm6 ; mm7=mm6=q0
@ -1121,10 +1114,7 @@ next8_mbv:
paddusb mm1, mm1 ; abs(q0-p0)*2
paddusb mm1, mm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
paddb mm2, mm2 ; flimit*2 (less than 255)
paddb mm4, mm2 ; flimit * 2 + limit (less than 255)
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm1, mm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por mm1, mm0; ; mask
pxor mm0, mm0
@ -1392,16 +1382,13 @@ next8_mbv:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit
;)
global sym(vp8_loop_filter_simple_horizontal_edge_mmx)
sym(vp8_loop_filter_simple_horizontal_edge_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@ -1410,14 +1397,10 @@ sym(vp8_loop_filter_simple_horizontal_edge_mmx):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
movsxd rcx, dword ptr arg(5) ;count
mov rcx, 2 ; count
nexts8_h:
mov rdx, arg(3) ;limit
movq mm7, [rdx]
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit ; get blimit
movq mm3, [rdx] ;
paddb mm3, mm3 ; flimit*2 (less than 255)
paddb mm3, mm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@ -1445,7 +1428,7 @@ nexts8_h:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm3, mm3
pcmpeqb mm5, mm3
@ -1515,16 +1498,13 @@ nexts8_h:
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit
;)
global sym(vp8_loop_filter_simple_vertical_edge_mmx)
sym(vp8_loop_filter_simple_vertical_edge_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SHADOW_ARGS_TO_STACK 3
GET_GOT rbx
push rsi
push rdi
@ -1539,7 +1519,7 @@ sym(vp8_loop_filter_simple_vertical_edge_mmx):
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
lea rsi, [rsi + rax*4- 2]; ;
movsxd rcx, dword ptr arg(5) ;count
mov rcx, 2 ; count
nexts8_v:
lea rdi, [rsi + rax];
@ -1602,14 +1582,10 @@ nexts8_v:
paddusb mm5, mm5 ; abs(p0-q0)*2
paddusb mm5, mm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit ; get blimit
movq mm7, [rdx]
mov rdx, arg(3) ; get limit
movq mm6, [rdx]
paddb mm7, mm7 ; flimit*2 (less than 255)
paddb mm7, mm6 ; flimit * 2 + limit (less than 255)
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb mm5, mm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor mm7, mm7
pcmpeqb mm5, mm7 ; mm5 = mask

Просмотреть файл

@ -110,7 +110,7 @@
psubusb xmm6, xmm5 ; p1-=p0
por xmm6, xmm4 ; abs(p1 - p0)
mov rdx, arg(2) ; get flimit
mov rdx, arg(2) ; get blimit
movdqa t1, xmm6 ; save to t1
@ -123,7 +123,7 @@
psubusb xmm1, xmm7
por xmm2, xmm3 ; abs(p1-q1)
movdqa xmm4, XMMWORD PTR [rdx] ; flimit
movdqa xmm7, XMMWORD PTR [rdx] ; blimit
movdqa xmm3, xmm0 ; q0
pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero
@ -134,13 +134,11 @@
psrlw xmm2, 1 ; abs(p1-q1)/2
psubusb xmm5, xmm3 ; p0-=q0
paddb xmm4, xmm4 ; flimit*2 (less than 255)
psubusb xmm3, xmm6 ; q0-=p0
por xmm5, xmm3 ; abs(p0 - q0)
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255)
movdqa xmm4, t0 ; hev get abs (q1 - q0)
@ -150,7 +148,7 @@
movdqa xmm2, XMMWORD PTR [rdx] ; hev
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
psubusb xmm4, xmm2 ; hev
psubusb xmm3, xmm2 ; hev
@ -278,7 +276,7 @@
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -288,7 +286,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -328,7 +326,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -338,7 +336,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -574,7 +572,7 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -584,7 +582,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -624,7 +622,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@ -634,7 +632,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -904,7 +902,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
movdqa xmm4, XMMWORD PTR [rdx]; limit
pmaxub xmm0, xmm7
mov rdx, arg(2) ; flimit
mov rdx, arg(2) ; blimit
psubusb xmm0, xmm4
movdqa xmm5, xmm2 ; q1
@ -921,12 +919,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
psrlw xmm5, 1 ; abs(p1-q1)/2
psubusb xmm6, xmm3 ; q0-p0
movdqa xmm2, XMMWORD PTR [rdx]; flimit
movdqa xmm4, XMMWORD PTR [rdx]; blimit
mov rdx, arg(4) ; get thresh
por xmm1, xmm6 ; abs(q0-p0)
paddb xmm2, xmm2 ; flimit*2 (less than 255)
movdqa xmm6, t0 ; get abs (q1 - q0)
@ -939,10 +936,9 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh
paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255)
psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit
por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
por xmm1, xmm0 ; mask
@ -1014,7 +1010,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -1024,7 +1020,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1081,7 +1077,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@ -1091,7 +1087,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1239,7 +1235,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; int count
@ -1249,7 +1245,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1308,7 +1304,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
;(
; unsigned char *u,
; int src_pixel_step,
; const char *flimit,
; const char *blimit,
; const char *limit,
; const char *thresh,
; unsigned char *v
@ -1318,7 +1314,7 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1376,17 +1372,14 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit,
;)
global sym(vp8_loop_filter_simple_horizontal_edge_sse2)
sym(vp8_loop_filter_simple_horizontal_edge_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1394,13 +1387,8 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
mov rdx, arg(2) ;flimit ; get flimit
mov rdx, arg(2) ;blimit
movdqa xmm3, XMMWORD PTR [rdx]
mov rdx, arg(3) ;limit
movdqa xmm7, XMMWORD PTR [rdx]
paddb xmm3, xmm3 ; flimit*2 (less than 255)
paddb xmm3, xmm7 ; flimit * 2 + limit (less than 255)
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@ -1428,7 +1416,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm3 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm3, xmm3
pcmpeqb xmm5, xmm3
@ -1493,17 +1481,14 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
;(
; unsigned char *src_ptr,
; int src_pixel_step,
; const char *flimit,
; const char *limit,
; const char *thresh,
; int count
; const char *blimit,
;)
global sym(vp8_loop_filter_simple_vertical_edge_sse2)
sym(vp8_loop_filter_simple_vertical_edge_sse2):
push rbp ; save old base pointer value.
mov rbp, rsp ; set new base pointer value.
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SHADOW_ARGS_TO_STACK 3
SAVE_XMM 7
GET_GOT rbx ; save callee-saved reg
push rsi
push rdi
@ -1607,14 +1592,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
paddusb xmm5, xmm5 ; abs(p0-q0)*2
paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2
mov rdx, arg(2) ;flimit
mov rdx, arg(2) ;blimit
movdqa xmm7, XMMWORD PTR [rdx]
mov rdx, arg(3) ; get limit
movdqa xmm6, XMMWORD PTR [rdx]
paddb xmm7, xmm7 ; flimit*2 (less than 255)
paddb xmm7, xmm6 ; flimit * 2 + limit (less than 255)
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit
psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit
pxor xmm7, xmm7
pcmpeqb xmm5, xmm7 ; mm5 = mask

Просмотреть файл

@ -9,30 +9,18 @@
*/
#include "vpx_ports/config.h"
#include "loopfilter.h"
prototype_loopfilter(vp8_loop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_vertical_edge_c);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_c);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
#include "vpx_config.h"
#include "vp8/common/loopfilter.h"
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
prototype_loopfilter(vp8_loop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_vertical_edge_sse2);
prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
@ -42,113 +30,77 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
#if HAVE_MMX
/* Horizontal MB filtering */
void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_mmx(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
}
void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_mmx(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
if (v_ptr)
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
}
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_mmx(y_ptr + 12, y_stride, blimit);
}
#endif
@ -156,113 +108,65 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
/* Horizontal MB filtering */
#if HAVE_SSE2
void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
/* Vertical MB Filtering */
void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
}
void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr);
}
/* Horizontal B Filtering */
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride);
}
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit);
vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit);
}
/* Vertical B Filtering */
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
int y_stride, int uv_stride, loop_filter_info *lfi)
{
(void) simpler_lpf;
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
if (u_ptr)
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4);
}
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit)
{
(void) u_ptr;
(void) v_ptr;
(void) uv_stride;
(void) simpler_lpf;
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit);
}
#endif
#if 0
void vp8_fast_loop_filter_vertical_edges_sse(unsigned char *y_ptr,
int y_stride,
loop_filter_info *lfi)
{
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
}
#endif

Просмотреть файл

@ -24,10 +24,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bv_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bh_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bvs_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_mmx);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_mmx);
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_mmx);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_mmx);
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_mmx);
#if !CONFIG_RUNTIME_CPU_DETECT
@ -44,13 +44,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_mmx);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_mmx
#undef vp8_lf_simple_mb_v
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_mmx
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_mmx
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_mmx
#undef vp8_lf_simple_mb_h
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_mmx
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_mmx
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_mmx
@ -63,10 +63,10 @@ extern prototype_loopfilter_block(vp8_loop_filter_mbv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bv_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_mbh_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bh_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bvs_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_sse2);
extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
extern prototype_simple_loopfilter(vp8_loop_filter_bvs_sse2);
extern prototype_simple_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
extern prototype_simple_loopfilter(vp8_loop_filter_bhs_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
@ -83,13 +83,13 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_sse2);
#define vp8_lf_normal_b_h vp8_loop_filter_bh_sse2
#undef vp8_lf_simple_mb_v
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_sse2
#define vp8_lf_simple_mb_v vp8_loop_filter_simple_vertical_edge_sse2
#undef vp8_lf_simple_b_v
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_sse2
#undef vp8_lf_simple_mb_h
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_sse2
#define vp8_lf_simple_mb_h vp8_loop_filter_simple_horizontal_edge_sse2
#undef vp8_lf_simple_b_h
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_sse2

Просмотреть файл

@ -26,7 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -256,7 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -456,7 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi

Просмотреть файл

@ -67,7 +67,7 @@ sym(vp8_recon4b_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
SAVE_XMM
SAVE_XMM 7
push rsi
push rdi
; end prolog
@ -229,3 +229,460 @@ sym(vp8_copy_mem16x16_sse2):
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2)
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
; from top
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; from left
dec rsi
lea rdi, [rax*3]
movzx ecx, byte [rsi+rax]
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
movzx edx, byte [rsi+rax*4]
add ecx, edx
; add up
pextrw edx, mm1, 0x0
lea edx, [edx+ecx+8]
sar edx, 4
movd mm1, edx
pshufw mm1, mm1, 0x0
packuswb mm1, mm1
; write out
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
lea rdi, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2)
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
GET_GOT rbx
push rsi
push rdi
; end prolog
; from top
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; add up
paddw mm1, [GLOBAL(dc_4)]
psraw mm1, 3
pshufw mm1, mm1, 0x0
packuswb mm1, mm1
; write out
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
lea rdi, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2)
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
; end prolog
; from left
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
dec rsi
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
lea rsi, [rsi+rax*4]
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
movzx edx, byte [rsi+rdi]
lea edx, [ecx+edx+4]
; add up
shr edx, 3
movd mm1, edx
pshufw mm1, mm1, 0x0
packuswb mm1, mm1
; write out
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
lea rax, [rcx*3]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
lea rdi, [rdi+rcx*4]
movq [rdi ], mm1
movq [rdi+rcx ], mm1
movq [rdi+rcx*2], mm1
movq [rdi+rax ], mm1
; begin epilog
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx)
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
GET_GOT rbx
; end prolog
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
lea rax, [rax+rdx*4]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
; begin epilog
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1)
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
GET_GOT rbx
push rsi
push rdi
; end prolog
; read top row
mov edx, 4
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
sub rsi, rax
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
%endif
movq xmm1, [rsi]
punpcklbw xmm1, xmm0
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
lea rsi, [rsi+rax-1]
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
punpcklqdq xmm3, xmm3
%else
pshufb xmm3, xmm2
%endif
psubw xmm1, xmm3
; set up dest ptrs
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
vp8_intra_pred_uv_tm_%1_loop:
movd xmm3, [rsi]
movd xmm5, [rsi+rax]
%ifidn %1, sse2
punpcklbw xmm3, xmm0
punpcklbw xmm5, xmm0
pshuflw xmm3, xmm3, 0x0
pshuflw xmm5, xmm5, 0x0
punpcklqdq xmm3, xmm3
punpcklqdq xmm5, xmm5
%else
pshufb xmm3, xmm2
pshufb xmm5, xmm2
%endif
paddw xmm3, xmm1
paddw xmm5, xmm1
packuswb xmm3, xmm5
movq [rdi ], xmm3
movhps[rdi+rcx], xmm3
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz vp8_intra_pred_uv_tm_%1_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
%endmacro
vp8_intra_pred_uv_tm sse2
vp8_intra_pred_uv_tm ssse3
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx)
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
; end prolog
; read from top
mov rax, arg(2) ;src;
movsxd rdx, dword ptr arg(3) ;src_stride;
sub rax, rdx
movq mm1, [rax]
; write out
mov rax, arg(0) ;dst;
movsxd rdx, dword ptr arg(1) ;dst_stride
lea rcx, [rdx*3]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
lea rax, [rax+rdx*4]
movq [rax ], mm1
movq [rax+rdx ], mm1
movq [rax+rdx*2], mm1
movq [rax+rcx ], mm1
; begin epilog
UNSHADOW_ARGS
pop rbp
ret
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
; unsigned char *src,
; int src_stride,
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1)
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 4
push rsi
push rdi
%ifidn %1, ssse3
%ifndef GET_GOT_SAVE_ARG
push rbx
%endif
GET_GOT rbx
%endif
; end prolog
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
mov rsi, arg(2) ;src;
movsxd rax, dword ptr arg(3) ;src_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
lea rdx, [rcx*3]
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
dec rsi
%ifidn %1, mmx2
vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
movd mm1, [rsi+rax]
punpcklbw mm0, mm0
punpcklbw mm1, mm1
pshufw mm0, mm0, 0x0
pshufw mm1, mm1, 0x0
movq [rdi ], mm0
movq [rdi+rcx], mm1
lea rsi, [rsi+rax*2]
lea rdi, [rdi+rcx*2]
dec edx
jnz vp8_intra_pred_uv_ho_%1_loop
%else
movd xmm0, [rsi]
movd xmm3, [rsi+rax]
movd xmm1, [rsi+rax*2]
movd xmm4, [rsi+rbx]
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
pshufb xmm1, xmm2
movq [rdi ], xmm0
movhps [rdi+rcx], xmm0
movq [rdi+rcx*2], xmm1
movhps [rdi+rdx], xmm1
lea rsi, [rsi+rax*4]
lea rdi, [rdi+rcx*4]
movd xmm0, [rsi]
movd xmm3, [rsi+rax]
movd xmm1, [rsi+rax*2]
movd xmm4, [rsi+rbx]
punpcklbw xmm0, xmm3
punpcklbw xmm1, xmm4
pshufb xmm0, xmm2
pshufb xmm1, xmm2
movq [rdi ], xmm0
movhps [rdi+rcx], xmm0
movq [rdi+rcx*2], xmm1
movhps [rdi+rdx], xmm1
%endif
; begin epilog
%ifidn %1, ssse3
RESTORE_GOT
%ifndef GET_GOT_SAVE_ARG
pop rbx
%endif
%endif
pop rdi
pop rsi
UNSHADOW_ARGS
pop rbp
ret
%endmacro
vp8_intra_pred_uv_ho mmx2
vp8_intra_pred_uv_ho ssse3
SECTION_RODATA
dc_128:
times 8 db 128
dc_4:
times 4 dw 4
align 16
dc_1024:
times 8 dw 0x400
align 16
dc_00001111:
times 8 db 0
times 8 db 1

Просмотреть файл

@ -0,0 +1,96 @@
/*
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "vpx_ports/config.h"
#include "vp8/common/recon.h"
#include "recon_x86.h"
#include "vpx_mem/vpx_mem.h"
#define build_intra_predictors_mbuv_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
const unsigned char *src, int src_stride)
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dctop_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dcleft_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc128_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ho_ssse3);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_ve_mmx);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_sse2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
build_intra_predictors_mbuv_fn_t tm_func,
build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
int src_stride = x->dst.uv_stride;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
case H_PRED: fn = ho_func; break;
case TM_PRED: fn = tm_func; break;
case DC_PRED:
if (x->up_available) {
if (x->left_available) {
fn = vp8_intra_pred_uv_dc_mmx2; break;
} else {
fn = vp8_intra_pred_uv_dctop_mmx2; break;
}
} else if (x->left_available) {
fn = vp8_intra_pred_uv_dcleft_mmx2; break;
} else {
fn = vp8_intra_pred_uv_dc128_mmx; break;
}
break;
default: return;
}
fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
}
void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
&x->predictor[320], 8,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
x->dst.v_buffer, x->dst.uv_stride,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}

Просмотреть файл

@ -46,6 +46,8 @@ extern prototype_copy_block(vp8_copy_mem16x16_mmx);
extern prototype_recon_block(vp8_recon2b_sse2);
extern prototype_recon_block(vp8_recon4b_sse2);
extern prototype_copy_block(vp8_copy_mem16x16_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_sse2);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_sse2);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_recon2
@ -57,6 +59,26 @@ extern prototype_copy_block(vp8_copy_mem16x16_sse2);
#undef vp8_recon_copy16x16
#define vp8_recon_copy16x16 vp8_copy_mem16x16_sse2
#undef vp8_recon_build_intra_predictors_mbuv
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_sse2
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_sse2
#endif
#endif
#if HAVE_SSSE3
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_ssse3);
extern prototype_build_intra_predictors(vp8_build_intra_predictors_mbuv_s_ssse3);
#if !CONFIG_RUNTIME_CPU_DETECT
#undef vp8_recon_build_intra_predictors_mbuv
#define vp8_recon_build_intra_predictors_mbuv vp8_build_intra_predictors_mbuv_ssse3
#undef vp8_recon_build_intra_predictors_mbuv_s
#define vp8_recon_build_intra_predictors_mbuv_s vp8_build_intra_predictors_mbuv_s_ssse3
#endif
#endif
#endif

Просмотреть файл

@ -113,97 +113,6 @@ nextrow:
ret
;
; THIS FUNCTION APPEARS TO BE UNUSED
;
;void vp8_filter_block1d_v6_mmx
;(
; short *src_ptr,
; unsigned char *output_ptr,
; unsigned int pixels_per_line,
; unsigned int pixel_step,
; unsigned int output_height,
; unsigned int output_width,
; short * vp8_filter
;)
global sym(vp8_filter_block1d_v6_mmx)
sym(vp8_filter_block1d_v6_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
GET_GOT rbx
push rsi
push rdi
; end prolog
movq mm5, [GLOBAL(rd)]
push rbx
mov rbx, arg(6) ;vp8_filter
movq mm1, [rbx + 16] ; do both the negative taps first!!!
movq mm2, [rbx + 32] ;
movq mm6, [rbx + 48] ;
movq mm7, [rbx + 64] ;
movsxd rdx, dword ptr arg(2) ;pixels_per_line
mov rdi, arg(1) ;output_ptr
mov rsi, arg(0) ;src_ptr
sub rsi, rdx
sub rsi, rdx
movsxd rcx, DWORD PTR arg(4) ;output_height
movsxd rax, DWORD PTR arg(5) ;output_width ; destination pitch?
pxor mm0, mm0 ; mm0 = 00000000
nextrow_v:
movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1
pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers.
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2
pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers.
paddsw mm3, mm4 ; mm3 += mm4
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0
pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers.
paddsw mm3, mm4 ; mm3 += mm4
movq mm4, [rsi] ; mm4 = p0..p3 = row -2
pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers.
paddsw mm3, mm4 ; mm3 += mm4
add rsi, rdx ; move source forward 1 line to avoid 3 * pitch
movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1
pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers.
paddsw mm3, mm4 ; mm3 += mm4
movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3
pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers.
paddsw mm3, mm4 ; mm3 += mm4
paddsw mm3, mm5 ; mm3 += round value
psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128
packuswb mm3, mm0 ; pack and saturate
movd [rdi],mm3 ; store the results in the destination
add rdi,rax;
dec rcx ; decrement count
jnz nextrow_v ; next row
pop rbx
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
;void vp8_filter_block1dc_v6_mmx
;(
; short *src_ptr,

Просмотреть файл

@ -37,7 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -157,7 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 7
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -333,7 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -428,7 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -538,7 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -651,7 +651,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -816,7 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -908,7 +908,6 @@ sym(vp8_unpack_block1d16_h6_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 5
;SAVE_XMM ;xmm6, xmm7 are not used here.
GET_GOT rbx
push rsi
push rdi
@ -948,7 +947,6 @@ unpack_block1d16_h6_sse2_rowloop:
pop rdi
pop rsi
RESTORE_GOT
;RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -969,7 +967,7 @@ sym(vp8_bilinear_predict16x16_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1238,7 +1236,7 @@ sym(vp8_bilinear_predict8x8_sse2):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi

Просмотреть файл

@ -39,6 +39,7 @@ sym(vp8_filter_block1d8_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -107,6 +108,7 @@ filter_block1d8_h6_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -162,6 +164,7 @@ filter_block1d8_h4_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -179,7 +182,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -194,10 +197,6 @@ sym(vp8_filter_block1d16_h6_ssse3):
mov rdi, arg(2) ;output_ptr
;;
;; cmp esi, DWORD PTR [rax]
;; je vp8_filter_block1d16_h4_ssse3
mov rsi, arg(0) ;src_ptr
movdqa xmm4, XMMWORD PTR [rax] ;k0_k5
@ -271,61 +270,7 @@ filter_block1d16_h6_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
vp8_filter_block1d16_h4_ssse3:
movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4
movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixels_per_line
movsxd rcx, dword ptr arg(4) ;output_height
movsxd rdx, dword ptr arg(3) ;output_pitch
filter_block1d16_h4_rowloop_ssse3:
movdqu xmm1, XMMWORD PTR [rsi - 2]
movdqa xmm2, xmm1
pshufb xmm1, [GLOBAL(shuf2b)]
pshufb xmm2, [GLOBAL(shuf3b)]
pmaddubsw xmm1, xmm5
movdqu xmm3, XMMWORD PTR [rsi + 6]
pmaddubsw xmm2, xmm6
movdqa xmm0, xmm3
pshufb xmm3, [GLOBAL(shuf3b)]
pshufb xmm0, [GLOBAL(shuf2b)]
paddsw xmm1, [GLOBAL(rd)]
paddsw xmm1, xmm2
pmaddubsw xmm0, xmm5
pmaddubsw xmm3, xmm6
psraw xmm1, 7
packuswb xmm1, xmm1
lea rsi, [rsi + rax]
paddsw xmm3, xmm0
paddsw xmm3, [GLOBAL(rd)]
psraw xmm3, 7
packuswb xmm3, xmm3
punpcklqdq xmm1, xmm3
movdqa XMMWORD Ptr [rdi], xmm1
add rdi, rdx
dec rcx
jnz filter_block1d16_h4_rowloop_ssse3
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -344,6 +289,7 @@ sym(vp8_filter_block1d4_h6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -451,6 +397,7 @@ filter_block1d4_h4_rowloop_ssse3:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -471,6 +418,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -566,6 +514,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -638,6 +587,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -656,6 +606,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -728,6 +679,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -776,6 +728,7 @@ vp8_filter_block1d8_v4_ssse3_loop:
pop rdi
pop rsi
RESTORE_GOT
RESTORE_XMM
UNSHADOW_ARGS
pop rbp
ret
@ -932,7 +885,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi
@ -1195,7 +1148,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 6
SAVE_XMM
SAVE_XMM 7
GET_GOT rbx
push rsi
push rdi

Просмотреть файл

@ -11,7 +11,7 @@
#include "vpx_ports/config.h"
#include "vpx_ports/mem.h"
#include "subpixel.h"
#include "vp8/common/subpixel.h"
extern const short vp8_six_tap_mmx[8][6*8];
extern const short vp8_bilinear_filters_mmx[8][2*8];

Просмотреть файл

@ -9,25 +9,21 @@
*/
#include "vpx_ports/config.h"
#include "vpx_config.h"
#include "vpx_ports/x86.h"
#include "g_common.h"
#include "subpixel.h"
#include "loopfilter.h"
#include "recon.h"
#include "idct.h"
#include "pragmas.h"
#include "onyxc_int.h"
#include "vp8/common/g_common.h"
#include "vp8/common/subpixel.h"
#include "vp8/common/loopfilter.h"
#include "vp8/common/recon.h"
#include "vp8/common/idct.h"
#include "vp8/common/pragmas.h"
#include "vp8/common/onyxc_int.h"
void vp8_arch_x86_common_init(VP8_COMMON *ctx)
{
#if CONFIG_RUNTIME_CPU_DETECT
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
int flags = x86_simd_caps();
int mmx_enabled = flags & HAS_MMX;
int xmm_enabled = flags & HAS_SSE;
int wmt_enabled = flags & HAS_SSE2;
int SSSE3Enabled = flags & HAS_SSSE3;
/* Note:
*
@ -39,7 +35,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
/* Override default functions with fastest ones for this CPU. */
#if HAVE_MMX
if (mmx_enabled)
if (flags & HAS_MMX)
{
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx;
rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx;
@ -67,9 +63,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_mmx;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_mmx;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_mmx;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_mmx;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_mmx;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_mmx;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_mmx;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_mmx;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_mmx;
#if CONFIG_POSTPROC
@ -83,11 +79,15 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
#endif
#if HAVE_SSE2
if (wmt_enabled)
if (flags & HAS_SSE2)
{
rtcd->recon.recon2 = vp8_recon2b_sse2;
rtcd->recon.recon4 = vp8_recon4b_sse2;
rtcd->recon.copy16x16 = vp8_copy_mem16x16_sse2;
rtcd->recon.build_intra_predictors_mbuv =
vp8_build_intra_predictors_mbuv_sse2;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_sse2;
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_sse2;
@ -101,9 +101,9 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_sse2;
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_sse2;
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_sse2;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_sse2;
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_simple_vertical_edge_sse2;
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_sse2;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_sse2;
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_simple_horizontal_edge_sse2;
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_sse2;
#if CONFIG_POSTPROC
@ -118,7 +118,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
#if HAVE_SSSE3
if (SSSE3Enabled)
if (flags & HAS_SSSE3)
{
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3;
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3;
@ -126,6 +126,11 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3;
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3;
rtcd->recon.build_intra_predictors_mbuv =
vp8_build_intra_predictors_mbuv_ssse3;
rtcd->recon.build_intra_predictors_mbuv_s =
vp8_build_intra_predictors_mbuv_s_ssse3;
}
#endif

Просмотреть файл

@ -11,23 +11,24 @@
#include "vpx_ports/config.h"
#include "vpx_ports/arm.h"
#include "blockd.h"
#include "pragmas.h"
#include "postproc.h"
#include "dboolhuff.h"
#include "dequantize.h"
#include "onyxd_int.h"
#include "vp8/common/blockd.h"
#include "vp8/common/pragmas.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/decoder/onyxd_int.h"
void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
{
#if CONFIG_RUNTIME_CPU_DETECT
int flags = pbi->common.rtcd.flags;
int has_edsp = flags & HAS_EDSP;
int has_media = flags & HAS_MEDIA;
int has_neon = flags & HAS_NEON;
#if HAVE_ARMV5TE
if (flags & HAS_EDSP)
{
}
#endif
#if HAVE_ARMV6
if (has_media)
if (flags & HAS_MEDIA)
{
pbi->dequant.block = vp8_dequantize_b_v6;
pbi->dequant.idct_add = vp8_dequant_idct_add_v6;
@ -35,17 +36,11 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6;
#if 0 /*For use with RTCD, when implemented*/
pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
#endif
}
#endif
#if HAVE_ARMV7
if (has_neon)
if (flags & HAS_NEON)
{
pbi->dequant.block = vp8_dequantize_b_neon;
pbi->dequant.idct_add = vp8_dequant_idct_add_neon;
@ -54,12 +49,6 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
#if 0 /*For use with RTCD, when implemented*/
pbi->dboolhuff.start = vp8dx_start_decode_c;
pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c;
pbi->dboolhuff.debool = vp8dx_decode_bool_c;
pbi->dboolhuff.devalue = vp8dx_decode_value_c;
#endif
}
#endif
#endif

Просмотреть файл

@ -9,8 +9,8 @@
*/
#include "vpx_ports/config.h"
#include "idct.h"
#include "dequantize.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
void vp8_dequant_dc_idct_add_y_block_v6
(short *q, short *dq, unsigned char *pre,

Просмотреть файл

@ -1,43 +0,0 @@
#ifndef DBOOLHUFF_ARM_H
#define DBOOLHUFF_ARM_H
/* JLK
* There are currently no arm-optimized versions of
* these functions. As they are implemented, they
* can be uncommented below and added to
* arm/dsystemdependent.c
*
* The existing asm code is likely so different as
* to be useless. However, its been left (for now)
* for reference.
*/
#if 0
#if HAVE_ARMV6
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_v6
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_v6
#undef vp8_dbool_debool
#define vp8_dbool_debool vp8_decode_bool_v6
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_v6
#endif /* HAVE_ARMV6 */
#if HAVE_ARMV7
#undef vp8_dbool_start
#define vp8_dbool_start vp8dx_start_decode_neon
#undef vp8_dbool_fill
#define vp8_dbool_fill vp8_bool_decoder_fill_neon
#undef vp8_dbool_debool
#define vp8_dbool_debool vp8_decode_bool_neon
#undef vp8_dbool_devalue
#define vp8_dbool_devalue vp8_decode_value_neon
#endif /* HAVE_ARMV7 */
#endif
#endif /* DBOOLHUFF_ARM_H */

Просмотреть файл

@ -10,9 +10,8 @@
#include "vpx_ports/config.h"
#include "dequantize.h"
#include "predictdc.h"
#include "idct.h"
#include "vp8/decoder/dequantize.h"
#include "vp8/common/idct.h"
#include "vpx_mem/vpx_mem.h"
#if HAVE_ARMV7
@ -27,7 +26,6 @@ extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
void vp8_dequantize_b_neon(BLOCKD *d)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
short *DQC = d->dequant;
@ -39,7 +37,6 @@ void vp8_dequantize_b_neon(BLOCKD *d)
#if HAVE_ARMV6
void vp8_dequantize_b_v6(BLOCKD *d)
{
int i;
short *DQ = d->dqcoeff;
short *Q = d->qcoeff;
short *DQC = d->dequant;

Просмотреть файл

@ -1,320 +0,0 @@
;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
EXPORT |vp8_decode_mb_tokens_v6|
AREA |.text|, CODE, READONLY ; name this block of code
INCLUDE vpx_asm_offsets.asm
l_qcoeff EQU 0
l_i EQU 4
l_type EQU 8
l_stop EQU 12
l_c EQU 16
l_l_ptr EQU 20
l_a_ptr EQU 24
l_bc EQU 28
l_coef_ptr EQU 32
l_stacksize EQU 64
;; constant offsets -- these should be created at build time
c_block2above_offset EQU 25
c_entropy_nodes EQU 11
c_dct_eob_token EQU 11
|vp8_decode_mb_tokens_v6| PROC
stmdb sp!, {r4 - r11, lr}
sub sp, sp, #l_stacksize
mov r7, r1 ; type
mov r9, r0 ; detoken
ldr r1, [r9, #detok_current_bc]
ldr r0, [r9, #detok_qcoeff_start_ptr]
mov r11, #0 ; i
mov r3, #16 ; stop
cmp r7, #1 ; type ?= 1
addeq r11, r11, #24 ; i = 24
addeq r3, r3, #8 ; stop = 24
addeq r0, r0, #3, 24 ; qcoefptr += 24*16
str r0, [sp, #l_qcoeff]
str r11, [sp, #l_i]
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r1, [sp, #l_bc]
add lr, r9, r7, lsl #2 ; detoken + type*4
ldr r8, [r1, #bool_decoder_user_buffer]
ldr r10, [lr, #detok_coef_probs]
ldr r5, [r1, #bool_decoder_count]
ldr r6, [r1, #bool_decoder_range]
ldr r4, [r1, #bool_decoder_value]
str r10, [sp, #l_coef_ptr]
BLOCK_LOOP
ldr r3, [r9, #detok_ptr_block2leftabove]
ldr r1, [r9, #detok_L]
ldr r2, [r9, #detok_A]
ldrb r12, [r3, r11]! ; block2left[i]
ldrb r3, [r3, #c_block2above_offset]; block2above[i]
cmp r7, #0 ; c = !type
moveq r7, #1
movne r7, #0
ldrb r0, [r1, r12]! ; *(L += block2left[i])
ldrb r3, [r2, r3]! ; *(A += block2above[i])
mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11
; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
cmp r0, #0 ; *l ?= 0
movne r0, #1
cmp r3, #0 ; *a ?= 0
addne r0, r0, #1 ; t
str r1, [sp, #l_l_ptr] ; save &l
str r2, [sp, #l_a_ptr] ; save &a
smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES)
mov r1, #0 ; t = 0
str r7, [sp, #l_c]
;align 4
COEFF_LOOP
ldr r3, [r9, #detok_ptr_coef_bands_x]
ldr lr, [r9, #detok_coef_tree_ptr]
;STALL
ldrb r3, [r3, r7] ; coef_bands_x[c]
;STALL
;STALL
add r0, r0, r3 ; Prob += coef_bands_x[c]
get_token_loop
ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1]
mov r3, r6, lsl #8 ; range << 8
sub r3, r3, #256 ; (range << 8) - (1 << 8)
mov r10, #1 ; 1
smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8)
ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr
;++
subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE
addhs r1, r1, #1 ; t += 1
movhs r4, r3 ; value -= bigsplit (split << 24)
subhs r2, r6, r2 ; range -= split
; movlo r6, r2 ; range = split
ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t]
; NORMALIZE
clz r3, r2 ; vp8dx_bitreader_norm[range] + 24
sub r3, r3, #24 ; vp8dx_bitreader_norm[range]
subs r5, r5, r3 ; count -= shift
mov r6, r2, lsl r3 ; range <<= shift
mov r4, r4, lsl r3 ; value <<= shift
; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
addle r5, r5, #8 ; count += 8
rsble r3, r5, #24 ; 24 - count
addle r8, r8, #1 ; bufptr++
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16
cmp r1, #0 ; t ?= 0
bgt get_token_loop ; while (t > 0)
cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN)
beq END_OF_BLOCK ; break
rsb lr, r1, #0 ; v = -t;
cmp lr, #4 ; if(v > FOUR_TOKEN)
ble SKIP_EXTRABITS
ldr r3, [r9, #detok_teb_base_ptr]
mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count
add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4)
ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
extrabits_loop
add r3, r0, r7 ; &teb_ptr->Probs[bits_count]
ldrb r2, [r3, #4] ; probability. why +4?
mov r3, r6, lsl #8 ; range << 8
sub r3, r3, #256 ; range << 8 + 1 << 8
smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8)
ldrb r12, [r8] ; *bufptr
;++
subs r10, r4, r2, lsl #24 ; value - (split<<24)
movhs r4, r10 ; value = value - (split << 24)
subhs r2, r6, r2 ; range = range - split
addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<<bits_count)
; NORMALIZE
clz r3, r2 ; shift - leading zeros in split
sub r3, r3, #24 ; don't count first 3 bytes
subs r5, r5, r3 ; count -= shift
mov r6, r2, lsl r3 ; range = range << shift
mov r4, r4, lsl r3 ; value <<= shift
addle r5, r5, #8 ; count += BR_COUNT
addle r8, r8, #1 ; bufptr++
rsble r3, r5, #24 ; BR_COUNT - count
orrle r4, r4, r12, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
subs r0, r0, #1 ; bits_count --
bpl extrabits_loop
SKIP_EXTRABITS
ldr r11, [sp, #l_qcoeff]
ldr r0, [sp, #l_coef_ptr] ; Prob = coef_probs
cmp r1, #0 ; check for nonzero token - if (t)
beq SKIP_EOB_CHECK ; if t is zero, we will skip the eob table chec
add r3, r6, #1 ; range + 1
mov r2, r3, lsr #1 ; split = (range + 1) >> 1
subs r3, r4, r2, lsl #24 ; value - (split<<24)
movhs r4, r3 ; value -= (split << 24)
subhs r2, r6, r2 ; range -= split
mvnhs r3, lr ; -v
addhs lr, r3, #1 ; v = (v ^ -1) + 1
; NORMALIZE
clz r3, r2 ; leading 0s in split
sub r3, r3, #24 ; shift
subs r5, r5, r3 ; count -= shift
mov r6, r2, lsl r3 ; range <<= shift
mov r4, r4, lsl r3 ; value <<= shift
ldrleb r2, [r8], #1 ; *(bufptr++)
addle r5, r5, #8 ; count += 8
rsble r3, r5, #24 ; BR_COUNT - count
orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count)
add r0, r0, #11 ; Prob += ENTROPY_NODES (11)
cmn r1, #1 ; t < -ONE_TOKEN
addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11)
mvn r1, #1 ; t = -1 ???? C is -2
SKIP_EOB_CHECK
ldr r7, [sp, #l_c] ; c
ldr r3, [r9, #detok_scan]
add r1, r1, #2 ; t+= 2
cmp r7, #15 ; c should will be one higher
ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value
add r7, r7, #1 ; c++
add r3, r11, r3, lsl #1 ; qcoeff + scan[c]
str r7, [sp, #l_c] ; store c
strh lr, [r3] ; qcoef_ptr[scan[c]] = v
blt COEFF_LOOP
sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c
END_OF_BLOCK
ldr r3, [sp, #l_type] ; type
ldr r10, [sp, #l_coef_ptr] ; coef_ptr
ldr r0, [sp, #l_qcoeff] ; qcoeff
ldr r11, [sp, #l_i] ; i
ldr r12, [sp, #l_stop] ; stop
cmp r3, #0 ; type ?= 0
moveq r1, #1
movne r1, #0
add r3, r11, r9 ; detok + i
cmp r7, r1 ; c ?= !type
strb r7, [r3, #detok_eob] ; eob[i] = c
ldr r7, [sp, #l_l_ptr] ; l
ldr r2, [sp, #l_a_ptr] ; a
movne r3, #1 ; t
moveq r3, #0
add r0, r0, #32 ; qcoeff += 32 (16 * 2?)
add r11, r11, #1 ; i++
strb r3, [r7] ; *l = t
strb r3, [r2] ; *a = t
str r0, [sp, #l_qcoeff] ; qcoeff
str r11, [sp, #l_i] ; i
cmp r11, r12 ; i < stop
ldr r7, [sp, #l_type] ; type
blt BLOCK_LOOP
cmp r11, #25 ; i ?= 25
bne ln2_decode_mb_to
ldr r12, [r9, #detok_qcoeff_start_ptr]
ldr r10, [r9, #detok_coef_probs]
mov r7, #0 ; type/i = 0
mov r3, #16 ; stop = 16
str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr
str r7, [sp, #l_i]
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0]
b BLOCK_LOOP
ln2_decode_mb_to
cmp r11, #16 ; i ?= 16
bne ln1_decode_mb_to
mov r10, #detok_coef_probs
add r10, r10, #2*4 ; coef_probs[type]
ldr r10, [r9, r10] ; detok + detok_coef_probs[type]
mov r7, #2 ; type = 2
mov r3, #24 ; stop = 24
str r7, [sp, #l_type]
str r3, [sp, #l_stop]
str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type]
b BLOCK_LOOP
ln1_decode_mb_to
ldr r2, [sp, #l_bc]
mov r0, #0
nop
str r8, [r2, #bool_decoder_user_buffer]
str r5, [r2, #bool_decoder_count]
str r4, [r2, #bool_decoder_value]
str r6, [r2, #bool_decoder_range]
add sp, sp, #l_stacksize
ldmia sp!, {r4 - r11, pc}
ENDP ; |vp8_decode_mb_tokens_v6|
END

Просмотреть файл

@ -35,7 +35,7 @@
ldr r1, [sp, #4] ; stride
adr r12, _CONSTANTS_
adr r12, cospi8sqrt2minus1 ; pointer to the first constant
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
vmul.i16 q2, q4, q6
@ -123,7 +123,6 @@
ENDP ; |vp8_dequant_idct_add_neon|
; Constant Pool
_CONSTANTS_ EQU cospi8sqrt2minus1
cospi8sqrt2minus1 DCD 0x4e7b4e7b
sinpi8sqrt2 DCD 0x8a8c8a8c

Просмотреть файл

@ -9,8 +9,8 @@
*/
#include "vpx_ports/config.h"
#include "idct.h"
#include "dequantize.h"
#include "vp8/common/idct.h"
#include "vp8/decoder/dequantize.h"
/* place these declarations here because we don't want to maintain them
* outside of this scope

Просмотреть файл

@ -41,7 +41,7 @@
ldr r1, [sp, #4]
vld1.32 {d31[1]}, [r12]
adr r2, _CONSTANTS_
adr r2, cospi8sqrt2minus1 ; pointer to the first constant
ldrh r12, [r1], #2 ; lo *dc
ldrh r1, [r1] ; hi *dc
@ -198,7 +198,6 @@
ENDP ; |idct_dequant_dc_full_2x_neon|
; Constant Pool
_CONSTANTS_ EQU cospi8sqrt2minus1
cospi8sqrt2minus1 DCD 0x4e7b
; because the lowest bit in 0x8a8c is 0, we can pre-shift this
sinpi8sqrt2 DCD 0x4546

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше