зеркало из https://github.com/mozilla/pjs.git
Bug 608066 - Update libvpx to v0.9.5. r=chris,khuey a=b-f
This commit is contained in:
Родитель
0a19a9180e
Коммит
767618fe25
|
@ -165,7 +165,11 @@ MOZ_TREMOR = @MOZ_TREMOR@
|
|||
MOZ_WEBM = @MOZ_WEBM@
|
||||
VPX_AS = @VPX_AS@
|
||||
VPX_ASFLAGS = @VPX_ASFLAGS@
|
||||
VPX_DASH_C_FLAG = @VPX_DASH_C_FLAG@
|
||||
VPX_AS_CONVERSION = @VPX_AS_CONVERSION@
|
||||
VPX_ASM_SUFFIX = @VPX_ASM_SUFFIX@
|
||||
VPX_X86_ASM = @VPX_X86_ASM@
|
||||
VPX_ARM_ASM = @VPX_ARM_ASM@
|
||||
NS_PRINTING = @NS_PRINTING@
|
||||
MOZ_CRASHREPORTER = @MOZ_CRASHREPORTER@
|
||||
MOZ_HELP_VIEWER = @MOZ_HELP_VIEWER@
|
||||
|
|
25
configure.in
25
configure.in
|
@ -4958,7 +4958,11 @@ MOZ_MEDIA=
|
|||
MOZ_WEBM=1
|
||||
VPX_AS=
|
||||
VPX_ASFLAGS=
|
||||
VPX_AS_DASH_C_FLAG=
|
||||
VPX_AS_CONVERSION=
|
||||
VPX_ASM_SUFFIX=
|
||||
VPX_X86_ASM=
|
||||
VPX_ARM_ASM=
|
||||
MOZ_PANGO=1
|
||||
MOZ_PERMISSIONS=1
|
||||
MOZ_PLACES=1
|
||||
|
@ -6045,8 +6049,10 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then
|
|||
|
||||
|
||||
dnl Detect if we can use an assembler to compile optimized assembly for libvpx.
|
||||
dnl We currently require yasm on all platforms and require yasm 1.1.0 on Win32.
|
||||
dnl We currently require yasm on all x86 platforms and require yasm 1.1.0 on Win32.
|
||||
dnl We currently require gcc on all arm platforms.
|
||||
VPX_AS=$YASM
|
||||
VPX_ASM_SUFFIX=asm
|
||||
|
||||
dnl See if we have assembly on this platform.
|
||||
case "$OS_ARCH:$CPU_ARCH" in
|
||||
|
@ -6093,6 +6099,17 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then
|
|||
fi
|
||||
fi
|
||||
;;
|
||||
*:arm*)
|
||||
if test -n "$GNU_AS" ; then
|
||||
VPX_AS=$AS
|
||||
dnl These flags are a lie; they're just used to enable the requisite
|
||||
dnl opcodes; actual arch detection is done at runtime.
|
||||
VPX_ASFLAGS="-march=armv7-a -mfpu=neon"
|
||||
VPX_DASH_C_FLAG="-c"
|
||||
VPX_AS_CONVERSION="$PERL ${srcdir}/media/libvpx/build/make/ads2gas.pl"
|
||||
VPX_ASM_SUFFIX="$ASM_SUFFIX"
|
||||
VPX_ARM_ASM=1
|
||||
fi
|
||||
esac
|
||||
|
||||
if test -n "$COMPILE_ENVIRONMENT" -a -n "$VPX_X86_ASM" -a -z "$VPX_AS"; then
|
||||
|
@ -6101,6 +6118,8 @@ if test -n "$MOZ_WEBM" -a -z "$MOZ_NATIVE_LIBVPX"; then
|
|||
|
||||
if test -n "$VPX_X86_ASM"; then
|
||||
AC_DEFINE(VPX_X86_ASM)
|
||||
elif test -n "$VPX_ARM_ASM"; then
|
||||
AC_DEFINE(VPX_ARM_ASM)
|
||||
else
|
||||
AC_MSG_WARN([No assembler or assembly support for libvpx. Using unoptimized C routines.])
|
||||
fi
|
||||
|
@ -9082,7 +9101,11 @@ AC_SUBST(MOZ_OGG)
|
|||
AC_SUBST(MOZ_ALSA_LIBS)
|
||||
AC_SUBST(VPX_AS)
|
||||
AC_SUBST(VPX_ASFLAGS)
|
||||
AC_SUBST(VPX_DASH_C_FLAG)
|
||||
AC_SUBST(VPX_AS_CONVERSION)
|
||||
AC_SUBST(VPX_ASM_SUFFIX)
|
||||
AC_SUBST(VPX_X86_ASM)
|
||||
AC_SUBST(VPX_ARM_ASM)
|
||||
|
||||
if test "$USING_HCC"; then
|
||||
CC='${topsrcdir}/build/hcc'
|
||||
|
|
|
@ -53,8 +53,10 @@ LOCAL_INCLUDES += \
|
|||
-I$(topsrcdir)/media/libvpx \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/ \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/common/ \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/common/arm \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/common/x86 \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/decoder \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/decoder/arm \
|
||||
-I$(topsrcdir)/media/libvpx/vp8/decoder/x86 \
|
||||
-I$(topsrcdir)/media/libvpx/vpx_codec \
|
||||
-I$(topsrcdir)/media/libvpx/vpx_mem/ \
|
||||
|
@ -64,25 +66,35 @@ LOCAL_INCLUDES += \
|
|||
$(NULL)
|
||||
|
||||
VPATH += \
|
||||
$(srcdir)/build/make \
|
||||
$(srcdir)/vpx \
|
||||
$(srcdir)/vpx/src \
|
||||
$(srcdir)/vpx_mem \
|
||||
$(srcdir)/vpx_mem/include \
|
||||
$(srcdir)/vpx_ports \
|
||||
$(srcdir)/vpx_scale \
|
||||
$(srcdir)/vpx_scale/arm \
|
||||
$(srcdir)/vpx_scale/generic \
|
||||
$(srcdir)/vp8 \
|
||||
$(srcdir)/vp8/common \
|
||||
$(srcdir)/vp8/common/arm \
|
||||
$(srcdir)/vp8/common/arm/armv6 \
|
||||
$(srcdir)/vp8/common/arm/neon \
|
||||
$(srcdir)/vp8/common/generic \
|
||||
$(srcdir)/vp8/common/x86 \
|
||||
$(srcdir)/vp8/decoder \
|
||||
$(srcdir)/vp8/decoder/arm \
|
||||
$(srcdir)/vp8/decoder/arm/armv6 \
|
||||
$(srcdir)/vp8/decoder/arm/neon \
|
||||
$(srcdir)/vp8/decoder/generic \
|
||||
$(srcdir)/vp8/decoder/x86 \
|
||||
$(NULL)
|
||||
|
||||
ASM_SUFFIX=asm
|
||||
#Setup the libvpx assembler config.
|
||||
AS=$(VPX_AS)
|
||||
ASFLAGS=$(VPX_ASFLAGS) -I$(topsrcdir)/media/libvpx/ -I$(topsrcdir)/media/libvpx/vpx_ports/
|
||||
ASFLAGS=$(VPX_ASFLAGS) -I. -I$(topsrcdir)/media/libvpx/ -I$(topsrcdir)/media/libvpx/vpx_ports/
|
||||
AS_DASH_C_FLAG=$(VPX_DASH_C_FLAG)
|
||||
ASM_SUFFIX=$(VPX_ASM_SUFFIX)
|
||||
|
||||
EXPORTS_NAMESPACES = vpx
|
||||
|
||||
|
@ -104,6 +116,7 @@ EXPORTS_vpx = \
|
|||
mem.h \
|
||||
vpx_integer.h \
|
||||
vpx_timer.h \
|
||||
arm.h \
|
||||
x86.h \
|
||||
scale_mode.h \
|
||||
vpxscale.h \
|
||||
|
@ -145,9 +158,9 @@ CSRCS += \
|
|||
dboolhuff.c \
|
||||
decodemv.c \
|
||||
decodframe.c \
|
||||
demode.c \
|
||||
dequantize.c \
|
||||
detokenize.c \
|
||||
reconintra_mt.c \
|
||||
idct_blk.c \
|
||||
onyxd_if.c \
|
||||
threading.c \
|
||||
|
@ -168,6 +181,7 @@ CSRCS += \
|
|||
ifdef VPX_X86_ASM
|
||||
# Building on an x86 platform with a supported assembler, include
|
||||
# the optimized assembly in the build.
|
||||
|
||||
CSRCS += \
|
||||
idct_blk_mmx.c \
|
||||
idct_blk_sse2.c \
|
||||
|
@ -196,7 +210,116 @@ ASFILES += \
|
|||
$(NULL)
|
||||
|
||||
endif
|
||||
|
||||
|
||||
ifdef VPX_ARM_ASM
|
||||
# Building on an ARM platform with a supported assembler, include
|
||||
# the optimized assembly in the build.
|
||||
|
||||
# The Android NDK doesn't pre-define anything to indicate the OS it's on, so
|
||||
# do it for them.
|
||||
ifeq ($(OS_TARGET),Android)
|
||||
DEFINES += -D__linux__
|
||||
endif
|
||||
|
||||
CSRCS += \
|
||||
arm_cpudetect.c \
|
||||
arm_systemdependent.c \
|
||||
bilinearfilter_arm.c \
|
||||
filter_arm.c \
|
||||
loopfilter_arm.c \
|
||||
reconintra_arm.c \
|
||||
arm_dsystemdependent.c \
|
||||
dequantize_arm.c \
|
||||
idct_blk_v6.c \
|
||||
idct_blk_neon.c \
|
||||
recon_neon.c \
|
||||
$(NULL)
|
||||
|
||||
VPX_ASFILES = \
|
||||
detokenize.asm \
|
||||
bilinearfilter_v6.asm \
|
||||
copymem8x4_v6.asm \
|
||||
copymem8x8_v6.asm \
|
||||
copymem16x16_v6.asm \
|
||||
dc_only_idct_add_v6.asm \
|
||||
iwalsh_v6.asm \
|
||||
filter_v6.asm \
|
||||
idct_v6.asm \
|
||||
loopfilter_v6.asm \
|
||||
recon_v6.asm \
|
||||
simpleloopfilter_v6.asm \
|
||||
sixtappredict8x4_v6.asm \
|
||||
bilinearpredict4x4_neon.asm \
|
||||
bilinearpredict8x4_neon.asm \
|
||||
bilinearpredict8x8_neon.asm \
|
||||
bilinearpredict16x16_neon.asm \
|
||||
copymem8x4_neon.asm \
|
||||
copymem8x8_neon.asm \
|
||||
copymem16x16_neon.asm \
|
||||
dc_only_idct_add_neon.asm \
|
||||
iwalsh_neon.asm \
|
||||
loopfilter_neon.asm \
|
||||
loopfiltersimplehorizontaledge_neon.asm \
|
||||
loopfiltersimpleverticaledge_neon.asm \
|
||||
mbloopfilter_neon.asm \
|
||||
recon2b_neon.asm \
|
||||
recon4b_neon.asm \
|
||||
reconb_neon.asm \
|
||||
shortidct4x4llm_1_neon.asm \
|
||||
shortidct4x4llm_neon.asm \
|
||||
sixtappredict4x4_neon.asm \
|
||||
sixtappredict8x4_neon.asm \
|
||||
sixtappredict8x8_neon.asm \
|
||||
sixtappredict16x16_neon.asm \
|
||||
recon16x16mb_neon.asm \
|
||||
buildintrapredictorsmby_neon.asm \
|
||||
save_neon_reg.asm \
|
||||
dequant_dc_idct_v6.asm \
|
||||
dequant_idct_v6.asm \
|
||||
dequantize_v6.asm \
|
||||
idct_dequant_dc_full_2x_neon.asm \
|
||||
idct_dequant_dc_0_2x_neon.asm \
|
||||
dequant_idct_neon.asm \
|
||||
idct_dequant_full_2x_neon.asm \
|
||||
idct_dequant_0_2x_neon.asm \
|
||||
dequantizeb_neon.asm \
|
||||
$(NULL)
|
||||
|
||||
# The ARM asm needs to extract the offsets of various C struct members.
|
||||
# We need a program that runs on the host to pull them out of a .o file.
|
||||
HOST_CSRCS = obj_int_extract.c
|
||||
HOST_PROGRAM = host_obj_int_extract$(HOST_BIN_SUFFIX)
|
||||
|
||||
ifdef VPX_AS_CONVERSION
|
||||
# The ARM asm is written in ARM RVCT syntax, but we actually build it with
|
||||
# gas using GNU syntax. Add some rules to perform the conversion.
|
||||
VPX_CONVERTED_ASFILES = $(addsuffix .$(ASM_SUFFIX), $(VPX_ASFILES))
|
||||
|
||||
ASFILES += $(VPX_CONVERTED_ASFILES)
|
||||
GARBAGE += $(VPX_CONVERTED_ASFILES)
|
||||
|
||||
%.asm.$(ASM_SUFFIX): %.asm
|
||||
$(VPX_AS_CONVERSION) < $< > $@
|
||||
|
||||
vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
|
||||
./$(HOST_PROGRAM) rvds $< | $(VPX_AS_CONVERSION) > $@
|
||||
|
||||
detokenize.asm.$(OBJ_SUFFIX): vpx_asm_offsets.asm
|
||||
|
||||
else
|
||||
ASFILES += $(VPX_ASFILES)
|
||||
|
||||
vpx_asm_offsets.asm: vpx_asm_offsets.$(OBJ_SUFFIX) $(HOST_PROGRAM)
|
||||
./$(HOST_PROGRAM) rvds $< > $@
|
||||
|
||||
detokenize.$(OBJ_SUFFIX): vpx_asm_offsets.asm
|
||||
|
||||
endif
|
||||
|
||||
GARBAGE += vpx_asm_offsets.$(OBJ_SUFFIX) vpx_asm_offsets.asm
|
||||
|
||||
endif
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
||||
# Workaround a bug of Sun Studio (CR 6963410)
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
Using libvpx pulled from git://review.webmproject.org/libvpx.git
|
||||
Commit ID: 0dd78af3e9b089eacc9af280adfb5549fc7ecdcd
|
||||
Using the v0.9.5 release pulled from
|
||||
http://webm.googlecode.com/files/libvpx-v0.9.5.zip
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
#!/usr/bin/perl
|
||||
##
|
||||
## Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
##
|
||||
## Use of this source code is governed by a BSD-style license
|
||||
## that can be found in the LICENSE file in the root of the source
|
||||
## tree. An additional intellectual property rights grant can be found
|
||||
## in the file PATENTS. All contributing project authors may
|
||||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
|
||||
# ads2gas.pl
|
||||
# Author: Eric Fung (efung (at) acm.org)
|
||||
#
|
||||
# Convert ARM Developer Suite 1.0.1 syntax assembly source to GNU as format
|
||||
#
|
||||
# Usage: cat inputfile | perl ads2gas.pl > outputfile
|
||||
#
|
||||
print "@ This file was created from a .asm file\n";
|
||||
print "@ using the ads2gas.pl script.\n";
|
||||
print "\t.equ DO1STROUNDING, 0\n";
|
||||
|
||||
while (<STDIN>)
|
||||
{
|
||||
# Comment character
|
||||
s/;/@/g;
|
||||
|
||||
# Hexadecimal constants prefaced by 0x
|
||||
s/#&/#0x/g;
|
||||
|
||||
# Convert :OR: to |
|
||||
s/:OR:/ | /g;
|
||||
|
||||
# Convert :AND: to &
|
||||
s/:AND:/ & /g;
|
||||
|
||||
# Convert :NOT: to ~
|
||||
s/:NOT:/ ~ /g;
|
||||
|
||||
# Convert :SHL: to <<
|
||||
s/:SHL:/ << /g;
|
||||
|
||||
# Convert :SHR: to >>
|
||||
s/:SHR:/ >> /g;
|
||||
|
||||
# Convert ELSE to .else
|
||||
s/ELSE/.else/g;
|
||||
|
||||
# Convert ENDIF to .endif
|
||||
s/ENDIF/.endif/g;
|
||||
|
||||
# Convert ELSEIF to .elseif
|
||||
s/ELSEIF/.elseif/g;
|
||||
|
||||
# Convert LTORG to .ltorg
|
||||
s/LTORG/.ltorg/g;
|
||||
|
||||
# Convert IF :DEF:to .if
|
||||
# gcc doesn't have the ability to do a conditional
|
||||
# if defined variable that is set by IF :DEF: on
|
||||
# armasm, so convert it to a normal .if and then
|
||||
# make sure to define a value elesewhere
|
||||
if (s/\bIF :DEF:\b/.if /g)
|
||||
{
|
||||
s/=/==/g;
|
||||
}
|
||||
|
||||
# Convert IF to .if
|
||||
if (s/\bIF\b/.if/g)
|
||||
{
|
||||
s/=+/==/g;
|
||||
}
|
||||
|
||||
# Convert INCLUDE to .INCLUDE "file"
|
||||
s/INCLUDE(\s*)(.*)$/.include $1\"$2\"/;
|
||||
|
||||
# Code directive (ARM vs Thumb)
|
||||
s/CODE([0-9][0-9])/.code $1/;
|
||||
|
||||
# No AREA required
|
||||
s/^\s*AREA.*$/.text/;
|
||||
|
||||
# DCD to .word
|
||||
# This one is for incoming symbols
|
||||
s/DCD\s+\|(\w*)\|/.long $1/;
|
||||
|
||||
# DCW to .short
|
||||
s/DCW\s+\|(\w*)\|/.short $1/;
|
||||
s/DCW(.*)/.short $1/;
|
||||
|
||||
# Constants defined in scope
|
||||
s/DCD(.*)/.long $1/;
|
||||
s/DCB(.*)/.byte $1/;
|
||||
|
||||
# RN to .req
|
||||
if (s/RN\s+([Rr]\d+|lr)/.req $1/)
|
||||
{
|
||||
print;
|
||||
next;
|
||||
}
|
||||
|
||||
# Make function visible to linker, and make additional symbol with
|
||||
# prepended underscore
|
||||
s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
|
||||
s/IMPORT\s+\|([\$\w]*)\|/.global $1/;
|
||||
|
||||
# No vertical bars required; make additional symbol with prepended
|
||||
# underscore
|
||||
s/^\|(\$?\w+)\|/_$1\n\t$1:/g;
|
||||
|
||||
# Labels need trailing colon
|
||||
# s/^(\w+)/$1:/ if !/EQU/;
|
||||
# put the colon at the end of the line in the macro
|
||||
s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/;
|
||||
|
||||
# Strip ALIGN
|
||||
s/\sALIGN/@ ALIGN/g;
|
||||
|
||||
# Strip ARM
|
||||
s/\sARM/@ ARM/g;
|
||||
|
||||
# Strip REQUIRE8
|
||||
#s/\sREQUIRE8/@ REQUIRE8/g;
|
||||
s/\sREQUIRE8/@ /g; #EQU cause problem
|
||||
|
||||
# Strip PRESERVE8
|
||||
s/\sPRESERVE8/@ PRESERVE8/g;
|
||||
|
||||
# Strip PROC and ENDPROC
|
||||
s/\sPROC/@/g;
|
||||
s/\sENDP/@/g;
|
||||
|
||||
# EQU directive
|
||||
s/(.*)EQU(.*)/.equ $1, $2/;
|
||||
|
||||
# Begin macro definition
|
||||
if (/MACRO/) {
|
||||
$_ = <STDIN>;
|
||||
s/^/.macro/;
|
||||
s/\$//g; # remove formal param reference
|
||||
s/;/@/g; # change comment characters
|
||||
}
|
||||
|
||||
# For macros, use \ to reference formal params
|
||||
s/\$/\\/g; # End macro definition
|
||||
s/MEND/.endm/; # No need to tell it where to stop assembling
|
||||
next if /^\s*END\s*$/;
|
||||
print;
|
||||
}
|
|
@ -0,0 +1,756 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vpx_config.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <io.h>
|
||||
#include <share.h>
|
||||
#include "vpx/vpx_integer.h"
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
OUTPUT_FMT_PLAIN,
|
||||
OUTPUT_FMT_RVDS,
|
||||
OUTPUT_FMT_GAS,
|
||||
} output_fmt_t;
|
||||
|
||||
int log_msg(const char *fmt, ...)
|
||||
{
|
||||
int res;
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
res = vfprintf(stderr, fmt, ap);
|
||||
va_end(ap);
|
||||
return res;
|
||||
}
|
||||
|
||||
#if defined(__GNUC__) && __GNUC__
|
||||
|
||||
#if defined(__MACH__)
|
||||
|
||||
#include <mach-o/loader.h>
|
||||
#include <mach-o/nlist.h>
|
||||
|
||||
int parse_macho(uint8_t *base_buf, size_t sz)
|
||||
{
|
||||
int i, j;
|
||||
struct mach_header header;
|
||||
uint8_t *buf = base_buf;
|
||||
int base_data_section = 0;
|
||||
|
||||
memcpy(&header, buf, sizeof(struct mach_header));
|
||||
buf += sizeof(struct mach_header);
|
||||
|
||||
if (header.magic != MH_MAGIC)
|
||||
{
|
||||
log_msg("Bad magic number for object file. 0x%x expected, 0x%x found.\n",
|
||||
header.magic, MH_MAGIC);
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (header.cputype != CPU_TYPE_ARM)
|
||||
{
|
||||
log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_ARM.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (header.filetype != MH_OBJECT)
|
||||
{
|
||||
log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
for (i = 0; i < header.ncmds; i++)
|
||||
{
|
||||
struct load_command lc;
|
||||
struct symtab_command sc;
|
||||
struct segment_command seg_c;
|
||||
|
||||
memcpy(&lc, buf, sizeof(struct load_command));
|
||||
|
||||
if (lc.cmd == LC_SEGMENT)
|
||||
{
|
||||
uint8_t *seg_buf = buf;
|
||||
struct section s;
|
||||
|
||||
memcpy(&seg_c, buf, sizeof(struct segment_command));
|
||||
|
||||
seg_buf += sizeof(struct segment_command);
|
||||
|
||||
for (j = 0; j < seg_c.nsects; j++)
|
||||
{
|
||||
memcpy(&s, seg_buf + (j * sizeof(struct section)), sizeof(struct section));
|
||||
|
||||
// Need to get this offset which is the start of the symbol table
|
||||
// before matching the strings up with symbols.
|
||||
base_data_section = s.offset;
|
||||
}
|
||||
}
|
||||
else if (lc.cmd == LC_SYMTAB)
|
||||
{
|
||||
uint8_t *sym_buf = base_buf;
|
||||
uint8_t *str_buf = base_buf;
|
||||
|
||||
if (base_data_section != 0)
|
||||
{
|
||||
memcpy(&sc, buf, sizeof(struct symtab_command));
|
||||
|
||||
if (sc.cmdsize != sizeof(struct symtab_command))
|
||||
log_msg("Can't find symbol table!\n");
|
||||
|
||||
sym_buf += sc.symoff;
|
||||
str_buf += sc.stroff;
|
||||
|
||||
for (j = 0; j < sc.nsyms; j++)
|
||||
{
|
||||
struct nlist nl;
|
||||
int val;
|
||||
|
||||
memcpy(&nl, sym_buf + (j * sizeof(struct nlist)), sizeof(struct nlist));
|
||||
|
||||
val = *((int *)(base_buf + base_data_section + nl.n_value));
|
||||
|
||||
// Location of string is cacluated each time from the
|
||||
// start of the string buffer. On darwin the symbols
|
||||
// are prefixed by "_". On other platforms it is not
|
||||
// so it needs to be removed. That is the reason for
|
||||
// the +1.
|
||||
printf("%-40s EQU %5d\n", str_buf + nl.n_un.n_strx + 1, val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buf += lc.cmdsize;
|
||||
}
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
char *f;
|
||||
struct stat stat_buf;
|
||||
uint8_t *file_buf;
|
||||
int res;
|
||||
|
||||
if (argc < 2 || argc > 3)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
|
||||
fprintf(stderr, " <obj file>\tMachO format object file to parse\n");
|
||||
fprintf(stderr, "Output Formats:\n");
|
||||
fprintf(stderr, " gas - compatible with GNU assembler\n");
|
||||
fprintf(stderr, " rvds - compatible with armasm\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
f = argv[2];
|
||||
|
||||
if (!((!strcmp(argv[1], "rvds")) || (!strcmp(argv[1], "gas"))))
|
||||
f = argv[1];
|
||||
|
||||
fd = open(f, O_RDONLY);
|
||||
|
||||
if (fd < 0)
|
||||
{
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (fstat(fd, &stat_buf))
|
||||
{
|
||||
perror("stat");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
file_buf = malloc(stat_buf.st_size);
|
||||
|
||||
if (!file_buf)
|
||||
{
|
||||
perror("malloc");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
|
||||
{
|
||||
perror("read");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (close(fd))
|
||||
{
|
||||
perror("close");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
res = parse_macho(file_buf, stat_buf.st_size);
|
||||
free(file_buf);
|
||||
|
||||
if (!res)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
bail:
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
#else
|
||||
#include "elf.h"
|
||||
|
||||
#define COPY_STRUCT(dst, buf, ofst, sz) do {\
|
||||
if(ofst + sizeof((*(dst))) > sz) goto bail;\
|
||||
memcpy(dst, buf+ofst, sizeof((*(dst))));\
|
||||
} while(0)
|
||||
|
||||
#define ENDIAN_ASSIGN(val, memb) do {\
|
||||
if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
|
||||
(val) = (memb);\
|
||||
} while(0)
|
||||
|
||||
#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
|
||||
ENDIAN_ASSIGN(memb, memb);\
|
||||
} while(0)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *buf; /* Buffer containing ELF data */
|
||||
size_t sz; /* Buffer size */
|
||||
int le_data; /* Data is little-endian */
|
||||
Elf32_Ehdr hdr;
|
||||
} elf_obj_t;
|
||||
|
||||
int parse_elf32_header(elf_obj_t *elf)
|
||||
{
|
||||
int res;
|
||||
/* Verify ELF32 header */
|
||||
COPY_STRUCT(&elf->hdr, elf->buf, 0, elf->sz);
|
||||
res = elf->hdr.e_ident[EI_MAG0] == ELFMAG0;
|
||||
res &= elf->hdr.e_ident[EI_MAG1] == ELFMAG1;
|
||||
res &= elf->hdr.e_ident[EI_MAG2] == ELFMAG2;
|
||||
res &= elf->hdr.e_ident[EI_MAG3] == ELFMAG3;
|
||||
res &= elf->hdr.e_ident[EI_CLASS] == ELFCLASS32;
|
||||
res &= elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB
|
||||
|| elf->hdr.e_ident[EI_DATA] == ELFDATA2MSB;
|
||||
|
||||
if (!res) goto bail;
|
||||
|
||||
elf->le_data = elf->hdr.e_ident[EI_DATA] == ELFDATA2LSB;
|
||||
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_machine);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_version);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_entry);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shoff);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_ehsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_phnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shentsize);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shnum);
|
||||
ENDIAN_ASSIGN_IN_PLACE(elf->hdr.e_shstrndx);
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf32_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr)
|
||||
{
|
||||
if (idx >= elf->hdr.e_shnum)
|
||||
goto bail;
|
||||
|
||||
COPY_STRUCT(hdr, elf->buf, elf->hdr.e_shoff + idx * elf->hdr.e_shentsize,
|
||||
elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_type);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_flags);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addr);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_offset);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_link);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_addralign);
|
||||
ENDIAN_ASSIGN_IN_PLACE(hdr->sh_entsize);
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *parse_elf32_string_table(elf_obj_t *elf, int s_idx, int idx)
|
||||
{
|
||||
Elf32_Shdr shdr;
|
||||
|
||||
if (parse_elf32_section(elf, s_idx, &shdr))
|
||||
{
|
||||
log_msg("Failed to parse ELF string table: section %d, index %d\n",
|
||||
s_idx, idx);
|
||||
return "";
|
||||
}
|
||||
|
||||
return (char *)(elf->buf + shdr.sh_offset + idx);
|
||||
}
|
||||
|
||||
int parse_elf32_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym)
|
||||
{
|
||||
COPY_STRUCT(sym, elf->buf, ofst, elf->sz);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_name);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_value);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_size);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_info);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_other);
|
||||
ENDIAN_ASSIGN_IN_PLACE(sym->st_shndx);
|
||||
return 0;
|
||||
bail:
|
||||
return 1;
|
||||
}
|
||||
|
||||
int parse_elf32(uint8_t *buf, size_t sz, output_fmt_t mode)
|
||||
{
|
||||
elf_obj_t elf;
|
||||
Elf32_Shdr shdr;
|
||||
unsigned int ofst;
|
||||
int i;
|
||||
Elf32_Off strtab_off; /* save String Table offset for later use */
|
||||
|
||||
memset(&elf, 0, sizeof(elf));
|
||||
elf.buf = buf;
|
||||
elf.sz = sz;
|
||||
|
||||
/* Parse Header */
|
||||
if (parse_elf32_header(&elf))
|
||||
{
|
||||
log_msg("Parse error: File does not appear to be valid ELF32\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < elf.hdr.e_shnum; i++)
|
||||
{
|
||||
parse_elf32_section(&elf, i, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_STRTAB)
|
||||
{
|
||||
char strtsb_name[128];
|
||||
|
||||
strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
|
||||
|
||||
if (!(strcmp(strtsb_name, ".shstrtab")))
|
||||
{
|
||||
log_msg("found section: %s\n", strtsb_name);
|
||||
strtab_off = shdr.sh_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Parse all Symbol Tables */
|
||||
for (i = 0; i < elf.hdr.e_shnum; i++)
|
||||
{
|
||||
|
||||
parse_elf32_section(&elf, i, &shdr);
|
||||
|
||||
if (shdr.sh_type == SHT_SYMTAB)
|
||||
{
|
||||
for (ofst = shdr.sh_offset;
|
||||
ofst < shdr.sh_offset + shdr.sh_size;
|
||||
ofst += shdr.sh_entsize)
|
||||
{
|
||||
Elf32_Sym sym;
|
||||
|
||||
parse_elf32_symbol(&elf, ofst, &sym);
|
||||
|
||||
/* For all OBJECTS (data objects), extract the value from the
|
||||
* proper data segment.
|
||||
*/
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
|
||||
log_msg("found data object %s\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name));
|
||||
|
||||
if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
|
||||
&& sym.st_size == 4)
|
||||
{
|
||||
Elf32_Shdr dhdr;
|
||||
int32_t val;
|
||||
char section_name[128];
|
||||
|
||||
parse_elf32_section(&elf, sym.st_shndx, &dhdr);
|
||||
|
||||
/* For explanition - refer to _MSC_VER version of code */
|
||||
strcpy(section_name, (char *)(elf.buf + strtab_off + dhdr.sh_name));
|
||||
log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type);
|
||||
|
||||
if (!(strcmp(section_name, ".bss")))
|
||||
{
|
||||
val = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(&val,
|
||||
elf.buf + dhdr.sh_offset + sym.st_value,
|
||||
sizeof(val));
|
||||
}
|
||||
|
||||
if (!elf.le_data)
|
||||
{
|
||||
log_msg("Big Endian data not supported yet!\n");
|
||||
goto bail;
|
||||
}\
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case OUTPUT_FMT_RVDS:
|
||||
printf("%-40s EQU %5d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
case OUTPUT_FMT_GAS:
|
||||
printf(".equ %-40s, %5d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
break;
|
||||
default:
|
||||
printf("%s = %d\n",
|
||||
parse_elf32_string_table(&elf,
|
||||
shdr.sh_link,
|
||||
sym.st_name),
|
||||
val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mode == OUTPUT_FMT_RVDS)
|
||||
printf(" END\n");
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
log_msg("Parse error: File does not appear to be valid ELF32\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
output_fmt_t mode;
|
||||
char *f;
|
||||
struct stat stat_buf;
|
||||
uint8_t *file_buf;
|
||||
int res;
|
||||
|
||||
if (argc < 2 || argc > 3)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
|
||||
fprintf(stderr, " <obj file>\tELF format object file to parse\n");
|
||||
fprintf(stderr, "Output Formats:\n");
|
||||
fprintf(stderr, " gas - compatible with GNU assembler\n");
|
||||
fprintf(stderr, " rvds - compatible with armasm\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
f = argv[2];
|
||||
|
||||
if (!strcmp(argv[1], "rvds"))
|
||||
mode = OUTPUT_FMT_RVDS;
|
||||
else if (!strcmp(argv[1], "gas"))
|
||||
mode = OUTPUT_FMT_GAS;
|
||||
else
|
||||
f = argv[1];
|
||||
|
||||
|
||||
fd = open(f, O_RDONLY);
|
||||
|
||||
if (fd < 0)
|
||||
{
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (fstat(fd, &stat_buf))
|
||||
{
|
||||
perror("stat");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
file_buf = malloc(stat_buf.st_size);
|
||||
|
||||
if (!file_buf)
|
||||
{
|
||||
perror("malloc");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
|
||||
{
|
||||
perror("read");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (close(fd))
|
||||
{
|
||||
perror("close");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
res = parse_elf32(file_buf, stat_buf.st_size, mode);
|
||||
//res = parse_coff(file_buf, stat_buf.st_size);
|
||||
free(file_buf);
|
||||
|
||||
if (!res)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
bail:
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
/* See "Microsoft Portable Executable and Common Object File Format Specification"
|
||||
for reference.
|
||||
*/
|
||||
#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
|
||||
#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
|
||||
|
||||
int parse_coff(unsigned __int8 *buf, size_t sz)
|
||||
{
|
||||
unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
|
||||
unsigned int sectionrawdata_ptr;
|
||||
unsigned int i;
|
||||
unsigned __int8 *ptr;
|
||||
unsigned __int32 symoffset;
|
||||
FILE *fp;
|
||||
|
||||
char **sectionlist; //this array holds all section names in their correct order.
|
||||
//it is used to check if the symbol is in .bss or .data section.
|
||||
|
||||
nsections = get_le16(buf + 2);
|
||||
symtab_ptr = get_le32(buf + 8);
|
||||
symtab_sz = get_le32(buf + 12);
|
||||
strtab_ptr = symtab_ptr + symtab_sz * 18;
|
||||
|
||||
if (nsections > 96)
|
||||
goto bail;
|
||||
|
||||
sectionlist = malloc(nsections * sizeof * sectionlist);
|
||||
|
||||
//log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
|
||||
|
||||
/*
|
||||
The size of optional header is always zero for an obj file. So, the section header
|
||||
follows the file header immediately.
|
||||
*/
|
||||
|
||||
ptr = buf + 20; //section header
|
||||
|
||||
for (i = 0; i < nsections; i++)
|
||||
{
|
||||
char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(sectionname, ptr, 8);
|
||||
//log_msg("COFF: Parsing section %s\n",sectionname);
|
||||
|
||||
sectionlist[i] = malloc(strlen(sectionname) + 1);
|
||||
strcpy(sectionlist[i], sectionname);
|
||||
|
||||
if (!strcmp(sectionname, ".data")) sectionrawdata_ptr = get_le32(ptr + 20);
|
||||
|
||||
ptr += 40;
|
||||
}
|
||||
|
||||
//log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
|
||||
//log_msg("COFF: raw data pointer ofset for section .data is %u\n", sectionrawdata_ptr);
|
||||
|
||||
fp = fopen("vpx_asm_offsets.asm", "w");
|
||||
|
||||
if (fp == NULL)
|
||||
{
|
||||
perror("open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* The compiler puts the data with non-zero offset in .data section, but puts the data with
|
||||
zero offset in .bss section. So, if the data in in .bss section, set offset=0.
|
||||
Note from Wiki: In an object module compiled from C, the bss section contains
|
||||
the local variables (but not functions) that were declared with the static keyword,
|
||||
except for those with non-zero initial values. (In C, static variables are initialized
|
||||
to zero by default.) It also contains the non-local (both extern and static) variables
|
||||
that are also initialized to zero (either explicitly or by default).
|
||||
*/
|
||||
//move to symbol table
|
||||
/* COFF symbol table:
|
||||
offset field
|
||||
0 Name(*)
|
||||
8 Value
|
||||
12 SectionNumber
|
||||
14 Type
|
||||
16 StorageClass
|
||||
17 NumberOfAuxSymbols
|
||||
*/
|
||||
ptr = buf + symtab_ptr;
|
||||
|
||||
for (i = 0; i < symtab_sz; i++)
|
||||
{
|
||||
__int16 section = get_le16(ptr + 12); //section number
|
||||
|
||||
if (section > 0 && ptr[16] == 2)
|
||||
{
|
||||
//if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
|
||||
|
||||
if (get_le32(ptr))
|
||||
{
|
||||
char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
strncpy(name, ptr, 8);
|
||||
//log_msg("COFF: Parsing symbol %s\n",name);
|
||||
fprintf(fp, "%-40s EQU ", name);
|
||||
}
|
||||
else
|
||||
{
|
||||
//log_msg("COFF: Parsing symbol %s\n",
|
||||
// buf + strtab_ptr + get_le32(ptr+4));
|
||||
fprintf(fp, "%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
|
||||
}
|
||||
|
||||
if (!(strcmp(sectionlist[section-1], ".bss")))
|
||||
{
|
||||
symoffset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
|
||||
}
|
||||
|
||||
//log_msg(" Section: %d\n",section);
|
||||
//log_msg(" Class: %d\n",ptr[16]);
|
||||
//log_msg(" Address: %u\n",get_le32(ptr+8));
|
||||
//log_msg(" Offset: %u\n", symoffset);
|
||||
|
||||
fprintf(fp, "%5d\n", symoffset);
|
||||
}
|
||||
|
||||
ptr += 18;
|
||||
}
|
||||
|
||||
fprintf(fp, " END\n");
|
||||
fclose(fp);
|
||||
|
||||
for (i = 0; i < nsections; i++)
|
||||
{
|
||||
free(sectionlist[i]);
|
||||
}
|
||||
|
||||
free(sectionlist);
|
||||
|
||||
return 0;
|
||||
bail:
|
||||
|
||||
for (i = 0; i < nsections; i++)
|
||||
{
|
||||
free(sectionlist[i]);
|
||||
}
|
||||
|
||||
free(sectionlist);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int fd;
|
||||
output_fmt_t mode;
|
||||
const char *f;
|
||||
struct _stat stat_buf;
|
||||
unsigned __int8 *file_buf;
|
||||
int res;
|
||||
|
||||
if (argc < 2 || argc > 3)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
|
||||
fprintf(stderr, " <obj file>\tELF format object file to parse\n");
|
||||
fprintf(stderr, "Output Formats:\n");
|
||||
fprintf(stderr, " gas - compatible with GNU assembler\n");
|
||||
fprintf(stderr, " rvds - compatible with armasm\n");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
f = argv[2];
|
||||
|
||||
if (!strcmp(argv[1], "rvds"))
|
||||
mode = OUTPUT_FMT_RVDS;
|
||||
else if (!strcmp(argv[1], "gas"))
|
||||
mode = OUTPUT_FMT_GAS;
|
||||
else
|
||||
f = argv[1];
|
||||
|
||||
if (_sopen_s(&fd, f, _O_BINARY, _SH_DENYNO, _S_IREAD | _S_IWRITE))
|
||||
{
|
||||
perror("Unable to open file");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (_fstat(fd, &stat_buf))
|
||||
{
|
||||
perror("stat");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
file_buf = malloc(stat_buf.st_size);
|
||||
|
||||
if (!file_buf)
|
||||
{
|
||||
perror("malloc");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (_read(fd, file_buf, stat_buf.st_size) != stat_buf.st_size)
|
||||
{
|
||||
perror("read");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
if (_close(fd))
|
||||
{
|
||||
perror("close");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
res = parse_coff(file_buf, stat_buf.st_size);
|
||||
|
||||
free(file_buf);
|
||||
|
||||
if (!res)
|
||||
return EXIT_SUCCESS;
|
||||
|
||||
bail:
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
#endif
|
|
@ -1,113 +0,0 @@
|
|||
diff --git a/media/libvpx/vp8/decoder/decodframe.c b/media/libvpx/vp8/decoder/decodframe.c
|
||||
--- a/media/libvpx/vp8/decoder/decodframe.c
|
||||
+++ b/media/libvpx/vp8/decoder/decodframe.c
|
||||
@@ -462,17 +462,17 @@ static void setup_token_decoder(VP8D_COM
|
||||
{
|
||||
partition_size = read_partition_size(partition_size_ptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
partition_size = user_data_end - partition;
|
||||
}
|
||||
|
||||
- if (partition + partition_size > user_data_end)
|
||||
+ if (user_data_end - partition < partition_size)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Truncated packet or corrupt partition "
|
||||
"%d length", i + 1);
|
||||
|
||||
if (vp8dx_start_decode(bool_decoder, IF_RTCD(&pbi->dboolhuff),
|
||||
partition, partition_size))
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate bool decoder %d", i + 1);
|
||||
@@ -564,30 +564,33 @@ static void init_frame(VP8D_COMP *pbi)
|
||||
|
||||
int vp8_decode_frame(VP8D_COMP *pbi)
|
||||
{
|
||||
vp8_reader *const bc = & pbi->bc;
|
||||
VP8_COMMON *const pc = & pbi->common;
|
||||
MACROBLOCKD *const xd = & pbi->mb;
|
||||
const unsigned char *data = (const unsigned char *)pbi->Source;
|
||||
const unsigned char *const data_end = data + pbi->source_sz;
|
||||
- int first_partition_length_in_bytes;
|
||||
+ unsigned int first_partition_length_in_bytes;
|
||||
|
||||
int mb_row;
|
||||
int i, j, k, l;
|
||||
const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
|
||||
|
||||
+ if (data_end - data < 3)
|
||||
+ vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
+ "Truncated packet");
|
||||
pc->frame_type = (FRAME_TYPE)(data[0] & 1);
|
||||
pc->version = (data[0] >> 1) & 7;
|
||||
pc->show_frame = (data[0] >> 4) & 1;
|
||||
first_partition_length_in_bytes =
|
||||
(data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
|
||||
data += 3;
|
||||
|
||||
- if (data + first_partition_length_in_bytes > data_end)
|
||||
+ if (data_end - data < first_partition_length_in_bytes)
|
||||
vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Truncated packet or corrupt partition 0 length");
|
||||
vp8_setup_version(pc);
|
||||
|
||||
if (pc->frame_type == KEY_FRAME)
|
||||
{
|
||||
const int Width = pc->Width;
|
||||
const int Height = pc->Height;
|
||||
diff --git a/media/libvpx/vp8/decoder/onyxd_if.c b/media/libvpx/vp8/decoder/onyxd_if.c
|
||||
--- a/media/libvpx/vp8/decoder/onyxd_if.c
|
||||
+++ b/media/libvpx/vp8/decoder/onyxd_if.c
|
||||
@@ -318,45 +318,49 @@ int vp8dx_receive_compressed_data(VP8D_P
|
||||
|
||||
if (ptr == 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
pbi->common.error.error_code = VPX_CODEC_OK;
|
||||
|
||||
+ cm->new_fb_idx = get_free_fb (cm);
|
||||
+
|
||||
if (setjmp(pbi->common.error.jmp))
|
||||
{
|
||||
pbi->common.error.setjmp = 0;
|
||||
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
|
||||
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
|
||||
return -1;
|
||||
}
|
||||
|
||||
pbi->common.error.setjmp = 1;
|
||||
|
||||
#if HAVE_ARMV7
|
||||
vp8_push_neon(dx_store_reg);
|
||||
#endif
|
||||
|
||||
vpx_usec_timer_start(&timer);
|
||||
|
||||
//cm->current_video_frame++;
|
||||
pbi->Source = source;
|
||||
pbi->source_sz = size;
|
||||
|
||||
- cm->new_fb_idx = get_free_fb (cm);
|
||||
-
|
||||
retcode = vp8_decode_frame(pbi);
|
||||
|
||||
if (retcode < 0)
|
||||
{
|
||||
#if HAVE_ARMV7
|
||||
vp8_pop_neon(dx_store_reg);
|
||||
#endif
|
||||
pbi->common.error.error_code = VPX_CODEC_ERROR;
|
||||
pbi->common.error.setjmp = 0;
|
||||
+ if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
|
||||
+ cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
|
||||
return retcode;
|
||||
}
|
||||
|
||||
if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
|
||||
vp8_stop_lfthread(pbi);
|
||||
|
||||
if (swap_frame_buffers (cm))
|
||||
{
|
|
@ -1,168 +0,0 @@
|
|||
diff --git a/media/libvpx/vp8/common/blockd.h b/media/libvpx/vp8/common/blockd.h
|
||||
--- a/media/libvpx/vp8/common/blockd.h
|
||||
+++ b/media/libvpx/vp8/common/blockd.h
|
||||
@@ -90,17 +90,17 @@ typedef enum
|
||||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
// Macroblock level features
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, // Use alternate Quantizer ....
|
||||
MB_LVL_ALT_LF = 1, // Use alternate loop filter value...
|
||||
- MB_LVL_MAX = 2, // Number of MB level features supported
|
||||
+ MB_LVL_MAX = 2 // Number of MB level features supported
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
// Segment Feature Masks
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
#define VP8_YMODES (B_PRED + 1)
|
||||
diff --git a/media/libvpx/vp8/common/ppflags.h b/media/libvpx/vp8/common/ppflags.h
|
||||
--- a/media/libvpx/vp8/common/ppflags.h
|
||||
+++ b/media/libvpx/vp8/common/ppflags.h
|
||||
@@ -15,12 +15,12 @@ enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1,
|
||||
VP8D_DEMACROBLOCK = 2,
|
||||
VP8D_ADDNOISE = 4,
|
||||
VP8D_DEBUG_LEVEL1 = 8,
|
||||
VP8D_DEBUG_LEVEL2 = 16,
|
||||
VP8D_DEBUG_LEVEL3 = 32,
|
||||
- VP8D_DEBUG_LEVEL4 = 64,
|
||||
+ VP8D_DEBUG_LEVEL4 = 64
|
||||
};
|
||||
|
||||
#endif
|
||||
diff --git a/media/libvpx/vpx/vp8.h b/media/libvpx/vpx/vp8.h
|
||||
--- a/media/libvpx/vpx/vp8.h
|
||||
+++ b/media/libvpx/vpx/vp8.h
|
||||
@@ -48,17 +48,17 @@ enum vp8_dec_control_id
|
||||
*
|
||||
* The set of macros define VP8 decoder post processing flags
|
||||
*/
|
||||
enum vp8_postproc_level
|
||||
{
|
||||
VP8_NOFILTERING = 0,
|
||||
VP8_DEBLOCK = 1,
|
||||
VP8_DEMACROBLOCK = 2,
|
||||
- VP8_ADDNOISE = 4,
|
||||
+ VP8_ADDNOISE = 4
|
||||
};
|
||||
|
||||
/*!\brief post process flags
|
||||
*
|
||||
* This define a structure that describe the post processing settings. For
|
||||
* the best objective measure (using thet PSNR metric) set post_proc_flag
|
||||
* to VP8_DEBLOCK and deblocking_level to 1.
|
||||
*/
|
||||
diff --git a/media/libvpx/vpx/vpx_codec.h b/media/libvpx/vpx/vpx_codec.h
|
||||
--- a/media/libvpx/vpx/vpx_codec.h
|
||||
+++ b/media/libvpx/vpx/vpx_codec.h
|
||||
@@ -57,17 +57,17 @@ extern "C" {
|
||||
#define DEPRECATED
|
||||
#define DECLSPEC_DEPRECATED /**< \copydoc #DEPRECATED */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*!\brief Decorator indicating a function is potentially unused */
|
||||
#ifdef UNUSED
|
||||
#elif __GNUC__
|
||||
-#define UNUSED __attribute__ ((unused));
|
||||
+#define UNUSED __attribute__ ((unused))
|
||||
#else
|
||||
#define UNUSED
|
||||
#endif
|
||||
|
||||
/*!\brief Current ABI version number
|
||||
*
|
||||
* \internal
|
||||
* If this file is altered in any way that changes the ABI, this value
|
||||
@@ -123,17 +123,17 @@ extern "C" {
|
||||
/*!\brief An application-supplied parameter is not valid.
|
||||
*
|
||||
*/
|
||||
VPX_CODEC_INVALID_PARAM,
|
||||
|
||||
/*!\brief An iterator reached the end of list.
|
||||
*
|
||||
*/
|
||||
- VPX_CODEC_LIST_END,
|
||||
+ VPX_CODEC_LIST_END
|
||||
|
||||
}
|
||||
vpx_codec_err_t;
|
||||
|
||||
|
||||
/*! \brief Codec capabilities bitfield
|
||||
*
|
||||
* Each codec advertises the capabilities it supports as part of its
|
||||
diff --git a/media/libvpx/vpx/vpx_decoder_compat.h b/media/libvpx/vpx/vpx_decoder_compat.h
|
||||
--- a/media/libvpx/vpx/vpx_decoder_compat.h
|
||||
+++ b/media/libvpx/vpx/vpx_decoder_compat.h
|
||||
@@ -73,17 +73,17 @@ extern "C" {
|
||||
/*!\brief An application-supplied parameter is not valid.
|
||||
*
|
||||
*/
|
||||
VPX_DEC_INVALID_PARAM = VPX_CODEC_INVALID_PARAM,
|
||||
|
||||
/*!\brief An iterator reached the end of list.
|
||||
*
|
||||
*/
|
||||
- VPX_DEC_LIST_END = VPX_CODEC_LIST_END,
|
||||
+ VPX_DEC_LIST_END = VPX_CODEC_LIST_END
|
||||
|
||||
}
|
||||
vpx_dec_err_t;
|
||||
|
||||
/*! \brief Decoder capabilities bitfield
|
||||
*
|
||||
* Each decoder advertises the capabilities it supports as part of its
|
||||
* ::vpx_dec_iface_t interface structure. Capabilities are extra interfaces
|
||||
diff --git a/media/libvpx/vpx/vpx_encoder.h b/media/libvpx/vpx/vpx_encoder.h
|
||||
--- a/media/libvpx/vpx/vpx_encoder.h
|
||||
+++ b/media/libvpx/vpx/vpx_encoder.h
|
||||
@@ -166,17 +166,17 @@ extern "C" {
|
||||
} vpx_rational_t; /**< alias for struct vpx_rational */
|
||||
|
||||
|
||||
/*!\brief Multi-pass Encoding Pass */
|
||||
enum vpx_enc_pass
|
||||
{
|
||||
VPX_RC_ONE_PASS, /**< Single pass mode */
|
||||
VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
|
||||
- VPX_RC_LAST_PASS, /**< Final pass of multi-pass mode */
|
||||
+ VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */
|
||||
};
|
||||
|
||||
|
||||
/*!\brief Rate control mode */
|
||||
enum vpx_rc_mode
|
||||
{
|
||||
VPX_VBR, /**< Variable Bit Rate (VBR) mode */
|
||||
VPX_CBR /**< Constant Bit Rate (CBR) mode */
|
||||
diff --git a/media/libvpx/vpx/vpx_image.h b/media/libvpx/vpx/vpx_image.h
|
||||
--- a/media/libvpx/vpx/vpx_image.h
|
||||
+++ b/media/libvpx/vpx/vpx_image.h
|
||||
@@ -50,17 +50,17 @@ extern "C" {
|
||||
VPX_IMG_FMT_RGB32_LE, /**< 32 bit packed BGR0 */
|
||||
VPX_IMG_FMT_ARGB, /**< 32 bit packed ARGB, alpha=255 */
|
||||
VPX_IMG_FMT_ARGB_LE, /**< 32 bit packed BGRA, alpha=255 */
|
||||
VPX_IMG_FMT_RGB565_LE, /**< 16 bit per pixel, gggbbbbb rrrrrggg */
|
||||
VPX_IMG_FMT_RGB555_LE, /**< 16 bit per pixel, gggbbbbb 0rrrrrgg */
|
||||
VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
|
||||
VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2,
|
||||
VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
|
||||
- VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, /** < planar 4:2:0 format with vpx color space */
|
||||
+ VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4 /** < planar 4:2:0 format with vpx color space */
|
||||
}
|
||||
vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
|
||||
|
||||
#if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
|
||||
#define IMG_FMT_PLANAR VPX_IMG_FMT_PLANAR /**< \deprecated Use #VPX_IMG_FMT_PLANAR */
|
||||
#define IMG_FMT_UV_FLIP VPX_IMG_FMT_UV_FLIP /**< \deprecated Use #VPX_IMG_FMT_UV_FLIP */
|
||||
#define IMG_FMT_HAS_ALPHA VPX_IMG_FMT_HAS_ALPHA /**< \deprecated Use #VPX_IMG_FMT_HAS_ALPHA */
|
||||
|
|
@ -1,10 +1,7 @@
|
|||
diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/common/loopfilter_filters.c
|
||||
--- a/media/libvpx/vp8/common/loopfilter_filters.c
|
||||
+++ b/media/libvpx/vp8/common/loopfilter_filters.c
|
||||
@@ -8,16 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
@@ -11,10 +11,14 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "loopfilter.h"
|
||||
|
@ -13,9 +10,7 @@ diff --git a/media/libvpx/vp8/common/loopfilter_filters.c b/media/libvpx/vp8/com
|
|||
+#ifdef __SUNPRO_C
|
||||
+#define __inline inline
|
||||
+#endif
|
||||
|
||||
#define NEW_LOOPFILTER_MASK
|
||||
|
||||
+
|
||||
typedef unsigned char uc;
|
||||
|
||||
static __inline signed char vp8_signed_char_clamp(int t)
|
||||
|
@ -109,14 +104,14 @@ diff --git a/media/libvpx/vpx_ports/mem.h b/media/libvpx/vpx_ports/mem.h
|
|||
diff --git a/media/libvpx/vpx_ports/x86.h b/media/libvpx/vpx_ports/x86.h
|
||||
--- a/media/libvpx/vpx_ports/x86.h
|
||||
+++ b/media/libvpx/vpx_ports/x86.h
|
||||
@@ -26,16 +26,36 @@
|
||||
@@ -45,16 +45,36 @@
|
||||
#define cpuid(func,ax,bx,cx,dx)\
|
||||
__asm__ __volatile__ (\
|
||||
"pushl %%ebx \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"movl %%ebx, %1 \n\t" \
|
||||
"popl %%ebx \n\t" \
|
||||
: "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func));
|
||||
"mov %%ebx, %%edi \n\t" \
|
||||
"cpuid \n\t" \
|
||||
"xchg %%edi, %%ebx \n\t" \
|
||||
: "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
|
||||
: "a" (func));
|
||||
#endif
|
||||
+#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
|
||||
+#if ARCH_X86_64
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
diff --git a/media/libvpx/vp8/common/x86/subpixel_sse2.asm b/media/libvpx/vp8/common/x86/subpixel_sse2.asm
|
||||
--- a/media/libvpx/vp8/common/x86/subpixel_sse2.asm
|
||||
+++ b/media/libvpx/vp8/common/x86/subpixel_sse2.asm
|
||||
@@ -1003,17 +1003,17 @@ next_row8x8:
|
||||
paddw xmm3, xmm7
|
||||
|
||||
movdqa xmm7, xmm4
|
||||
|
||||
paddw xmm3, [rd GLOBAL] ; xmm3 += round value
|
||||
psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128
|
||||
|
||||
packuswb xmm3, xmm0
|
||||
- movq [rdi], xmm3 ; store the results in the destination
|
||||
+ movq QWORD PTR [rdi], xmm3 ; store the results in the destination
|
||||
|
||||
add rsp, 16 ; next line
|
||||
add rdi, rdx
|
||||
|
||||
cmp rdi, rcx
|
||||
jne next_row8x8
|
||||
|
||||
;add rsp, 144
|
|
@ -79,19 +79,30 @@ commonFiles=(
|
|||
vp8/common/swapyv12buffer.c
|
||||
vp8/common/textblit.c
|
||||
vp8/common/treecoder.c
|
||||
vp8/common/arm/arm_systemdependent.c
|
||||
vp8/common/arm/bilinearfilter_arm.c
|
||||
vp8/common/arm/filter_arm.c
|
||||
vp8/common/arm/loopfilter_arm.c
|
||||
vp8/common/arm/reconintra_arm.c
|
||||
vp8/common/arm/vpx_asm_offsets.c
|
||||
vp8/common/arm/neon/recon_neon.c
|
||||
vp8/common/x86/loopfilter_x86.c
|
||||
vp8/common/x86/vp8_asm_stubs.c
|
||||
vp8/common/x86/x86_systemdependent.c
|
||||
vp8/decoder/dboolhuff.c
|
||||
vp8/decoder/decodemv.c
|
||||
vp8/decoder/decodframe.c
|
||||
vp8/decoder/demode.c
|
||||
vp8/decoder/dequantize.c
|
||||
vp8/decoder/detokenize.c
|
||||
vp8/decoder/reconintra_mt.c
|
||||
vp8/decoder/generic/dsystemdependent.c
|
||||
vp8/decoder/idct_blk.c
|
||||
vp8/decoder/onyxd_if.c
|
||||
vp8/decoder/threading.c
|
||||
vp8/decoder/arm/arm_dsystemdependent.c
|
||||
vp8/decoder/arm/dequantize_arm.c
|
||||
vp8/decoder/arm/armv6/idct_blk_v6.c
|
||||
vp8/decoder/arm/neon/idct_blk_neon.c
|
||||
vp8/decoder/x86/idct_blk_mmx.c
|
||||
vp8/decoder/x86/idct_blk_sse2.c
|
||||
vp8/decoder/x86/x86_dsystemdependent.c
|
||||
|
@ -138,7 +149,6 @@ commonFiles=(
|
|||
vp8/common/reconinter.h
|
||||
vp8/common/reconintra4x4.h
|
||||
vp8/common/reconintra.h
|
||||
vp8/common/segmentation_common.h
|
||||
vp8/common/setupintrarecon.h
|
||||
vp8/common/subpixel.h
|
||||
vp8/common/swapyv12buffer.h
|
||||
|
@ -147,6 +157,10 @@ commonFiles=(
|
|||
vp8/common/treecoder.h
|
||||
vp8/common/type_aliases.h
|
||||
vp8/common/vpxerrors.h
|
||||
vp8/common/arm/idct_arm.h
|
||||
vp8/common/arm/loopfilter_arm.h
|
||||
vp8/common/arm/recon_arm.h
|
||||
vp8/common/arm/subpixel_arm.h
|
||||
vp8/common/x86/idct_x86.h
|
||||
vp8/common/x86/loopfilter_x86.h
|
||||
vp8/common/x86/postproc_x86.h
|
||||
|
@ -155,11 +169,14 @@ commonFiles=(
|
|||
vp8/decoder/dboolhuff.h
|
||||
vp8/decoder/decodemv.h
|
||||
vp8/decoder/decoderthreading.h
|
||||
vp8/decoder/demode.h
|
||||
vp8/decoder/dequantize.h
|
||||
vp8/decoder/detokenize.h
|
||||
vp8/decoder/onyxd_int.h
|
||||
vp8/decoder/reconintra_mt.h
|
||||
vp8/decoder/treereader.h
|
||||
vp8/decoder/arm/dboolhuff_arm.h
|
||||
vp8/decoder/arm/dequantize_arm.h
|
||||
vp8/decoder/arm/detokenize_arm.h
|
||||
vp8/decoder/x86/dequantize_x86.h
|
||||
vpx/internal/vpx_codec_internal.h
|
||||
vpx/vp8cx.h
|
||||
|
@ -176,14 +193,63 @@ commonFiles=(
|
|||
vpx/vpx_integer.h
|
||||
vpx_mem/include/vpx_mem_intrnl.h
|
||||
vpx_mem/vpx_mem.h
|
||||
vpx_ports/arm_cpudetect.c
|
||||
vpx_ports/config.h
|
||||
vpx_ports/mem.h
|
||||
vpx_ports/vpx_timer.h
|
||||
vpx_ports/arm.h
|
||||
vpx_ports/x86.h
|
||||
vpx_scale/scale_mode.h
|
||||
vpx_scale/vpxscale.h
|
||||
vpx_scale/yv12config.h
|
||||
vpx_scale/yv12extend.h
|
||||
vp8/common/arm/armv6/bilinearfilter_v6.asm
|
||||
vp8/common/arm/armv6/copymem8x4_v6.asm
|
||||
vp8/common/arm/armv6/copymem8x8_v6.asm
|
||||
vp8/common/arm/armv6/copymem16x16_v6.asm
|
||||
vp8/common/arm/armv6/dc_only_idct_add_v6.asm
|
||||
vp8/common/arm/armv6/iwalsh_v6.asm
|
||||
vp8/common/arm/armv6/filter_v6.asm
|
||||
vp8/common/arm/armv6/idct_v6.asm
|
||||
vp8/common/arm/armv6/loopfilter_v6.asm
|
||||
vp8/common/arm/armv6/recon_v6.asm
|
||||
vp8/common/arm/armv6/simpleloopfilter_v6.asm
|
||||
vp8/common/arm/armv6/sixtappredict8x4_v6.asm
|
||||
vp8/common/arm/neon/bilinearpredict4x4_neon.asm
|
||||
vp8/common/arm/neon/bilinearpredict8x4_neon.asm
|
||||
vp8/common/arm/neon/bilinearpredict8x8_neon.asm
|
||||
vp8/common/arm/neon/bilinearpredict16x16_neon.asm
|
||||
vp8/common/arm/neon/copymem8x4_neon.asm
|
||||
vp8/common/arm/neon/copymem8x8_neon.asm
|
||||
vp8/common/arm/neon/copymem16x16_neon.asm
|
||||
vp8/common/arm/neon/dc_only_idct_add_neon.asm
|
||||
vp8/common/arm/neon/iwalsh_neon.asm
|
||||
vp8/common/arm/neon/loopfilter_neon.asm
|
||||
vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
|
||||
vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
|
||||
vp8/common/arm/neon/mbloopfilter_neon.asm
|
||||
vp8/common/arm/neon/recon2b_neon.asm
|
||||
vp8/common/arm/neon/recon4b_neon.asm
|
||||
vp8/common/arm/neon/reconb_neon.asm
|
||||
vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
|
||||
vp8/common/arm/neon/shortidct4x4llm_neon.asm
|
||||
vp8/common/arm/neon/sixtappredict4x4_neon.asm
|
||||
vp8/common/arm/neon/sixtappredict8x4_neon.asm
|
||||
vp8/common/arm/neon/sixtappredict8x8_neon.asm
|
||||
vp8/common/arm/neon/sixtappredict16x16_neon.asm
|
||||
vp8/common/arm/neon/recon16x16mb_neon.asm
|
||||
vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
|
||||
vp8/common/arm/neon/save_neon_reg.asm
|
||||
vp8/decoder/arm/detokenize.asm
|
||||
vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
|
||||
vp8/decoder/arm/armv6/dequant_idct_v6.asm
|
||||
vp8/decoder/arm/armv6/dequantize_v6.asm
|
||||
vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
|
||||
vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
|
||||
vp8/decoder/arm/neon/dequant_idct_neon.asm
|
||||
vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
|
||||
vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
|
||||
vp8/decoder/arm/neon/dequantizeb_neon.asm
|
||||
vp8/common/x86/idctllm_mmx.asm
|
||||
vp8/common/x86/idctllm_sse2.asm
|
||||
vp8/common/x86/iwalsh_mmx.asm
|
||||
|
@ -200,6 +266,8 @@ commonFiles=(
|
|||
vp8/decoder/x86/dequantize_mmx.asm
|
||||
vpx_ports/emms.asm
|
||||
vpx_ports/x86_abi_support.asm
|
||||
build/make/ads2gas.pl
|
||||
build/make/obj_int_extract.c
|
||||
LICENSE
|
||||
PATENTS
|
||||
)
|
||||
|
@ -218,9 +286,9 @@ cp $1/objdir/x86-linux-gcc/vpx_config.asm vpx_config_x86-linux-gcc.asm
|
|||
cp $1/objdir/x86-linux-gcc/vpx_config.h vpx_config_x86-linux-gcc.h
|
||||
|
||||
# Config files for x86_64-linux-gcc and Solaris x86_64
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.c vpx_config_x86-linux-gcc.c
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.asm vpx_config_x86-linux-gcc.asm
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.h vpx_config_x86-linux-gcc.h
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.c vpx_config_x86_64-linux-gcc.c
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.asm vpx_config_x86_64-linux-gcc.asm
|
||||
cp $1/objdir/x86_64-linux-gcc/vpx_config.h vpx_config_x86_64-linux-gcc.h
|
||||
|
||||
# Copy config files for mac...
|
||||
cp $1/objdir/x86-darwin9-gcc/vpx_config.c vpx_config_x86-darwin9-gcc.c
|
||||
|
@ -232,6 +300,10 @@ cp $1/objdir/x86_64-darwin9-gcc/vpx_config.c vpx_config_x86_64-darwin9-gcc.c
|
|||
cp $1/objdir/x86_64-darwin9-gcc/vpx_config.asm vpx_config_x86_64-darwin9-gcc.asm
|
||||
cp $1/objdir/x86_64-darwin9-gcc/vpx_config.h vpx_config_x86_64-darwin9-gcc.h
|
||||
|
||||
# Config files for arm-linux-gcc
|
||||
cp $1/objdir/armv7-linux-gcc/vpx_config.c vpx_config_arm-linux-gcc.c
|
||||
cp $1/objdir/armv7-linux-gcc/vpx_config.h vpx_config_arm-linux-gcc.h
|
||||
|
||||
# Config files for generic-gnu
|
||||
cp $1/objdir/generic-gnu/vpx_config.c vpx_config_generic-gnu.c
|
||||
cp $1/objdir/generic-gnu/vpx_config.h vpx_config_generic-gnu.h
|
||||
|
@ -243,11 +315,5 @@ do
|
|||
cp -v $1/$f $f
|
||||
done
|
||||
|
||||
# Patch to reduce compiler warnings, so we can compile with -Werror in mozilla.
|
||||
patch -p3 < reduce-warnings-1.patch
|
||||
patch -p3 < subpixel-qword.patch
|
||||
# Patch to compile with Sun Studio on Solaris
|
||||
patch -p3 < solaris.patch
|
||||
# Patch to fix frame buffer reference counting and parition length overflow
|
||||
# checks.
|
||||
patch -p3 < frame_buf_ref.patch
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -56,7 +56,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
|||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
// our internal buffers are always multiples of 16
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
|
||||
|
@ -153,7 +153,7 @@ void vp8_setup_version(VP8_COMMON *cm)
|
|||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
//4,5,6,7 are reserved for future use
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->simpler_lpf = 0;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
|
@ -177,10 +177,10 @@ void vp8_create_common(VP8_COMMON *oci)
|
|||
oci->clr_type = REG_YUV;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
// Initialise reference frame sign bias structure to defaults
|
||||
/* Initialise reference frame sign bias structure to defaults */
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
// Default disable buffer to buffer copying
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "vpx_ports/arm.h"
|
||||
#include "g_common.h"
|
||||
#include "pragmas.h"
|
||||
#include "subpixel.h"
|
||||
#include "loopfilter.h"
|
||||
#include "recon.h"
|
||||
#include "idct.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
|
||||
|
||||
extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
|
||||
|
||||
void vp8_arch_arm_common_init(VP8_COMMON *ctx)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
|
||||
int flags = arm_cpu_caps();
|
||||
int has_edsp = flags & HAS_EDSP;
|
||||
int has_media = flags & HAS_MEDIA;
|
||||
int has_neon = flags & HAS_NEON;
|
||||
rtcd->flags = flags;
|
||||
|
||||
/* Override default functions with fastest ones for this CPU. */
|
||||
#if HAVE_ARMV6
|
||||
if (has_media)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6;
|
||||
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6;
|
||||
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6;
|
||||
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
|
||||
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6;
|
||||
rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6;
|
||||
rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6;
|
||||
|
||||
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6;
|
||||
rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual;
|
||||
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6;
|
||||
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6;
|
||||
|
||||
rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6;
|
||||
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6;
|
||||
rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6;
|
||||
rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6;
|
||||
rtcd->recon.recon = vp8_recon_b_armv6;
|
||||
rtcd->recon.recon2 = vp8_recon2b_armv6;
|
||||
rtcd->recon.recon4 = vp8_recon4b_armv6;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
if (has_neon)
|
||||
{
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon;
|
||||
rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon;
|
||||
rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon;
|
||||
rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
|
||||
rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon;
|
||||
rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon;
|
||||
rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon;
|
||||
|
||||
rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon;
|
||||
rtcd->idct.idct16 = vp8_short_idct4x4llm_neon;
|
||||
rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon;
|
||||
rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon;
|
||||
|
||||
rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
|
||||
rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon;
|
||||
rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
|
||||
rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon;
|
||||
rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
|
||||
rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon;
|
||||
rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
|
||||
rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon;
|
||||
|
||||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon;
|
||||
rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon;
|
||||
rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon;
|
||||
rtcd->recon.recon = vp8_recon_b_neon;
|
||||
rtcd->recon.recon2 = vp8_recon2b_neon;
|
||||
rtcd->recon.recon4 = vp8_recon4b_neon;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_neon;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV6
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_media)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
if (has_neon)
|
||||
#endif
|
||||
{
|
||||
vp8_build_intra_predictors_mby_ptr =
|
||||
vp8_build_intra_predictors_mby_neon;
|
||||
vp8_build_intra_predictors_mby_s_ptr =
|
||||
vp8_build_intra_predictors_mby_s_neon;
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_bil_second_pass_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
pld [r0]
|
||||
;;ENDIF
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; output_height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save output_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
|
||||
|bil_height_loop_1st_v6|
|
||||
ldrb r6, [r0] ; load source data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|
||||
|
||||
|bil_width_loop_1st_v6|
|
||||
ldrb r9, [r0, #3]
|
||||
ldrb r10, [r0, #4]
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
|
||||
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
|
||||
|
||||
smuad r6, r6, r5 ; apply the filter
|
||||
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
|
||||
smuad r7, r7, r5
|
||||
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
|
||||
|
||||
smuad r8, r8, r5
|
||||
smuad r9, r9, r5
|
||||
|
||||
add r0, r0, #4
|
||||
subs lr, lr, #1
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #16, r6, asr #7
|
||||
usat r7, #16, r7, asr #7
|
||||
|
||||
strh r6, [r1], r3 ; result is transposed and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strh r7, [r1], r3
|
||||
add r9, r9, #0x40
|
||||
usat r8, #16, r8, asr #7
|
||||
usat r9, #16, r9, asr #7
|
||||
|
||||
strh r8, [r1], r3 ; result is transposed and stored
|
||||
|
||||
ldrneb r6, [r0] ; load source data
|
||||
strh r9, [r1], r3
|
||||
|
||||
ldrneb r7, [r0, #1]
|
||||
ldrneb r8, [r0, #2]
|
||||
|
||||
bne bil_width_loop_1st_v6
|
||||
|
||||
add r0, r0, r2 ; move to next input row
|
||||
subs r12, r12, #1
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
pld [r0]
|
||||
;;ENDIF
|
||||
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_1st_v6
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_1st_filter|
|
||||
|bil_height_loop_null_1st|
|
||||
mov lr, r4, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_null_1st|
|
||||
ldrb r6, [r0] ; load data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
ldrb r9, [r0, #3]
|
||||
|
||||
strh r6, [r1], r3 ; store it to immediate buffer
|
||||
add r0, r0, #4
|
||||
strh r7, [r1], r3
|
||||
subs lr, lr, #1
|
||||
strh r8, [r1], r3
|
||||
strh r9, [r1], r3
|
||||
|
||||
bne bil_width_loop_null_1st
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r2 ; move to next input line
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_1st
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 int output_pitch,
|
||||
; r3 unsigned int output_height,
|
||||
; stack unsigned int output_width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; output width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
mov r11, r1
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_2nd_filter
|
||||
|
||||
|bil_height_loop_2nd|
|
||||
ldr r6, [r0] ; load the data
|
||||
ldr r8, [r0, #4]
|
||||
ldrh r10, [r0, #8]
|
||||
mov lr, r3, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_2nd|
|
||||
pkhtb r7, r6, r8 ; src[1] | src[2]
|
||||
pkhtb r9, r8, r10 ; src[3] | src[4]
|
||||
|
||||
smuad r6, r6, r5 ; apply filter
|
||||
smuad r8, r8, r5 ; apply filter
|
||||
|
||||
subs lr, lr, #1
|
||||
|
||||
smuadx r7, r7, r5 ; apply filter
|
||||
smuadx r9, r9, r5 ; apply filter
|
||||
|
||||
add r0, r0, #8
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #8, r6, asr #7
|
||||
usat r7, #8, r7, asr #7
|
||||
strb r6, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strb r7, [r1], r2
|
||||
add r9, r9, #0x40
|
||||
usat r8, #8, r8, asr #7
|
||||
usat r9, #8, r9, asr #7
|
||||
strb r8, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
ldrne r6, [r0] ; load data
|
||||
strb r9, [r1], r2
|
||||
ldrne r8, [r0, #4]
|
||||
ldrneh r10, [r0, #8]
|
||||
|
||||
bne bil_width_loop_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4 ; update src for next row
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_2nd
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_2nd_filter|
|
||||
|bil_height_loop_null_2nd|
|
||||
mov lr, r3, lsr #2
|
||||
|
||||
|bil_width_loop_null_2nd|
|
||||
ldr r6, [r0], #4 ; load data
|
||||
subs lr, lr, #1
|
||||
ldr r8, [r0], #4
|
||||
|
||||
strb r6, [r1], r2 ; store data
|
||||
mov r7, r6, lsr #16
|
||||
strb r7, [r1], r2
|
||||
mov r9, r8, lsr #16
|
||||
strb r8, [r1], r2
|
||||
strb r9, [r1], r2
|
||||
|
||||
bne bil_width_loop_null_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_2nd
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_armv6|
|
||||
|
||||
END
|
|
@ -0,0 +1,182 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
;push {r4-r7}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #15
|
||||
beq copy_mem16x16_fast
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem16x16_8
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem16x16_4
|
||||
|
||||
;copy one byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
ldrb r6, [r0, #2]
|
||||
ldrb r7, [r0, #3]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
strb r6, [r2, #2]
|
||||
strb r7, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
ldrb r6, [r0, #6]
|
||||
ldrb r7, [r0, #7]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
strb r6, [r2, #6]
|
||||
strb r7, [r2, #7]
|
||||
|
||||
ldrb r4, [r0, #8]
|
||||
ldrb r5, [r0, #9]
|
||||
ldrb r6, [r0, #10]
|
||||
ldrb r7, [r0, #11]
|
||||
|
||||
strb r4, [r2, #8]
|
||||
strb r5, [r2, #9]
|
||||
strb r6, [r2, #10]
|
||||
strb r7, [r2, #11]
|
||||
|
||||
ldrb r4, [r0, #12]
|
||||
ldrb r5, [r0, #13]
|
||||
ldrb r6, [r0, #14]
|
||||
ldrb r7, [r0, #15]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #12]
|
||||
strb r5, [r2, #13]
|
||||
strb r6, [r2, #14]
|
||||
strb r7, [r2, #15]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
ldrneb r6, [r0, #2]
|
||||
ldrneb r7, [r0, #3]
|
||||
|
||||
bne copy_mem16x16_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem16x16_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r0, #8]
|
||||
ldr r7, [r0, #12]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
str r6, [r2, #8]
|
||||
str r7, [r2, #12]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
ldrne r6, [r0, #8]
|
||||
ldrne r7, [r0, #12]
|
||||
|
||||
bne copy_mem16x16_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem16x16_8
|
||||
sub r1, r1, #16
|
||||
sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_8_loop
|
||||
ldmia r0!, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
ldmia r0!, {r6-r7}
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
stmia r2!, {r4-r5}
|
||||
subs r12, r12, #1
|
||||
;stm r2, {r4-r5}
|
||||
stmia r2!, {r6-r7}
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem16x16_8_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 16 bytes each time
|
||||
copy_mem16x16_fast
|
||||
;sub r1, r1, #16
|
||||
;sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_fast_loop
|
||||
ldmia r0, {r4-r7}
|
||||
;ldm r0, {r4-r7}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r7}
|
||||
;stm r2, {r4-r7}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem16x16_fast_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,128 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x4_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x4_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x4_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x4_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x4_4_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x4_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x4_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,128 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x8_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x8_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x8_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x8_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x8_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x8_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x8_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,67 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
|
||||
; unsigned char *dst_ptr, int pitch, int stride)
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 dest_ptr
|
||||
; r3 pitch
|
||||
; sp stride
|
||||
|
||||
|vp8_dc_only_idct_add_v6| PROC
|
||||
stmdb sp!, {r4 - r7, lr}
|
||||
|
||||
add r0, r0, #4 ; input_dc += 4
|
||||
ldr r12, c0x0000FFFF
|
||||
ldr r4, [r1], r3
|
||||
ldr r6, [r1], r3
|
||||
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
|
||||
ldr lr, [sp, #20]
|
||||
orr r0, r0, r0, lsl #16 ; a1 | a1
|
||||
|
||||
uxtab16 r5, r0, r4 ; a1+2 | a1+0
|
||||
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
ldr r4, [r1], r3
|
||||
ldr r6, [r1]
|
||||
str r5, [r2], lr
|
||||
str r7, [r2], lr
|
||||
|
||||
uxtab16 r5, r0, r4
|
||||
uxtab16 r4, r0, r4, ror #8
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
str r5, [r2], lr
|
||||
str r7, [r2]
|
||||
|
||||
ldmia sp!, {r4 - r7, pc}
|
||||
|
||||
ENDP ; |vp8_dc_only_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
c0x0000FFFF DCD 0x0000FFFF
|
||||
END
|
|
@ -0,0 +1,443 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter4_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 short *output_ptr
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int output_width
|
||||
; stack unsigned int output_height
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with
|
||||
; the output being a 2 byte value and the intput being a 1 byte value.
|
||||
|vp8_filter_block2d_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, #-2]
|
||||
;;pld [r0, #30]
|
||||
;;ENDIF
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_6
|
||||
|
||||
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, r2]
|
||||
;;pld [r0, r9]
|
||||
;;ENDIF
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
sub sp, sp, #4
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
str r1, [sp] ; push destination to stack
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
|
||||
sub r0, r0, #4 ; offset input buffer
|
||||
|
||||
|height_loop_2nd|
|
||||
ldr r8, [r0] ; load the data
|
||||
ldr r9, [r0, #4]
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd|
|
||||
smuad lr, r4, r8 ; apply filter
|
||||
sub r7, r7, #1
|
||||
smulbt r8, r4, r8
|
||||
|
||||
ldr r10, [r0, #8]
|
||||
|
||||
smlad lr, r5, r9, lr
|
||||
smladx r8, r12, r9, r8
|
||||
|
||||
ldrh r9, [r0, #12]
|
||||
|
||||
smlad lr, r6, r10, lr
|
||||
smladx r8, r11, r10, r8
|
||||
|
||||
add r0, r0, #4
|
||||
smlatb r10, r6, r9, r8
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ands r8, r7, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r2 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrne r8, [r0] ; load data for next loop
|
||||
ldrne r9, [r0, #4]
|
||||
strb r10, [r1], r2
|
||||
|
||||
bne width_loop_2nd
|
||||
|
||||
ldr r1, [sp] ; update dst for next loop
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; updata src for next loop
|
||||
add r1, r1, #1
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_2nd
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter4_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
add lr, r1, r3 ; save final destination pointer
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
mov r4, #0x40 ; rounding factor (for smlad{x})
|
||||
|
||||
|height_loop_2nd_4|
|
||||
ldrd r8, [r0, #-4] ; load the data
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd_4|
|
||||
ldr r10, [r0, #4]!
|
||||
smladx r6, r9, r12, r4 ; apply filter
|
||||
pkhbt r8, r9, r8
|
||||
smlad r5, r8, r12, r4
|
||||
pkhbt r8, r10, r9
|
||||
smladx r6, r10, r11, r6
|
||||
sub r7, r7, #1
|
||||
smlad r5, r8, r11, r5
|
||||
|
||||
mov r8, r9 ; shift the data for the next loop
|
||||
mov r9, r10
|
||||
|
||||
usat r6, #8, r6, asr #7 ; shift and clamp
|
||||
usat r5, #8, r5, asr #7
|
||||
|
||||
strb r5, [r1], r2 ; the result is transposed back and stored
|
||||
tst r7, #0xff
|
||||
strb r6, [r1], r2
|
||||
|
||||
bne width_loop_2nd_4
|
||||
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; update src for next loop
|
||||
sub r1, lr, r7, lsr #16 ; update dst for next loop
|
||||
|
||||
bne height_loop_2nd_4
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_first_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r4, [sp, #36] ; output pitch
|
||||
ldr r11, [sp, #40] ; HFilter address
|
||||
sub sp, sp, #8
|
||||
|
||||
mov r7, r3
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
sub r4, r4, r3
|
||||
str r4, [sp] ; save modified output pitch
|
||||
str r2, [sp, #4]
|
||||
|
||||
mov r2, #0x40
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_only_6|
|
||||
ldrb r8, [r0, #-2] ; load data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
|
||||
mov r12, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_1st_only_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
;; smuad lr, lr, r4
|
||||
smlad lr, lr, r4, r2
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
;; smuad r8, r8, r4
|
||||
smlad r8, r8, r4, r2
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
;; add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
;; add r10, r10, #0x40
|
||||
strb lr, [r1], #1 ; store the result
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0, #-1]
|
||||
strb r10, [r1], #1
|
||||
ldrneb r10, [r0], #2
|
||||
|
||||
bne width_loop_1st_only_6
|
||||
|
||||
;;add r9, r2, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [r0, r2]
|
||||
;;pld [r0, r9]
|
||||
;;ENDIF
|
||||
|
||||
ldr lr, [sp] ; load back output pitch
|
||||
ldr r12, [sp, #4] ; load back output pitch
|
||||
subs r7, r7, #1
|
||||
add r0, r0, r12 ; updata src for next loop
|
||||
add r1, r1, lr ; update dst for next loop
|
||||
|
||||
bne height_loop_1st_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_first_pass_only_armv6|
|
||||
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_second_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; VFilter address
|
||||
ldr r12, [sp, #36] ; output pitch
|
||||
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after
|
||||
|
||||
sub sp, sp, #8
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r0, [sp] ; save r0 to stack
|
||||
str r1, [sp, #4] ; save dst to stack
|
||||
|
||||
; six tap filter
|
||||
|width_loop_2nd_only_6|
|
||||
ldrb r8, [r0], r2 ; load data
|
||||
orr r7, r7, r3 ; loop counter
|
||||
ldrb r9, [r0], r2
|
||||
ldrb r10, [r0], r2
|
||||
|
||||
|height_loop_2nd_only_6|
|
||||
; filter first column in this inner loop, than, move to next colum.
|
||||
ldrb r11, [r0], r2
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0], r2
|
||||
|
||||
smuad lr, lr, r4
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0], r2
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0]
|
||||
|
||||
sub r7, r7, #2
|
||||
sub r0, r0, r2, lsl #2
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
ands r9, r7, #0xff
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0], r2 ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r12 ; store the result for the column
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0], r2
|
||||
strb r10, [r1], r12
|
||||
ldrneb r10, [r0], r2
|
||||
|
||||
bne height_loop_2nd_only_6
|
||||
|
||||
ldr r0, [sp]
|
||||
ldr r1, [sp, #4]
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #1 ; move to filter next column
|
||||
str r0, [sp]
|
||||
add r1, r1, #1
|
||||
str r1, [sp, #4]
|
||||
|
||||
bne width_loop_2nd_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
END
|
|
@ -0,0 +1,345 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
; r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r14
|
||||
EXPORT |vp8_short_idct4x4llm_1_v6|
|
||||
EXPORT |vp8_short_idct4x4llm_v6|
|
||||
EXPORT |vp8_short_idct4x4llm_v6_scott|
|
||||
EXPORT |vp8_short_idct4x4llm_v6_dual|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
;********************************************************************************
|
||||
;* void short_idct4x4llm_1_v6(INT16 * input, INT16 * output, INT32 pitch)
|
||||
;* r0 INT16 * input
|
||||
;* r1 INT16 * output
|
||||
;* r2 INT32 pitch
|
||||
;* bench: 3/5
|
||||
;********************************************************************************
|
||||
|
||||
|vp8_short_idct4x4llm_1_v6| PROC ; cycles in out pit
|
||||
;
|
||||
ldrsh r0, [r0] ; load input[0] 1, r0 un 2
|
||||
add r0, r0, #4 ; 1 +4
|
||||
stmdb sp!, {r4, r5, lr} ; make room for wide writes 1 backup
|
||||
mov r0, r0, asr #3 ; (input[0] + 4) >> 3 1, r0 req`d ^1 >> 3
|
||||
pkhbt r4, r0, r0, lsl #16 ; pack r0 into r4 1, r0 req`d ^1 pack
|
||||
mov r5, r4 ; expand expand
|
||||
|
||||
strd r4, [r1], r2 ; *output = r0, post inc 1
|
||||
strd r4, [r1], r2 ; 1
|
||||
strd r4, [r1], r2 ; 1
|
||||
strd r4, [r1] ; 1
|
||||
;
|
||||
ldmia sp!, {r4, r5, pc} ; replace vars, return restore
|
||||
ENDP ; |vp8_short_idct4x4llm_1_v6|
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
|
||||
;********************************************************************************
|
||||
;* void short_idct4x4llm_v6(INT16 * input, INT16 * output, INT32 pitch)
|
||||
;* r0 INT16 * input
|
||||
;* r1 INT16 * output
|
||||
;* r2 INT32 pitch
|
||||
;* bench:
|
||||
;********************************************************************************
|
||||
|
||||
|vp8_short_idct4x4llm_v6| PROC ; cycles in out pit
|
||||
;
|
||||
stmdb sp!, {r4-r11, lr} ; backup registers 1 backup
|
||||
;
|
||||
mov r4, #0x00004E00 ; 1 cst
|
||||
orr r4, r4, #0x0000007B ; cospi8sqrt2minus1
|
||||
mov r5, #0x00008A00 ; 1 cst
|
||||
orr r5, r5, #0x0000008C ; sinpi8sqrt2
|
||||
;
|
||||
mov r6, #4 ; i=4 1 i
|
||||
loop1 ;
|
||||
ldrsh r12, [r0, #8] ; input[4] 1, r12 unavail 2 [4]
|
||||
ldrsh r3, [r0, #24] ; input[12] 1, r3 unavail 2 [12]
|
||||
ldrsh r8, [r0, #16] ; input[8] 1, r8 unavail 2 [8]
|
||||
ldrsh r7, [r0], #0x2 ; input[0] 1, r7 unavail 2 ++ [0]
|
||||
smulwb r10, r5, r12 ; ([4] * sinpi8sqrt2) >> 16 1, r10 un 2, r12/r5 ^1 t1
|
||||
smulwb r11, r4, r3 ; ([12] * cospi8sqrt2minus1) >> 16 1, r11 un 2, r3/r4 ^1 t2
|
||||
add r9, r7, r8 ; a1 = [0] + [8] 1 a1
|
||||
sub r7, r7, r8 ; b1 = [0] - [8] 1 b1
|
||||
add r11, r3, r11 ; temp2 1
|
||||
rsb r11, r11, r10 ; c1 = temp1 - temp2 1 c1
|
||||
smulwb r3, r5, r3 ; ([12] * sinpi8sqrt2) >> 16 1, r3 un 2, r3/r5 ^ 1 t2
|
||||
smulwb r10, r4, r12 ; ([4] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r12/r4 ^1 t1
|
||||
add r8, r7, r11 ; b1 + c1 1 b+c
|
||||
strh r8, [r1, r2] ; out[pitch] = b1+c1 1
|
||||
sub r7, r7, r11 ; b1 - c1 1 b-c
|
||||
add r10, r12, r10 ; temp1 1
|
||||
add r3, r10, r3 ; d1 = temp1 + temp2 1 d1
|
||||
add r10, r9, r3 ; a1 + d1 1 a+d
|
||||
sub r3, r9, r3 ; a1 - d1 1 a-d
|
||||
add r8, r2, r2 ; pitch * 2 1 p*2
|
||||
strh r7, [r1, r8] ; out[pitch*2] = b1-c1 1
|
||||
add r7, r2, r2, lsl #1 ; pitch * 3 1 p*3
|
||||
strh r3, [r1, r7] ; out[pitch*3] = a1-d1 1
|
||||
subs r6, r6, #1 ; i-- 1 --
|
||||
strh r10, [r1], #0x2 ; out[0] = a1+d1 1 ++
|
||||
bne loop1 ; if i>0, continue
|
||||
;
|
||||
sub r1, r1, #8 ; set up out for next loop 1 -4
|
||||
; for this iteration, input=prev output
|
||||
mov r6, #4 ; i=4 1 i
|
||||
; b returnfull
|
||||
loop2 ;
|
||||
ldrsh r11, [r1, #2] ; input[1] 1, r11 un 2 [1]
|
||||
ldrsh r8, [r1, #6] ; input[3] 1, r8 un 2 [3]
|
||||
ldrsh r3, [r1, #4] ; input[2] 1, r3 un 2 [2]
|
||||
ldrsh r0, [r1] ; input[0] 1, r0 un 2 [0]
|
||||
smulwb r9, r5, r11 ; ([1] * sinpi8sqrt2) >> 16 1, r9 un 2, r5/r11 ^1 t1
|
||||
smulwb r10, r4, r8 ; ([3] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r4/r8 ^1 t2
|
||||
add r7, r0, r3 ; a1 = [0] + [2] 1 a1
|
||||
sub r0, r0, r3 ; b1 = [0] - [2] 1 b1
|
||||
add r10, r8, r10 ; temp2 1
|
||||
rsb r9, r10, r9 ; c1 = temp1 - temp2 1 c1
|
||||
smulwb r8, r5, r8 ; ([3] * sinpi8sqrt2) >> 16 1, r8 un 2, r5/r8 ^1 t2
|
||||
smulwb r10, r4, r11 ; ([1] * cospi8sqrt2minus1) >> 16 1, r10 un 2, r4/r11 ^1 t1
|
||||
add r3, r0, r9 ; b1+c1 1 b+c
|
||||
add r3, r3, #4 ; b1+c1+4 1 +4
|
||||
add r10, r11, r10 ; temp1 1
|
||||
mov r3, r3, asr #3 ; b1+c1+4 >> 3 1, r3 ^1 >>3
|
||||
strh r3, [r1, #2] ; out[1] = b1+c1 1
|
||||
add r10, r10, r8 ; d1 = temp1 + temp2 1 d1
|
||||
add r3, r7, r10 ; a1+d1 1 a+d
|
||||
add r3, r3, #4 ; a1+d1+4 1 +4
|
||||
sub r7, r7, r10 ; a1-d1 1 a-d
|
||||
add r7, r7, #4 ; a1-d1+4 1 +4
|
||||
mov r3, r3, asr #3 ; a1+d1+4 >> 3 1, r3 ^1 >>3
|
||||
mov r7, r7, asr #3 ; a1-d1+4 >> 3 1, r7 ^1 >>3
|
||||
strh r7, [r1, #6] ; out[3] = a1-d1 1
|
||||
sub r0, r0, r9 ; b1-c1 1 b-c
|
||||
add r0, r0, #4 ; b1-c1+4 1 +4
|
||||
subs r6, r6, #1 ; i-- 1 --
|
||||
mov r0, r0, asr #3 ; b1-c1+4 >> 3 1, r0 ^1 >>3
|
||||
strh r0, [r1, #4] ; out[2] = b1-c1 1
|
||||
strh r3, [r1], r2 ; out[0] = a1+d1 1
|
||||
; add r1, r1, r2 ; out += pitch 1 ++
|
||||
bne loop2 ; if i>0, continue
|
||||
returnfull ;
|
||||
ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
|
||||
ENDP
|
||||
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
|
||||
;********************************************************************************
|
||||
;* void short_idct4x4llm_v6_scott(INT16 * input, INT16 * output, INT32 pitch)
|
||||
;* r0 INT16 * input
|
||||
;* r1 INT16 * output
|
||||
;* r2 INT32 pitch
|
||||
;* bench:
|
||||
;********************************************************************************
|
||||
|
||||
|vp8_short_idct4x4llm_v6_scott| PROC ; cycles in out pit
|
||||
; mov r0, #0 ;
|
||||
; ldr r0, [r0] ;
|
||||
stmdb sp!, {r4 - r11, lr} ; backup registers 1 backup
|
||||
;
|
||||
mov r3, #0x00004E00 ; cos
|
||||
orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
|
||||
mov r4, #0x00008A00 ; sin
|
||||
orr r4, r4, #0x0000008C ; sinpi8sqrt2
|
||||
;
|
||||
mov r5, #0x2 ; i i
|
||||
;
|
||||
short_idct4x4llm_v6_scott_loop1 ;
|
||||
ldr r10, [r0, #(4*2)] ; i5 | i4 5,4
|
||||
ldr r11, [r0, #(12*2)] ; i13 | i12 13,12
|
||||
;
|
||||
smulwb r6, r4, r10 ; ((ip[4] * sinpi8sqrt2) >> 16) lt1
|
||||
smulwb r7, r3, r11 ; ((ip[12] * cospi8sqrt2minus1) >> 16) lt2
|
||||
;
|
||||
smulwb r12, r3, r10 ; ((ip[4] * cospi8sqrt2misu1) >> 16) l2t2
|
||||
smulwb r14, r4, r11 ; ((ip[12] * sinpi8sqrt2) >> 16) l2t1
|
||||
;
|
||||
add r6, r6, r7 ; partial c1 lt1-lt2
|
||||
add r12, r12, r14 ; partial d1 l2t2+l2t1
|
||||
;
|
||||
smulwt r14, r4, r10 ; ((ip[5] * sinpi8sqrt2) >> 16) ht1
|
||||
smulwt r7, r3, r11 ; ((ip[13] * cospi8sqrt2minus1) >> 16) ht2
|
||||
;
|
||||
smulwt r8, r3, r10 ; ((ip[5] * cospi8sqrt2minus1) >> 16) h2t1
|
||||
smulwt r9, r4, r11 ; ((ip[13] * sinpi8sqrt2) >> 16) h2t2
|
||||
;
|
||||
add r7, r14, r7 ; partial c1_2 ht1+ht2
|
||||
sub r8, r8, r9 ; partial d1_2 h2t1-h2t2
|
||||
;
|
||||
pkhbt r6, r6, r7, lsl #16 ; partial c1_2 | partial c1_1 pack
|
||||
pkhbt r12, r12, r8, lsl #16 ; partial d1_2 | partial d1_1 pack
|
||||
;
|
||||
usub16 r6, r6, r10 ; c1_2 | c1_1 c
|
||||
uadd16 r12, r12, r11 ; d1_2 | d1_1 d
|
||||
;
|
||||
ldr r10, [r0, #0] ; i1 | i0 1,0
|
||||
ldr r11, [r0, #(8*2)] ; i9 | i10 9,10
|
||||
;
|
||||
;;;;;; add r0, r0, #0x4 ; +4
|
||||
;;;;;; add r1, r1, #0x4 ; +4
|
||||
;
|
||||
uadd16 r8, r10, r11 ; i1 + i9 | i0 + i8 aka a1 a
|
||||
usub16 r9, r10, r11 ; i1 - i9 | i0 - i8 aka b1 b
|
||||
;
|
||||
uadd16 r7, r8, r12 ; a1 + d1 pair a+d
|
||||
usub16 r14, r8, r12 ; a1 - d1 pair a-d
|
||||
;
|
||||
str r7, [r1] ; op[0] = a1 + d1
|
||||
str r14, [r1, r2] ; op[pitch*3] = a1 - d1
|
||||
;
|
||||
add r0, r0, #0x4 ; op[pitch] = b1 + c1 ++
|
||||
add r1, r1, #0x4 ; op[pitch*2] = b1 - c1 ++
|
||||
;
|
||||
subs r5, r5, #0x1 ; --
|
||||
bne short_idct4x4llm_v6_scott_loop1 ;
|
||||
;
|
||||
sub r1, r1, #16 ; reset output ptr
|
||||
mov r5, #0x4 ;
|
||||
mov r0, r1 ; input = output
|
||||
;
|
||||
short_idct4x4llm_v6_scott_loop2 ;
|
||||
;
|
||||
subs r5, r5, #0x1 ;
|
||||
bne short_idct4x4llm_v6_scott_loop2 ;
|
||||
;
|
||||
ldmia sp!, {r4 - r11, pc} ;
|
||||
ENDP ;
|
||||
;
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
;********************************************************************************
|
||||
|
||||
;********************************************************************************
|
||||
;* void short_idct4x4llm_v6_dual(INT16 * input, INT16 * output, INT32 pitch)
|
||||
;* r0 INT16 * input
|
||||
;* r1 INT16 * output
|
||||
;* r2 INT32 pitch
|
||||
;* bench:
|
||||
;********************************************************************************
|
||||
|
||||
|vp8_short_idct4x4llm_v6_dual| PROC ; cycles in out pit
|
||||
;
|
||||
stmdb sp!, {r4-r11, lr} ; backup registers 1 backup
|
||||
mov r3, #0x00004E00 ; cos
|
||||
orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
|
||||
mov r4, #0x00008A00 ; sin
|
||||
orr r4, r4, #0x0000008C ; sinpi8sqrt2
|
||||
mov r5, #0x2 ; i=2 i
|
||||
loop1_dual
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4 5|4
|
||||
ldr r12, [r0, #(12*2)] ; i13 | i12 13|12
|
||||
ldr r14, [r0, #(8*2)] ; i9 | i8 9|8
|
||||
|
||||
smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
|
||||
smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
|
||||
smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s
|
||||
pkhbt r7, r7, r9, lsl #16 ; 5c | 4c
|
||||
smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c
|
||||
pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
|
||||
uadd16 r6, r6, r7 ; 5c+5 | 4c+4
|
||||
smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s
|
||||
smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c
|
||||
smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s
|
||||
subs r5, r5, #0x1 ; i-- --
|
||||
pkhbt r9, r9, r11, lsl #16 ; 13c | 12c
|
||||
ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0
|
||||
pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
|
||||
uadd16 r7, r12, r9 ; 13c+13 | 12c+12
|
||||
usub16 r7, r8, r7 ; c c
|
||||
uadd16 r6, r6, r10 ; d d
|
||||
uadd16 r10, r11, r14 ; a a
|
||||
usub16 r8, r11, r14 ; b b
|
||||
uadd16 r9, r10, r6 ; a+d a+d
|
||||
usub16 r10, r10, r6 ; a-d a-d
|
||||
uadd16 r6, r8, r7 ; b+c b+c
|
||||
usub16 r7, r8, r7 ; b-c b-c
|
||||
str r6, [r1, r2] ; o5 | o4
|
||||
add r6, r2, r2 ; pitch * 2 p2
|
||||
str r7, [r1, r6] ; o9 | o8
|
||||
add r6, r6, r2 ; pitch * 3 p3
|
||||
str r10, [r1, r6] ; o13 | o12
|
||||
str r9, [r1], #0x4 ; o1 | o0 ++
|
||||
bne loop1_dual ;
|
||||
mov r5, #0x2 ; i=2 i
|
||||
sub r0, r1, #8 ; reset input/output i/o
|
||||
loop2_dual
|
||||
ldr r6, [r0, r2] ; i5 | i4 5|4
|
||||
ldr r1, [r0] ; i1 | i0 1|0
|
||||
ldr r12, [r0, #0x4] ; i3 | i2 3|2
|
||||
add r14, r2, #0x4 ; pitch + 2 p+2
|
||||
ldr r14, [r0, r14] ; i7 | i6 7|6
|
||||
smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c
|
||||
smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s
|
||||
smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s
|
||||
pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4
|
||||
pkhbt r7, r9, r7, lsl #16 ; 1c | 5c
|
||||
pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1
|
||||
pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5
|
||||
uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2
|
||||
pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6
|
||||
uadd16 r10, r11, r9 ; a a
|
||||
usub16 r9, r11, r9 ; b b
|
||||
pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7
|
||||
subs r5, r5, #0x1 ; i-- --
|
||||
smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c
|
||||
smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s
|
||||
smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c
|
||||
smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s
|
||||
|
||||
pkhbt r7, r12, r7, lsl #16 ; 3c | 7c
|
||||
pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1
|
||||
uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2
|
||||
usub16 r12, r8, r6 ; c (o1 | o5) c
|
||||
uadd16 r6, r11, r1 ; d (o3 | o7) d
|
||||
uadd16 r7, r10, r6 ; a+d a+d
|
||||
mov r8, #0x4 ; set up 4's 4
|
||||
orr r8, r8, #0x40000 ; 4|4
|
||||
usub16 r6, r10, r6 ; a-d a-d
|
||||
uadd16 r6, r6, r8 ; a-d+4 3|7
|
||||
uadd16 r7, r7, r8 ; a+d+4 0|4
|
||||
uadd16 r10, r9, r12 ; b+c b+c
|
||||
usub16 r1, r9, r12 ; b-c b-c
|
||||
uadd16 r10, r10, r8 ; b+c+4 1|5
|
||||
uadd16 r1, r1, r8 ; b-c+4 2|6
|
||||
mov r8, r10, asr #19 ; o1 >> 3
|
||||
strh r8, [r0, #2] ; o1
|
||||
mov r8, r1, asr #19 ; o2 >> 3
|
||||
strh r8, [r0, #4] ; o2
|
||||
mov r8, r6, asr #19 ; o3 >> 3
|
||||
strh r8, [r0, #6] ; o3
|
||||
mov r8, r7, asr #19 ; o0 >> 3
|
||||
strh r8, [r0], r2 ; o0 +p
|
||||
sxth r10, r10 ;
|
||||
mov r8, r10, asr #3 ; o5 >> 3
|
||||
strh r8, [r0, #2] ; o5
|
||||
sxth r1, r1 ;
|
||||
mov r8, r1, asr #3 ; o6 >> 3
|
||||
strh r8, [r0, #4] ; o6
|
||||
sxth r6, r6 ;
|
||||
mov r8, r6, asr #3 ; o7 >> 3
|
||||
strh r8, [r0, #6] ; o7
|
||||
sxth r7, r7 ;
|
||||
mov r8, r7, asr #3 ; o4 >> 3
|
||||
strh r8, [r0], r2 ; o4 +p
|
||||
;;;;; subs r5, r5, #0x1 ; i-- --
|
||||
bne loop2_dual ;
|
||||
;
|
||||
ldmia sp!, {r4 - r11, pc} ; replace vars, return restore
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -0,0 +1,152 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_short_inv_walsh4x4_v6|
|
||||
EXPORT |vp8_short_inv_walsh4x4_1_v6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
|
||||
|vp8_short_inv_walsh4x4_v6| PROC
|
||||
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r2, [r0], #4 ; [1 | 0]
|
||||
ldr r3, [r0], #4 ; [3 | 2]
|
||||
ldr r4, [r0], #4 ; [5 | 4]
|
||||
ldr r5, [r0], #4 ; [7 | 6]
|
||||
ldr r6, [r0], #4 ; [9 | 8]
|
||||
ldr r7, [r0], #4 ; [11 | 10]
|
||||
ldr r8, [r0], #4 ; [13 | 12]
|
||||
ldr r9, [r0] ; [15 | 14]
|
||||
|
||||
qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
|
||||
qadd16 r11, r4, r6 ; b1 [5+9 | 4+8]
|
||||
qsub16 r12, r4, r6 ; c1 [5-9 | 4-8]
|
||||
qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
|
||||
|
||||
qadd16 r2, r10, r11 ; a1 + b1 [1 | 0]
|
||||
qadd16 r4, r12, lr ; c1 + d1 [5 | 4]
|
||||
qsub16 r6, r10, r11 ; a1 - b1 [9 | 8]
|
||||
qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
|
||||
|
||||
qadd16 r10, r3, r9 ; a1 [3+15 | 2+14]
|
||||
qadd16 r11, r5, r7 ; b1 [7+11 | 6+10]
|
||||
qsub16 r12, r5, r7 ; c1 [7-11 | 6-10]
|
||||
qsub16 lr, r3, r9 ; d1 [3-15 | 2-14]
|
||||
|
||||
qadd16 r3, r10, r11 ; a1 + b1 [3 | 2]
|
||||
qadd16 r5, r12, lr ; c1 + d1 [7 | 6]
|
||||
qsub16 r7, r10, r11 ; a1 - b1 [11 | 10]
|
||||
qsub16 r9, lr, r12 ; d1 - c1 [15 | 14]
|
||||
|
||||
; first transform complete
|
||||
|
||||
qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
|
||||
qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
|
||||
qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
|
||||
qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
|
||||
|
||||
qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
ldr r10, c0x00030003
|
||||
qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r2, r2, r10 ; [b2+3|c2+3]
|
||||
qadd16 r3, r3, r10 ; [a2+3|d2+3]
|
||||
qadd16 r4, r4, r10 ; [b2+3|c2+3]
|
||||
qadd16 r5, r5, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r2, #3 ; [1 | x]
|
||||
pkhtb r12, r12, r3, asr #19; [1 | 0]
|
||||
lsl lr, r3, #16 ; [~3 | x]
|
||||
lsl r2, r2, #16 ; [~2 | x]
|
||||
asr lr, lr, #3 ; [3 | x]
|
||||
pkhtb lr, lr, r2, asr #19 ; [3 | 2]
|
||||
|
||||
asr r2, r4, #3 ; [5 | x]
|
||||
pkhtb r2, r2, r5, asr #19 ; [5 | 4]
|
||||
lsl r3, r5, #16 ; [~7 | x]
|
||||
lsl r4, r4, #16 ; [~6 | x]
|
||||
asr r3, r3, #3 ; [7 | x]
|
||||
pkhtb r3, r3, r4, asr #19 ; [7 | 6]
|
||||
|
||||
str r12, [r1], #4
|
||||
str lr, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r3, [r1], #4
|
||||
|
||||
qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11]
|
||||
qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11]
|
||||
qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
|
||||
qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
|
||||
|
||||
qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r6, r6, r10 ; [b2+3|c2+3]
|
||||
qadd16 r7, r7, r10 ; [a2+3|d2+3]
|
||||
qadd16 r8, r8, r10 ; [b2+3|c2+3]
|
||||
qadd16 r9, r9, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r2, r6, #3 ; [9 | x]
|
||||
pkhtb r2, r2, r7, asr #19 ; [9 | 8]
|
||||
lsl r3, r7, #16 ; [~11| x]
|
||||
lsl r4, r6, #16 ; [~10| x]
|
||||
asr r3, r3, #3 ; [11 | x]
|
||||
pkhtb r3, r3, r4, asr #19 ; [11 | 10]
|
||||
|
||||
asr r4, r8, #3 ; [13 | x]
|
||||
pkhtb r4, r4, r9, asr #19 ; [13 | 12]
|
||||
lsl r5, r9, #16 ; [~15| x]
|
||||
lsl r6, r8, #16 ; [~14| x]
|
||||
asr r5, r5, #3 ; [15 | x]
|
||||
pkhtb r5, r5, r6, asr #19 ; [15 | 14]
|
||||
|
||||
str r2, [r1], #4
|
||||
str r3, [r1], #4
|
||||
str r4, [r1], #4
|
||||
str r5, [r1]
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
|
||||
;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
|
||||
|vp8_short_inv_walsh4x4_1_v6| PROC
|
||||
|
||||
ldrsh r2, [r0] ; [0]
|
||||
add r2, r2, #3 ; [0] + 3
|
||||
asr r2, r2, #3 ; a1 ([0]+3) >> 3
|
||||
lsl r2, r2, #16 ; [a1 | x]
|
||||
orr r2, r2, r2, lsr #16 ; [a1 | a1]
|
||||
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1], #4
|
||||
str r2, [r1]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_1_v6|
|
||||
|
||||
; Constant Pool
|
||||
c0x00030003 DCD 0x00030003
|
||||
END
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,281 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_recon_b_armv6|
|
||||
EXPORT |vp8_recon2b_armv6|
|
||||
EXPORT |vp8_recon4b_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
prd RN r0
|
||||
dif RN r1
|
||||
dst RN r2
|
||||
stride RN r3
|
||||
|
||||
;void recon_b(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int stride)
|
||||
; R0 char* pred_ptr
|
||||
; R1 short * dif_ptr
|
||||
; R2 char * dst_ptr
|
||||
; R3 int stride
|
||||
|
||||
; Description:
|
||||
; Loop through the block adding the Pred and Diff together. Clamp and then
|
||||
; store back into the Dst.
|
||||
|
||||
; Restrictions :
|
||||
; all buffers are expected to be 4 byte aligned coming in and
|
||||
; going out.
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
;
|
||||
;
|
||||
;
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_recon_b_armv6| PROC
|
||||
stmdb sp!, {r4 - r9, lr}
|
||||
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||||
ldr r6, [dif, #0] ; 1 | 0
|
||||
ldr r7, [dif, #4] ; 3 | 2
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||||
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||||
|
||||
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||||
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||||
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
add dif, dif, #32
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst], stride
|
||||
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||||
;; ldr r6, [dif, #8] ; 1 | 0
|
||||
;; ldr r7, [dif, #12] ; 3 | 2
|
||||
ldr r6, [dif, #0] ; 1 | 0
|
||||
ldr r7, [dif, #4] ; 3 | 2
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||||
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||||
|
||||
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||||
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||||
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
add dif, dif, #32
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst], stride
|
||||
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||||
;; ldr r6, [dif, #16] ; 1 | 0
|
||||
;; ldr r7, [dif, #20] ; 3 | 2
|
||||
ldr r6, [dif, #0] ; 1 | 0
|
||||
ldr r7, [dif, #4] ; 3 | 2
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||||
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||||
|
||||
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||||
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||||
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
add dif, dif, #32
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst], stride
|
||||
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #16 ; 3 | 2 | 1 | 0
|
||||
;; ldr r6, [dif, #24] ; 1 | 0
|
||||
;; ldr r7, [dif, #28] ; 3 | 2
|
||||
ldr r6, [dif, #0] ; 1 | 0
|
||||
ldr r7, [dif, #4] ; 3 | 2
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||||
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||||
|
||||
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||||
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||||
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst], stride
|
||||
|
||||
ldmia sp!, {r4 - r9, pc}
|
||||
|
||||
ENDP ; |recon_b|
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
;
|
||||
;
|
||||
;
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
; R0 char *pred_ptr
|
||||
; R1 short *dif_ptr
|
||||
; R2 char *dst_ptr
|
||||
; R3 int stride
|
||||
|vp8_recon4b_armv6| PROC
|
||||
stmdb sp!, {r4 - r9, lr}
|
||||
|
||||
mov lr, #4
|
||||
|
||||
recon4b_loop
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #4 ; 3 | 2 | 1 | 0
|
||||
ldr r6, [dif, #0] ; 1 | 0
|
||||
ldr r7, [dif, #4] ; 3 | 2
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; 2 | 0
|
||||
pkhtb r9, r7, r6, asr #16 ; 3 | 1
|
||||
|
||||
uxtab16 r8, r8, r4 ; 2 | 0 + 3 | 2 | 2 | 0
|
||||
uxtab16 r9, r9, r4, ror #8 ; 3 | 1 + 0 | 3 | 2 | 1
|
||||
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst]
|
||||
|
||||
;4, 5, 6, 7
|
||||
ldr r4, [prd], #4
|
||||
;; ldr r6, [dif, #32]
|
||||
;; ldr r7, [dif, #36]
|
||||
ldr r6, [dif, #8]
|
||||
ldr r7, [dif, #12]
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16
|
||||
pkhtb r9, r7, r6, asr #16
|
||||
|
||||
uxtab16 r8, r8, r4
|
||||
uxtab16 r9, r9, r4, ror #8
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst, #4]
|
||||
|
||||
;8, 9, 10, 11
|
||||
ldr r4, [prd], #4
|
||||
;; ldr r6, [dif, #64]
|
||||
;; ldr r7, [dif, #68]
|
||||
ldr r6, [dif, #16]
|
||||
ldr r7, [dif, #20]
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16
|
||||
pkhtb r9, r7, r6, asr #16
|
||||
|
||||
uxtab16 r8, r8, r4
|
||||
uxtab16 r9, r9, r4, ror #8
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst, #8]
|
||||
|
||||
;12, 13, 14, 15
|
||||
ldr r4, [prd], #4
|
||||
;; ldr r6, [dif, #96]
|
||||
;; ldr r7, [dif, #100]
|
||||
ldr r6, [dif, #24]
|
||||
ldr r7, [dif, #28]
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16
|
||||
pkhtb r9, r7, r6, asr #16
|
||||
|
||||
uxtab16 r8, r8, r4
|
||||
uxtab16 r9, r9, r4, ror #8
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst, #12]
|
||||
|
||||
add dst, dst, stride
|
||||
;; add dif, dif, #8
|
||||
add dif, dif, #32
|
||||
|
||||
subs lr, lr, #1
|
||||
bne recon4b_loop
|
||||
|
||||
ldmia sp!, {r4 - r9, pc}
|
||||
|
||||
ENDP ; |Recon4B|
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
;
|
||||
;
|
||||
;
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
; R0 char *pred_ptr
|
||||
; R1 short *dif_ptr
|
||||
; R2 char *dst_ptr
|
||||
; R3 int stride
|
||||
|vp8_recon2b_armv6| PROC
|
||||
stmdb sp!, {r4 - r9, lr}
|
||||
|
||||
mov lr, #4
|
||||
|
||||
recon2b_loop
|
||||
;0, 1, 2, 3
|
||||
ldr r4, [prd], #4
|
||||
ldr r6, [dif, #0]
|
||||
ldr r7, [dif, #4]
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16
|
||||
pkhtb r9, r7, r6, asr #16
|
||||
|
||||
uxtab16 r8, r8, r4
|
||||
uxtab16 r9, r9, r4, ror #8
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst]
|
||||
|
||||
;4, 5, 6, 7
|
||||
ldr r4, [prd], #4
|
||||
;; ldr r6, [dif, #32]
|
||||
;; ldr r7, [dif, #36]
|
||||
ldr r6, [dif, #8]
|
||||
ldr r7, [dif, #12]
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16
|
||||
pkhtb r9, r7, r6, asr #16
|
||||
|
||||
uxtab16 r8, r8, r4
|
||||
uxtab16 r9, r9, r4, ror #8
|
||||
usat16 r8, #8, r8
|
||||
usat16 r9, #8, r9
|
||||
orr r8, r8, r9, lsl #8
|
||||
|
||||
str r8, [dst, #4]
|
||||
|
||||
add dst, dst, stride
|
||||
;; add dif, dif, #8
|
||||
add dif, dif, #16
|
||||
|
||||
subs lr, lr, #1
|
||||
bne recon2b_loop
|
||||
|
||||
ldmia sp!, {r4 - r9, pc}
|
||||
|
||||
ENDP ; |Recon2B|
|
||||
|
||||
END
|
|
@ -0,0 +1,287 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
MACRO
|
||||
TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
|
||||
; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
|
||||
; a0: 03 02 01 00
|
||||
; a1: 13 12 11 10
|
||||
; a2: 23 22 21 20
|
||||
; a3: 33 32 31 30
|
||||
; b3 b2 b1 b0
|
||||
|
||||
uxtb16 $b1, $a1 ; xx 12 xx 10
|
||||
uxtb16 $b0, $a0 ; xx 02 xx 00
|
||||
uxtb16 $b3, $a3 ; xx 32 xx 30
|
||||
uxtb16 $b2, $a2 ; xx 22 xx 20
|
||||
orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00
|
||||
orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20
|
||||
|
||||
uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11
|
||||
uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31
|
||||
uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01
|
||||
uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21
|
||||
orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01
|
||||
orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21
|
||||
|
||||
pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1
|
||||
pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3
|
||||
|
||||
pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0
|
||||
pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2
|
||||
MEND
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *flimit,
|
||||
;r3 const char *limit,
|
||||
;stack const char *thresh,
|
||||
;stack int count
|
||||
|
||||
; All 16 elements in flimit are equal. So, in the code, only one load is needed
|
||||
; for flimit. Same applies to limit. thresh is not used in simple looopfilter
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r3] ; limit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
ldr r7, [r2] ; flimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r9, [sp, #40] ; count for 8-in-parallel
|
||||
uadd8 r7, r7, r7 ; flimit * 2
|
||||
mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time
|
||||
uadd8 r12, r7, r12 ; flimit * 2 + limit
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
; vp8_simple_filter_mask()
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r10, r4, r5 ; p0 - q0
|
||||
uqsub8 r11, r5, r4 ; q0 - p0
|
||||
orr r8, r8, r7 ; abs(p1 - q1)
|
||||
orr r10, r10, r11 ; abs(p0 - q0)
|
||||
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
|
||||
uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
|
||||
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r8, #0
|
||||
usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
|
||||
sel r10, r8, lr ; filter mask: F or 0
|
||||
cmp r10, #0
|
||||
beq simple_hskip_filter ; skip filtering if all masks are 0x00
|
||||
|
||||
;vp8_simple_filter()
|
||||
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r8, c0x03030303
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, r10 ; vp8_filter &= mask
|
||||
|
||||
qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
|
||||
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr ; Filter1 >>= 3
|
||||
shadd8 r8 , r8 , lr ; Filter2 >>= 3
|
||||
|
||||
qsub8 r5 ,r5, r7 ; u = q0 - Filter1
|
||||
qadd8 r4, r4, r8 ; u = p0 + Filter2
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
str r5, [src] ; store oq0 result
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
str r4, [src, -pstep] ; store op0 result
|
||||
|
||||
|simple_hskip_filter|
|
||||
subs r9, r9, #1
|
||||
addne src, src, #4 ; next row
|
||||
|
||||
ldrne r3, [src, -pstep, lsl #1] ; p1
|
||||
ldrne r4, [src, -pstep] ; p0
|
||||
ldrne r5, [src] ; q0
|
||||
ldrne r6, [src, pstep] ; q1
|
||||
|
||||
bne simple_hnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_vertical_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r12, [r2] ; r12: flimit
|
||||
ldr r2, c0x80808080
|
||||
ldr r7, [r3] ; limit
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
ldrh r4, [src], pstep
|
||||
uadd8 r12, r12, r12 ; flimit * 2
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
ldrh r6, [src], pstep
|
||||
uadd8 r12, r12, r7 ; flimit * 2 + limit
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
ldrh r4, [src], pstep
|
||||
ldr r11, [sp, #40] ; count (r11) for 8-in-parallel
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, r11, lsl #1 ; 4-in-parallel
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
pkhbt r9, r3, r4, lsl #16
|
||||
pkhbt r10, r5, r6, lsl #16
|
||||
|
||||
;transpose r7, r8, r9, r10 to r3, r4, r5, r6
|
||||
TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r9, r4, r5 ; p0 - q0
|
||||
uqsub8 r10, r5, r4 ; q0 - p0
|
||||
orr r7, r7, r8 ; abs(p1 - q1)
|
||||
orr r9, r9, r10 ; abs(p0 - q0)
|
||||
mov r8, #0
|
||||
uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
|
||||
uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
|
||||
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r10, #0 ; r10 == -1
|
||||
|
||||
usub8 r7, r12, r7 ; compare to flimit
|
||||
sel lr, r10, r8 ; filter mask
|
||||
|
||||
cmp lr, #0
|
||||
beq simple_vskip_filter ; skip filtering
|
||||
|
||||
;vp8_simple_filter() function
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r9, c0x03030303 ; r9 = 3
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, lr ; vp8_filter &= mask
|
||||
|
||||
qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
|
||||
qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8 ; Filter2 >>= 3
|
||||
shadd8 r3 , r3 , r8 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
sub src, src, pstep, lsl #2
|
||||
|
||||
qadd8 r4, r4, r9 ; u = p0 + Filter2
|
||||
qsub8 r5, r5, r3 ; u = q0 - Filter1
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
|
||||
strb r4, [src, #-1] ; store the result
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
strb r5, [src], pstep
|
||||
|
||||
|simple_vskip_filter|
|
||||
subs r11, r11, #1
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrneh r3, [src, #-2]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrneh r3, [src, #-2]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
bne simple_vnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
; Constant Pool
|
||||
c0x80808080 DCD 0x80808080
|
||||
c0x03030303 DCD 0x03030303
|
||||
c0x04040404 DCD 0x04040404
|
||||
|
||||
END
|
|
@ -0,0 +1,271 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x4_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack unsigned char *dst_ptr,
|
||||
; stack int dst_pitch
|
||||
;-------------------------------------
|
||||
;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
|
||||
;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
|
||||
;and the result is stored in transpose.
|
||||
|vp8_sixtap_predict8x4_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
add lr, sp, #4 ;point to temporary buffer
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;first-pass filter
|
||||
ldr r12, _filter8_coeff_
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
add r2, r12, r2, lsl #4 ;calculate filter location
|
||||
add r0, r0, #3 ;adjust src only for loading convinience
|
||||
|
||||
ldr r3, [r2] ; load up packed filter coefficients
|
||||
ldr r4, [r2, #4]
|
||||
ldr r5, [r2, #8]
|
||||
|
||||
mov r2, #0x90000 ; height=9 is top part of counter
|
||||
|
||||
sub r1, r1, #8
|
||||
|
||||
|first_pass_hloop_v6|
|
||||
ldrb r6, [r0, #-5] ; load source data
|
||||
ldrb r7, [r0, #-4]
|
||||
ldrb r8, [r0, #-3]
|
||||
ldrb r9, [r0, #-2]
|
||||
ldrb r10, [r0, #-1]
|
||||
|
||||
orr r2, r2, #0x4 ; construct loop counter. width=8=4x2
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; r7 | r6
|
||||
pkhbt r7, r7, r8, lsl #16 ; r8 | r7
|
||||
|
||||
pkhbt r8, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
|first_pass_wloop_v6|
|
||||
smuad r11, r6, r3 ; vp8_filter[0], vp8_filter[1]
|
||||
smuad r12, r7, r3
|
||||
|
||||
ldrb r6, [r0], #1
|
||||
|
||||
smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
|
||||
ldrb r7, [r0], #1
|
||||
smlad r12, r9, r4, r12
|
||||
|
||||
pkhbt r10, r10, r6, lsl #16 ; r10 | r9
|
||||
pkhbt r6, r6, r7, lsl #16 ; r11 | r10
|
||||
smlad r11, r10, r5, r11 ; vp8_filter[4], vp8_filter[5]
|
||||
smlad r12, r6, r5, r12
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add r11, r11, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff ; test loop counter
|
||||
usat r11, #8, r11, asr #7
|
||||
add r12, r12, #0x40
|
||||
strh r11, [lr], #20 ; result is transposed and stored, which
|
||||
usat r12, #8, r12, asr #7
|
||||
|
||||
strh r12, [lr], #20
|
||||
|
||||
movne r11, r6
|
||||
movne r12, r7
|
||||
|
||||
movne r6, r8
|
||||
movne r7, r9
|
||||
movne r8, r10
|
||||
movne r9, r11
|
||||
movne r10, r12
|
||||
|
||||
bne first_pass_wloop_v6
|
||||
|
||||
;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [src, ppl]
|
||||
;;pld [src, r9]
|
||||
;;ENDIF
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
|
||||
sub lr, lr, #158
|
||||
|
||||
add r0, r0, r1 ; move to next input line
|
||||
|
||||
bne first_pass_hloop_v6
|
||||
|
||||
;second pass filter
|
||||
secondpass_filter
|
||||
ldr r3, [sp], #4 ; load back yoffset
|
||||
ldr r0, [sp, #216] ; load dst address from stack 180+36
|
||||
ldr r1, [sp, #220] ; load dst stride from stack 180+40
|
||||
|
||||
cmp r3, #0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
ldr r12, _filter8_coeff_
|
||||
add lr, r12, r3, lsl #4 ;calculate filter location
|
||||
|
||||
mov r2, #0x00080000
|
||||
|
||||
ldr r3, [lr] ; load up packed filter coefficients
|
||||
ldr r4, [lr, #4]
|
||||
ldr r5, [lr, #8]
|
||||
|
||||
pkhbt r12, r4, r3 ; pack the filter differently
|
||||
pkhbt r11, r5, r4
|
||||
|
||||
second_pass_hloop_v6
|
||||
ldr r6, [sp] ; load the data
|
||||
ldr r7, [sp, #4]
|
||||
|
||||
orr r2, r2, #2 ; loop counter
|
||||
|
||||
second_pass_wloop_v6
|
||||
smuad lr, r3, r6 ; apply filter
|
||||
smulbt r10, r3, r6
|
||||
|
||||
ldr r8, [sp, #8]
|
||||
|
||||
smlad lr, r4, r7, lr
|
||||
smladx r10, r12, r7, r10
|
||||
|
||||
ldrh r9, [sp, #12]
|
||||
|
||||
smlad lr, r5, r8, lr
|
||||
smladx r10, r11, r8, r10
|
||||
|
||||
add sp, sp, #4
|
||||
smlatb r10, r5, r9, r10
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r0], r1 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
strb r10, [r0],r1
|
||||
|
||||
movne r6, r7
|
||||
movne r7, r8
|
||||
|
||||
bne second_pass_wloop_v6
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
add sp, sp, #12 ; updata src for next loop (20-8)
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne second_pass_hloop_v6
|
||||
|
||||
add sp, sp, #20
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r1, r1, #8
|
||||
mov r2, #9
|
||||
|
||||
skip_firstpass_hloop
|
||||
ldrb r4, [r0], #1 ; load data
|
||||
subs r2, r2, #1
|
||||
ldrb r5, [r0], #1
|
||||
strh r4, [lr], #20 ; store it to immediate buffer
|
||||
ldrb r6, [r0], #1 ; load data
|
||||
strh r5, [lr], #20
|
||||
ldrb r7, [r0], #1
|
||||
strh r6, [lr], #20
|
||||
ldrb r8, [r0], #1
|
||||
strh r7, [lr], #20
|
||||
ldrb r9, [r0], #1
|
||||
strh r8, [lr], #20
|
||||
ldrb r10, [r0], #1
|
||||
strh r9, [lr], #20
|
||||
ldrb r11, [r0], #1
|
||||
strh r10, [lr], #20
|
||||
add r0, r0, r1 ; move to next input line
|
||||
strh r11, [lr], #20
|
||||
|
||||
sub lr, lr, #158 ; move over to next column
|
||||
bne skip_firstpass_hloop
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;--------------------
|
||||
skip_secondpass_filter
|
||||
mov r2, #8
|
||||
add sp, sp, #4 ;start from src[0] instead of src[-2]
|
||||
|
||||
skip_secondpass_hloop
|
||||
ldr r6, [sp], #4
|
||||
subs r2, r2, #1
|
||||
ldr r8, [sp], #4
|
||||
|
||||
mov r7, r6, lsr #16 ; unpack
|
||||
strb r6, [r0], r1
|
||||
mov r9, r8, lsr #16
|
||||
strb r7, [r0], r1
|
||||
add sp, sp, #12 ; 20-8
|
||||
strb r8, [r0], r1
|
||||
strb r9, [r0], r1
|
||||
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne skip_secondpass_hloop
|
||||
|
||||
add sp, sp, #16 ; 180 - (160 +4)
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000
|
||||
DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000
|
||||
DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000
|
||||
DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000
|
||||
DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000
|
||||
DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000
|
||||
DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000
|
||||
DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000
|
||||
|
||||
;DCD 0, 0, 128, 0, 0, 0
|
||||
;DCD 0, -6, 123, 12, -1, 0
|
||||
;DCD 2, -11, 108, 36, -8, 1
|
||||
;DCD 0, -9, 93, 50, -6, 0
|
||||
;DCD 3, -16, 77, 77, -16, 3
|
||||
;DCD 0, -6, 50, 93, -9, 0
|
||||
;DCD 1, -8, 36, 108, -11, 2
|
||||
;DCD 0, -1, 12, 123, -6, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
static const short bilinear_filters[8][2] =
|
||||
{
|
||||
{ 128, 0 },
|
||||
{ 112, 16 },
|
||||
{ 96, 32 },
|
||||
{ 80, 48 },
|
||||
{ 64, 64 },
|
||||
{ 48, 80 },
|
||||
{ 32, 96 },
|
||||
{ 16, 112 }
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if 0
|
||||
void vp8_filter_block2d_bil_first_pass_6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i, j;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = ( ( (int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[1] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_filter_block2d_bil_second_pass_6
|
||||
(
|
||||
unsigned short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
int output_pitch,
|
||||
unsigned int output_height,
|
||||
unsigned int output_width,
|
||||
const short *vp8_filter
|
||||
)
|
||||
{
|
||||
unsigned int i,j;
|
||||
int Temp;
|
||||
|
||||
for ( i=0; i<output_height; i++ )
|
||||
{
|
||||
for ( j=0; j<output_width; j++ )
|
||||
{
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[output_width] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT/2);
|
||||
output_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT);
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
/* Next row... */
|
||||
/*src_ptr += src_pixels_per_line - output_width;*/
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
|
||||
unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
/* pixel_step = 1; */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = bilinear_filters[xoffset];
|
||||
VFilter = bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -0,0 +1,256 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "subpixel.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#define BLOCK_HEIGHT_WIDTH 4
|
||||
#define VP8_FILTER_WEIGHT 128
|
||||
#define VP8_FILTER_SHIFT 7
|
||||
|
||||
DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
|
||||
{
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
};
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter4_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if HAVE_ARMV6
|
||||
void vp8_sixtap_predict_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
|
||||
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
void vp8_sixtap_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
|
||||
/*if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
|
||||
}*/
|
||||
/* Hfilter is null. Second pass only */
|
||||
/*else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
|
||||
else*/
|
||||
|
||||
vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
|
||||
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
|
||||
/*}*/
|
||||
}
|
||||
#endif
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef IDCT_ARM_H
|
||||
#define IDCT_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_idct(vp8_short_idct4x4llm_1_v6);
|
||||
extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
|
||||
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_idct_idct1
|
||||
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
|
||||
|
||||
#undef vp8_idct_idct16
|
||||
#define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
|
||||
|
||||
#undef vp8_idct_idct1_scalar_add
|
||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
|
||||
|
||||
#undef vp8_idct_iwalsh1
|
||||
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
|
||||
|
||||
#undef vp8_idct_iwalsh16
|
||||
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_idct(vp8_short_idct4x4llm_1_neon);
|
||||
extern prototype_idct(vp8_short_idct4x4llm_neon);
|
||||
extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
|
||||
extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_idct_idct1
|
||||
#define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
|
||||
|
||||
#undef vp8_idct_idct16
|
||||
#define vp8_idct_idct16 vp8_short_idct4x4llm_neon
|
||||
|
||||
#undef vp8_idct_idct1_scalar_add
|
||||
#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
|
||||
|
||||
#undef vp8_idct_iwalsh1
|
||||
#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
|
||||
|
||||
#undef vp8_idct_iwalsh16
|
||||
#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,237 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <math.h>
|
||||
#include "loopfilter.h"
|
||||
#include "onyxc_int.h"
|
||||
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_armv6);
|
||||
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_y_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_neon);
|
||||
extern prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_neon);
|
||||
|
||||
extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
|
||||
|
||||
#if HAVE_ARMV6
|
||||
/*ARMV6 loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 4 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 8 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
(void) u_ptr;
|
||||
(void) v_ptr;
|
||||
(void) uv_stride;
|
||||
(void) simpler_lpf;
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 4, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 8, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
vp8_loop_filter_simple_vertical_edge_neon(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef LOOPFILTER_ARM_H
|
||||
#define LOOPFILTER_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
|
||||
|
||||
#undef vp8_lf_normal_b_v
|
||||
#define vp8_lf_normal_b_v vp8_loop_filter_bv_armv6
|
||||
|
||||
#undef vp8_lf_normal_mb_h
|
||||
#define vp8_lf_normal_mb_h vp8_loop_filter_mbh_armv6
|
||||
|
||||
#undef vp8_lf_normal_b_h
|
||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_armv6
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_armv6
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bv_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bh_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
|
||||
extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_lf_normal_mb_v
|
||||
#define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
|
||||
|
||||
#undef vp8_lf_normal_b_v
|
||||
#define vp8_lf_normal_b_v vp8_loop_filter_bv_neon
|
||||
|
||||
#undef vp8_lf_normal_mb_h
|
||||
#define vp8_lf_normal_mb_h vp8_loop_filter_mbh_neon
|
||||
|
||||
#undef vp8_lf_normal_b_h
|
||||
#define vp8_lf_normal_b_h vp8_loop_filter_bh_neon
|
||||
|
||||
#undef vp8_lf_simple_mb_v
|
||||
#define vp8_lf_simple_mb_v vp8_loop_filter_mbvs_neon
|
||||
|
||||
#undef vp8_lf_simple_b_v
|
||||
#define vp8_lf_simple_b_v vp8_loop_filter_bvs_neon
|
||||
|
||||
#undef vp8_lf_simple_mb_h
|
||||
#define vp8_lf_simple_mb_h vp8_loop_filter_mbhs_neon
|
||||
|
||||
#undef vp8_lf_simple_b_h
|
||||
#define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,362 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict16x16_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(r5) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict16x16_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
ldr r12, _bifilter16_coeff_
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_bfilter16x16_only
|
||||
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
|
||||
vld1.s32 {d31}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_bfilter16x16_only
|
||||
|
||||
sub sp, sp, #272 ;reserve space on stack for temporary storage
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
mov lr, sp
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
|
||||
mov r2, #3 ;loop counter
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (17x16)
|
||||
filt_blk2d_fp16x16_loop_neon
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d3, d0
|
||||
vmull.u8 q9, d5, d0
|
||||
vmull.u8 q10, d6, d0
|
||||
vmull.u8 q11, d8, d0
|
||||
vmull.u8 q12, d9, d0
|
||||
vmull.u8 q13, d11, d0
|
||||
vmull.u8 q14, d12, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
vext.8 d11, d11, d12, #1
|
||||
|
||||
vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q9, d5, d1
|
||||
vmlal.u8 q11, d8, d1
|
||||
vmlal.u8 q13, d11, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
vext.8 d12, d12, d13, #1
|
||||
|
||||
vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q10, d6, d1
|
||||
vmlal.u8 q12, d9, d1
|
||||
vmlal.u8 q14, d12, d1
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d15, q8, #7
|
||||
vqrshrn.u16 d16, q9, #7
|
||||
vqrshrn.u16 d17, q10, #7
|
||||
vqrshrn.u16 d18, q11, #7
|
||||
vqrshrn.u16 d19, q12, #7
|
||||
vqrshrn.u16 d20, q13, #7
|
||||
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
vqrshrn.u16 d21, q14, #7
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
|
||||
vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
vst1.u8 {d18, d19, d20, d21}, [lr]!
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
bne filt_blk2d_fp16x16_loop_neon
|
||||
|
||||
;First-pass filtering for rest 5 lines
|
||||
vld1.u8 {d14, d15, d16}, [r0], r1
|
||||
|
||||
vmull.u8 q9, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q10, d3, d0
|
||||
vmull.u8 q11, d5, d0
|
||||
vmull.u8 q12, d6, d0
|
||||
vmull.u8 q13, d8, d0
|
||||
vmull.u8 q14, d9, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
|
||||
vmlal.u8 q9, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q11, d5, d1
|
||||
vmlal.u8 q13, d8, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
|
||||
vmlal.u8 q10, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q12, d6, d1
|
||||
vmlal.u8 q14, d9, d1
|
||||
|
||||
vmull.u8 q1, d11, d0
|
||||
vmull.u8 q2, d12, d0
|
||||
vmull.u8 q3, d14, d0
|
||||
vmull.u8 q4, d15, d0
|
||||
|
||||
vext.8 d11, d11, d12, #1 ;construct src_ptr[1]
|
||||
vext.8 d14, d14, d15, #1
|
||||
|
||||
vmlal.u8 q1, d11, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q3, d14, d1
|
||||
|
||||
vext.8 d12, d12, d13, #1
|
||||
vext.8 d15, d15, d16, #1
|
||||
|
||||
vmlal.u8 q2, d12, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q4, d15, d1
|
||||
|
||||
vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d11, q10, #7
|
||||
vqrshrn.u16 d12, q11, #7
|
||||
vqrshrn.u16 d13, q12, #7
|
||||
vqrshrn.u16 d14, q13, #7
|
||||
vqrshrn.u16 d15, q14, #7
|
||||
vqrshrn.u16 d16, q1, #7
|
||||
vqrshrn.u16 d17, q2, #7
|
||||
vqrshrn.u16 d18, q3, #7
|
||||
vqrshrn.u16 d19, q4, #7
|
||||
|
||||
vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result
|
||||
vst1.u8 {d14, d15, d16, d17}, [lr]!
|
||||
vst1.u8 {d18, d19}, [lr]!
|
||||
|
||||
;Second pass: 16x16
|
||||
;secondpass_filter
|
||||
add r3, r12, r3, lsl #3
|
||||
sub lr, lr, #272
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
|
||||
vld1.u8 {d22, d23}, [lr]! ;load src data
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
mov r12, #4 ;loop counter
|
||||
|
||||
filt_blk2d_sp16x16_loop_neon
|
||||
vld1.u8 {d24, d25}, [lr]!
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {d26, d27}, [lr]!
|
||||
vmull.u8 q2, d23, d0
|
||||
vld1.u8 {d28, d29}, [lr]!
|
||||
vmull.u8 q3, d24, d0
|
||||
vld1.u8 {d30, d31}, [lr]!
|
||||
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d25, d1
|
||||
vmlal.u8 q3, d26, d1
|
||||
vmlal.u8 q4, d27, d1
|
||||
vmlal.u8 q5, d28, d1
|
||||
vmlal.u8 q6, d29, d1
|
||||
vmlal.u8 q7, d30, d1
|
||||
vmlal.u8 q8, d31, d1
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2, d3}, [r4], r5 ;store result
|
||||
vst1.u8 {d4, d5}, [r4], r5
|
||||
vst1.u8 {d6, d7}, [r4], r5
|
||||
vmov q11, q15
|
||||
vst1.u8 {d8, d9}, [r4], r5
|
||||
|
||||
bne filt_blk2d_sp16x16_loop_neon
|
||||
|
||||
add sp, sp, #272
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;--------------------
|
||||
firstpass_bfilter16x16_only
|
||||
mov r2, #4 ;loop counter
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (16x16)
|
||||
filt_blk2d_fpo16x16_loop_neon
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d3, d0
|
||||
vmull.u8 q9, d5, d0
|
||||
vmull.u8 q10, d6, d0
|
||||
vmull.u8 q11, d8, d0
|
||||
vmull.u8 q12, d9, d0
|
||||
vmull.u8 q13, d11, d0
|
||||
vmull.u8 q14, d12, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
vext.8 d11, d11, d12, #1
|
||||
|
||||
vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q9, d5, d1
|
||||
vmlal.u8 q11, d8, d1
|
||||
vmlal.u8 q13, d11, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
vext.8 d12, d12, d13, #1
|
||||
|
||||
vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q10, d6, d1
|
||||
vmlal.u8 q12, d9, d1
|
||||
vmlal.u8 q14, d12, d1
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d15, q8, #7
|
||||
vqrshrn.u16 d16, q9, #7
|
||||
vqrshrn.u16 d17, q10, #7
|
||||
vqrshrn.u16 d18, q11, #7
|
||||
vqrshrn.u16 d19, q12, #7
|
||||
vqrshrn.u16 d20, q13, #7
|
||||
vst1.u8 {d14, d15}, [r4], r5 ;store result
|
||||
vqrshrn.u16 d21, q14, #7
|
||||
|
||||
vst1.u8 {d16, d17}, [r4], r5
|
||||
vst1.u8 {d18, d19}, [r4], r5
|
||||
vst1.u8 {d20, d21}, [r4], r5
|
||||
|
||||
bne filt_blk2d_fpo16x16_loop_neon
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;---------------------
|
||||
secondpass_bfilter16x16_only
|
||||
;Second pass: 16x16
|
||||
;secondpass_filter
|
||||
add r3, r12, r3, lsl #3
|
||||
mov r12, #4 ;loop counter
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
vld1.u8 {d22, d23}, [r0], r1 ;load src data
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
filt_blk2d_spo16x16_loop_neon
|
||||
vld1.u8 {d24, d25}, [r0], r1
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {d26, d27}, [r0], r1
|
||||
vmull.u8 q2, d23, d0
|
||||
vld1.u8 {d28, d29}, [r0], r1
|
||||
vmull.u8 q3, d24, d0
|
||||
vld1.u8 {d30, d31}, [r0], r1
|
||||
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d25, d1
|
||||
vmlal.u8 q3, d26, d1
|
||||
vmlal.u8 q4, d27, d1
|
||||
vmlal.u8 q5, d28, d1
|
||||
vmlal.u8 q6, d29, d1
|
||||
vmlal.u8 q7, d30, d1
|
||||
vmlal.u8 q8, d31, d1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2, d3}, [r4], r5 ;store result
|
||||
subs r12, r12, #1
|
||||
vst1.u8 {d4, d5}, [r4], r5
|
||||
vmov q11, q15
|
||||
vst1.u8 {d6, d7}, [r4], r5
|
||||
vst1.u8 {d8, d9}, [r4], r5
|
||||
|
||||
bne filt_blk2d_spo16x16_loop_neon
|
||||
pop {r4-r5,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_bifilter16_coeff_
|
||||
DCD bifilter16_coeff
|
||||
bifilter16_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,135 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict4x4_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict4x4_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
ldr r12, _bifilter4_coeff_
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (5x4)
|
||||
vld1.u8 {d2}, [r0], r1 ;load src data
|
||||
add r2, r12, r2, lsl #3 ;calculate Hfilter location (2coeffsx4bytes=8bytes)
|
||||
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vld1.u32 {d31}, [r2] ;first_pass filter
|
||||
|
||||
vld1.u8 {d4}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0-d1)
|
||||
vld1.u8 {d5}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {d6}, [r0], r1
|
||||
|
||||
vshr.u64 q4, q1, #8 ;construct src_ptr[1]
|
||||
vshr.u64 q5, q2, #8
|
||||
vshr.u64 d12, d6, #8
|
||||
|
||||
vzip.32 d2, d3 ;put 2-line data in 1 register (src_ptr[0])
|
||||
vzip.32 d4, d5
|
||||
vzip.32 d8, d9 ;put 2-line data in 1 register (src_ptr[1])
|
||||
vzip.32 d10, d11
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d4, d0
|
||||
vmull.u8 q9, d6, d0
|
||||
|
||||
vmlal.u8 q7, d8, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q8, d10, d1
|
||||
vmlal.u8 q9, d12, d1
|
||||
|
||||
vqrshrn.u16 d28, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d29, q8, #7
|
||||
vqrshrn.u16 d30, q9, #7
|
||||
|
||||
;Second pass: 4x4
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3 ;calculate Vfilter location
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d28, d0
|
||||
vmull.u8 q2, d29, d0
|
||||
|
||||
vext.8 d26, d28, d29, #4 ;construct src_ptr[pixel_step]
|
||||
vext.8 d27, d29, d30, #4
|
||||
|
||||
vmlal.u8 q1, d26, d1
|
||||
vmlal.u8 q2, d27, d1
|
||||
|
||||
add r0, r4, lr
|
||||
add r1, r0, lr
|
||||
add r2, r1, lr
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
|
||||
vst1.32 {d2[0]}, [r4] ;store result
|
||||
vst1.32 {d2[1]}, [r0]
|
||||
vst1.32 {d3[0]}, [r1]
|
||||
vst1.32 {d3[1]}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
|
||||
vld1.32 {d28[0]}, [r0], r1 ;load src data
|
||||
vld1.32 {d28[1]}, [r0], r1
|
||||
vld1.32 {d29[0]}, [r0], r1
|
||||
vld1.32 {d29[1]}, [r0], r1
|
||||
vld1.32 {d30[0]}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.32 {d28[0]}, [r4], lr ;store result
|
||||
vst1.32 {d28[1]}, [r4], lr
|
||||
vst1.32 {d29[0]}, [r4], lr
|
||||
vst1.32 {d29[1]}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bilinearfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_bifilter4_coeff_
|
||||
DCD bifilter4_coeff
|
||||
bifilter4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,140 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict8x4_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict8x4_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
ldr r12, _bifilter8x4_coeff_
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (5x8)
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vld1.u32 {d31}, [r2] ;load first_pass filter
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
vext.8 d11, d10, d11, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
vmlal.u8 q10, d11, d1
|
||||
|
||||
vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d23, q7, #7
|
||||
vqrshrn.u16 d24, q8, #7
|
||||
vqrshrn.u16 d25, q9, #7
|
||||
vqrshrn.u16 d26, q10, #7
|
||||
|
||||
;Second pass: 4x8
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3
|
||||
add r0, r4, lr
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
add r1, r0, lr
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q2, d23, d0
|
||||
vmull.u8 q3, d24, d0
|
||||
vmull.u8 q4, d25, d0
|
||||
|
||||
vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d24, d1
|
||||
vmlal.u8 q3, d25, d1
|
||||
vmlal.u8 q4, d26, d1
|
||||
|
||||
add r2, r1, lr
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
|
||||
vst1.u8 {d2}, [r4] ;store result
|
||||
vst1.u8 {d3}, [r0]
|
||||
vst1.u8 {d4}, [r1]
|
||||
vst1.u8 {d5}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
vld1.u8 {d22}, [r0], r1 ;load src data
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.u8 {d22}, [r4], lr ;store result
|
||||
vst1.u8 {d23}, [r4], lr
|
||||
vst1.u8 {d24}, [r4], lr
|
||||
vst1.u8 {d25}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_bifilter8x4_coeff_
|
||||
DCD bifilter8x4_coeff
|
||||
bifilter8x4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,188 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict8x8_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict8x8_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
ldr r12, _bifilter8_coeff_
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (9x8)
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vld1.u32 {d31}, [r2] ;load first_pass filter
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vqrshrn.u16 d23, q7, #7
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vqrshrn.u16 d24, q8, #7
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vqrshrn.u16 d25, q9, #7
|
||||
|
||||
;first_pass filtering on the rest 5-line data
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
vext.8 d11, d10, d11, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
vmlal.u8 q10, d11, d1
|
||||
|
||||
vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d27, q7, #7
|
||||
vqrshrn.u16 d28, q8, #7
|
||||
vqrshrn.u16 d29, q9, #7
|
||||
vqrshrn.u16 d30, q10, #7
|
||||
|
||||
;Second pass: 8x8
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3
|
||||
add r0, r4, lr
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
add r1, r0, lr
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q2, d23, d0
|
||||
vmull.u8 q3, d24, d0
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d24, d1
|
||||
vmlal.u8 q3, d25, d1
|
||||
vmlal.u8 q4, d26, d1
|
||||
vmlal.u8 q5, d27, d1
|
||||
vmlal.u8 q6, d28, d1
|
||||
vmlal.u8 q7, d29, d1
|
||||
vmlal.u8 q8, d30, d1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2}, [r4] ;store result
|
||||
vst1.u8 {d3}, [r0]
|
||||
vst1.u8 {d4}, [r1], lr
|
||||
vst1.u8 {d5}, [r1], lr
|
||||
vst1.u8 {d6}, [r1], lr
|
||||
vst1.u8 {d7}, [r1], lr
|
||||
vst1.u8 {d8}, [r1], lr
|
||||
vst1.u8 {d9}, [r1], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
vld1.u8 {d22}, [r0], r1 ;load src data
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
vld1.u8 {d27}, [r0], r1
|
||||
vld1.u8 {d28}, [r0], r1
|
||||
vld1.u8 {d29}, [r0], r1
|
||||
vld1.u8 {d30}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.u8 {d22}, [r4], lr ;store result
|
||||
vst1.u8 {d23}, [r4], lr
|
||||
vst1.u8 {d24}, [r4], lr
|
||||
vst1.u8 {d25}, [r4], lr
|
||||
vst1.u8 {d26}, [r4], lr
|
||||
vst1.u8 {d27}, [r4], lr
|
||||
vst1.u8 {d28}, [r4], lr
|
||||
vst1.u8 {d29}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA bifilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_bifilter8_coeff_
|
||||
DCD bifilter8_coeff
|
||||
bifilter8_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,584 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_build_intra_predictors_mby_neon_func|
|
||||
EXPORT |vp8_build_intra_predictors_mby_s_neon_func|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *y_buffer
|
||||
; r1 unsigned char *ypred_ptr
|
||||
; r2 int y_stride
|
||||
; r3 int mode
|
||||
; stack int Up
|
||||
; stack int Left
|
||||
|
||||
|vp8_build_intra_predictors_mby_neon_func| PROC
|
||||
push {r4-r8, lr}
|
||||
|
||||
cmp r3, #0
|
||||
beq case_dc_pred
|
||||
cmp r3, #1
|
||||
beq case_v_pred
|
||||
cmp r3, #2
|
||||
beq case_h_pred
|
||||
cmp r3, #3
|
||||
beq case_tm_pred
|
||||
|
||||
case_dc_pred
|
||||
ldr r4, [sp, #24] ; Up
|
||||
ldr r5, [sp, #28] ; Left
|
||||
|
||||
; Default the DC average to 128
|
||||
mov r12, #128
|
||||
vdup.u8 q0, r12
|
||||
|
||||
; Zero out running sum
|
||||
mov r12, #0
|
||||
|
||||
; compute shift and jump
|
||||
adds r7, r4, r5
|
||||
beq skip_dc_pred_up_left
|
||||
|
||||
; Load above row, if it exists
|
||||
cmp r4, #0
|
||||
beq skip_dc_pred_up
|
||||
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q1}, [r6]
|
||||
vpaddl.u8 q2, q1
|
||||
vpaddl.u16 q3, q2
|
||||
vpaddl.u32 q4, q3
|
||||
|
||||
vmov.32 r4, d8[0]
|
||||
vmov.32 r6, d9[0]
|
||||
|
||||
add r12, r4, r6
|
||||
|
||||
; Move back to interger registers
|
||||
|
||||
skip_dc_pred_up
|
||||
|
||||
cmp r5, #0
|
||||
beq skip_dc_pred_left
|
||||
|
||||
sub r0, r0, #1
|
||||
|
||||
; Load left row, if it exists
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0]
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
skip_dc_pred_left
|
||||
add r7, r7, #3 ; Shift
|
||||
sub r4, r7, #1
|
||||
mov r5, #1
|
||||
add r12, r12, r5, lsl r4
|
||||
mov r5, r12, lsr r7 ; expected_dc
|
||||
|
||||
vdup.u8 q0, r5
|
||||
|
||||
skip_dc_pred_up_left
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
|
||||
pop {r4-r8,pc}
|
||||
case_v_pred
|
||||
; Copy down above row
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q0}, [r6]
|
||||
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_h_pred
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_tm_pred
|
||||
; Load yabove_row
|
||||
sub r3, r0, r2
|
||||
vld1.8 {q8}, [r3]
|
||||
|
||||
; Load ytop_left
|
||||
sub r3, r3, #1
|
||||
ldrb r7, [r3]
|
||||
|
||||
vdup.u16 q7, r7
|
||||
|
||||
; Compute yabove_row - ytop_left
|
||||
mov r3, #1
|
||||
vdup.u8 q0, r3
|
||||
|
||||
vmull.u8 q4, d16, d0
|
||||
vmull.u8 q5, d17, d0
|
||||
|
||||
vsub.s16 q4, q4, q7
|
||||
vsub.s16 q5, q5, q7
|
||||
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
mov r12, #4
|
||||
|
||||
case_tm_pred_loop
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u16 q0, r3
|
||||
vdup.u16 q1, r4
|
||||
vdup.u16 q2, r5
|
||||
vdup.u16 q3, r6
|
||||
|
||||
vqadd.s16 q8, q0, q4
|
||||
vqadd.s16 q9, q0, q5
|
||||
|
||||
vqadd.s16 q10, q1, q4
|
||||
vqadd.s16 q11, q1, q5
|
||||
|
||||
vqadd.s16 q12, q2, q4
|
||||
vqadd.s16 q13, q2, q5
|
||||
|
||||
vqadd.s16 q14, q3, q4
|
||||
vqadd.s16 q15, q3, q5
|
||||
|
||||
vqshrun.s16 d0, q8, #0
|
||||
vqshrun.s16 d1, q9, #0
|
||||
|
||||
vqshrun.s16 d2, q10, #0
|
||||
vqshrun.s16 d3, q11, #0
|
||||
|
||||
vqshrun.s16 d4, q12, #0
|
||||
vqshrun.s16 d5, q13, #0
|
||||
|
||||
vqshrun.s16 d6, q14, #0
|
||||
vqshrun.s16 d7, q15, #0
|
||||
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
subs r12, r12, #1
|
||||
bne case_tm_pred_loop
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; r0 unsigned char *y_buffer
|
||||
; r1 unsigned char *ypred_ptr
|
||||
; r2 int y_stride
|
||||
; r3 int mode
|
||||
; stack int Up
|
||||
; stack int Left
|
||||
|
||||
|vp8_build_intra_predictors_mby_s_neon_func| PROC
|
||||
push {r4-r8, lr}
|
||||
|
||||
mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
|
||||
|
||||
cmp r3, #0
|
||||
beq case_dc_pred_s
|
||||
cmp r3, #1
|
||||
beq case_v_pred_s
|
||||
cmp r3, #2
|
||||
beq case_h_pred_s
|
||||
cmp r3, #3
|
||||
beq case_tm_pred_s
|
||||
|
||||
case_dc_pred_s
|
||||
ldr r4, [sp, #24] ; Up
|
||||
ldr r5, [sp, #28] ; Left
|
||||
|
||||
; Default the DC average to 128
|
||||
mov r12, #128
|
||||
vdup.u8 q0, r12
|
||||
|
||||
; Zero out running sum
|
||||
mov r12, #0
|
||||
|
||||
; compute shift and jump
|
||||
adds r7, r4, r5
|
||||
beq skip_dc_pred_up_left_s
|
||||
|
||||
; Load above row, if it exists
|
||||
cmp r4, #0
|
||||
beq skip_dc_pred_up_s
|
||||
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q1}, [r6]
|
||||
vpaddl.u8 q2, q1
|
||||
vpaddl.u16 q3, q2
|
||||
vpaddl.u32 q4, q3
|
||||
|
||||
vmov.32 r4, d8[0]
|
||||
vmov.32 r6, d9[0]
|
||||
|
||||
add r12, r4, r6
|
||||
|
||||
; Move back to interger registers
|
||||
|
||||
skip_dc_pred_up_s
|
||||
|
||||
cmp r5, #0
|
||||
beq skip_dc_pred_left_s
|
||||
|
||||
sub r0, r0, #1
|
||||
|
||||
; Load left row, if it exists
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0]
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
skip_dc_pred_left_s
|
||||
add r7, r7, #3 ; Shift
|
||||
sub r4, r7, #1
|
||||
mov r5, #1
|
||||
add r12, r12, r5, lsl r4
|
||||
mov r5, r12, lsr r7 ; expected_dc
|
||||
|
||||
vdup.u8 q0, r5
|
||||
|
||||
skip_dc_pred_up_left_s
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
|
||||
pop {r4-r8,pc}
|
||||
case_v_pred_s
|
||||
; Copy down above row
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q0}, [r6]
|
||||
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_h_pred_s
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_tm_pred_s
|
||||
; Load yabove_row
|
||||
sub r3, r0, r2
|
||||
vld1.8 {q8}, [r3]
|
||||
|
||||
; Load ytop_left
|
||||
sub r3, r3, #1
|
||||
ldrb r7, [r3]
|
||||
|
||||
vdup.u16 q7, r7
|
||||
|
||||
; Compute yabove_row - ytop_left
|
||||
mov r3, #1
|
||||
vdup.u8 q0, r3
|
||||
|
||||
vmull.u8 q4, d16, d0
|
||||
vmull.u8 q5, d17, d0
|
||||
|
||||
vsub.s16 q4, q4, q7
|
||||
vsub.s16 q5, q5, q7
|
||||
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
mov r12, #4
|
||||
|
||||
case_tm_pred_loop_s
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u16 q0, r3
|
||||
vdup.u16 q1, r4
|
||||
vdup.u16 q2, r5
|
||||
vdup.u16 q3, r6
|
||||
|
||||
vqadd.s16 q8, q0, q4
|
||||
vqadd.s16 q9, q0, q5
|
||||
|
||||
vqadd.s16 q10, q1, q4
|
||||
vqadd.s16 q11, q1, q5
|
||||
|
||||
vqadd.s16 q12, q2, q4
|
||||
vqadd.s16 q13, q2, q5
|
||||
|
||||
vqadd.s16 q14, q3, q4
|
||||
vqadd.s16 q15, q3, q5
|
||||
|
||||
vqshrun.s16 d0, q8, #0
|
||||
vqshrun.s16 d1, q9, #0
|
||||
|
||||
vqshrun.s16 d2, q10, #0
|
||||
vqshrun.s16 d3, q11, #0
|
||||
|
||||
vqshrun.s16 d4, q12, #0
|
||||
vqshrun.s16 d5, q13, #0
|
||||
|
||||
vqshrun.s16 d6, q14, #0
|
||||
vqshrun.s16 d7, q15, #0
|
||||
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
subs r12, r12, #1
|
||||
bne case_tm_pred_loop_s
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
END
|
|
@ -0,0 +1,59 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_neon| PROC
|
||||
|
||||
vld1.u8 {q0}, [r0], r1
|
||||
vld1.u8 {q1}, [r0], r1
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vst1.u8 {q0}, [r2], r3
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vst1.u8 {q1}, [r2], r3
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vst1.u8 {q2}, [r2], r3
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vst1.u8 {q3}, [r2], r3
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vst1.u8 {q4}, [r2], r3
|
||||
vld1.u8 {q7}, [r0], r1
|
||||
vst1.u8 {q5}, [r2], r3
|
||||
vld1.u8 {q8}, [r0], r1
|
||||
vst1.u8 {q6}, [r2], r3
|
||||
vld1.u8 {q9}, [r0], r1
|
||||
vst1.u8 {q7}, [r2], r3
|
||||
vld1.u8 {q10}, [r0], r1
|
||||
vst1.u8 {q8}, [r2], r3
|
||||
vld1.u8 {q11}, [r0], r1
|
||||
vst1.u8 {q9}, [r2], r3
|
||||
vld1.u8 {q12}, [r0], r1
|
||||
vst1.u8 {q10}, [r2], r3
|
||||
vld1.u8 {q13}, [r0], r1
|
||||
vst1.u8 {q11}, [r2], r3
|
||||
vld1.u8 {q14}, [r0], r1
|
||||
vst1.u8 {q12}, [r2], r3
|
||||
vld1.u8 {q15}, [r0], r1
|
||||
vst1.u8 {q13}, [r2], r3
|
||||
vst1.u8 {q14}, [r2], r3
|
||||
vst1.u8 {q15}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,34 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_neon| PROC
|
||||
vld1.u8 {d0}, [r0], r1
|
||||
vld1.u8 {d1}, [r0], r1
|
||||
vst1.u8 {d0}, [r2], r3
|
||||
vld1.u8 {d2}, [r0], r1
|
||||
vst1.u8 {d1}, [r2], r3
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vst1.u8 {d2}, [r2], r3
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,43 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_neon| PROC
|
||||
|
||||
vld1.u8 {d0}, [r0], r1
|
||||
vld1.u8 {d1}, [r0], r1
|
||||
vst1.u8 {d0}, [r2], r3
|
||||
vld1.u8 {d2}, [r0], r1
|
||||
vst1.u8 {d1}, [r2], r3
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vst1.u8 {d2}, [r2], r3
|
||||
vld1.u8 {d4}, [r0], r1
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
vld1.u8 {d5}, [r0], r1
|
||||
vst1.u8 {d4}, [r2], r3
|
||||
vld1.u8 {d6}, [r0], r1
|
||||
vst1.u8 {d5}, [r2], r3
|
||||
vld1.u8 {d7}, [r0], r1
|
||||
vst1.u8 {d6}, [r2], r3
|
||||
vst1.u8 {d7}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,49 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
|
||||
; unsigned char *dst_ptr, int pitch, int stride)
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 dst_ptr
|
||||
; r3 pitch
|
||||
; sp stride
|
||||
|vp8_dc_only_idct_add_neon| PROC
|
||||
add r0, r0, #4
|
||||
asr r0, r0, #3
|
||||
ldr r12, [sp]
|
||||
vdup.16 q0, r0
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r3
|
||||
vld1.32 {d2[1]}, [r1], r3
|
||||
vld1.32 {d4[0]}, [r1], r3
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q1, q0, d2
|
||||
vaddw.u8 q2, q0, d4
|
||||
|
||||
vqmovun.s16 d2, q1
|
||||
vqmovun.s16 d4, q2
|
||||
|
||||
vst1.32 {d2[0]}, [r2], r12
|
||||
vst1.32 {d2[1]}, [r2], r12
|
||||
vst1.32 {d4[0]}, [r2], r12
|
||||
vst1.32 {d4[1]}, [r2]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,96 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
EXPORT |vp8_short_inv_walsh4x4_neon|
|
||||
EXPORT |vp8_short_inv_walsh4x4_1_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_neon(short *input, short *output)
|
||||
|vp8_short_inv_walsh4x4_neon| PROC
|
||||
|
||||
; read in all four lines of values: d0->d3
|
||||
vldm.64 r0, {q0, q1}
|
||||
|
||||
; first for loop
|
||||
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [12]
|
||||
vadd.s16 d5, d1, d2 ;b = [4] + [8]
|
||||
vsub.s16 d6, d1, d2 ;c = [4] - [8]
|
||||
vsub.s16 d7, d0, d3 ;d = [0] - [12]
|
||||
|
||||
vadd.s16 d0, d4, d5 ;a + b
|
||||
vadd.s16 d1, d6, d7 ;c + d
|
||||
vsub.s16 d2, d4, d5 ;a - b
|
||||
vsub.s16 d3, d7, d6 ;d - c
|
||||
|
||||
vtrn.32 d0, d2 ;d0: 0 1 8 9
|
||||
;d2: 2 3 10 11
|
||||
vtrn.32 d1, d3 ;d1: 4 5 12 13
|
||||
;d3: 6 7 14 15
|
||||
|
||||
vtrn.16 d0, d1 ;d0: 0 4 8 12
|
||||
;d1: 1 5 9 13
|
||||
vtrn.16 d2, d3 ;d2: 2 6 10 14
|
||||
;d3: 3 7 11 15
|
||||
|
||||
; second for loop
|
||||
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [3]
|
||||
vadd.s16 d5, d1, d2 ;b = [1] + [2]
|
||||
vsub.s16 d6, d1, d2 ;c = [1] - [2]
|
||||
vsub.s16 d7, d0, d3 ;d = [0] - [3]
|
||||
|
||||
vadd.s16 d0, d4, d5 ;e = a + b
|
||||
vadd.s16 d1, d6, d7 ;f = c + d
|
||||
vsub.s16 d2, d4, d5 ;g = a - b
|
||||
vsub.s16 d3, d7, d6 ;h = d - c
|
||||
|
||||
vmov.i16 q2, #3
|
||||
vadd.i16 q0, q0, q2 ;e/f += 3
|
||||
vadd.i16 q1, q1, q2 ;g/h += 3
|
||||
|
||||
vshr.s16 q0, q0, #3 ;e/f >> 3
|
||||
vshr.s16 q1, q1, #3 ;g/h >> 3
|
||||
|
||||
vtrn.32 d0, d2
|
||||
vtrn.32 d1, d3
|
||||
vtrn.16 d0, d1
|
||||
vtrn.16 d2, d3
|
||||
|
||||
vstmia.16 r1!, {q0}
|
||||
vstmia.16 r1!, {q1}
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_neon|
|
||||
|
||||
|
||||
;short vp8_short_inv_walsh4x4_1_neon(short *input, short *output)
|
||||
|vp8_short_inv_walsh4x4_1_neon| PROC
|
||||
; load a full line into a neon register
|
||||
vld1.16 {q0}, [r0]
|
||||
; extract first element and replicate
|
||||
vdup.16 q1, d0[0]
|
||||
; add 3 to all values
|
||||
vmov.i16 q2, #3
|
||||
vadd.i16 q3, q1, q2
|
||||
; right shift
|
||||
vshr.s16 q3, q3, #3
|
||||
; write it back
|
||||
vstmia.16 r1!, {q3}
|
||||
vstmia.16 r1!, {q3}
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_1_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,409 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 const signed char *flimit
|
||||
; r3 const signed char *limit
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
vld1.u8 {q3}, [r2], r1 ; p3
|
||||
vld1.u8 {q4}, [r2], r1 ; p2
|
||||
vld1.u8 {q5}, [r2], r1 ; p1
|
||||
vld1.u8 {q6}, [r2], r1 ; p0
|
||||
vld1.u8 {q7}, [r2], r1 ; q0
|
||||
vld1.u8 {q8}, [r2], r1 ; q1
|
||||
vld1.u8 {q9}, [r2], r1 ; q2
|
||||
vld1.u8 {q10}, [r2] ; q3
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vst1.u8 {q5}, [r0], r1 ; store op1
|
||||
vst1.u8 {q6}, [r0], r1 ; store op0
|
||||
vst1.u8 {q7}, [r0], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r0], r1 ; store oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
|
||||
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.u8 {d6}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r3] ; q3
|
||||
|
||||
ldr r3, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
vld1.u8 {d7}, [r12], r1 ; p3
|
||||
vld1.u8 {d9}, [r12], r1 ; p2
|
||||
vld1.u8 {d11}, [r12], r1 ; p1
|
||||
vld1.u8 {d13}, [r12], r1 ; p0
|
||||
vld1.u8 {d15}, [r12], r1 ; q0
|
||||
vld1.u8 {d17}, [r12], r1 ; q1
|
||||
vld1.u8 {d19}, [r12], r1 ; q2
|
||||
vld1.u8 {d21}, [r12] ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r3] ; thresh
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r2, r2, r1, lsl #1
|
||||
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0] ; store u oq1
|
||||
vst1.u8 {d17}, [r2] ; store v oq1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
vld1.u8 {d6}, [r2], r1 ; load first 8-line src data
|
||||
vld1.u8 {d8}, [r2], r1
|
||||
vld1.u8 {d10}, [r2], r1
|
||||
vld1.u8 {d12}, [r2], r1
|
||||
vld1.u8 {d14}, [r2], r1
|
||||
vld1.u8 {d16}, [r2], r1
|
||||
vld1.u8 {d18}, [r2], r1
|
||||
vld1.u8 {d20}, [r2], r1
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
|
||||
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r2], r1
|
||||
vld1.u8 {d11}, [r2], r1
|
||||
vld1.u8 {d13}, [r2], r1
|
||||
vld1.u8 {d15}, [r2], r1
|
||||
vld1.u8 {d17}, [r2], r1
|
||||
vld1.u8 {d19}, [r2], r1
|
||||
vld1.u8 {d21}, [r2]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
vld1.s8 {d0[], d1[]}, [r2] ; flimit
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
|
||||
sub r3, r2, #4 ; move v pointer down by 4 columns
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d21}, [r3]
|
||||
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store.
|
||||
|
||||
; r0-r3 PRESERVE
|
||||
; q0 flimit
|
||||
; q1 limit
|
||||
; q2 thresh
|
||||
; q3 p3
|
||||
; q4 p2
|
||||
; q5 p1
|
||||
; q6 p0
|
||||
; q7 q0
|
||||
; q8 q1
|
||||
; q9 q2
|
||||
; q10 q3
|
||||
|vp8_loop_filter_neon| PROC
|
||||
ldr r12, _lf_coeff_
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q4
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
||||
vmax.u8 q15, q15, q3
|
||||
|
||||
vadd.u8 q0, q0, q0 ; flimit * 2
|
||||
vadd.u8 q0, q0, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
|
||||
; vp8_filter() function
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
veor q8, q8, q0 ; qs1
|
||||
|
||||
vld1.u8 {q10}, [r12]!
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q11, d15, d13
|
||||
|
||||
vmovl.u8 q4, d20
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
vorr q14, q13, q14 ; vp8_hevmask
|
||||
|
||||
vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
|
||||
vmul.i16 q11, q11, q4
|
||||
|
||||
vand q1, q1, q14 ; vp8_filter &= hev
|
||||
vand q15, q15, q9 ; vp8_filter_mask
|
||||
|
||||
vaddw.s8 q2, q2, d2
|
||||
vaddw.s8 q11, q11, d3
|
||||
|
||||
vld1.u8 {q9}, [r12]!
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
vqmovn.s16 d3, q11
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q1, q10 ; Filter2 = clamp(vp8_filter+3)
|
||||
vqadd.s8 q1, q1, q9 ; Filter1 = clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
|
||||
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
||||
|
||||
; outer tap adjustments: ++vp8_filter >> 1
|
||||
vrshr.s8 q1, q1, #1
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
|
||||
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
||||
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
||||
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q8, q12, q0 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
AREA loopfilter_dat, DATA, READONLY
|
||||
_lf_coeff_
|
||||
DCD lf_coeff
|
||||
lf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101
|
||||
|
||||
END
|
|
@ -0,0 +1,118 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||
sub r0, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
ldr r12, _lfhy_coeff_
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q10}, [r12]! ; 0x03
|
||||
vld1.u8 {q8}, [r0] ; q1
|
||||
|
||||
;vp8_filter_mask() function
|
||||
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
;vp8_filter() function
|
||||
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
||||
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q3, d15, d13
|
||||
|
||||
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q11, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q12, q3, q3
|
||||
|
||||
vld1.u8 {q9}, [r12]! ; 0x04
|
||||
|
||||
vadd.s16 q2, q2, q11
|
||||
vadd.s16 q3, q3, q12
|
||||
|
||||
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q3, q3, d9
|
||||
|
||||
;vqadd.s8 q4, q4, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d9, q3
|
||||
;;;;;;;;;;;;;
|
||||
|
||||
vand q4, q4, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q4, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q4, q4, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q4, q4, #3 ; Filter1 >>= 3
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
;calculate output
|
||||
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
add r3, r0, r1
|
||||
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
|
||||
vst1.u8 {q6}, [r0] ; store op0
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_lfhy_coeff_
|
||||
DCD lfhy_coeff
|
||||
lfhy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
|
||||
END
|
|
@ -0,0 +1,159 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;Note: flimit, limit, and thresh should be positive numbers. All 16 elements in flimit
|
||||
;are equal. So, in the code, only one load is needed
|
||||
;for flimit. Same way applies to limit and thresh.
|
||||
; r0 unsigned char *s,
|
||||
; r1 int p, //pitch
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; stack(r4) const signed char *thresh,
|
||||
; //stack(r5) int count --unused
|
||||
|
||||
|vp8_loop_filter_simple_vertical_edge_neon| PROC
|
||||
sub r0, r0, #2 ; move src pointer down by 2 columns
|
||||
|
||||
vld4.8 {d6[0], d7[0], d8[0], d9[0]}, [r0], r1
|
||||
vld1.s8 {d2[], d3[]}, [r2] ; flimit
|
||||
vld1.s8 {d26[], d27[]}, [r3] ; limit -> q13
|
||||
vld4.8 {d6[1], d7[1], d8[1], d9[1]}, [r0], r1
|
||||
ldr r12, _vlfy_coeff_
|
||||
vld4.8 {d6[2], d7[2], d8[2], d9[2]}, [r0], r1
|
||||
vld4.8 {d6[3], d7[3], d8[3], d9[3]}, [r0], r1
|
||||
vld4.8 {d6[4], d7[4], d8[4], d9[4]}, [r0], r1
|
||||
vld4.8 {d6[5], d7[5], d8[5], d9[5]}, [r0], r1
|
||||
vld4.8 {d6[6], d7[6], d8[6], d9[6]}, [r0], r1
|
||||
vld4.8 {d6[7], d7[7], d8[7], d9[7]}, [r0], r1
|
||||
|
||||
vld4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vld1.u8 {q0}, [r12]! ; 0x80
|
||||
vld4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vld1.u8 {q11}, [r12]! ; 0x03
|
||||
vld4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vld1.u8 {q12}, [r12]! ; 0x04
|
||||
vld4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vld4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vld4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vld4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vld4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
|
||||
|
||||
vswp d7, d10
|
||||
vswp d12, d9
|
||||
;vswp q4, q5 ; p1:q3, p0:q5, q0:q4, q1:q6
|
||||
|
||||
;vp8_filter_mask() function
|
||||
;vp8_hevmask() function
|
||||
sub r0, r0, r1, lsl #4
|
||||
vabd.u8 q15, q5, q4 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q3, q6 ; abs(p1 - q1)
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
veor q4, q4, q0 ; qs0: q0 offset to convert to a signed value
|
||||
veor q5, q5, q0 ; ps0: p0 offset to convert to a signed value
|
||||
veor q3, q3, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vadd.u8 q1, q1, q1 ; flimit * 2
|
||||
vadd.u8 q1, q1, q13 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15 ; abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
|
||||
|
||||
;vp8_filter() function
|
||||
;;;;;;;;;;
|
||||
;vqsub.s8 q2, q5, q4 ; ( qs0 - ps0)
|
||||
vsubl.s8 q2, d8, d10 ; ( qs0 - ps0)
|
||||
vsubl.s8 q13, d9, d11
|
||||
|
||||
vqsub.s8 q1, q3, q6 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
;vmul.i8 q2, q2, q11 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0)
|
||||
vadd.s16 q14, q13, q13
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q14
|
||||
|
||||
;vqadd.s8 q1, q1, q2
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
|
||||
vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d3, q13
|
||||
|
||||
add r0, r0, #1
|
||||
add r2, r0, r1
|
||||
;;;;;;;;;;;
|
||||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q1, q11 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q1, q1, q12 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
vqsub.s8 q10, q4, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
vqadd.s8 q11, q5, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
|
||||
add r3, r2, r1
|
||||
vswp d13, d14
|
||||
add r12, r3, r1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst2.8 {d12[0], d13[0]}, [r0]
|
||||
vst2.8 {d12[1], d13[1]}, [r2]
|
||||
vst2.8 {d12[2], d13[2]}, [r3]
|
||||
vst2.8 {d12[3], d13[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d12[4], d13[4]}, [r12]
|
||||
vst2.8 {d12[5], d13[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d12[6], d13[6]}, [r0]
|
||||
vst2.8 {d12[7], d13[7]}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst2.8 {d14[0], d15[0]}, [r2]
|
||||
vst2.8 {d14[1], d15[1]}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst2.8 {d14[2], d15[2]}, [r3]
|
||||
vst2.8 {d14[3], d15[3]}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst2.8 {d14[4], d15[4]}, [r12]
|
||||
vst2.8 {d14[5], d15[5]}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst2.8 {d14[6], d15[6]}, [r0]
|
||||
vst2.8 {d14[7], d15[7]}, [r2]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_neon|
|
||||
|
||||
;-----------------
|
||||
AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_vlfy_coeff_
|
||||
DCD vlfy_coeff
|
||||
vlfy_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
|
||||
END
|
|
@ -0,0 +1,519 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon|
|
||||
EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
||||
EXPORT |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; flimit, limit, and thresh should be positive numbers.
|
||||
; All 16 elements in these variables are equal.
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ; p3
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
vld1.u8 {q4}, [r0], r1 ; p2
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.u8 {q5}, [r0], r1 ; p1
|
||||
vld1.u8 {q6}, [r0], r1 ; p0
|
||||
vld1.u8 {q7}, [r0], r1 ; q0
|
||||
vld1.u8 {q8}, [r0], r1 ; q1
|
||||
vld1.u8 {q9}, [r0], r1 ; q2
|
||||
vld1.u8 {q10}, [r0], r1 ; q3
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
add r0, r0, r1
|
||||
add r2, r0, r1
|
||||
add r3, r2, r1
|
||||
|
||||
vst1.u8 {q4}, [r0] ; store op2
|
||||
vst1.u8 {q5}, [r2] ; store op1
|
||||
vst1.u8 {q6}, [r3], r1 ; store op0
|
||||
add r12, r3, r1
|
||||
vst1.u8 {q7}, [r3] ; store oq0
|
||||
vst1.u8 {q8}, [r12], r1 ; store oq1
|
||||
vst1.u8 {q9}, [r12] ; store oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; p3
|
||||
vld1.u8 {d7}, [r3], r1 ; p3
|
||||
vld1.u8 {d8}, [r0], r1 ; p2
|
||||
vld1.u8 {d9}, [r3], r1 ; p2
|
||||
vld1.u8 {d10}, [r0], r1 ; p1
|
||||
vld1.u8 {d11}, [r3], r1 ; p1
|
||||
vld1.u8 {d12}, [r0], r1 ; p0
|
||||
vld1.u8 {d13}, [r3], r1 ; p0
|
||||
vld1.u8 {d14}, [r0], r1 ; q0
|
||||
vld1.u8 {d15}, [r3], r1 ; q0
|
||||
vld1.u8 {d16}, [r0], r1 ; q1
|
||||
vld1.u8 {d17}, [r3], r1 ; q1
|
||||
vld1.u8 {d18}, [r0], r1 ; q2
|
||||
vld1.u8 {d19}, [r3], r1 ; q2
|
||||
vld1.u8 {d20}, [r0], r1 ; q3
|
||||
vld1.u8 {d21}, [r3], r1 ; q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
|
||||
add r0, r0, r1
|
||||
add r3, r3, r1
|
||||
|
||||
vst1.u8 {d8}, [r0], r1 ; store u op2
|
||||
vst1.u8 {d9}, [r3], r1 ; store v op2
|
||||
vst1.u8 {d10}, [r0], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r3], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r3], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r3], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0], r1 ; store u oq1
|
||||
vst1.u8 {d17}, [r3], r1 ; store v oq1
|
||||
vst1.u8 {d18}, [r0], r1 ; store u oq2
|
||||
vst1.u8 {d19}, [r3], r1 ; store v oq2
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 int count (unused)
|
||||
|vp8_mbloop_filter_vertical_edge_y_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ; load first 8-line src data
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
sub sp, sp, #32
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
|
||||
vld1.u8 {d7}, [r0], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r0], r1
|
||||
vld1.u8 {d11}, [r0], r1
|
||||
vld1.u8 {d13}, [r0], r1
|
||||
vld1.u8 {d15}, [r0], r1
|
||||
vld1.u8 {d17}, [r0], r1
|
||||
vld1.u8 {d19}, [r0], r1
|
||||
vld1.u8 {d21}, [r0], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #4
|
||||
|
||||
add r2, r0, r1
|
||||
|
||||
add r3, r2, r1
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
add r12, r3, r1
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0]
|
||||
vst1.8 {d8}, [r2]
|
||||
vst1.8 {d10}, [r3]
|
||||
vst1.8 {d12}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d14}, [r12]
|
||||
vst1.8 {d16}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d18}, [r0]
|
||||
vst1.8 {d20}, [r2], r1
|
||||
add r3, r2, r1
|
||||
vst1.8 {d7}, [r2]
|
||||
vst1.8 {d9}, [r3], r1
|
||||
add r12, r3, r1
|
||||
vst1.8 {d11}, [r3]
|
||||
vst1.8 {d13}, [r12], r1
|
||||
add r0, r12, r1
|
||||
vst1.8 {d15}, [r12]
|
||||
vst1.8 {d17}, [r0], r1
|
||||
add r2, r0, r1
|
||||
vst1.8 {d19}, [r0]
|
||||
vst1.8 {d21}, [r2]
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 const signed char *flimit,
|
||||
; r3 const signed char *limit,
|
||||
; sp const signed char *thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
|
||||
stmdb sp!, {lr}
|
||||
sub r0, r0, #4 ; move src pointer down by 4 columns
|
||||
vld1.s8 {d2[], d3[]}, [r3] ; limit
|
||||
ldr r3, [sp, #8] ; load v ptr
|
||||
ldr r12, [sp, #4] ; load thresh pointer
|
||||
|
||||
sub r3, r3, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r0], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r0], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r0], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r0], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r0], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r0], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r0], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r3], r1
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
sub sp, sp, #32
|
||||
vld1.s8 {d4[], d5[]}, [r12] ; thresh
|
||||
mov r12, sp
|
||||
vst1.u8 {q3}, [r12]!
|
||||
vst1.u8 {q10}, [r12]!
|
||||
|
||||
bl vp8_mbloop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #3
|
||||
sub r3, r3, r1, lsl #3
|
||||
|
||||
vld1.u8 {q3}, [sp]!
|
||||
vld1.u8 {q10}, [sp]!
|
||||
|
||||
;transpose to 16x8 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
;store op2, op1, op0, oq0, oq1, oq2
|
||||
vst1.8 {d6}, [r0], r1
|
||||
vst1.8 {d7}, [r3], r1
|
||||
vst1.8 {d8}, [r0], r1
|
||||
vst1.8 {d9}, [r3], r1
|
||||
vst1.8 {d10}, [r0], r1
|
||||
vst1.8 {d11}, [r3], r1
|
||||
vst1.8 {d12}, [r0], r1
|
||||
vst1.8 {d13}, [r3], r1
|
||||
vst1.8 {d14}, [r0], r1
|
||||
vst1.8 {d15}, [r3], r1
|
||||
vst1.8 {d16}, [r0], r1
|
||||
vst1.8 {d17}, [r3], r1
|
||||
vst1.8 {d18}, [r0], r1
|
||||
vst1.8 {d19}, [r3], r1
|
||||
vst1.8 {d20}, [r0], r1
|
||||
vst1.8 {d21}, [r3], r1
|
||||
|
||||
ldmia sp!, {pc}
|
||||
ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_mbloop_filter_neon()
|
||||
; This is a helper function for the macroblock loopfilters. The individual
|
||||
; functions do the necessary load, transpose (if necessary), preserve (if
|
||||
; necessary) and store.
|
||||
|
||||
; TODO:
|
||||
; The vertical filter writes p3/q3 back out because two 4 element writes are
|
||||
; much simpler than ordering and writing two 3 element sets (or three 2 elements
|
||||
; sets, or whichever other combinations are possible).
|
||||
; If we can preserve q3 and q10, the vertical filter will be able to avoid
|
||||
; storing those values on the stack and reading them back after the filter.
|
||||
|
||||
; r0,r1 PRESERVE
|
||||
; r2 flimit
|
||||
; r3 PRESERVE
|
||||
; q1 limit
|
||||
; q2 thresh
|
||||
; q3 p3
|
||||
; q4 p2
|
||||
; q5 p1
|
||||
; q6 p0
|
||||
; q7 q0
|
||||
; q8 q1
|
||||
; q9 q2
|
||||
; q10 q3
|
||||
|
||||
|vp8_mbloop_filter_neon| PROC
|
||||
ldr r12, _mblf_coeff_
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q0, q10, q9 ; abs(q3 - q2)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q0
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q12, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1
|
||||
vmax.u8 q15, q15, q3
|
||||
|
||||
vld1.s8 {d4[], d5[]}, [r2] ; flimit
|
||||
|
||||
vld1.u8 {q0}, [r12]!
|
||||
|
||||
vadd.u8 q2, q2, q2 ; flimit * 2
|
||||
vadd.u8 q2, q2, q1 ; flimit * 2 + limit
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
vabd.u8 q1, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2
|
||||
vshr.u8 q1, q1, #1 ; a = a / 2
|
||||
vqadd.u8 q12, q12, q1 ; a = b + a
|
||||
vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
; vp8_filter
|
||||
; convert to signed
|
||||
veor q7, q7, q0 ; qs0
|
||||
veor q6, q6, q0 ; ps0
|
||||
veor q5, q5, q0 ; ps1
|
||||
veor q8, q8, q0 ; qs1
|
||||
veor q4, q4, q0 ; ps2
|
||||
veor q9, q9, q0 ; qs2
|
||||
|
||||
vorr q14, q13, q14 ; vp8_hevmask
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; qs0 - ps0
|
||||
vsubl.s8 q13, d15, d13
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
|
||||
vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0)
|
||||
vadd.s16 q11, q13, q13
|
||||
vand q15, q15, q12 ; vp8_filter_mask
|
||||
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q13, q13, q11
|
||||
|
||||
vld1.u8 {q12}, [r12]! ; #3
|
||||
|
||||
vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q13, q13, d3
|
||||
|
||||
vld1.u8 {q11}, [r12]! ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
vqmovn.s16 d3, q13
|
||||
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vld1.u8 {q15}, [r12]! ; #63
|
||||
;
|
||||
vand q13, q1, q14 ; Filter2 &= hev
|
||||
|
||||
vld1.u8 {d7}, [r12]! ; #9
|
||||
|
||||
vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4)
|
||||
vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3)
|
||||
|
||||
vld1.u8 {d6}, [r12]! ; #18
|
||||
|
||||
vshr.s8 q2, q2, #3 ; Filter1 >>= 3
|
||||
vshr.s8 q13, q13, #3 ; Filter2 >>= 3
|
||||
|
||||
vmov q10, q15
|
||||
vmov q12, q15
|
||||
|
||||
vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1)
|
||||
|
||||
vld1.u8 {d5}, [r12]! ; #27
|
||||
|
||||
vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2)
|
||||
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
|
||||
; roughly 1/7th difference across boundary
|
||||
; roughly 2/7th difference across boundary
|
||||
; roughly 3/7th difference across boundary
|
||||
vmov q11, q15
|
||||
vmov q13, q15
|
||||
vmov q14, q15
|
||||
|
||||
vmlal.s8 q10, d2, d7 ; Filter2 * 9
|
||||
vmlal.s8 q11, d3, d7
|
||||
vmlal.s8 q12, d2, d6 ; Filter2 * 18
|
||||
vmlal.s8 q13, d3, d6
|
||||
vmlal.s8 q14, d2, d5 ; Filter2 * 27
|
||||
vmlal.s8 q15, d3, d5
|
||||
vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7)
|
||||
vqshrn.s16 d21, q11, #7
|
||||
vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7)
|
||||
vqshrn.s16 d25, q13, #7
|
||||
vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7)
|
||||
vqshrn.s16 d29, q15, #7
|
||||
|
||||
vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u)
|
||||
vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u)
|
||||
vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u)
|
||||
vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u)
|
||||
vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u)
|
||||
vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u)
|
||||
veor q9, q11, q0 ; *oq2 = s^0x80
|
||||
veor q4, q10, q0 ; *op2 = s^0x80
|
||||
veor q8, q13, q0 ; *oq1 = s^0x80
|
||||
veor q5, q12, q0 ; *op2 = s^0x80
|
||||
veor q7, q15, q0 ; *oq0 = s^0x80
|
||||
veor q6, q14, q0 ; *op0 = s^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_mbloop_filter_neon|
|
||||
|
||||
AREA mbloopfilter_dat, DATA, READONLY
|
||||
_mblf_coeff_
|
||||
DCD mblf_coeff
|
||||
mblf_coeff
|
||||
DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080
|
||||
DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303
|
||||
DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404
|
||||
DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
|
||||
DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212
|
||||
DCD 0x1b1b1b1b, 0x1b1b1b1b
|
||||
|
||||
END
|
|
@ -0,0 +1,131 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_recon16x16mb_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *pred_ptr,
|
||||
; r1 short *diff_ptr,
|
||||
; r2 unsigned char *dst_ptr,
|
||||
; r3 int ystride,
|
||||
; stack unsigned char *udst_ptr,
|
||||
; stack unsigned char *vdst_ptr
|
||||
|
||||
|vp8_recon16x16mb_neon| PROC
|
||||
mov r12, #4 ;loop counter for Y loop
|
||||
|
||||
recon16x16mb_loop_y
|
||||
vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr
|
||||
vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr
|
||||
vld1.u8 {q14, q15}, [r0]!
|
||||
vld1.16 {q10, q11}, [r1]!
|
||||
|
||||
vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits
|
||||
vmovl.u8 q1, d25
|
||||
vmovl.u8 q2, d26
|
||||
vmovl.u8 q3, d27
|
||||
vmovl.u8 q4, d28
|
||||
vmovl.u8 q5, d29
|
||||
vmovl.u8 q6, d30
|
||||
vld1.16 {q12, q13}, [r1]!
|
||||
vmovl.u8 q7, d31
|
||||
vld1.16 {q14, q15}, [r1]!
|
||||
|
||||
pld [r0]
|
||||
pld [r1]
|
||||
pld [r1, #64]
|
||||
|
||||
vadd.s16 q0, q0, q8 ;add Diff data and Pred data together
|
||||
vadd.s16 q1, q1, q9
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q3, q3, q11
|
||||
vadd.s16 q4, q4, q12
|
||||
vadd.s16 q5, q5, q13
|
||||
vadd.s16 q6, q6, q14
|
||||
vadd.s16 q7, q7, q15
|
||||
|
||||
vqmovun.s16 d0, q0 ;CLAMP() saturation
|
||||
vqmovun.s16 d1, q1
|
||||
vqmovun.s16 d2, q2
|
||||
vqmovun.s16 d3, q3
|
||||
vqmovun.s16 d4, q4
|
||||
vqmovun.s16 d5, q5
|
||||
vst1.u8 {q0}, [r2], r3 ;store result
|
||||
vqmovun.s16 d6, q6
|
||||
vst1.u8 {q1}, [r2], r3
|
||||
vqmovun.s16 d7, q7
|
||||
vst1.u8 {q2}, [r2], r3
|
||||
subs r12, r12, #1
|
||||
|
||||
moveq r12, #2 ;loop counter for UV loop
|
||||
|
||||
vst1.u8 {q3}, [r2], r3
|
||||
bne recon16x16mb_loop_y
|
||||
|
||||
mov r3, r3, lsr #1 ;uv_stride = ystride>>1
|
||||
ldr r2, [sp] ;load upred_ptr
|
||||
|
||||
recon16x16mb_loop_uv
|
||||
vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr
|
||||
vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr
|
||||
vld1.u8 {q14, q15}, [r0]!
|
||||
vld1.16 {q10, q11}, [r1]!
|
||||
|
||||
vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits
|
||||
vmovl.u8 q1, d25
|
||||
vmovl.u8 q2, d26
|
||||
vmovl.u8 q3, d27
|
||||
vmovl.u8 q4, d28
|
||||
vmovl.u8 q5, d29
|
||||
vmovl.u8 q6, d30
|
||||
vld1.16 {q12, q13}, [r1]!
|
||||
vmovl.u8 q7, d31
|
||||
vld1.16 {q14, q15}, [r1]!
|
||||
|
||||
vadd.s16 q0, q0, q8 ;add Diff data and Pred data together
|
||||
vadd.s16 q1, q1, q9
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q3, q3, q11
|
||||
vadd.s16 q4, q4, q12
|
||||
vadd.s16 q5, q5, q13
|
||||
vadd.s16 q6, q6, q14
|
||||
|
||||
vqmovun.s16 d0, q0 ;CLAMP() saturation
|
||||
vadd.s16 q7, q7, q15
|
||||
vqmovun.s16 d1, q1
|
||||
vqmovun.s16 d2, q2
|
||||
vqmovun.s16 d3, q3
|
||||
vst1.u8 {d0}, [r2], r3 ;store result
|
||||
vqmovun.s16 d4, q4
|
||||
vst1.u8 {d1}, [r2], r3
|
||||
vqmovun.s16 d5, q5
|
||||
vst1.u8 {d2}, [r2], r3
|
||||
vqmovun.s16 d6, q6
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
vqmovun.s16 d7, q7
|
||||
vst1.u8 {d4}, [r2], r3
|
||||
subs r12, r12, #1
|
||||
|
||||
vst1.u8 {d5}, [r2], r3
|
||||
vst1.u8 {d6}, [r2], r3
|
||||
vst1.u8 {d7}, [r2], r3
|
||||
|
||||
ldrne r2, [sp, #4] ;load vpred_ptr
|
||||
bne recon16x16mb_loop_uv
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,54 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_recon2b_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *pred_ptr,
|
||||
; r1 short *diff_ptr,
|
||||
; r2 unsigned char *dst_ptr,
|
||||
; r3 int stride
|
||||
|
||||
|vp8_recon2b_neon| PROC
|
||||
vld1.u8 {q8, q9}, [r0] ;load data from pred_ptr
|
||||
vld1.16 {q4, q5}, [r1]! ;load data from diff_ptr
|
||||
|
||||
vmovl.u8 q0, d16 ;modify Pred data from 8 bits to 16 bits
|
||||
vld1.16 {q6, q7}, [r1]!
|
||||
vmovl.u8 q1, d17
|
||||
vmovl.u8 q2, d18
|
||||
vmovl.u8 q3, d19
|
||||
|
||||
vadd.s16 q0, q0, q4 ;add Diff data and Pred data together
|
||||
vadd.s16 q1, q1, q5
|
||||
vadd.s16 q2, q2, q6
|
||||
vadd.s16 q3, q3, q7
|
||||
|
||||
vqmovun.s16 d0, q0 ;CLAMP() saturation
|
||||
vqmovun.s16 d1, q1
|
||||
vqmovun.s16 d2, q2
|
||||
vqmovun.s16 d3, q3
|
||||
add r0, r2, r3
|
||||
|
||||
vst1.u8 {d0}, [r2] ;store result
|
||||
vst1.u8 {d1}, [r0], r3
|
||||
add r2, r0, r3
|
||||
vst1.u8 {d2}, [r0]
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,69 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_recon4b_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *pred_ptr,
|
||||
; r1 short *diff_ptr,
|
||||
; r2 unsigned char *dst_ptr,
|
||||
; r3 int stride
|
||||
|
||||
|vp8_recon4b_neon| PROC
|
||||
vld1.u8 {q12, q13}, [r0]! ;load data from pred_ptr
|
||||
vld1.16 {q8, q9}, [r1]! ;load data from diff_ptr
|
||||
vld1.u8 {q14, q15}, [r0]
|
||||
vld1.16 {q10, q11}, [r1]!
|
||||
|
||||
vmovl.u8 q0, d24 ;modify Pred data from 8 bits to 16 bits
|
||||
vmovl.u8 q1, d25
|
||||
vmovl.u8 q2, d26
|
||||
vmovl.u8 q3, d27
|
||||
vmovl.u8 q4, d28
|
||||
vmovl.u8 q5, d29
|
||||
vmovl.u8 q6, d30
|
||||
vld1.16 {q12, q13}, [r1]!
|
||||
vmovl.u8 q7, d31
|
||||
vld1.16 {q14, q15}, [r1]
|
||||
|
||||
vadd.s16 q0, q0, q8 ;add Diff data and Pred data together
|
||||
vadd.s16 q1, q1, q9
|
||||
vadd.s16 q2, q2, q10
|
||||
vadd.s16 q3, q3, q11
|
||||
vadd.s16 q4, q4, q12
|
||||
vadd.s16 q5, q5, q13
|
||||
vadd.s16 q6, q6, q14
|
||||
vadd.s16 q7, q7, q15
|
||||
|
||||
vqmovun.s16 d0, q0 ;CLAMP() saturation
|
||||
vqmovun.s16 d1, q1
|
||||
vqmovun.s16 d2, q2
|
||||
vqmovun.s16 d3, q3
|
||||
vqmovun.s16 d4, q4
|
||||
vqmovun.s16 d5, q5
|
||||
vqmovun.s16 d6, q6
|
||||
vqmovun.s16 d7, q7
|
||||
add r0, r2, r3
|
||||
|
||||
vst1.u8 {q0}, [r2] ;store result
|
||||
vst1.u8 {q1}, [r0], r3
|
||||
add r2, r0, r3
|
||||
vst1.u8 {q2}, [r0]
|
||||
vst1.u8 {q3}, [r2], r3
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "recon.h"
|
||||
#include "blockd.h"
|
||||
|
||||
extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
|
||||
|
||||
void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *pred_ptr = &x->predictor[0];
|
||||
short *diff_ptr = &x->diff[0];
|
||||
unsigned char *dst_ptr = x->dst.y_buffer;
|
||||
unsigned char *udst_ptr = x->dst.u_buffer;
|
||||
unsigned char *vdst_ptr = x->dst.v_buffer;
|
||||
int ystride = x->dst.y_stride;
|
||||
/*int uv_stride = x->dst.uv_stride;*/
|
||||
|
||||
vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_recon_b_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *pred_ptr,
|
||||
; r1 short *diff_ptr,
|
||||
; r2 unsigned char *dst_ptr,
|
||||
; r3 int stride
|
||||
|
||||
|vp8_recon_b_neon| PROC
|
||||
mov r12, #16
|
||||
|
||||
vld1.u8 {d28}, [r0], r12 ;load 4 data/line from pred_ptr
|
||||
vld1.16 {q10, q11}, [r1]! ;load data from diff_ptr
|
||||
vld1.u8 {d29}, [r0], r12
|
||||
vld1.16 {q11, q12}, [r1]!
|
||||
vld1.u8 {d30}, [r0], r12
|
||||
vld1.16 {q12, q13}, [r1]!
|
||||
vld1.u8 {d31}, [r0], r12
|
||||
vld1.16 {q13}, [r1]
|
||||
|
||||
vmovl.u8 q0, d28 ;modify Pred data from 8 bits to 16 bits
|
||||
vmovl.u8 q1, d29 ;Pred data in d0, d2, d4, d6
|
||||
vmovl.u8 q2, d30
|
||||
vmovl.u8 q3, d31
|
||||
|
||||
vadd.s16 d0, d0, d20 ;add Diff data and Pred data together
|
||||
vadd.s16 d2, d2, d22
|
||||
vadd.s16 d4, d4, d24
|
||||
vadd.s16 d6, d6, d26
|
||||
|
||||
vqmovun.s16 d0, q0 ;CLAMP() saturation
|
||||
vqmovun.s16 d1, q1
|
||||
vqmovun.s16 d2, q2
|
||||
vqmovun.s16 d3, q3
|
||||
add r1, r2, r3
|
||||
|
||||
vst1.32 {d0[0]}, [r2] ;store result
|
||||
vst1.32 {d1[0]}, [r1], r3
|
||||
add r2, r1, r3
|
||||
vst1.32 {d2[0]}, [r1]
|
||||
vst1.32 {d3[0]}, [r2], r3
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,36 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_push_neon|
|
||||
EXPORT |vp8_pop_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|vp8_push_neon| PROC
|
||||
vst1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vst1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|vp8_pop_neon| PROC
|
||||
vld1.i64 {d8, d9, d10, d11}, [r0]!
|
||||
vld1.i64 {d12, d13, d14, d15}, [r0]!
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_short_idct4x4llm_1_neon|
|
||||
EXPORT |vp8_dc_only_idct_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch);
|
||||
; r0 short *input;
|
||||
; r1 short *output;
|
||||
; r2 int pitch;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|vp8_short_idct4x4llm_1_neon| PROC
|
||||
vld1.16 {d0[]}, [r0] ;load input[0]
|
||||
|
||||
add r3, r1, r2
|
||||
add r12, r3, r2
|
||||
|
||||
vrshr.s16 d0, d0, #3
|
||||
|
||||
add r0, r12, r2
|
||||
|
||||
vst1.16 {d0}, [r1]
|
||||
vst1.16 {d0}, [r3]
|
||||
vst1.16 {d0}, [r12]
|
||||
vst1.16 {d0}, [r0]
|
||||
|
||||
bx lr
|
||||
ENDP
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;void vp8_dc_only_idct_c(short input_dc, short *output, int pitch);
|
||||
; r0 short input_dc;
|
||||
; r1 short *output;
|
||||
; r2 int pitch;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|vp8_dc_only_idct_neon| PROC
|
||||
vdup.16 d0, r0
|
||||
|
||||
add r3, r1, r2
|
||||
add r12, r3, r2
|
||||
|
||||
vrshr.s16 d0, d0, #3
|
||||
|
||||
add r0, r12, r2
|
||||
|
||||
vst1.16 {d0}, [r1]
|
||||
vst1.16 {d0}, [r3]
|
||||
vst1.16 {d0}, [r12]
|
||||
vst1.16 {d0}, [r0]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
END
|
|
@ -0,0 +1,127 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_short_idct4x4llm_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;*************************************************************
|
||||
;void vp8_short_idct4x4llm_c(short *input, short *output, int pitch)
|
||||
;r0 short * input
|
||||
;r1 short * output
|
||||
;r2 int pitch
|
||||
;*************************************************************
|
||||
;static const int cospi8sqrt2minus1=20091;
|
||||
;static const int sinpi8sqrt2 =35468;
|
||||
;static const int rounding = 0;
|
||||
;Optimization note: The resulted data from dequantization are signed 13-bit data that is
|
||||
;in the range of [-4096, 4095]. This allows to use "vqdmulh"(neon) instruction since
|
||||
;it won't go out of range (13+16+1=30bits<32bits). This instruction gives the high half
|
||||
;result of the multiplication that is needed in IDCT.
|
||||
|
||||
|vp8_short_idct4x4llm_neon| PROC
|
||||
ldr r12, _idct_coeff_
|
||||
vld1.16 {q1, q2}, [r0]
|
||||
vld1.16 {d0}, [r12]
|
||||
|
||||
vswp d3, d4 ;q2(vp[4] vp[12])
|
||||
|
||||
vqdmulh.s16 q3, q2, d0[2]
|
||||
vqdmulh.s16 q4, q2, d0[0]
|
||||
|
||||
vqadd.s16 d12, d2, d3 ;a1
|
||||
vqsub.s16 d13, d2, d3 ;b1
|
||||
|
||||
vshr.s16 q3, q3, #1
|
||||
vshr.s16 q4, q4, #1
|
||||
|
||||
vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
|
||||
vqadd.s16 q4, q4, q2
|
||||
|
||||
;d6 - c1:temp1
|
||||
;d7 - d1:temp2
|
||||
;d8 - d1:temp1
|
||||
;d9 - c1:temp2
|
||||
|
||||
vqsub.s16 d10, d6, d9 ;c1
|
||||
vqadd.s16 d11, d7, d8 ;d1
|
||||
|
||||
vqadd.s16 d2, d12, d11
|
||||
vqadd.s16 d3, d13, d10
|
||||
vqsub.s16 d4, d13, d10
|
||||
vqsub.s16 d5, d12, d11
|
||||
|
||||
vtrn.32 d2, d4
|
||||
vtrn.32 d3, d5
|
||||
vtrn.16 d2, d3
|
||||
vtrn.16 d4, d5
|
||||
|
||||
vswp d3, d4
|
||||
|
||||
vqdmulh.s16 q3, q2, d0[2]
|
||||
vqdmulh.s16 q4, q2, d0[0]
|
||||
|
||||
vqadd.s16 d12, d2, d3 ;a1
|
||||
vqsub.s16 d13, d2, d3 ;b1
|
||||
|
||||
vshr.s16 q3, q3, #1
|
||||
vshr.s16 q4, q4, #1
|
||||
|
||||
vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
|
||||
vqadd.s16 q4, q4, q2
|
||||
|
||||
vqsub.s16 d10, d6, d9 ;c1
|
||||
vqadd.s16 d11, d7, d8 ;d1
|
||||
|
||||
vqadd.s16 d2, d12, d11
|
||||
vqadd.s16 d3, d13, d10
|
||||
vqsub.s16 d4, d13, d10
|
||||
vqsub.s16 d5, d12, d11
|
||||
|
||||
vrshr.s16 d2, d2, #3
|
||||
vrshr.s16 d3, d3, #3
|
||||
vrshr.s16 d4, d4, #3
|
||||
vrshr.s16 d5, d5, #3
|
||||
|
||||
add r3, r1, r2
|
||||
add r12, r3, r2
|
||||
add r0, r12, r2
|
||||
|
||||
vtrn.32 d2, d4
|
||||
vtrn.32 d3, d5
|
||||
vtrn.16 d2, d3
|
||||
vtrn.16 d4, d5
|
||||
|
||||
vst1.16 {d2}, [r1]
|
||||
vst1.16 {d3}, [r3]
|
||||
vst1.16 {d4}, [r12]
|
||||
vst1.16 {d5}, [r0]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA idct4x4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_idct_coeff_
|
||||
DCD idct_coeff
|
||||
idct_coeff
|
||||
DCD 0x4e7b4e7b, 0x8a8c8a8c
|
||||
|
||||
;20091, 20091, 35468, 35468
|
||||
|
||||
END
|
|
@ -0,0 +1,495 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict16x16_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(r5) int dst_pitch
|
||||
|
||||
;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply abs() to
|
||||
; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multiplication,
|
||||
; the result can be negtive. So, I treat the result as s16. But, since it is also possible
|
||||
; that the result can be a large positive number (> 2^15-1), which could be confused as a
|
||||
; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1, 4 ,5 ,2,
|
||||
; which ensures that the result stays in s16 range. Finally, saturated add the result by
|
||||
; applying 3rd filter coeff. Same applys to other filter functions.
|
||||
|
||||
|vp8_sixtap_predict16x16_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
ldr r12, _filter16_coeff_
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_filter16x16_only
|
||||
|
||||
add r2, r12, r2, lsl #5 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
|
||||
vld1.s32 {q14, q15}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_filter16x16_only
|
||||
|
||||
sub sp, sp, #336 ;reserve space on stack for temporary storage
|
||||
mov lr, sp
|
||||
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
mov r2, #7 ;loop counter
|
||||
sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vdup.8 d1, d24[4]
|
||||
vdup.8 d2, d25[0]
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (21x16)
|
||||
filt_blk2d_fp16x16_loop_neon
|
||||
vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data
|
||||
vld1.u8 {d9, d10, d11}, [r0], r1
|
||||
vld1.u8 {d12, d13, d14}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q9, d7, d0
|
||||
vmull.u8 q10, d9, d0
|
||||
vmull.u8 q11, d10, d0
|
||||
vmull.u8 q12, d12, d0
|
||||
vmull.u8 q13, d13, d0
|
||||
|
||||
vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d29, d9, d10, #1
|
||||
vext.8 d30, d12, d13, #1
|
||||
|
||||
vmlsl.u8 q8, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q10, d29, d1
|
||||
vmlsl.u8 q12, d30, d1
|
||||
|
||||
vext.8 d28, d7, d8, #1
|
||||
vext.8 d29, d10, d11, #1
|
||||
vext.8 d30, d13, d14, #1
|
||||
|
||||
vmlsl.u8 q9, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q11, d29, d1
|
||||
vmlsl.u8 q13, d30, d1
|
||||
|
||||
vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d29, d9, d10, #4
|
||||
vext.8 d30, d12, d13, #4
|
||||
|
||||
vmlsl.u8 q8, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q10, d29, d4
|
||||
vmlsl.u8 q12, d30, d4
|
||||
|
||||
vext.8 d28, d7, d8, #4
|
||||
vext.8 d29, d10, d11, #4
|
||||
vext.8 d30, d13, d14, #4
|
||||
|
||||
vmlsl.u8 q9, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q11, d29, d4
|
||||
vmlsl.u8 q13, d30, d4
|
||||
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d9, d10, #5
|
||||
vext.8 d30, d12, d13, #5
|
||||
|
||||
vmlal.u8 q8, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q10, d29, d5
|
||||
vmlal.u8 q12, d30, d5
|
||||
|
||||
vext.8 d28, d7, d8, #5
|
||||
vext.8 d29, d10, d11, #5
|
||||
vext.8 d30, d13, d14, #5
|
||||
|
||||
vmlal.u8 q9, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q11, d29, d5
|
||||
vmlal.u8 q13, d30, d5
|
||||
|
||||
vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d29, d9, d10, #2
|
||||
vext.8 d30, d12, d13, #2
|
||||
|
||||
vmlal.u8 q8, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q10, d29, d2
|
||||
vmlal.u8 q12, d30, d2
|
||||
|
||||
vext.8 d28, d7, d8, #2
|
||||
vext.8 d29, d10, d11, #2
|
||||
vext.8 d30, d13, d14, #2
|
||||
|
||||
vmlal.u8 q9, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q11, d29, d2
|
||||
vmlal.u8 q13, d30, d2
|
||||
|
||||
vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d29, d9, d10, #3
|
||||
vext.8 d30, d12, d13, #3
|
||||
|
||||
vext.8 d15, d7, d8, #3
|
||||
vext.8 d31, d10, d11, #3
|
||||
vext.8 d6, d13, d14, #3
|
||||
|
||||
vmull.u8 q4, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q5, d29, d3
|
||||
vmull.u8 q6, d30, d3
|
||||
|
||||
vqadd.s16 q8, q4 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q10, q5
|
||||
vqadd.s16 q12, q6
|
||||
|
||||
vmull.u8 q6, d15, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q7, d31, d3
|
||||
vmull.u8 q3, d6, d3
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqadd.s16 q9, q6
|
||||
vqadd.s16 q11, q7
|
||||
vqadd.s16 q13, q3
|
||||
|
||||
vqrshrun.s16 d6, q8, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q9, #7
|
||||
vqrshrun.s16 d8, q10, #7
|
||||
vqrshrun.s16 d9, q11, #7
|
||||
vqrshrun.s16 d10, q12, #7
|
||||
vqrshrun.s16 d11, q13, #7
|
||||
|
||||
vst1.u8 {d6, d7, d8}, [lr]! ;store result
|
||||
vst1.u8 {d9, d10, d11}, [lr]!
|
||||
|
||||
bne filt_blk2d_fp16x16_loop_neon
|
||||
|
||||
;Second pass: 16x16
|
||||
;secondpass_filter - do first 8-columns and then second 8-columns
|
||||
add r3, r12, r3, lsl #5
|
||||
sub lr, lr, #336
|
||||
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
mov r3, #2 ;loop counter
|
||||
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
mov r2, #16
|
||||
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d14[4]
|
||||
vdup.8 d2, d15[0]
|
||||
vdup.8 d3, d15[4]
|
||||
vdup.8 d4, d16[0]
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
filt_blk2d_sp16x16_outloop_neon
|
||||
vld1.u8 {d18}, [lr], r2 ;load src data
|
||||
vld1.u8 {d19}, [lr], r2
|
||||
vld1.u8 {d20}, [lr], r2
|
||||
vld1.u8 {d21}, [lr], r2
|
||||
mov r12, #4 ;loop counter
|
||||
vld1.u8 {d22}, [lr], r2
|
||||
|
||||
secondpass_inner_loop_neon
|
||||
vld1.u8 {d23}, [lr], r2 ;load src data
|
||||
vld1.u8 {d24}, [lr], r2
|
||||
vld1.u8 {d25}, [lr], r2
|
||||
vld1.u8 {d26}, [lr], r2
|
||||
|
||||
vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d19, d0
|
||||
vmull.u8 q5, d20, d0
|
||||
vmull.u8 q6, d21, d0
|
||||
|
||||
vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d20, d1
|
||||
vmlsl.u8 q5, d21, d1
|
||||
vmlsl.u8 q6, d22, d1
|
||||
|
||||
vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d23, d4
|
||||
vmlsl.u8 q5, d24, d4
|
||||
vmlsl.u8 q6, d25, d4
|
||||
|
||||
vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d21, d2
|
||||
vmlal.u8 q5, d22, d2
|
||||
vmlal.u8 q6, d23, d2
|
||||
|
||||
vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d24, d5
|
||||
vmlal.u8 q5, d25, d5
|
||||
vmlal.u8 q6, d26, d5
|
||||
|
||||
vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d22, d3
|
||||
vmull.u8 q9, d23, d3
|
||||
vmull.u8 q10, d24, d3
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vmov q9, q11
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vmov q10, q12
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vmov d22, d26
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
|
||||
bne secondpass_inner_loop_neon
|
||||
|
||||
subs r3, r3, #1
|
||||
sub lr, lr, #336
|
||||
add lr, lr, #8
|
||||
|
||||
sub r4, r4, r5, lsl #4
|
||||
add r4, r4, #8
|
||||
|
||||
bne filt_blk2d_sp16x16_outloop_neon
|
||||
|
||||
add sp, sp, #336
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;--------------------
|
||||
firstpass_filter16x16_only
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
mov r2, #8 ;loop counter
|
||||
sub r0, r0, #2 ;move srcptr back to (column-2)
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vdup.8 d1, d24[4]
|
||||
vdup.8 d2, d25[0]
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (16x16)
|
||||
filt_blk2d_fpo16x16_loop_neon
|
||||
vld1.u8 {d6, d7, d8}, [r0], r1 ;load src data
|
||||
vld1.u8 {d9, d10, d11}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
|
||||
vmull.u8 q6, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q7, d7, d0
|
||||
vmull.u8 q8, d9, d0
|
||||
vmull.u8 q9, d10, d0
|
||||
|
||||
vext.8 d20, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d21, d9, d10, #1
|
||||
vext.8 d22, d7, d8, #1
|
||||
vext.8 d23, d10, d11, #1
|
||||
vext.8 d24, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d25, d9, d10, #4
|
||||
vext.8 d26, d7, d8, #4
|
||||
vext.8 d27, d10, d11, #4
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d9, d10, #5
|
||||
|
||||
vmlsl.u8 q6, d20, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d21, d1
|
||||
vmlsl.u8 q7, d22, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q9, d23, d1
|
||||
vmlsl.u8 q6, d24, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d25, d4
|
||||
vmlsl.u8 q7, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q9, d27, d4
|
||||
vmlal.u8 q6, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q8, d29, d5
|
||||
|
||||
vext.8 d20, d7, d8, #5
|
||||
vext.8 d21, d10, d11, #5
|
||||
vext.8 d22, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d23, d9, d10, #2
|
||||
vext.8 d24, d7, d8, #2
|
||||
vext.8 d25, d10, d11, #2
|
||||
|
||||
vext.8 d26, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d27, d9, d10, #3
|
||||
vext.8 d28, d7, d8, #3
|
||||
vext.8 d29, d10, d11, #3
|
||||
|
||||
vmlal.u8 q7, d20, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q9, d21, d5
|
||||
vmlal.u8 q6, d22, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d23, d2
|
||||
vmlal.u8 q7, d24, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q9, d25, d2
|
||||
|
||||
vmull.u8 q10, d26, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q11, d27, d3
|
||||
vmull.u8 q12, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q15, d29, d3
|
||||
|
||||
vqadd.s16 q6, q10 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q11
|
||||
vqadd.s16 q7, q12
|
||||
vqadd.s16 q9, q15
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrun.s16 d6, q6, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q7, #7
|
||||
vqrshrun.s16 d8, q8, #7
|
||||
vqrshrun.s16 d9, q9, #7
|
||||
|
||||
vst1.u8 {q3}, [r4], r5 ;store result
|
||||
vst1.u8 {q4}, [r4], r5
|
||||
|
||||
bne filt_blk2d_fpo16x16_loop_neon
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;--------------------
|
||||
secondpass_filter16x16_only
|
||||
;Second pass: 16x16
|
||||
add r3, r12, r3, lsl #5
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
mov r3, #2 ;loop counter
|
||||
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d14[4]
|
||||
vdup.8 d2, d15[0]
|
||||
vdup.8 d3, d15[4]
|
||||
vdup.8 d4, d16[0]
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
filt_blk2d_spo16x16_outloop_neon
|
||||
vld1.u8 {d18}, [r0], r1 ;load src data
|
||||
vld1.u8 {d19}, [r0], r1
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vld1.u8 {d21}, [r0], r1
|
||||
mov r12, #4 ;loop counter
|
||||
vld1.u8 {d22}, [r0], r1
|
||||
|
||||
secondpass_only_inner_loop_neon
|
||||
vld1.u8 {d23}, [r0], r1 ;load src data
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
|
||||
vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d19, d0
|
||||
vmull.u8 q5, d20, d0
|
||||
vmull.u8 q6, d21, d0
|
||||
|
||||
vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d20, d1
|
||||
vmlsl.u8 q5, d21, d1
|
||||
vmlsl.u8 q6, d22, d1
|
||||
|
||||
vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d23, d4
|
||||
vmlsl.u8 q5, d24, d4
|
||||
vmlsl.u8 q6, d25, d4
|
||||
|
||||
vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d21, d2
|
||||
vmlal.u8 q5, d22, d2
|
||||
vmlal.u8 q6, d23, d2
|
||||
|
||||
vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d24, d5
|
||||
vmlal.u8 q5, d25, d5
|
||||
vmlal.u8 q6, d26, d5
|
||||
|
||||
vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d22, d3
|
||||
vmull.u8 q9, d23, d3
|
||||
vmull.u8 q10, d24, d3
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vmov q9, q11
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vmov q10, q12
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vmov d22, d26
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
|
||||
bne secondpass_only_inner_loop_neon
|
||||
|
||||
subs r3, r3, #1
|
||||
sub r0, r0, r1, lsl #4
|
||||
sub r0, r0, r1, lsl #2
|
||||
sub r0, r0, r1
|
||||
add r0, r0, #8
|
||||
|
||||
sub r4, r4, r5, lsl #4
|
||||
add r4, r4, #8
|
||||
|
||||
bne filt_blk2d_spo16x16_outloop_neon
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters16_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter16_coeff_
|
||||
DCD filter16_coeff
|
||||
filter16_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,426 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack(r4) unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_sixtap_predict_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
ldr r12, _filter4_coeff_
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_filter4x4_only
|
||||
|
||||
add r2, r12, r2, lsl #5 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
vld1.s32 {q14, q15}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_filter4x4_only
|
||||
|
||||
vabs.s32 q12, q14 ;get abs(filer_parameters)
|
||||
vabs.s32 q13, q15
|
||||
|
||||
sub r0, r0, #2 ;go back 2 columns of src data
|
||||
sub r0, r0, r1, lsl #1 ;go back 2 lines of src data
|
||||
|
||||
;First pass: output_height lines x output_width columns (9x4)
|
||||
vld1.u8 {q3}, [r0], r1 ;load first 4-line src data
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vdup.8 d1, d24[4]
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vdup.8 d2, d25[0]
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d19, d8, d9, #5
|
||||
vext.8 d20, d10, d11, #5
|
||||
vext.8 d21, d12, d13, #5
|
||||
|
||||
vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
|
||||
vswp d11, d12
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
|
||||
vzip.32 d20, d21
|
||||
vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmull.u8 q8, d20, d5
|
||||
|
||||
vmov q4, q3 ;keep original src data in q4 q6
|
||||
vmov q6, q5
|
||||
|
||||
vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
|
||||
vshr.u64 q10, q6, #8
|
||||
vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
|
||||
vmlal.u8 q8, d10, d0
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #32 ;construct src_ptr[2]
|
||||
vshr.u64 q5, q6, #32
|
||||
vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d20, d1
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #16 ;construct src_ptr[0]
|
||||
vshr.u64 q10, q6, #16
|
||||
vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d10, d4
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #24 ;construct src_ptr[1]
|
||||
vshr.u64 q5, q6, #24
|
||||
vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d20, d2
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
|
||||
vzip.32 d10, d11
|
||||
vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q10, d10, d3
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ;load rest 5-line src data
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
|
||||
vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q10
|
||||
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
|
||||
vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d28, q8, #7
|
||||
|
||||
;First Pass on rest 5-line data
|
||||
vld1.u8 {q11}, [r0], r1
|
||||
|
||||
vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d19, d8, d9, #5
|
||||
vext.8 d20, d10, d11, #5
|
||||
vext.8 d21, d12, d13, #5
|
||||
|
||||
vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
|
||||
vswp d11, d12
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
|
||||
vzip.32 d20, d21
|
||||
vext.8 d31, d22, d23, #5 ;construct src_ptr[3]
|
||||
vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmull.u8 q8, d20, d5
|
||||
vmull.u8 q12, d31, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
|
||||
vmov q4, q3 ;keep original src data in q4 q6
|
||||
vmov q6, q5
|
||||
|
||||
vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
|
||||
vshr.u64 q10, q6, #8
|
||||
|
||||
vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
|
||||
vmlal.u8 q8, d10, d0
|
||||
vmlal.u8 q12, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #32 ;construct src_ptr[2]
|
||||
vshr.u64 q5, q6, #32
|
||||
vext.8 d31, d22, d23, #1 ;construct src_ptr[-1]
|
||||
|
||||
vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d20, d1
|
||||
vmlsl.u8 q12, d31, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #16 ;construct src_ptr[0]
|
||||
vshr.u64 q10, q6, #16
|
||||
vext.8 d31, d22, d23, #4 ;construct src_ptr[2]
|
||||
|
||||
vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d10, d4
|
||||
vmlsl.u8 q12, d31, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #24 ;construct src_ptr[1]
|
||||
vshr.u64 q5, q6, #24
|
||||
vext.8 d31, d22, d23, #2 ;construct src_ptr[0]
|
||||
|
||||
vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d20, d2
|
||||
vmlal.u8 q12, d31, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
|
||||
vzip.32 d10, d11
|
||||
vext.8 d31, d22, d23, #3 ;construct src_ptr[1]
|
||||
vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q10, d10, d3
|
||||
vmull.u8 q11, d31, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
|
||||
add r3, r12, r3, lsl #5
|
||||
|
||||
vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q10
|
||||
vqadd.s16 q12, q11
|
||||
|
||||
vext.8 d23, d27, d28, #4
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
|
||||
vqrshrun.s16 d29, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d30, q8, #7
|
||||
vqrshrun.s16 d31, q12, #7
|
||||
|
||||
;Second pass: 4x4
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
vext.8 d24, d28, d29, #4
|
||||
vext.8 d25, d29, d30, #4
|
||||
vext.8 d26, d30, d31, #4
|
||||
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d14[4]
|
||||
vdup.8 d2, d15[0]
|
||||
vdup.8 d3, d15[4]
|
||||
vdup.8 d4, d16[0]
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d28, d0
|
||||
|
||||
vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmull.u8 q6, d26, d5
|
||||
|
||||
vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d30, d4
|
||||
|
||||
vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q6, d24, d1
|
||||
|
||||
vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d29, d2
|
||||
|
||||
vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmlal.u8 q6, d25, d3
|
||||
|
||||
add r0, r4, lr
|
||||
add r1, r0, lr
|
||||
add r2, r1, lr
|
||||
|
||||
vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q6, q4
|
||||
|
||||
vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d4, q6, #7
|
||||
|
||||
vst1.32 {d3[0]}, [r4] ;store result
|
||||
vst1.32 {d3[1]}, [r0]
|
||||
vst1.32 {d4[0]}, [r1]
|
||||
vst1.32 {d4[1]}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
|
||||
;---------------------
|
||||
firstpass_filter4x4_only
|
||||
vabs.s32 q12, q14 ;get abs(filer_parameters)
|
||||
vabs.s32 q13, q15
|
||||
|
||||
sub r0, r0, #2 ;go back 2 columns of src data
|
||||
|
||||
;First pass: output_height lines x output_width columns (4x4)
|
||||
vld1.u8 {q3}, [r0], r1 ;load first 4-line src data
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vdup.8 d1, d24[4]
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vdup.8 d2, d25[0]
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
vext.8 d18, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d19, d8, d9, #5
|
||||
vext.8 d20, d10, d11, #5
|
||||
vext.8 d21, d12, d13, #5
|
||||
|
||||
vswp d7, d8 ;discard 2nd half data after src_ptr[3] is done
|
||||
vswp d11, d12
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[3])
|
||||
vzip.32 d20, d21
|
||||
vmull.u8 q7, d18, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmull.u8 q8, d20, d5
|
||||
|
||||
vmov q4, q3 ;keep original src data in q4 q6
|
||||
vmov q6, q5
|
||||
|
||||
vzip.32 d6, d7 ;construct src_ptr[-2], and put 2-line data together
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #8 ;construct src_ptr[-1]
|
||||
vshr.u64 q10, q6, #8
|
||||
vmlal.u8 q7, d6, d0 ;+(src_ptr[-2] * vp8_filter[0])
|
||||
vmlal.u8 q8, d10, d0
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[-1])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #32 ;construct src_ptr[2]
|
||||
vshr.u64 q5, q6, #32
|
||||
vmlsl.u8 q7, d18, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d20, d1
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[2])
|
||||
vzip.32 d10, d11
|
||||
vshr.u64 q9, q4, #16 ;construct src_ptr[0]
|
||||
vshr.u64 q10, q6, #16
|
||||
vmlsl.u8 q7, d6, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d10, d4
|
||||
|
||||
vzip.32 d18, d19 ;put 2-line data in 1 register (src_ptr[0])
|
||||
vzip.32 d20, d21
|
||||
vshr.u64 q3, q4, #24 ;construct src_ptr[1]
|
||||
vshr.u64 q5, q6, #24
|
||||
vmlal.u8 q7, d18, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d20, d2
|
||||
|
||||
vzip.32 d6, d7 ;put 2-line data in 1 register (src_ptr[1])
|
||||
vzip.32 d10, d11
|
||||
vmull.u8 q9, d6, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q10, d10, d3
|
||||
|
||||
add r0, r4, lr
|
||||
add r1, r0, lr
|
||||
add r2, r1, lr
|
||||
|
||||
vqadd.s16 q7, q9 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q10
|
||||
|
||||
vqrshrun.s16 d27, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d28, q8, #7
|
||||
|
||||
vst1.32 {d27[0]}, [r4] ;store result
|
||||
vst1.32 {d27[1]}, [r0]
|
||||
vst1.32 {d28[0]}, [r1]
|
||||
vst1.32 {d28[1]}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
|
||||
;---------------------
|
||||
secondpass_filter4x4_only
|
||||
sub r0, r0, r1, lsl #1
|
||||
add r3, r12, r3, lsl #5
|
||||
|
||||
vld1.32 {d27[0]}, [r0], r1 ;load src data
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
vld1.32 {d27[1]}, [r0], r1
|
||||
vabs.s32 q7, q5
|
||||
vld1.32 {d28[0]}, [r0], r1
|
||||
vabs.s32 q8, q6
|
||||
vld1.32 {d28[1]}, [r0], r1
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vld1.32 {d29[0]}, [r0], r1
|
||||
vdup.8 d1, d14[4]
|
||||
vld1.32 {d29[1]}, [r0], r1
|
||||
vdup.8 d2, d15[0]
|
||||
vld1.32 {d30[0]}, [r0], r1
|
||||
vdup.8 d3, d15[4]
|
||||
vld1.32 {d30[1]}, [r0], r1
|
||||
vdup.8 d4, d16[0]
|
||||
vld1.32 {d31[0]}, [r0], r1
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
vext.8 d23, d27, d28, #4
|
||||
vext.8 d24, d28, d29, #4
|
||||
vext.8 d25, d29, d30, #4
|
||||
vext.8 d26, d30, d31, #4
|
||||
|
||||
vmull.u8 q3, d27, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d28, d0
|
||||
|
||||
vmull.u8 q5, d25, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmull.u8 q6, d26, d5
|
||||
|
||||
vmlsl.u8 q3, d29, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d30, d4
|
||||
|
||||
vmlsl.u8 q5, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q6, d24, d1
|
||||
|
||||
vmlal.u8 q3, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d29, d2
|
||||
|
||||
vmlal.u8 q5, d24, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmlal.u8 q6, d25, d3
|
||||
|
||||
add r0, r4, lr
|
||||
add r1, r0, lr
|
||||
add r2, r1, lr
|
||||
|
||||
vqadd.s16 q5, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q6, q4
|
||||
|
||||
vqrshrun.s16 d3, q5, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d4, q6, #7
|
||||
|
||||
vst1.32 {d3[0]}, [r4] ;store result
|
||||
vst1.32 {d3[1]}, [r0]
|
||||
vst1.32 {d4[0]}, [r1]
|
||||
vst1.32 {d4[1]}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters4_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter4_coeff_
|
||||
DCD filter4_coeff
|
||||
filter4_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,477 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x4_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(r5) int dst_pitch
|
||||
|
||||
|vp8_sixtap_predict8x4_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
ldr r12, _filter8_coeff_
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_filter8x4_only
|
||||
|
||||
add r2, r12, r2, lsl #5 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
|
||||
vld1.s32 {q14, q15}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_filter8x4_only
|
||||
|
||||
sub sp, sp, #32 ;reserve space on stack for temporary storage
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
|
||||
mov lr, sp
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vdup.8 d1, d24[4]
|
||||
vdup.8 d2, d25[0]
|
||||
|
||||
;First pass: output_height lines x output_width columns (9x8)
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
vdup.8 d3, d25[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vdup.8 d4, d26[0]
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vdup.8 d5, d26[4]
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q8, d8, d0
|
||||
vmull.u8 q9, d10, d0
|
||||
vmull.u8 q10, d12, d0
|
||||
|
||||
vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d29, d8, d9, #1
|
||||
vext.8 d30, d10, d11, #1
|
||||
vext.8 d31, d12, d13, #1
|
||||
|
||||
vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d29, d1
|
||||
vmlsl.u8 q9, d30, d1
|
||||
vmlsl.u8 q10, d31, d1
|
||||
|
||||
vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d29, d8, d9, #4
|
||||
vext.8 d30, d10, d11, #4
|
||||
vext.8 d31, d12, d13, #4
|
||||
|
||||
vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d29, d4
|
||||
vmlsl.u8 q9, d30, d4
|
||||
vmlsl.u8 q10, d31, d4
|
||||
|
||||
vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d29, d8, d9, #2
|
||||
vext.8 d30, d10, d11, #2
|
||||
vext.8 d31, d12, d13, #2
|
||||
|
||||
vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d29, d2
|
||||
vmlal.u8 q9, d30, d2
|
||||
vmlal.u8 q10, d31, d2
|
||||
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d8, d9, #5
|
||||
vext.8 d30, d10, d11, #5
|
||||
vext.8 d31, d12, d13, #5
|
||||
|
||||
vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q8, d29, d5
|
||||
vmlal.u8 q9, d30, d5
|
||||
vmlal.u8 q10, d31, d5
|
||||
|
||||
vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d29, d8, d9, #3
|
||||
vext.8 d30, d10, d11, #3
|
||||
vext.8 d31, d12, d13, #3
|
||||
|
||||
vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d29, d3
|
||||
vmull.u8 q5, d30, d3
|
||||
vmull.u8 q6, d31, d3
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
|
||||
vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d23, q8, #7
|
||||
vqrshrun.s16 d24, q9, #7
|
||||
vqrshrun.s16 d25, q10, #7
|
||||
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vst1.u8 {d22}, [lr]! ;store result
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vst1.u8 {d23}, [lr]!
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vst1.u8 {d24}, [lr]!
|
||||
vld1.u8 {q7}, [r0], r1
|
||||
vst1.u8 {d25}, [lr]!
|
||||
|
||||
;first_pass filtering on the rest 5-line data
|
||||
vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
vmull.u8 q11, d12, d0
|
||||
vmull.u8 q12, d14, d0
|
||||
|
||||
vext.8 d27, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d28, d8, d9, #1
|
||||
vext.8 d29, d10, d11, #1
|
||||
vext.8 d30, d12, d13, #1
|
||||
vext.8 d31, d14, d15, #1
|
||||
|
||||
vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q9, d28, d1
|
||||
vmlsl.u8 q10, d29, d1
|
||||
vmlsl.u8 q11, d30, d1
|
||||
vmlsl.u8 q12, d31, d1
|
||||
|
||||
vext.8 d27, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d28, d8, d9, #4
|
||||
vext.8 d29, d10, d11, #4
|
||||
vext.8 d30, d12, d13, #4
|
||||
vext.8 d31, d14, d15, #4
|
||||
|
||||
vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q9, d28, d4
|
||||
vmlsl.u8 q10, d29, d4
|
||||
vmlsl.u8 q11, d30, d4
|
||||
vmlsl.u8 q12, d31, d4
|
||||
|
||||
vext.8 d27, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d28, d8, d9, #2
|
||||
vext.8 d29, d10, d11, #2
|
||||
vext.8 d30, d12, d13, #2
|
||||
vext.8 d31, d14, d15, #2
|
||||
|
||||
vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q9, d28, d2
|
||||
vmlal.u8 q10, d29, d2
|
||||
vmlal.u8 q11, d30, d2
|
||||
vmlal.u8 q12, d31, d2
|
||||
|
||||
vext.8 d27, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d28, d8, d9, #5
|
||||
vext.8 d29, d10, d11, #5
|
||||
vext.8 d30, d12, d13, #5
|
||||
vext.8 d31, d14, d15, #5
|
||||
|
||||
vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q9, d28, d5
|
||||
vmlal.u8 q10, d29, d5
|
||||
vmlal.u8 q11, d30, d5
|
||||
vmlal.u8 q12, d31, d5
|
||||
|
||||
vext.8 d27, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d28, d8, d9, #3
|
||||
vext.8 d29, d10, d11, #3
|
||||
vext.8 d30, d12, d13, #3
|
||||
vext.8 d31, d14, d15, #3
|
||||
|
||||
vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d28, d3
|
||||
vmull.u8 q5, d29, d3
|
||||
vmull.u8 q6, d30, d3
|
||||
vmull.u8 q7, d31, d3
|
||||
|
||||
vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q9, q4
|
||||
vqadd.s16 q10, q5
|
||||
vqadd.s16 q11, q6
|
||||
vqadd.s16 q12, q7
|
||||
|
||||
vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d27, q9, #7
|
||||
vqrshrun.s16 d28, q10, #7
|
||||
vqrshrun.s16 d29, q11, #7 ;load intermediate data from stack
|
||||
vqrshrun.s16 d30, q12, #7
|
||||
|
||||
;Second pass: 8x4
|
||||
;secondpass_filter
|
||||
add r3, r12, r3, lsl #5
|
||||
sub lr, lr, #32
|
||||
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
vld1.u8 {q11}, [lr]!
|
||||
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
vld1.u8 {q12}, [lr]!
|
||||
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d14[4]
|
||||
vdup.8 d2, d15[0]
|
||||
vdup.8 d3, d15[4]
|
||||
vdup.8 d4, d16[0]
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d23, d0
|
||||
vmull.u8 q5, d24, d0
|
||||
vmull.u8 q6, d25, d0
|
||||
|
||||
vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d24, d1
|
||||
vmlsl.u8 q5, d25, d1
|
||||
vmlsl.u8 q6, d26, d1
|
||||
|
||||
vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d27, d4
|
||||
vmlsl.u8 q5, d28, d4
|
||||
vmlsl.u8 q6, d29, d4
|
||||
|
||||
vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d25, d2
|
||||
vmlal.u8 q5, d26, d2
|
||||
vmlal.u8 q6, d27, d2
|
||||
|
||||
vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d28, d5
|
||||
vmlal.u8 q5, d29, d5
|
||||
vmlal.u8 q6, d30, d5
|
||||
|
||||
vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d26, d3
|
||||
vmull.u8 q9, d27, d3
|
||||
vmull.u8 q10, d28, d3
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
|
||||
add sp, sp, #32
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;--------------------
|
||||
firstpass_filter8x4_only
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vdup.8 d1, d24[4]
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vdup.8 d2, d25[0]
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
;First pass: output_height lines x output_width columns (4x8)
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q8, d8, d0
|
||||
vmull.u8 q9, d10, d0
|
||||
vmull.u8 q10, d12, d0
|
||||
|
||||
vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d29, d8, d9, #1
|
||||
vext.8 d30, d10, d11, #1
|
||||
vext.8 d31, d12, d13, #1
|
||||
|
||||
vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d29, d1
|
||||
vmlsl.u8 q9, d30, d1
|
||||
vmlsl.u8 q10, d31, d1
|
||||
|
||||
vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d29, d8, d9, #4
|
||||
vext.8 d30, d10, d11, #4
|
||||
vext.8 d31, d12, d13, #4
|
||||
|
||||
vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d29, d4
|
||||
vmlsl.u8 q9, d30, d4
|
||||
vmlsl.u8 q10, d31, d4
|
||||
|
||||
vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d29, d8, d9, #2
|
||||
vext.8 d30, d10, d11, #2
|
||||
vext.8 d31, d12, d13, #2
|
||||
|
||||
vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d29, d2
|
||||
vmlal.u8 q9, d30, d2
|
||||
vmlal.u8 q10, d31, d2
|
||||
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d8, d9, #5
|
||||
vext.8 d30, d10, d11, #5
|
||||
vext.8 d31, d12, d13, #5
|
||||
|
||||
vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q8, d29, d5
|
||||
vmlal.u8 q9, d30, d5
|
||||
vmlal.u8 q10, d31, d5
|
||||
|
||||
vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d29, d8, d9, #3
|
||||
vext.8 d30, d10, d11, #3
|
||||
vext.8 d31, d12, d13, #3
|
||||
|
||||
vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d29, d3
|
||||
vmull.u8 q5, d30, d3
|
||||
vmull.u8 q6, d31, d3
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d23, q8, #7
|
||||
vqrshrun.s16 d24, q9, #7
|
||||
vqrshrun.s16 d25, q10, #7
|
||||
|
||||
vst1.u8 {d22}, [r4], r5 ;store result
|
||||
vst1.u8 {d23}, [r4], r5
|
||||
vst1.u8 {d24}, [r4], r5
|
||||
vst1.u8 {d25}, [r4], r5
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;---------------------
|
||||
secondpass_filter8x4_only
|
||||
;Second pass: 8x4
|
||||
add r3, r12, r3, lsl #5
|
||||
sub r0, r0, r1, lsl #1
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
vld1.u8 {d22}, [r0], r1
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vdup.8 d1, d14[4]
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
vdup.8 d2, d15[0]
|
||||
vld1.u8 {d27}, [r0], r1
|
||||
vdup.8 d3, d15[4]
|
||||
vld1.u8 {d28}, [r0], r1
|
||||
vdup.8 d4, d16[0]
|
||||
vld1.u8 {d29}, [r0], r1
|
||||
vdup.8 d5, d16[4]
|
||||
vld1.u8 {d30}, [r0], r1
|
||||
|
||||
vmull.u8 q3, d22, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d23, d0
|
||||
vmull.u8 q5, d24, d0
|
||||
vmull.u8 q6, d25, d0
|
||||
|
||||
vmlsl.u8 q3, d23, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d24, d1
|
||||
vmlsl.u8 q5, d25, d1
|
||||
vmlsl.u8 q6, d26, d1
|
||||
|
||||
vmlsl.u8 q3, d26, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d27, d4
|
||||
vmlsl.u8 q5, d28, d4
|
||||
vmlsl.u8 q6, d29, d4
|
||||
|
||||
vmlal.u8 q3, d24, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d25, d2
|
||||
vmlal.u8 q5, d26, d2
|
||||
vmlal.u8 q6, d27, d2
|
||||
|
||||
vmlal.u8 q3, d27, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d28, d5
|
||||
vmlal.u8 q5, d29, d5
|
||||
vmlal.u8 q6, d30, d5
|
||||
|
||||
vmull.u8 q7, d25, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d26, d3
|
||||
vmull.u8 q9, d27, d3
|
||||
vmull.u8 q10, d28, d3
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,528 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x8_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack(r4) unsigned char *dst_ptr,
|
||||
; stack(r5) int dst_pitch
|
||||
|
||||
|vp8_sixtap_predict8x8_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
ldr r12, _filter8_coeff_
|
||||
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_filter8x8_only
|
||||
|
||||
add r2, r12, r2, lsl #5 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
|
||||
vld1.s32 {q14, q15}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_filter8x8_only
|
||||
|
||||
sub sp, sp, #64 ;reserve space on stack for temporary storage
|
||||
mov lr, sp
|
||||
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
mov r2, #2 ;loop counter
|
||||
sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vdup.8 d1, d24[4]
|
||||
vdup.8 d2, d25[0]
|
||||
|
||||
;First pass: output_height lines x output_width columns (13x8)
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
vdup.8 d3, d25[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vdup.8 d4, d26[0]
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vdup.8 d5, d26[4]
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
|
||||
filt_blk2d_fp8x8_loop_neon
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q8, d8, d0
|
||||
vmull.u8 q9, d10, d0
|
||||
vmull.u8 q10, d12, d0
|
||||
|
||||
vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d29, d8, d9, #1
|
||||
vext.8 d30, d10, d11, #1
|
||||
vext.8 d31, d12, d13, #1
|
||||
|
||||
vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d29, d1
|
||||
vmlsl.u8 q9, d30, d1
|
||||
vmlsl.u8 q10, d31, d1
|
||||
|
||||
vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d29, d8, d9, #4
|
||||
vext.8 d30, d10, d11, #4
|
||||
vext.8 d31, d12, d13, #4
|
||||
|
||||
vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d29, d4
|
||||
vmlsl.u8 q9, d30, d4
|
||||
vmlsl.u8 q10, d31, d4
|
||||
|
||||
vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d29, d8, d9, #2
|
||||
vext.8 d30, d10, d11, #2
|
||||
vext.8 d31, d12, d13, #2
|
||||
|
||||
vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d29, d2
|
||||
vmlal.u8 q9, d30, d2
|
||||
vmlal.u8 q10, d31, d2
|
||||
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d8, d9, #5
|
||||
vext.8 d30, d10, d11, #5
|
||||
vext.8 d31, d12, d13, #5
|
||||
|
||||
vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q8, d29, d5
|
||||
vmlal.u8 q9, d30, d5
|
||||
vmlal.u8 q10, d31, d5
|
||||
|
||||
vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d29, d8, d9, #3
|
||||
vext.8 d30, d10, d11, #3
|
||||
vext.8 d31, d12, d13, #3
|
||||
|
||||
vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d29, d3
|
||||
vmull.u8 q5, d30, d3
|
||||
vmull.u8 q6, d31, d3
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
|
||||
vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d23, q8, #7
|
||||
vqrshrun.s16 d24, q9, #7
|
||||
vqrshrun.s16 d25, q10, #7
|
||||
|
||||
vst1.u8 {d22}, [lr]! ;store result
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vst1.u8 {d23}, [lr]!
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vst1.u8 {d24}, [lr]!
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vst1.u8 {d25}, [lr]!
|
||||
|
||||
bne filt_blk2d_fp8x8_loop_neon
|
||||
|
||||
;first_pass filtering on the rest 5-line data
|
||||
;vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
;vld1.u8 {q4}, [r0], r1
|
||||
;vld1.u8 {q5}, [r0], r1
|
||||
;vld1.u8 {q6}, [r0], r1
|
||||
vld1.u8 {q7}, [r0], r1
|
||||
|
||||
vmull.u8 q8, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
vmull.u8 q11, d12, d0
|
||||
vmull.u8 q12, d14, d0
|
||||
|
||||
vext.8 d27, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d28, d8, d9, #1
|
||||
vext.8 d29, d10, d11, #1
|
||||
vext.8 d30, d12, d13, #1
|
||||
vext.8 d31, d14, d15, #1
|
||||
|
||||
vmlsl.u8 q8, d27, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q9, d28, d1
|
||||
vmlsl.u8 q10, d29, d1
|
||||
vmlsl.u8 q11, d30, d1
|
||||
vmlsl.u8 q12, d31, d1
|
||||
|
||||
vext.8 d27, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d28, d8, d9, #4
|
||||
vext.8 d29, d10, d11, #4
|
||||
vext.8 d30, d12, d13, #4
|
||||
vext.8 d31, d14, d15, #4
|
||||
|
||||
vmlsl.u8 q8, d27, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q9, d28, d4
|
||||
vmlsl.u8 q10, d29, d4
|
||||
vmlsl.u8 q11, d30, d4
|
||||
vmlsl.u8 q12, d31, d4
|
||||
|
||||
vext.8 d27, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d28, d8, d9, #2
|
||||
vext.8 d29, d10, d11, #2
|
||||
vext.8 d30, d12, d13, #2
|
||||
vext.8 d31, d14, d15, #2
|
||||
|
||||
vmlal.u8 q8, d27, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q9, d28, d2
|
||||
vmlal.u8 q10, d29, d2
|
||||
vmlal.u8 q11, d30, d2
|
||||
vmlal.u8 q12, d31, d2
|
||||
|
||||
vext.8 d27, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d28, d8, d9, #5
|
||||
vext.8 d29, d10, d11, #5
|
||||
vext.8 d30, d12, d13, #5
|
||||
vext.8 d31, d14, d15, #5
|
||||
|
||||
vmlal.u8 q8, d27, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q9, d28, d5
|
||||
vmlal.u8 q10, d29, d5
|
||||
vmlal.u8 q11, d30, d5
|
||||
vmlal.u8 q12, d31, d5
|
||||
|
||||
vext.8 d27, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d28, d8, d9, #3
|
||||
vext.8 d29, d10, d11, #3
|
||||
vext.8 d30, d12, d13, #3
|
||||
vext.8 d31, d14, d15, #3
|
||||
|
||||
vmull.u8 q3, d27, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d28, d3
|
||||
vmull.u8 q5, d29, d3
|
||||
vmull.u8 q6, d30, d3
|
||||
vmull.u8 q7, d31, d3
|
||||
|
||||
vqadd.s16 q8, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q9, q4
|
||||
vqadd.s16 q10, q5
|
||||
vqadd.s16 q11, q6
|
||||
vqadd.s16 q12, q7
|
||||
|
||||
add r3, r12, r3, lsl #5
|
||||
|
||||
vqrshrun.s16 d26, q8, #7 ;shift/round/saturate to u8
|
||||
sub lr, lr, #64
|
||||
vqrshrun.s16 d27, q9, #7
|
||||
vld1.u8 {q9}, [lr]! ;load intermediate data from stack
|
||||
vqrshrun.s16 d28, q10, #7
|
||||
vld1.u8 {q10}, [lr]!
|
||||
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
|
||||
vqrshrun.s16 d29, q11, #7
|
||||
vld1.u8 {q11}, [lr]!
|
||||
|
||||
vabs.s32 q7, q5
|
||||
vabs.s32 q8, q6
|
||||
|
||||
vqrshrun.s16 d30, q12, #7
|
||||
vld1.u8 {q12}, [lr]!
|
||||
|
||||
;Second pass: 8x8
|
||||
mov r3, #2 ;loop counter
|
||||
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d14[4]
|
||||
vdup.8 d2, d15[0]
|
||||
vdup.8 d3, d15[4]
|
||||
vdup.8 d4, d16[0]
|
||||
vdup.8 d5, d16[4]
|
||||
|
||||
filt_blk2d_sp8x8_loop_neon
|
||||
vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d19, d0
|
||||
vmull.u8 q5, d20, d0
|
||||
vmull.u8 q6, d21, d0
|
||||
|
||||
vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d20, d1
|
||||
vmlsl.u8 q5, d21, d1
|
||||
vmlsl.u8 q6, d22, d1
|
||||
|
||||
vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d23, d4
|
||||
vmlsl.u8 q5, d24, d4
|
||||
vmlsl.u8 q6, d25, d4
|
||||
|
||||
vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d21, d2
|
||||
vmlal.u8 q5, d22, d2
|
||||
vmlal.u8 q6, d23, d2
|
||||
|
||||
vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d24, d5
|
||||
vmlal.u8 q5, d25, d5
|
||||
vmlal.u8 q6, d26, d5
|
||||
|
||||
vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d22, d3
|
||||
vmull.u8 q9, d23, d3
|
||||
vmull.u8 q10, d24, d3
|
||||
|
||||
subs r3, r3, #1
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vmov q9, q11
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vmov q10, q12
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vmov q11, q13
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vmov q12, q14
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
vmov d26, d30
|
||||
|
||||
bne filt_blk2d_sp8x8_loop_neon
|
||||
|
||||
add sp, sp, #64
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;---------------------
|
||||
firstpass_filter8x8_only
|
||||
;add r2, r12, r2, lsl #5 ;calculate filter location
|
||||
;vld1.s32 {q14, q15}, [r2] ;load first_pass filter
|
||||
vabs.s32 q12, q14
|
||||
vabs.s32 q13, q15
|
||||
|
||||
mov r2, #2 ;loop counter
|
||||
sub r0, r0, #2 ;move srcptr back to (line-2) and (column-2)
|
||||
|
||||
vdup.8 d0, d24[0] ;first_pass filter (d0-d5)
|
||||
vdup.8 d1, d24[4]
|
||||
vdup.8 d2, d25[0]
|
||||
vdup.8 d3, d25[4]
|
||||
vdup.8 d4, d26[0]
|
||||
vdup.8 d5, d26[4]
|
||||
|
||||
;First pass: output_height lines x output_width columns (8x8)
|
||||
filt_blk2d_fpo8x8_loop_neon
|
||||
vld1.u8 {q3}, [r0], r1 ;load src data
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d6, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q8, d8, d0
|
||||
vmull.u8 q9, d10, d0
|
||||
vmull.u8 q10, d12, d0
|
||||
|
||||
vext.8 d28, d6, d7, #1 ;construct src_ptr[-1]
|
||||
vext.8 d29, d8, d9, #1
|
||||
vext.8 d30, d10, d11, #1
|
||||
vext.8 d31, d12, d13, #1
|
||||
|
||||
vmlsl.u8 q7, d28, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q8, d29, d1
|
||||
vmlsl.u8 q9, d30, d1
|
||||
vmlsl.u8 q10, d31, d1
|
||||
|
||||
vext.8 d28, d6, d7, #4 ;construct src_ptr[2]
|
||||
vext.8 d29, d8, d9, #4
|
||||
vext.8 d30, d10, d11, #4
|
||||
vext.8 d31, d12, d13, #4
|
||||
|
||||
vmlsl.u8 q7, d28, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q8, d29, d4
|
||||
vmlsl.u8 q9, d30, d4
|
||||
vmlsl.u8 q10, d31, d4
|
||||
|
||||
vext.8 d28, d6, d7, #2 ;construct src_ptr[0]
|
||||
vext.8 d29, d8, d9, #2
|
||||
vext.8 d30, d10, d11, #2
|
||||
vext.8 d31, d12, d13, #2
|
||||
|
||||
vmlal.u8 q7, d28, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q8, d29, d2
|
||||
vmlal.u8 q9, d30, d2
|
||||
vmlal.u8 q10, d31, d2
|
||||
|
||||
vext.8 d28, d6, d7, #5 ;construct src_ptr[3]
|
||||
vext.8 d29, d8, d9, #5
|
||||
vext.8 d30, d10, d11, #5
|
||||
vext.8 d31, d12, d13, #5
|
||||
|
||||
vmlal.u8 q7, d28, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q8, d29, d5
|
||||
vmlal.u8 q9, d30, d5
|
||||
vmlal.u8 q10, d31, d5
|
||||
|
||||
vext.8 d28, d6, d7, #3 ;construct src_ptr[1]
|
||||
vext.8 d29, d8, d9, #3
|
||||
vext.8 d30, d10, d11, #3
|
||||
vext.8 d31, d12, d13, #3
|
||||
|
||||
vmull.u8 q3, d28, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q4, d29, d3
|
||||
vmull.u8 q5, d30, d3
|
||||
vmull.u8 q6, d31, d3
|
||||
;
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrun.s16 d22, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d23, q8, #7
|
||||
vqrshrun.s16 d24, q9, #7
|
||||
vqrshrun.s16 d25, q10, #7
|
||||
|
||||
vst1.u8 {d22}, [r4], r5 ;store result
|
||||
vst1.u8 {d23}, [r4], r5
|
||||
vst1.u8 {d24}, [r4], r5
|
||||
vst1.u8 {d25}, [r4], r5
|
||||
|
||||
bne filt_blk2d_fpo8x8_loop_neon
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;---------------------
|
||||
secondpass_filter8x8_only
|
||||
sub r0, r0, r1, lsl #1
|
||||
add r3, r12, r3, lsl #5
|
||||
|
||||
vld1.u8 {d18}, [r0], r1 ;load src data
|
||||
vld1.s32 {q5, q6}, [r3] ;load second_pass filter
|
||||
vld1.u8 {d19}, [r0], r1
|
||||
vabs.s32 q7, q5
|
||||
vld1.u8 {d20}, [r0], r1
|
||||
vabs.s32 q8, q6
|
||||
vld1.u8 {d21}, [r0], r1
|
||||
mov r3, #2 ;loop counter
|
||||
vld1.u8 {d22}, [r0], r1
|
||||
vdup.8 d0, d14[0] ;second_pass filter parameters (d0-d5)
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vdup.8 d1, d14[4]
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vdup.8 d2, d15[0]
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vdup.8 d3, d15[4]
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
vdup.8 d4, d16[0]
|
||||
vld1.u8 {d27}, [r0], r1
|
||||
vdup.8 d5, d16[4]
|
||||
vld1.u8 {d28}, [r0], r1
|
||||
vld1.u8 {d29}, [r0], r1
|
||||
vld1.u8 {d30}, [r0], r1
|
||||
|
||||
;Second pass: 8x8
|
||||
filt_blk2d_spo8x8_loop_neon
|
||||
vmull.u8 q3, d18, d0 ;(src_ptr[-2] * vp8_filter[0])
|
||||
vmull.u8 q4, d19, d0
|
||||
vmull.u8 q5, d20, d0
|
||||
vmull.u8 q6, d21, d0
|
||||
|
||||
vmlsl.u8 q3, d19, d1 ;-(src_ptr[-1] * vp8_filter[1])
|
||||
vmlsl.u8 q4, d20, d1
|
||||
vmlsl.u8 q5, d21, d1
|
||||
vmlsl.u8 q6, d22, d1
|
||||
|
||||
vmlsl.u8 q3, d22, d4 ;-(src_ptr[2] * vp8_filter[4])
|
||||
vmlsl.u8 q4, d23, d4
|
||||
vmlsl.u8 q5, d24, d4
|
||||
vmlsl.u8 q6, d25, d4
|
||||
|
||||
vmlal.u8 q3, d20, d2 ;(src_ptr[0] * vp8_filter[2])
|
||||
vmlal.u8 q4, d21, d2
|
||||
vmlal.u8 q5, d22, d2
|
||||
vmlal.u8 q6, d23, d2
|
||||
|
||||
vmlal.u8 q3, d23, d5 ;(src_ptr[3] * vp8_filter[5])
|
||||
vmlal.u8 q4, d24, d5
|
||||
vmlal.u8 q5, d25, d5
|
||||
vmlal.u8 q6, d26, d5
|
||||
|
||||
vmull.u8 q7, d21, d3 ;(src_ptr[1] * vp8_filter[3])
|
||||
vmull.u8 q8, d22, d3
|
||||
vmull.u8 q9, d23, d3
|
||||
vmull.u8 q10, d24, d3
|
||||
|
||||
subs r3, r3, #1
|
||||
|
||||
vqadd.s16 q7, q3 ;sum of all (src_data*filter_parameters)
|
||||
vqadd.s16 q8, q4
|
||||
vqadd.s16 q9, q5
|
||||
vqadd.s16 q10, q6
|
||||
|
||||
vqrshrun.s16 d6, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrun.s16 d7, q8, #7
|
||||
vqrshrun.s16 d8, q9, #7
|
||||
vqrshrun.s16 d9, q10, #7
|
||||
|
||||
vmov q9, q11
|
||||
vst1.u8 {d6}, [r4], r5 ;store result
|
||||
vmov q10, q12
|
||||
vst1.u8 {d7}, [r4], r5
|
||||
vmov q11, q13
|
||||
vst1.u8 {d8}, [r4], r5
|
||||
vmov q12, q14
|
||||
vst1.u8 {d9}, [r4], r5
|
||||
vmov d26, d30
|
||||
|
||||
bne filt_blk2d_spo8x8_loop_neon
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
AREA subpelfilters8_dat, DATA, READWRITE ;read/write by default
|
||||
;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
_filter8_coeff_
|
||||
DCD filter8_coeff
|
||||
filter8_coeff
|
||||
DCD 0, 0, 128, 0, 0, 0, 0, 0
|
||||
DCD 0, -6, 123, 12, -1, 0, 0, 0
|
||||
DCD 2, -11, 108, 36, -8, 1, 0, 0
|
||||
DCD 0, -9, 93, 50, -6, 0, 0, 0
|
||||
DCD 3, -16, 77, 77, -16, 3, 0, 0
|
||||
DCD 0, -6, 50, 93, -9, 0, 0, 0
|
||||
DCD 1, -8, 36, 108, -11, 2, 0, 0
|
||||
DCD 0, -1, 12, 123, -6, 0, 0, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef RECON_ARM_H
|
||||
#define RECON_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_recon_block(vp8_recon_b_armv6);
|
||||
extern prototype_recon_block(vp8_recon2b_armv6);
|
||||
extern prototype_recon_block(vp8_recon4b_armv6);
|
||||
|
||||
extern prototype_copy_block(vp8_copy_mem8x8_v6);
|
||||
extern prototype_copy_block(vp8_copy_mem8x4_v6);
|
||||
extern prototype_copy_block(vp8_copy_mem16x16_v6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon
|
||||
#define vp8_recon_recon vp8_recon_b_armv6
|
||||
|
||||
#undef vp8_recon_recon2
|
||||
#define vp8_recon_recon2 vp8_recon2b_armv6
|
||||
|
||||
#undef vp8_recon_recon4
|
||||
#define vp8_recon_recon4 vp8_recon4b_armv6
|
||||
|
||||
#undef vp8_recon_copy8x8
|
||||
#define vp8_recon_copy8x8 vp8_copy_mem8x8_v6
|
||||
|
||||
#undef vp8_recon_copy8x4
|
||||
#define vp8_recon_copy8x4 vp8_copy_mem8x4_v6
|
||||
|
||||
#undef vp8_recon_copy16x16
|
||||
#define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_recon_block(vp8_recon_b_neon);
|
||||
extern prototype_recon_block(vp8_recon2b_neon);
|
||||
extern prototype_recon_block(vp8_recon4b_neon);
|
||||
|
||||
extern prototype_copy_block(vp8_copy_mem8x8_neon);
|
||||
extern prototype_copy_block(vp8_copy_mem8x4_neon);
|
||||
extern prototype_copy_block(vp8_copy_mem16x16_neon);
|
||||
|
||||
extern prototype_recon_macroblock(vp8_recon_mb_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_recon_recon
|
||||
#define vp8_recon_recon vp8_recon_b_neon
|
||||
|
||||
#undef vp8_recon_recon2
|
||||
#define vp8_recon_recon2 vp8_recon2b_neon
|
||||
|
||||
#undef vp8_recon_recon4
|
||||
#define vp8_recon_recon4 vp8_recon4b_neon
|
||||
|
||||
#undef vp8_recon_copy8x8
|
||||
#define vp8_recon_copy8x8 vp8_copy_mem8x8_neon
|
||||
|
||||
#undef vp8_recon_copy8x4
|
||||
#define vp8_recon_copy8x4 vp8_copy_mem8x4_neon
|
||||
|
||||
#undef vp8_recon_copy16x16
|
||||
#define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
|
||||
|
||||
#undef vp8_recon_recon_mb
|
||||
#define vp8_recon_recon_mb vp8_recon_mb_neon
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include "blockd.h"
|
||||
#include "reconintra.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "recon.h"
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern void vp8_build_intra_predictors_mby_neon_func(
|
||||
unsigned char *y_buffer,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride,
|
||||
int mode,
|
||||
int Up,
|
||||
int Left);
|
||||
|
||||
void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *y_buffer = x->dst.y_buffer;
|
||||
unsigned char *ypred_ptr = x->predictor;
|
||||
int y_stride = x->dst.y_stride;
|
||||
int mode = x->mode_info_context->mbmi.mode;
|
||||
int Up = x->up_available;
|
||||
int Left = x->left_available;
|
||||
|
||||
vp8_build_intra_predictors_mby_neon_func(y_buffer, ypred_ptr, y_stride, mode, Up, Left);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern void vp8_build_intra_predictors_mby_s_neon_func(
|
||||
unsigned char *y_buffer,
|
||||
unsigned char *ypred_ptr,
|
||||
int y_stride,
|
||||
int mode,
|
||||
int Up,
|
||||
int Left);
|
||||
|
||||
void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x)
|
||||
{
|
||||
unsigned char *y_buffer = x->dst.y_buffer;
|
||||
unsigned char *ypred_ptr = x->predictor;
|
||||
int y_stride = x->dst.y_stride;
|
||||
int mode = x->mode_info_context->mbmi.mode;
|
||||
int Up = x->up_available;
|
||||
int Left = x->left_available;
|
||||
|
||||
vp8_build_intra_predictors_mby_s_neon_func(y_buffer, ypred_ptr, y_stride, mode, Up, Left);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,89 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef SUBPIXEL_ARM_H
|
||||
#define SUBPIXEL_ARM_H
|
||||
|
||||
#if HAVE_ARMV6
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_armv6);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x8_armv6);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x4_armv6);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict_armv6);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict16x16_armv6);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_subpix_sixtap16x16
|
||||
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
|
||||
|
||||
#undef vp8_subpix_sixtap8x8
|
||||
#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_armv6
|
||||
|
||||
#undef vp8_subpix_sixtap8x4
|
||||
#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_armv6
|
||||
|
||||
#undef vp8_subpix_sixtap4x4
|
||||
#define vp8_subpix_sixtap4x4 vp8_sixtap_predict_armv6
|
||||
|
||||
#undef vp8_subpix_bilinear16x16
|
||||
#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_armv6
|
||||
|
||||
#undef vp8_subpix_bilinear8x8
|
||||
#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_armv6
|
||||
|
||||
#undef vp8_subpix_bilinear8x4
|
||||
#define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_armv6
|
||||
|
||||
#undef vp8_subpix_bilinear4x4
|
||||
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if HAVE_ARMV7
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x8_neon);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict8x4_neon);
|
||||
extern prototype_subpixel_predict(vp8_sixtap_predict_neon);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict16x16_neon);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
|
||||
extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
|
||||
|
||||
#if !CONFIG_RUNTIME_CPU_DETECT
|
||||
#undef vp8_subpix_sixtap16x16
|
||||
#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
|
||||
|
||||
#undef vp8_subpix_sixtap8x8
|
||||
#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_neon
|
||||
|
||||
#undef vp8_subpix_sixtap8x4
|
||||
#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_neon
|
||||
|
||||
#undef vp8_subpix_sixtap4x4
|
||||
#define vp8_subpix_sixtap4x4 vp8_sixtap_predict_neon
|
||||
|
||||
#undef vp8_subpix_bilinear16x16
|
||||
#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_neon
|
||||
|
||||
#undef vp8_subpix_bilinear8x8
|
||||
#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_neon
|
||||
|
||||
#undef vp8_subpix_bilinear8x4
|
||||
#define vp8_subpix_bilinear8x4 vp8_bilinear_predict8x4_neon
|
||||
|
||||
#undef vp8_subpix_bilinear4x4
|
||||
#define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_ports/config.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#if CONFIG_VP8_ENCODER
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
#include "onyxd_int.h"
|
||||
#endif
|
||||
|
||||
#define DEFINE(sym, val) int sym = val;
|
||||
|
||||
/*
|
||||
#define BLANK() asm volatile("\n->" : : )
|
||||
*/
|
||||
|
||||
/*
|
||||
* int main(void)
|
||||
* {
|
||||
*/
|
||||
|
||||
#if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
#endif
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
DEFINE(mb_diff, offsetof(MACROBLOCKD, diff));
|
||||
DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor));
|
||||
DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride));
|
||||
DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer));
|
||||
DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer));
|
||||
DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer));
|
||||
DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available));
|
||||
DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available));
|
||||
|
||||
DEFINE(detok_scan, offsetof(DETOK, scan));
|
||||
DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove));
|
||||
DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr));
|
||||
DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr));
|
||||
DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr));
|
||||
DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x));
|
||||
|
||||
DEFINE(detok_A, offsetof(DETOK, A));
|
||||
DEFINE(detok_L, offsetof(DETOK, L));
|
||||
|
||||
DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr));
|
||||
DEFINE(detok_current_bc, offsetof(DETOK, current_bc));
|
||||
DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs));
|
||||
DEFINE(detok_eob, offsetof(DETOK, eob));
|
||||
|
||||
DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end));
|
||||
DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer));
|
||||
DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value));
|
||||
DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count));
|
||||
DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range));
|
||||
|
||||
DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val));
|
||||
DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length));
|
||||
#endif
|
||||
|
||||
//add asserts for any offset that is not supported by assembly code
|
||||
//add asserts for any size that is not supported by assembly code
|
||||
/*
|
||||
* return 0;
|
||||
* }
|
||||
*/
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -24,7 +24,7 @@ void vpx_log(const char *format, ...);
|
|||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
|
||||
//#define DCPRED 1
|
||||
/*#define DCPRED 1*/
|
||||
#define DCPREDSIMTHRESH 0
|
||||
#define DCPREDCNTTHRESH 3
|
||||
|
||||
|
@ -39,7 +39,7 @@ void vpx_log(const char *format, ...);
|
|||
#define MAX_REF_LF_DELTAS 4
|
||||
#define MAX_MODE_LF_DELTAS 4
|
||||
|
||||
// Segment Feature Masks
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_DELTADATA 0
|
||||
#define SEGMENT_ABSDATA 1
|
||||
|
||||
|
@ -75,11 +75,11 @@ typedef enum
|
|||
|
||||
typedef enum
|
||||
{
|
||||
DC_PRED, // average of above and left pixels
|
||||
V_PRED, // vertical prediction
|
||||
H_PRED, // horizontal prediction
|
||||
TM_PRED, // Truemotion prediction
|
||||
B_PRED, // block based prediction, each block has its own prediction mode
|
||||
DC_PRED, /* average of above and left pixels */
|
||||
V_PRED, /* vertical prediction */
|
||||
H_PRED, /* horizontal prediction */
|
||||
TM_PRED, /* Truemotion prediction */
|
||||
B_PRED, /* block based prediction, each block has its own prediction mode */
|
||||
|
||||
NEARESTMV,
|
||||
NEARMV,
|
||||
|
@ -90,16 +90,16 @@ typedef enum
|
|||
MB_MODE_COUNT
|
||||
} MB_PREDICTION_MODE;
|
||||
|
||||
// Macroblock level features
|
||||
/* Macroblock level features */
|
||||
typedef enum
|
||||
{
|
||||
MB_LVL_ALT_Q = 0, // Use alternate Quantizer ....
|
||||
MB_LVL_ALT_LF = 1, // Use alternate loop filter value...
|
||||
MB_LVL_MAX = 2 // Number of MB level features supported
|
||||
MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */
|
||||
MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */
|
||||
MB_LVL_MAX = 2 /* Number of MB level features supported */
|
||||
|
||||
} MB_LVL_FEATURES;
|
||||
|
||||
// Segment Feature Masks
|
||||
/* Segment Feature Masks */
|
||||
#define SEGMENT_ALTQ 0x01
|
||||
#define SEGMENT_ALT_LF 0x02
|
||||
|
||||
|
@ -110,11 +110,11 @@ typedef enum
|
|||
|
||||
typedef enum
|
||||
{
|
||||
B_DC_PRED, // average of above and left pixels
|
||||
B_DC_PRED, /* average of above and left pixels */
|
||||
B_TM_PRED,
|
||||
|
||||
B_VE_PRED, // vertical prediction
|
||||
B_HE_PRED, // horizontal prediction
|
||||
B_VE_PRED, /* vertical prediction */
|
||||
B_HE_PRED, /* horizontal prediction */
|
||||
|
||||
B_LD_PRED,
|
||||
B_RD_PRED,
|
||||
|
@ -168,14 +168,15 @@ typedef struct
|
|||
int as_int;
|
||||
MV as_mv;
|
||||
} mv;
|
||||
int partitioning;
|
||||
int partition_count;
|
||||
int mb_skip_coeff; //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
|
||||
int dc_diff;
|
||||
unsigned char segment_id; // Which set of segmentation parameters should be used for this MB
|
||||
int force_no_skip;
|
||||
int need_to_clamp_mvs;
|
||||
B_MODE_INFO partition_bmi[16];
|
||||
|
||||
unsigned char partitioning;
|
||||
unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
|
||||
unsigned char dc_diff;
|
||||
unsigned char need_to_clamp_mvs;
|
||||
|
||||
unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */
|
||||
|
||||
unsigned char force_no_skip; /* encoder only */
|
||||
} MB_MODE_INFO;
|
||||
|
||||
|
||||
|
@ -194,9 +195,9 @@ typedef struct
|
|||
short *diff;
|
||||
short *reference;
|
||||
|
||||
short(*dequant)[4];
|
||||
short *dequant;
|
||||
|
||||
// 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
|
||||
/* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
|
||||
unsigned char **base_pre;
|
||||
int pre;
|
||||
int pre_stride;
|
||||
|
@ -213,22 +214,20 @@ typedef struct
|
|||
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(16, short, diff[400]); // from idct diff
|
||||
DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */
|
||||
DECLARE_ALIGNED(16, unsigned char, predictor[384]);
|
||||
//not used DECLARE_ALIGNED(16, short, reference[384]);
|
||||
/* not used DECLARE_ALIGNED(16, short, reference[384]); */
|
||||
DECLARE_ALIGNED(16, short, qcoeff[400]);
|
||||
DECLARE_ALIGNED(16, short, dqcoeff[400]);
|
||||
DECLARE_ALIGNED(16, char, eobs[25]);
|
||||
|
||||
// 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
|
||||
/* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
|
||||
BLOCKD block[25];
|
||||
|
||||
YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
|
||||
YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
|
||||
YV12_BUFFER_CONFIG dst;
|
||||
|
||||
MODE_INFO *mode_info_context;
|
||||
MODE_INFO *mode_info;
|
||||
|
||||
int mode_info_stride;
|
||||
|
||||
FRAME_TYPE frame_type;
|
||||
|
@ -236,39 +235,39 @@ typedef struct
|
|||
int up_available;
|
||||
int left_available;
|
||||
|
||||
// Y,U,V,Y2
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context;
|
||||
ENTROPY_CONTEXT_PLANES *left_context;
|
||||
|
||||
// 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
|
||||
/* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
|
||||
unsigned char segmentation_enabled;
|
||||
|
||||
// 0 (do not update) 1 (update) the macroblock segmentation map.
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation map. */
|
||||
unsigned char update_mb_segmentation_map;
|
||||
|
||||
// 0 (do not update) 1 (update) the macroblock segmentation feature data.
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char update_mb_segmentation_data;
|
||||
|
||||
// 0 (do not update) 1 (update) the macroblock segmentation feature data.
|
||||
/* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
|
||||
unsigned char mb_segement_abs_delta;
|
||||
|
||||
// Per frame flags that define which MB level features (such as quantizer or loop filter level)
|
||||
// are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; // Probability Tree used to code Segment number
|
||||
/* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
|
||||
/* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
|
||||
vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Probability Tree used to code Segment number */
|
||||
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters
|
||||
signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* Segment parameters */
|
||||
|
||||
// mode_based Loop filter adjustment
|
||||
/* mode_based Loop filter adjustment */
|
||||
unsigned char mode_ref_lf_delta_enabled;
|
||||
unsigned char mode_ref_lf_delta_update;
|
||||
|
||||
// Delta values have the range +/- MAX_LOOP_FILTER
|
||||
//char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF
|
||||
//char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT
|
||||
/* Delta values have the range +/- MAX_LOOP_FILTER */
|
||||
signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */
|
||||
signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */
|
||||
|
||||
// Distance of MB away from frame edges
|
||||
/* Distance of MB away from frame edges */
|
||||
int mb_to_left_edge;
|
||||
int mb_to_right_edge;
|
||||
int mb_to_top_edge;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -21,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
int mb_index = 0;
|
||||
FILE *mvs = fopen("mvs.stt", "a");
|
||||
|
||||
// print out the macroblock Y modes
|
||||
/* print out the macroblock Y modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mb Modes for Frame %d\n", frame);
|
||||
|
||||
|
@ -60,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
// print out the macroblock UV modes
|
||||
/* print out the macroblock UV modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "UV Modes for Frame %d\n", frame);
|
||||
|
||||
|
@ -80,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
// print out the block modes
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "Mbs for Frame %d\n", frame);
|
||||
{
|
||||
|
@ -108,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
}
|
||||
fprintf(mvs, "\n");
|
||||
|
||||
// print out the macroblock mvs
|
||||
/* print out the macroblock mvs */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
|
||||
|
@ -128,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
|
|||
fprintf(mvs, "\n");
|
||||
|
||||
|
||||
// print out the block modes
|
||||
/* print out the block modes */
|
||||
mb_index = 0;
|
||||
fprintf(mvs, "MVs for Frame %d\n", frame);
|
||||
{
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -15,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
|
|||
{
|
||||
|
||||
{
|
||||
// Block Type ( 0 )
|
||||
/* Block Type ( 0 ) */
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
/* Coeff Band ( 1 ) */
|
||||
{30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,},
|
||||
{26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,},
|
||||
{10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
/* Coeff Band ( 2 ) */
|
||||
{25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,},
|
||||
{9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,},
|
||||
{1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
/* Coeff Band ( 3 ) */
|
||||
{26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,},
|
||||
{ 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
/* Coeff Band ( 4 ) */
|
||||
{10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,},
|
||||
{ 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
/* Coeff Band ( 5 ) */
|
||||
{10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,},
|
||||
{ 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
/* Coeff Band ( 6 ) */
|
||||
{40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,},
|
||||
{ 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 1 )
|
||||
/* Block Type ( 1 ) */
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
/* Coeff Band ( 0 ) */
|
||||
{3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,},
|
||||
{8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,},
|
||||
{9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
/* Coeff Band ( 1 ) */
|
||||
{12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,},
|
||||
{7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
/* Coeff Band ( 2 ) */
|
||||
{15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,},
|
||||
{7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,},
|
||||
{1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
/* Coeff Band ( 3 ) */
|
||||
{19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,},
|
||||
{1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
/* Coeff Band ( 4 ) */
|
||||
{12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,},
|
||||
{4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,},
|
||||
{ 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
/* Coeff Band ( 5 ) */
|
||||
{12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,},
|
||||
{ 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
/* Coeff Band ( 6 ) */
|
||||
{61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,},
|
||||
{15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,},
|
||||
{ 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,},
|
||||
{ 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 2 )
|
||||
/* Block Type ( 2 ) */
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
/* Coeff Band ( 0 ) */
|
||||
{ 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,},
|
||||
{1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,},
|
||||
{1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
/* Coeff Band ( 1 ) */
|
||||
{1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,},
|
||||
{1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,},
|
||||
{1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
/* Coeff Band ( 2 ) */
|
||||
{ 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,},
|
||||
{ 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,},
|
||||
{ 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
/* Coeff Band ( 3 ) */
|
||||
{ 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,},
|
||||
{ 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
/* Coeff Band ( 4 ) */
|
||||
{ 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,},
|
||||
{ 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
/* Coeff Band ( 5 ) */
|
||||
{ 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,},
|
||||
{ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
/* Coeff Band ( 6 ) */
|
||||
{ 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,},
|
||||
{ 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
},
|
||||
},
|
||||
{
|
||||
// Block Type ( 3 )
|
||||
/* Block Type ( 3 ) */
|
||||
{
|
||||
// Coeff Band ( 0 )
|
||||
/* Coeff Band ( 0 ) */
|
||||
{2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,},
|
||||
{8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,},
|
||||
{11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 1 )
|
||||
/* Coeff Band ( 1 ) */
|
||||
{9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,},
|
||||
{12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,},
|
||||
{10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 2 )
|
||||
/* Coeff Band ( 2 ) */
|
||||
{6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,},
|
||||
{6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,},
|
||||
{3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 3 )
|
||||
/* Coeff Band ( 3 ) */
|
||||
{11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,},
|
||||
{9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,},
|
||||
{4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 4 )
|
||||
/* Coeff Band ( 4 ) */
|
||||
{4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,},
|
||||
{3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,},
|
||||
{1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 5 )
|
||||
/* Coeff Band ( 5 ) */
|
||||
{8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,},
|
||||
{1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 6 )
|
||||
/* Coeff Band ( 6 ) */
|
||||
{27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,},
|
||||
{5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,},
|
||||
{1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,},
|
||||
},
|
||||
{
|
||||
// Coeff Band ( 7 )
|
||||
/* Coeff Band ( 7 ) */
|
||||
{ 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
|
||||
{ 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,},
|
||||
{ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,},
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -17,18 +17,18 @@
|
|||
|
||||
/* Coefficient token alphabet */
|
||||
|
||||
#define ZERO_TOKEN 0 //0 Extra Bits 0+0
|
||||
#define ONE_TOKEN 1 //1 Extra Bits 0+1
|
||||
#define TWO_TOKEN 2 //2 Extra Bits 0+1
|
||||
#define THREE_TOKEN 3 //3 Extra Bits 0+1
|
||||
#define FOUR_TOKEN 4 //4 Extra Bits 0+1
|
||||
#define DCT_VAL_CATEGORY1 5 //5-6 Extra Bits 1+1
|
||||
#define DCT_VAL_CATEGORY2 6 //7-10 Extra Bits 2+1
|
||||
#define DCT_VAL_CATEGORY3 7 //11-26 Extra Bits 4+1
|
||||
#define DCT_VAL_CATEGORY4 8 //11-26 Extra Bits 5+1
|
||||
#define DCT_VAL_CATEGORY5 9 //27-58 Extra Bits 5+1
|
||||
#define DCT_VAL_CATEGORY6 10 //59+ Extra Bits 11+1
|
||||
#define DCT_EOB_TOKEN 11 //EOB Extra Bits 0+0
|
||||
#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */
|
||||
#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */
|
||||
#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */
|
||||
#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */
|
||||
#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */
|
||||
#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */
|
||||
#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */
|
||||
#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */
|
||||
#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */
|
||||
#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */
|
||||
#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */
|
||||
|
||||
#define vp8_coef_tokens 12
|
||||
#define MAX_ENTROPY_TOKENS vp8_coef_tokens
|
||||
|
@ -83,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
|
|||
coefficient band (and since zigzag positions 0, 1, and 2 are in
|
||||
distinct bands). */
|
||||
|
||||
/*# define DC_TOKEN_CONTEXTS 3 // 00, 0!0, !0!0 */
|
||||
/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */
|
||||
# define PREV_COEF_CONTEXTS 3
|
||||
|
||||
extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -29,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
|
|||
const MV_CONTEXT vp8_default_mv_context[2] =
|
||||
{
|
||||
{{
|
||||
// row
|
||||
162, // is short
|
||||
128, // sign
|
||||
225, 146, 172, 147, 214, 39, 156, // short tree
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 // long bits
|
||||
/* row */
|
||||
162, /* is short */
|
||||
128, /* sign */
|
||||
225, 146, 172, 147, 214, 39, 156, /* short tree */
|
||||
128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */
|
||||
}},
|
||||
|
||||
|
||||
|
||||
{{
|
||||
// same for column
|
||||
164, // is short
|
||||
/* same for column */
|
||||
164, /* is short */
|
||||
128,
|
||||
204, 170, 119, 235, 140, 230, 228,
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 // long bits
|
||||
128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */
|
||||
|
||||
}}
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -15,14 +15,14 @@
|
|||
|
||||
static void extend_plane_borders
|
||||
(
|
||||
unsigned char *s, // source
|
||||
int sp, // pitch
|
||||
int h, // height
|
||||
int w, // width
|
||||
int et, // extend top border
|
||||
int el, // extend left border
|
||||
int eb, // extend bottom border
|
||||
int er // extend right border
|
||||
unsigned char *s, /* source */
|
||||
int sp, /* pitch */
|
||||
int h, /* height */
|
||||
int w, /* width */
|
||||
int et, /* extend top border */
|
||||
int el, /* extend left border */
|
||||
int eb, /* extend bottom border */
|
||||
int er /* extend right border */
|
||||
)
|
||||
{
|
||||
|
||||
|
@ -31,7 +31,7 @@ static void extend_plane_borders
|
|||
unsigned char *dest_ptr1, *dest_ptr2;
|
||||
int linesize;
|
||||
|
||||
// copy the left and right most columns out
|
||||
/* copy the left and right most columns out */
|
||||
src_ptr1 = s;
|
||||
src_ptr2 = s + w - 1;
|
||||
dest_ptr1 = s - el;
|
||||
|
@ -39,8 +39,9 @@ static void extend_plane_borders
|
|||
|
||||
for (i = 0; i < h - 0 + 1; i++)
|
||||
{
|
||||
// Some linkers will complain if we call vpx_memset with el set to a
|
||||
// constant 0.
|
||||
/* Some linkers will complain if we call vpx_memset with el set to a
|
||||
* constant 0.
|
||||
*/
|
||||
if (el)
|
||||
vpx_memset(dest_ptr1, src_ptr1[0], el);
|
||||
vpx_memset(dest_ptr2, src_ptr2[0], er);
|
||||
|
@ -50,7 +51,7 @@ static void extend_plane_borders
|
|||
dest_ptr2 += sp;
|
||||
}
|
||||
|
||||
// Now copy the top and bottom source lines into each line of the respective borders
|
||||
/* Now copy the top and bottom source lines into each line of the respective borders */
|
||||
src_ptr1 = s - el;
|
||||
src_ptr2 = s + sp * (h - 1) - el;
|
||||
dest_ptr1 = s + sp * (-et) - el;
|
||||
|
@ -76,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
|
|||
int er = 0xf & (16 - (width & 0xf));
|
||||
int eb = 0xf & (16 - (height & 0xf));
|
||||
|
||||
// check for non multiples of 16
|
||||
/* check for non multiples of 16 */
|
||||
if (er != 0 || eb != 0)
|
||||
{
|
||||
extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
|
||||
|
||||
//adjust for uv
|
||||
/* adjust for uv */
|
||||
height = (height + 1) >> 1;
|
||||
width = (width + 1) >> 1;
|
||||
er = 0x7 & (8 - (width & 0x7));
|
||||
|
@ -95,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
|
|||
}
|
||||
}
|
||||
|
||||
// note the extension is only for the last row, for intra prediction purpose
|
||||
/* note the extension is only for the last row, for intra prediction purpose */
|
||||
void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
|
||||
{
|
||||
int i;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -32,13 +32,13 @@ static const int bilinear_filters[8][2] =
|
|||
static const short sub_pel_filters[8][6] =
|
||||
{
|
||||
|
||||
{ 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic
|
||||
{ 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
|
||||
{ 0, -6, 123, 12, -1, 0 },
|
||||
{ 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter
|
||||
{ 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -9, 93, 50, -6, 0 },
|
||||
{ 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter
|
||||
{ 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */
|
||||
{ 0, -6, 50, 93, -9, 0 },
|
||||
{ 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter
|
||||
{ 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */
|
||||
{ 0, -1, 12, 123, -6, 0 },
|
||||
|
||||
|
||||
|
@ -69,9 +69,9 @@ void vp8_filter_block2d_first_pass
|
|||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); // Rounding
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
// Normalize back to 0-255
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
|
@ -83,7 +83,7 @@ void vp8_filter_block2d_first_pass
|
|||
src_ptr++;
|
||||
}
|
||||
|
||||
// Next row...
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
|
@ -108,16 +108,16 @@ void vp8_filter_block2d_second_pass
|
|||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
// Apply filter
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
|
||||
((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
|
||||
((int)src_ptr[0] * vp8_filter[2]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[3]) +
|
||||
((int)src_ptr[2*pixel_step] * vp8_filter[4]) +
|
||||
((int)src_ptr[3*pixel_step] * vp8_filter[5]) +
|
||||
(VP8_FILTER_WEIGHT >> 1); // Rounding
|
||||
(VP8_FILTER_WEIGHT >> 1); /* Rounding */
|
||||
|
||||
// Normalize back to 0-255
|
||||
/* Normalize back to 0-255 */
|
||||
Temp = Temp >> VP8_FILTER_SHIFT;
|
||||
|
||||
if (Temp < 0)
|
||||
|
@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
|
|||
src_ptr++;
|
||||
}
|
||||
|
||||
// Start next row
|
||||
/* Start next row */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
|
@ -146,12 +146,12 @@ void vp8_filter_block2d
|
|||
const short *VFilter
|
||||
)
|
||||
{
|
||||
int FData[9*4]; // Temp data bufffer used in filtering
|
||||
int FData[9*4]; /* Temp data bufffer used in filtering */
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
|
||||
|
||||
// then filter verticaly...
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
|
||||
}
|
||||
|
||||
|
@ -195,8 +195,8 @@ void vp8_sixtap_predict_c
|
|||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; // 6 tap
|
||||
VFilter = sub_pel_filters[yoffset]; // 6 tap
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
|
||||
}
|
||||
|
@ -212,16 +212,16 @@ void vp8_sixtap_predict8x8_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; // Temp data bufffer used in filtering
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; // 6 tap
|
||||
VFilter = sub_pel_filters[yoffset]; // 6 tap
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
|
||||
|
||||
|
||||
// then filter verticaly...
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
|
||||
|
||||
}
|
||||
|
@ -238,16 +238,16 @@ void vp8_sixtap_predict8x4_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[13*16]; // Temp data bufffer used in filtering
|
||||
int FData[13*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; // 6 tap
|
||||
VFilter = sub_pel_filters[yoffset]; // 6 tap
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
|
||||
|
||||
|
||||
// then filter verticaly...
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
|
||||
|
||||
}
|
||||
|
@ -264,16 +264,16 @@ void vp8_sixtap_predict16x16_c
|
|||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
int FData[21*24]; // Temp data bufffer used in filtering
|
||||
int FData[21*24]; /* Temp data bufffer used in filtering */
|
||||
|
||||
|
||||
HFilter = sub_pel_filters[xoffset]; // 6 tap
|
||||
VFilter = sub_pel_filters[yoffset]; // 6 tap
|
||||
HFilter = sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
|
||||
|
||||
// then filter verticaly...
|
||||
/* then filter verticaly... */
|
||||
vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
|
||||
|
||||
}
|
||||
|
@ -324,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
|
|||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
// Apply bilinear filter
|
||||
/* Apply bilinear filter */
|
||||
output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
|
||||
src_ptr++;
|
||||
}
|
||||
|
||||
// Next row...
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_width;
|
||||
}
|
||||
|
@ -384,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
|
|||
{
|
||||
for (j = 0; j < output_width; j++)
|
||||
{
|
||||
// Apply filter
|
||||
/* Apply filter */
|
||||
Temp = ((int)src_ptr[0] * vp8_filter[0]) +
|
||||
((int)src_ptr[pixel_step] * vp8_filter[1]) +
|
||||
(VP8_FILTER_WEIGHT / 2);
|
||||
|
@ -392,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
|
|||
src_ptr++;
|
||||
}
|
||||
|
||||
// Next row...
|
||||
/* Next row... */
|
||||
src_ptr += src_pixels_per_line - output_width;
|
||||
output_ptr += output_pitch;
|
||||
}
|
||||
|
@ -432,12 +432,12 @@ void vp8_filter_block2d_bil
|
|||
)
|
||||
{
|
||||
|
||||
unsigned short FData[17*16]; // Temp data bufffer used in filtering
|
||||
unsigned short FData[17*16]; /* Temp data bufffer used in filtering */
|
||||
|
||||
// First filter 1-D horizontally...
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
|
||||
|
||||
// then 1-D vertically...
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -168,7 +168,7 @@ void vp8_find_near_mvs
|
|||
|
||||
vp8_clamp_mv(nearest, xd);
|
||||
vp8_clamp_mv(nearby, xd);
|
||||
vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
|
||||
vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
|
||||
}
|
||||
|
||||
vp8_prob *vp8_mv_ref_probs(
|
||||
|
@ -179,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
|
|||
p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
|
||||
p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
|
||||
p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
|
||||
//p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
|
||||
/*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
|
||||
return p;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -18,6 +18,7 @@
|
|||
#include "onyxc_int.h"
|
||||
|
||||
extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
|
||||
extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
|
||||
|
||||
void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
|
||||
extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
|
||||
|
@ -39,9 +40,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
rtcd->recon.copy16x16 = vp8_copy_mem16x16_c;
|
||||
rtcd->recon.copy8x8 = vp8_copy_mem8x8_c;
|
||||
rtcd->recon.copy8x4 = vp8_copy_mem8x4_c;
|
||||
rtcd->recon.recon = vp8_recon_b_c;
|
||||
rtcd->recon.recon = vp8_recon_b_c;
|
||||
rtcd->recon.recon2 = vp8_recon2b_c;
|
||||
rtcd->recon.recon4 = vp8_recon4b_c;
|
||||
rtcd->recon.recon4 = vp8_recon4b_c;
|
||||
rtcd->recon.recon_mb = vp8_recon_mb_c;
|
||||
rtcd->recon.recon_mby = vp8_recon_mby_c;
|
||||
|
||||
rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c;
|
||||
rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c;
|
||||
|
@ -66,10 +69,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;
|
||||
rtcd->postproc.downacross = vp8_post_proc_down_and_across_c;
|
||||
rtcd->postproc.addnoise = vp8_plane_add_noise_c;
|
||||
rtcd->postproc.blend_mb = vp8_blend_mb_c;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
// Pure C:
|
||||
/* Pure C: */
|
||||
vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
|
||||
vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
|
||||
|
||||
|
@ -77,4 +81,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
|
|||
vp8_arch_x86_common_init(ctx);
|
||||
#endif
|
||||
|
||||
#if ARCH_ARM
|
||||
vp8_arch_arm_common_init(ctx);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -38,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
|
|||
{
|
||||
int i;
|
||||
|
||||
// do 2nd order transform on the dc block
|
||||
/* do 2nd order transform on the dc block */
|
||||
IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);
|
||||
|
||||
recon_dcblock(x);
|
||||
|
@ -68,7 +68,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
|
|||
if (x->mode_info_context->mbmi.mode != B_PRED &&
|
||||
x->mode_info_context->mbmi.mode != SPLITMV)
|
||||
{
|
||||
// do 2nd order transform on the dc block
|
||||
/* do 2nd order transform on the dc block */
|
||||
|
||||
IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
|
||||
recon_dcblock(x);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -23,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
|
|||
prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
|
||||
prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
|
||||
|
||||
// Horizontal MB filtering
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
|
@ -47,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
|||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
// Vertical MB Filtering
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
|
@ -71,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
|||
vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
|
||||
}
|
||||
|
||||
// Horizontal B Filtering
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
|
@ -99,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
|
|||
vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
|
||||
}
|
||||
|
||||
// Vertical B Filtering
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
|
||||
{
|
||||
|
@ -140,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||
const int yhedge_boost = 2;
|
||||
const int uvhedge_boost = 2;
|
||||
|
||||
// For each possible value for the loop filter fill out a "loop_filter_info" entry.
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
@ -166,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||
HEVThresh = 0;
|
||||
}
|
||||
|
||||
// Set loop filter paramaeters that control sharpness.
|
||||
/* Set loop filter paramaeters that control sharpness. */
|
||||
block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
|
||||
block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
|
||||
|
||||
|
@ -195,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||
|
||||
}
|
||||
|
||||
// Set up the function pointers depending on the type of loop filtering selected
|
||||
/* Set up the function pointers depending on the type of loop filtering selected */
|
||||
if (lft == NORMAL_LOOPFILTER)
|
||||
{
|
||||
cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
|
||||
|
@ -212,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
|
|||
}
|
||||
}
|
||||
|
||||
// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
||||
// each frame. Check last_frame_type to skip the function most of times.
|
||||
/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
|
||||
* each frame. Check last_frame_type to skip the function most of times.
|
||||
*/
|
||||
void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
||||
{
|
||||
int HEVThresh;
|
||||
int i, j;
|
||||
|
||||
// For each possible value for the loop filter fill out a "loop_filter_info" entry.
|
||||
/* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
|
||||
for (i = 0; i <= MAX_LOOP_FILTER; i++)
|
||||
{
|
||||
int filt_lvl = i;
|
||||
|
@ -247,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
|
|||
|
||||
for (j = 0; j < 16; j++)
|
||||
{
|
||||
//lfi[i].lim[j] = block_inside_limit;
|
||||
//lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
|
||||
/*lfi[i].lim[j] = block_inside_limit;
|
||||
lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
|
||||
lfi[i].mbthr[j] = HEVThresh;
|
||||
//lfi[i].flim[j] = filt_lvl;
|
||||
/*lfi[i].flim[j] = filt_lvl;*/
|
||||
lfi[i].thr[j] = HEVThresh;
|
||||
//lfi[i].uvlim[j] = block_inside_limit;
|
||||
//lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
|
||||
/*lfi[i].uvlim[j] = block_inside_limit;
|
||||
lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
|
||||
lfi[i].uvmbthr[j] = HEVThresh;
|
||||
//lfi[i].uvflim[j] = filt_lvl;
|
||||
/*lfi[i].uvflim[j] = filt_lvl;*/
|
||||
lfi[i].uvthr[j] = HEVThresh;
|
||||
}
|
||||
}
|
||||
|
@ -268,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
|
|||
|
||||
if (mbd->mode_ref_lf_delta_enabled)
|
||||
{
|
||||
// Aplly delta for reference frame
|
||||
/* Apply delta for reference frame */
|
||||
*filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
|
||||
|
||||
// Apply delta for mode
|
||||
/* Apply delta for mode */
|
||||
if (mbmi->ref_frame == INTRA_FRAME)
|
||||
{
|
||||
// Only the split mode BPRED has a further special case
|
||||
/* Only the split mode BPRED has a further special case */
|
||||
if (mbmi->mode == B_PRED)
|
||||
*filter_level += mbd->mode_lf_deltas[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
// Zero motion mode
|
||||
/* Zero motion mode */
|
||||
if (mbmi->mode == ZEROMV)
|
||||
*filter_level += mbd->mode_lf_deltas[1];
|
||||
|
||||
// Split MB motion mode
|
||||
/* Split MB motion mode */
|
||||
else if (mbmi->mode == SPLITMV)
|
||||
*filter_level += mbd->mode_lf_deltas[3];
|
||||
|
||||
// All other inter motion modes (Nearest, Near, New)
|
||||
/* All other inter motion modes (Nearest, Near, New) */
|
||||
else
|
||||
*filter_level += mbd->mode_lf_deltas[2];
|
||||
}
|
||||
|
||||
// Range check
|
||||
/* Range check */
|
||||
if (*filter_level > MAX_LOOP_FILTER)
|
||||
*filter_level = MAX_LOOP_FILTER;
|
||||
else if (*filter_level < 0)
|
||||
|
@ -311,7 +312,7 @@ void vp8_loop_filter_frame
|
|||
{
|
||||
YV12_BUFFER_CONFIG *post = cm->frame_to_show;
|
||||
loop_filter_info *lfi = cm->lf_info;
|
||||
int frame_type = cm->frame_type;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
|
@ -324,21 +325,21 @@ void vp8_loop_filter_frame
|
|||
int i;
|
||||
unsigned char *y_ptr, *u_ptr, *v_ptr;
|
||||
|
||||
mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
// Note the baseline filter values for each segment
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
// Abs value
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
// Delta Value
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -348,18 +349,18 @@ void vp8_loop_filter_frame
|
|||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
|
||||
// Initialize the loop filter for this frame.
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
|
||||
// Set up the buffer pointers
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
u_ptr = post->u_buffer;
|
||||
v_ptr = post->v_buffer;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
|
@ -368,9 +369,10 @@ void vp8_loop_filter_frame
|
|||
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
|
||||
// Distance of Mb to the various image edges.
|
||||
// These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
// Apply any context driven MB level adjustment
|
||||
/* Distance of Mb to the various image edges.
|
||||
* These specified to 8th pel as they are always compared to values that are in 1/8th pel units
|
||||
* Apply any context driven MB level adjustment
|
||||
*/
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
|
||||
if (filter_level)
|
||||
|
@ -381,7 +383,7 @@ void vp8_loop_filter_frame
|
|||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
|
||||
// don't apply across umv border
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
|
||||
|
||||
|
@ -393,14 +395,14 @@ void vp8_loop_filter_frame
|
|||
u_ptr += 8;
|
||||
v_ptr += 8;
|
||||
|
||||
mbd->mode_info_context++; // step to next MB
|
||||
mbd->mode_info_context++; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
u_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
v_ptr += post->uv_stride * 8 - post->uv_width;
|
||||
|
||||
mbd->mode_info_context++; // Skip border mb
|
||||
mbd->mode_info_context++; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -424,26 +426,26 @@ void vp8_loop_filter_frame_yonly
|
|||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
int frame_type = cm->frame_type;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
|
||||
//MODE_INFO * this_mb_mode_info = cm->mi; // Point at base of Mb MODE_INFO list
|
||||
mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
// Note the baseline filter values for each segment
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
// Abs value
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
// Delta Value
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -453,16 +455,16 @@ void vp8_loop_filter_frame_yonly
|
|||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
|
||||
// Initialize the loop filter for this frame.
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
|
||||
// Set up the buffer pointers
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
|
||||
|
@ -470,7 +472,7 @@ void vp8_loop_filter_frame_yonly
|
|||
int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
|
||||
filter_level = baseline_filter_level[Segment];
|
||||
|
||||
// Apply any context driven MB level adjustment
|
||||
/* Apply any context driven MB level adjustment */
|
||||
vp8_adjust_mb_lf_value(mbd, &filter_level);
|
||||
|
||||
if (filter_level)
|
||||
|
@ -481,7 +483,7 @@ void vp8_loop_filter_frame_yonly
|
|||
if (mbd->mode_info_context->mbmi.dc_diff > 0)
|
||||
cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
|
||||
// don't apply across umv border
|
||||
/* don't apply across umv border */
|
||||
if (mb_row > 0)
|
||||
cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
|
||||
|
||||
|
@ -490,12 +492,12 @@ void vp8_loop_filter_frame_yonly
|
|||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context ++; // step to next MB
|
||||
mbd->mode_info_context ++; /* step to next MB */
|
||||
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context ++; // Skip border mb
|
||||
mbd->mode_info_context ++; /* Skip border mb */
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -516,7 +518,7 @@ void vp8_loop_filter_partial_frame
|
|||
unsigned char *y_ptr;
|
||||
int mb_row;
|
||||
int mb_col;
|
||||
//int mb_rows = post->y_height >> 4;
|
||||
/*int mb_rows = post->y_height >> 4;*/
|
||||
int mb_cols = post->y_width >> 4;
|
||||
|
||||
int linestocopy;
|
||||
|
@ -525,12 +527,12 @@ void vp8_loop_filter_partial_frame
|
|||
int baseline_filter_level[MAX_MB_SEGMENTS];
|
||||
int filter_level;
|
||||
int alt_flt_enabled = mbd->segmentation_enabled;
|
||||
int frame_type = cm->frame_type;
|
||||
FRAME_TYPE frame_type = cm->frame_type;
|
||||
|
||||
(void) sharpness_lvl;
|
||||
|
||||
//MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list
|
||||
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list
|
||||
/*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
|
||||
mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */
|
||||
|
||||
linestocopy = (post->y_height >> (4 + Fraction));
|
||||
|
||||
|
@ -539,19 +541,19 @@ void vp8_loop_filter_partial_frame
|
|||
|
||||
linestocopy <<= 4;
|
||||
|
||||
// Note the baseline filter values for each segment
|
||||
/* Note the baseline filter values for each segment */
|
||||
if (alt_flt_enabled)
|
||||
{
|
||||
for (i = 0; i < MAX_MB_SEGMENTS; i++)
|
||||
{
|
||||
// Abs value
|
||||
/* Abs value */
|
||||
if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
|
||||
baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
// Delta Value
|
||||
/* Delta Value */
|
||||
else
|
||||
{
|
||||
baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range
|
||||
baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -561,16 +563,16 @@ void vp8_loop_filter_partial_frame
|
|||
baseline_filter_level[i] = default_filt_lvl;
|
||||
}
|
||||
|
||||
// Initialize the loop filter for this frame.
|
||||
/* Initialize the loop filter for this frame. */
|
||||
if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
|
||||
vp8_init_loop_filter(cm);
|
||||
else if (frame_type != cm->last_frame_type)
|
||||
vp8_frame_init_loop_filter(lfi, frame_type);
|
||||
|
||||
// Set up the buffer pointers
|
||||
/* Set up the buffer pointers */
|
||||
y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
|
||||
{
|
||||
for (mb_col = 0; mb_col < mb_cols; mb_col++)
|
||||
|
@ -593,10 +595,10 @@ void vp8_loop_filter_partial_frame
|
|||
}
|
||||
|
||||
y_ptr += 16;
|
||||
mbd->mode_info_context += 1; // step to next MB
|
||||
mbd->mode_info_context += 1; /* step to next MB */
|
||||
}
|
||||
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
mbd->mode_info_context += 1; // Skip border mb
|
||||
mbd->mode_info_context += 1; /* Skip border mb */
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -22,10 +22,10 @@ typedef enum
|
|||
SIMPLE_LOOPFILTER = 1
|
||||
} LOOPFILTERTYPE;
|
||||
|
||||
// FRK
|
||||
// Need to align this structure so when it is declared and
|
||||
// passed it can be loaded into vector registers.
|
||||
// FRK
|
||||
/* FRK
|
||||
* Need to align this structure so when it is declared and
|
||||
* passed it can be loaded into vector registers.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
DECLARE_ALIGNED(16, signed char, lim[16]);
|
||||
|
@ -119,8 +119,8 @@ typedef struct
|
|||
|
||||
typedef void loop_filter_uvfunction
|
||||
(
|
||||
unsigned char *u, // source pointer
|
||||
int p, // pitch
|
||||
unsigned char *u, /* source pointer */
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -17,8 +17,6 @@
|
|||
#define __inline inline
|
||||
#endif
|
||||
|
||||
#define NEW_LOOPFILTER_MASK
|
||||
|
||||
typedef unsigned char uc;
|
||||
|
||||
static __inline signed char vp8_signed_char_clamp(int t)
|
||||
|
@ -29,7 +27,7 @@ static __inline signed char vp8_signed_char_clamp(int t)
|
|||
}
|
||||
|
||||
|
||||
// should we apply any filter at all ( 11111111 yes, 00000000 no)
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
|
||||
uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
|
||||
{
|
||||
|
@ -40,16 +38,12 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
|
|||
mask |= (abs(q1 - q0) > limit) * -1;
|
||||
mask |= (abs(q2 - q1) > limit) * -1;
|
||||
mask |= (abs(q3 - q2) > limit) * -1;
|
||||
#ifndef NEW_LOOPFILTER_MASK
|
||||
mask |= (abs(p0 - q0) > flimit) * -1;
|
||||
#else
|
||||
mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1;
|
||||
#endif
|
||||
mask = ~mask;
|
||||
return mask;
|
||||
}
|
||||
|
||||
// is there high variance internal edge ( 11111111 yes, 00000000 no)
|
||||
/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
signed char hev = 0;
|
||||
|
@ -71,17 +65,18 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
|
|||
qs0 = (signed char) * oq0 ^ 0x80;
|
||||
qs1 = (signed char) * oq1 ^ 0x80;
|
||||
|
||||
// add outer taps if we have high edge variance
|
||||
/* add outer taps if we have high edge variance */
|
||||
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
|
||||
vp8_filter &= hev;
|
||||
|
||||
// inner taps
|
||||
/* inner taps */
|
||||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
|
||||
vp8_filter &= mask;
|
||||
|
||||
// save bottom 3 bits so that we round one side +4 and the other +3
|
||||
// if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
// we'd round 3 the other way
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3
|
||||
* if it equals 4 we'll set to adjust by -1 to account for the fact
|
||||
* we'd round 3 the other way
|
||||
*/
|
||||
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
|
||||
Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
|
||||
Filter1 >>= 3;
|
||||
|
@ -92,7 +87,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
|
|||
*op0 = u ^ 0x80;
|
||||
vp8_filter = Filter1;
|
||||
|
||||
// outer tap adjustments
|
||||
/* outer tap adjustments */
|
||||
vp8_filter += 1;
|
||||
vp8_filter >>= 1;
|
||||
vp8_filter &= ~hev;
|
||||
|
@ -106,19 +101,20 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
|
|||
void vp8_loop_filter_horizontal_edge_c
|
||||
(
|
||||
unsigned char *s,
|
||||
int p, //pitch
|
||||
int p, /* pitch */
|
||||
const signed char *flimit,
|
||||
const signed char *limit,
|
||||
const signed char *thresh,
|
||||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; // high edge variance
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
// loop filter designed to work using chars so that we can make maximum use
|
||||
// of 8 bit simd instructions.
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
|
@ -144,12 +140,13 @@ void vp8_loop_filter_vertical_edge_c
|
|||
int count
|
||||
)
|
||||
{
|
||||
int hev = 0; // high edge variance
|
||||
int hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
// loop filter designed to work using chars so that we can make maximum use
|
||||
// of 8 bit simd instructions.
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
mask = vp8_filter_mask(limit[i], flimit[i],
|
||||
|
@ -176,7 +173,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
|||
signed char qs1 = (signed char) * oq1 ^ 0x80;
|
||||
signed char qs2 = (signed char) * oq2 ^ 0x80;
|
||||
|
||||
// add outer taps if we have high edge variance
|
||||
/* add outer taps if we have high edge variance */
|
||||
vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
|
||||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
|
||||
vp8_filter &= mask;
|
||||
|
@ -184,7 +181,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
|||
Filter2 = vp8_filter;
|
||||
Filter2 &= hev;
|
||||
|
||||
// save bottom 3 bits so that we round one side +4 and the other +3
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(Filter2 + 4);
|
||||
Filter2 = vp8_signed_char_clamp(Filter2 + 3);
|
||||
Filter1 >>= 3;
|
||||
|
@ -193,25 +190,25 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
|
|||
ps0 = vp8_signed_char_clamp(ps0 + Filter2);
|
||||
|
||||
|
||||
// only apply wider filter if not high edge variance
|
||||
/* only apply wider filter if not high edge variance */
|
||||
vp8_filter &= ~hev;
|
||||
Filter2 = vp8_filter;
|
||||
|
||||
// roughly 3/7th difference across boundary
|
||||
/* roughly 3/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
|
||||
s = vp8_signed_char_clamp(qs0 - u);
|
||||
*oq0 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps0 + u);
|
||||
*op0 = s ^ 0x80;
|
||||
|
||||
// roughly 2/7th difference across boundary
|
||||
/* roughly 2/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
|
||||
s = vp8_signed_char_clamp(qs1 - u);
|
||||
*oq1 = s ^ 0x80;
|
||||
s = vp8_signed_char_clamp(ps1 + u);
|
||||
*op1 = s ^ 0x80;
|
||||
|
||||
// roughly 1/7th difference across boundary
|
||||
/* roughly 1/7th difference across boundary */
|
||||
u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
|
||||
s = vp8_signed_char_clamp(qs2 - u);
|
||||
*oq2 = s ^ 0x80;
|
||||
|
@ -229,12 +226,13 @@ void vp8_mbloop_filter_horizontal_edge_c
|
|||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; // high edge variance
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
// loop filter designed to work using chars so that we can make maximum use
|
||||
// of 8 bit simd instructions.
|
||||
/* loop filter designed to work using chars so that we can make maximum use
|
||||
* of 8 bit simd instructions.
|
||||
*/
|
||||
do
|
||||
{
|
||||
|
||||
|
@ -263,7 +261,7 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
int count
|
||||
)
|
||||
{
|
||||
signed char hev = 0; // high edge variance
|
||||
signed char hev = 0; /* high edge variance */
|
||||
signed char mask = 0;
|
||||
int i = 0;
|
||||
|
||||
|
@ -283,17 +281,14 @@ void vp8_mbloop_filter_vertical_edge_c
|
|||
|
||||
}
|
||||
|
||||
// should we apply any filter at all ( 11111111 yes, 00000000 no)
|
||||
/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
|
||||
static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
|
||||
{
|
||||
// Why does this cause problems for win32?
|
||||
// error C2143: syntax error : missing ';' before 'type'
|
||||
// (void) limit;
|
||||
#ifndef NEW_LOOPFILTER_MASK
|
||||
signed char mask = (abs(p0 - q0) <= flimit) * -1;
|
||||
#else
|
||||
/* Why does this cause problems for win32?
|
||||
* error C2143: syntax error : missing ';' before 'type'
|
||||
* (void) limit;
|
||||
*/
|
||||
signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1;
|
||||
#endif
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
@ -310,7 +305,7 @@ static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *o
|
|||
vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
|
||||
vp8_filter &= mask;
|
||||
|
||||
// save bottom 3 bits so that we round one side +4 and the other +3
|
||||
/* save bottom 3 bits so that we round one side +4 and the other +3 */
|
||||
Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
|
||||
Filter1 >>= 3;
|
||||
u = vp8_signed_char_clamp(q0 - Filter1);
|
||||
|
@ -338,7 +333,7 @@ void vp8_loop_filter_simple_horizontal_edge_c
|
|||
|
||||
do
|
||||
{
|
||||
//mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
|
||||
vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
|
||||
++s;
|
||||
|
@ -362,7 +357,7 @@ void vp8_loop_filter_simple_vertical_edge_c
|
|||
|
||||
do
|
||||
{
|
||||
//mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
|
||||
/*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
|
||||
mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
|
||||
vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
|
||||
s += p;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -14,7 +14,7 @@
|
|||
typedef enum
|
||||
{
|
||||
PRED = 0,
|
||||
DEST = 1,
|
||||
DEST = 1
|
||||
} BLOCKSET;
|
||||
|
||||
void vp8_setup_block
|
||||
|
@ -62,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
|
|||
v = &x->pre.v_buffer;
|
||||
}
|
||||
|
||||
for (block = 0; block < 16; block++) // y blocks
|
||||
for (block = 0; block < 16; block++) /* y blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
|
||||
(block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
|
||||
}
|
||||
|
||||
for (block = 16; block < 20; block++) // U and V blocks
|
||||
for (block = 16; block < 20; block++) /* U and V blocks */
|
||||
{
|
||||
vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
|
||||
((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
|
||||
|
@ -123,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
|
|||
void vp8_build_block_doffsets(MACROBLOCKD *x)
|
||||
{
|
||||
|
||||
// handle the destination pitch features
|
||||
/* handle the destination pitch features */
|
||||
vp8_setup_macroblock(x, DEST);
|
||||
vp8_setup_macroblock(x, PRED);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -14,27 +14,27 @@
|
|||
const int vp8_mode_contexts[6][4] =
|
||||
{
|
||||
{
|
||||
// 0
|
||||
/* 0 */
|
||||
7, 1, 1, 143,
|
||||
},
|
||||
{
|
||||
// 1
|
||||
/* 1 */
|
||||
14, 18, 14, 107,
|
||||
},
|
||||
{
|
||||
// 2
|
||||
/* 2 */
|
||||
135, 64, 57, 68,
|
||||
},
|
||||
{
|
||||
// 3
|
||||
/* 3 */
|
||||
60, 56, 128, 65,
|
||||
},
|
||||
{
|
||||
// 4
|
||||
/* 4 */
|
||||
159, 134, 128, 34,
|
||||
},
|
||||
{
|
||||
// 5
|
||||
/* 5 */
|
||||
234, 188, 128, 28,
|
||||
},
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -14,133 +14,133 @@
|
|||
const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
|
||||
{
|
||||
{
|
||||
//Above Mode : 0
|
||||
{ 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, // left_mode 0
|
||||
{ 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, // left_mode 1
|
||||
{ 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, // left_mode 2
|
||||
{ 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, // left_mode 3
|
||||
{ 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, // left_mode 4
|
||||
{ 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, // left_mode 5
|
||||
{ 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, // left_mode 6
|
||||
{ 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, // left_mode 7
|
||||
{ 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, // left_mode 8
|
||||
{ 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, // left_mode 9
|
||||
/*Above Mode : 0*/
|
||||
{ 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, /* left_mode 0 */
|
||||
{ 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, /* left_mode 1 */
|
||||
{ 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, /* left_mode 2 */
|
||||
{ 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, /* left_mode 3 */
|
||||
{ 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, /* left_mode 4 */
|
||||
{ 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, /* left_mode 5 */
|
||||
{ 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, /* left_mode 6 */
|
||||
{ 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, /* left_mode 7 */
|
||||
{ 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, /* left_mode 8 */
|
||||
{ 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 1
|
||||
{ 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, // left_mode 0
|
||||
{ 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, // left_mode 1
|
||||
{ 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, // left_mode 2
|
||||
{ 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, // left_mode 3
|
||||
{ 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, // left_mode 4
|
||||
{ 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, // left_mode 5
|
||||
{ 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, // left_mode 6
|
||||
{ 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, // left_mode 7
|
||||
{ 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, // left_mode 8
|
||||
{ 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, // left_mode 9
|
||||
/*Above Mode : 1*/
|
||||
{ 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, /* left_mode 0 */
|
||||
{ 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, /* left_mode 1 */
|
||||
{ 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, /* left_mode 2 */
|
||||
{ 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, /* left_mode 3 */
|
||||
{ 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, /* left_mode 4 */
|
||||
{ 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, /* left_mode 5 */
|
||||
{ 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, /* left_mode 6 */
|
||||
{ 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, /* left_mode 7 */
|
||||
{ 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, /* left_mode 8 */
|
||||
{ 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 2
|
||||
{ 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, // left_mode 0
|
||||
{ 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, // left_mode 1
|
||||
{ 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, // left_mode 2
|
||||
{ 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, // left_mode 3
|
||||
{ 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, // left_mode 4
|
||||
{ 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, // left_mode 5
|
||||
{ 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, // left_mode 6
|
||||
{ 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, // left_mode 7
|
||||
{ 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, // left_mode 8
|
||||
{ 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, // left_mode 9
|
||||
/*Above Mode : 2*/
|
||||
{ 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, /* left_mode 0 */
|
||||
{ 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, /* left_mode 1 */
|
||||
{ 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, /* left_mode 2 */
|
||||
{ 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, /* left_mode 3 */
|
||||
{ 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, /* left_mode 4 */
|
||||
{ 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, /* left_mode 5 */
|
||||
{ 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, /* left_mode 6 */
|
||||
{ 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, /* left_mode 7 */
|
||||
{ 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, /* left_mode 8 */
|
||||
{ 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 3
|
||||
{ 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, // left_mode 0
|
||||
{ 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, // left_mode 1
|
||||
{ 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, // left_mode 2
|
||||
{ 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, // left_mode 3
|
||||
{ 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, // left_mode 4
|
||||
{ 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, // left_mode 5
|
||||
{ 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, // left_mode 6
|
||||
{ 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, // left_mode 7
|
||||
{ 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, // left_mode 8
|
||||
{ 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, // left_mode 9
|
||||
/*Above Mode : 3*/
|
||||
{ 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, /* left_mode 0 */
|
||||
{ 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, /* left_mode 1 */
|
||||
{ 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, /* left_mode 2 */
|
||||
{ 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, /* left_mode 3 */
|
||||
{ 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, /* left_mode 4 */
|
||||
{ 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, /* left_mode 5 */
|
||||
{ 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, /* left_mode 6 */
|
||||
{ 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, /* left_mode 7 */
|
||||
{ 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, /* left_mode 8 */
|
||||
{ 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 4
|
||||
{ 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, // left_mode 0
|
||||
{ 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, // left_mode 1
|
||||
{ 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, // left_mode 2
|
||||
{ 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, // left_mode 3
|
||||
{ 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, // left_mode 4
|
||||
{ 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, // left_mode 5
|
||||
{ 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, // left_mode 6
|
||||
{ 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, // left_mode 7
|
||||
{ 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, // left_mode 8
|
||||
{ 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, // left_mode 9
|
||||
/*Above Mode : 4*/
|
||||
{ 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, /* left_mode 0 */
|
||||
{ 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, /* left_mode 1 */
|
||||
{ 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, /* left_mode 2 */
|
||||
{ 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, /* left_mode 3 */
|
||||
{ 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, /* left_mode 4 */
|
||||
{ 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, /* left_mode 5 */
|
||||
{ 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, /* left_mode 6 */
|
||||
{ 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, /* left_mode 7 */
|
||||
{ 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, /* left_mode 8 */
|
||||
{ 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 5
|
||||
{ 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, // left_mode 0
|
||||
{ 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, // left_mode 1
|
||||
{ 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, // left_mode 2
|
||||
{ 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, // left_mode 3
|
||||
{ 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, // left_mode 4
|
||||
{ 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, // left_mode 5
|
||||
{ 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, // left_mode 6
|
||||
{ 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, // left_mode 7
|
||||
{ 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, // left_mode 8
|
||||
{ 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, // left_mode 9
|
||||
/*Above Mode : 5*/
|
||||
{ 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, /* left_mode 0 */
|
||||
{ 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, /* left_mode 1 */
|
||||
{ 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, /* left_mode 2 */
|
||||
{ 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, /* left_mode 3 */
|
||||
{ 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, /* left_mode 4 */
|
||||
{ 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, /* left_mode 5 */
|
||||
{ 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, /* left_mode 6 */
|
||||
{ 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, /* left_mode 7 */
|
||||
{ 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, /* left_mode 8 */
|
||||
{ 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 6
|
||||
{ 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, // left_mode 0
|
||||
{ 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, // left_mode 1
|
||||
{ 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, // left_mode 2
|
||||
{ 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, // left_mode 3
|
||||
{ 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, // left_mode 4
|
||||
{ 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, // left_mode 5
|
||||
{ 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, // left_mode 6
|
||||
{ 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, // left_mode 7
|
||||
{ 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, // left_mode 8
|
||||
{ 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, // left_mode 9
|
||||
/*Above Mode : 6*/
|
||||
{ 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, /* left_mode 0 */
|
||||
{ 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, /* left_mode 1 */
|
||||
{ 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, /* left_mode 2 */
|
||||
{ 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, /* left_mode 3 */
|
||||
{ 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, /* left_mode 4 */
|
||||
{ 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, /* left_mode 5 */
|
||||
{ 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, /* left_mode 6 */
|
||||
{ 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, /* left_mode 7 */
|
||||
{ 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, /* left_mode 8 */
|
||||
{ 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 7
|
||||
{ 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, // left_mode 0
|
||||
{ 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, // left_mode 1
|
||||
{ 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, // left_mode 2
|
||||
{ 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, // left_mode 3
|
||||
{ 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, // left_mode 4
|
||||
{ 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, // left_mode 5
|
||||
{ 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, // left_mode 6
|
||||
{ 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, // left_mode 7
|
||||
{ 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, // left_mode 8
|
||||
{ 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, // left_mode 9
|
||||
/*Above Mode : 7*/
|
||||
{ 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, /* left_mode 0 */
|
||||
{ 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, /* left_mode 1 */
|
||||
{ 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, /* left_mode 2 */
|
||||
{ 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, /* left_mode 3 */
|
||||
{ 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, /* left_mode 4 */
|
||||
{ 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, /* left_mode 5 */
|
||||
{ 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, /* left_mode 6 */
|
||||
{ 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, /* left_mode 7 */
|
||||
{ 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, /* left_mode 8 */
|
||||
{ 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 8
|
||||
{ 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, // left_mode 0
|
||||
{ 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, // left_mode 1
|
||||
{ 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, // left_mode 2
|
||||
{ 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, // left_mode 3
|
||||
{ 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, // left_mode 4
|
||||
{ 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, // left_mode 5
|
||||
{ 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, // left_mode 6
|
||||
{ 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, // left_mode 7
|
||||
{ 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, // left_mode 8
|
||||
{ 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, // left_mode 9
|
||||
/*Above Mode : 8*/
|
||||
{ 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, /* left_mode 0 */
|
||||
{ 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, /* left_mode 1 */
|
||||
{ 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, /* left_mode 2 */
|
||||
{ 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, /* left_mode 3 */
|
||||
{ 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, /* left_mode 4 */
|
||||
{ 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, /* left_mode 5 */
|
||||
{ 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, /* left_mode 6 */
|
||||
{ 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, /* left_mode 7 */
|
||||
{ 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, /* left_mode 8 */
|
||||
{ 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, /* left_mode 9 */
|
||||
},
|
||||
{
|
||||
//Above Mode : 9
|
||||
{ 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, // left_mode 0
|
||||
{ 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, // left_mode 1
|
||||
{ 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, // left_mode 2
|
||||
{ 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, // left_mode 3
|
||||
{ 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, // left_mode 4
|
||||
{ 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, // left_mode 5
|
||||
{ 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, // left_mode 6
|
||||
{ 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, // left_mode 7
|
||||
{ 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, // left_mode 8
|
||||
{ 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, // left_mode 9
|
||||
/*Above Mode : 9*/
|
||||
{ 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, /* left_mode 0 */
|
||||
{ 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, /* left_mode 1 */
|
||||
{ 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, /* left_mode 2 */
|
||||
{ 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, /* left_mode 3 */
|
||||
{ 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, /* left_mode 4 */
|
||||
{ 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, /* left_mode 5 */
|
||||
{ 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, /* left_mode 6 */
|
||||
{ 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, /* left_mode 7 */
|
||||
{ 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, /* left_mode 8 */
|
||||
{ 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, /* left_mode 9 */
|
||||
},
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -21,9 +21,9 @@
|
|||
#include "recon.h"
|
||||
#include "postproc.h"
|
||||
|
||||
//#ifdef PACKET_TESTING
|
||||
/*#ifdef PACKET_TESTING*/
|
||||
#include "header.h"
|
||||
//#endif
|
||||
/*#endif*/
|
||||
|
||||
/* Create/destroy static data structures. */
|
||||
|
||||
|
@ -43,7 +43,7 @@ typedef struct frame_contexts
|
|||
vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
|
||||
vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
|
||||
MV_CONTEXT mvc[2];
|
||||
MV_CONTEXT pre_mvc[2]; //not to caculate the mvcost for the frame if mvc doesn't change.
|
||||
MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */
|
||||
} FRAME_CONTEXT;
|
||||
|
||||
typedef enum
|
||||
|
@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
|
|||
vp8_subpix_rtcd_vtable_t subpix;
|
||||
vp8_loopfilter_rtcd_vtable_t loopfilter;
|
||||
vp8_postproc_rtcd_vtable_t postproc;
|
||||
int flags;
|
||||
#else
|
||||
int unused;
|
||||
#endif
|
||||
|
@ -83,9 +84,9 @@ typedef struct VP8Common
|
|||
{
|
||||
struct vpx_internal_error_info error;
|
||||
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
|
||||
DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
|
||||
DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
|
||||
|
||||
int Width;
|
||||
int Height;
|
||||
|
@ -104,7 +105,7 @@ typedef struct VP8Common
|
|||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG temp_scale_frame;
|
||||
|
||||
FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skipped.
|
||||
FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
int show_frame;
|
||||
|
@ -115,7 +116,7 @@ typedef struct VP8Common
|
|||
int mb_cols;
|
||||
int mode_info_stride;
|
||||
|
||||
// prfile settings
|
||||
/* profile settings */
|
||||
int mb_no_coeff_skip;
|
||||
int no_lpf;
|
||||
int simpler_lpf;
|
||||
|
@ -123,7 +124,7 @@ typedef struct VP8Common
|
|||
int full_pixel;
|
||||
|
||||
int base_qindex;
|
||||
int last_kf_gf_q; // Q used on the last GF or KF
|
||||
int last_kf_gf_q; /* Q used on the last GF or KF */
|
||||
|
||||
int y1dc_delta_q;
|
||||
int y2dc_delta_q;
|
||||
|
@ -153,31 +154,31 @@ typedef struct VP8Common
|
|||
int last_sharpness_level;
|
||||
int sharpness_level;
|
||||
|
||||
int refresh_last_frame; // Two state 0 = NO, 1 = YES
|
||||
int refresh_golden_frame; // Two state 0 = NO, 1 = YES
|
||||
int refresh_alt_ref_frame; // Two state 0 = NO, 1 = YES
|
||||
int refresh_last_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */
|
||||
int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int copy_buffer_to_gf; // 0 none, 1 Last to GF, 2 ARF to GF
|
||||
int copy_buffer_to_arf; // 0 none, 1 Last to ARF, 2 GF to ARF
|
||||
int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */
|
||||
int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */
|
||||
|
||||
int refresh_entropy_probs; // Two state 0 = NO, 1 = YES
|
||||
int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */
|
||||
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1
|
||||
int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */
|
||||
|
||||
// Y,U,V,Y2
|
||||
ENTROPY_CONTEXT_PLANES *above_context; // row of context for each plane
|
||||
ENTROPY_CONTEXT_PLANES left_context; // (up to) 4 contexts ""
|
||||
/* Y,U,V,Y2 */
|
||||
ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */
|
||||
ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */
|
||||
|
||||
|
||||
// keyframe block modes are predicted by their above, left neighbors
|
||||
/* keyframe block modes are predicted by their above, left neighbors */
|
||||
|
||||
vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
|
||||
vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */
|
||||
vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
|
||||
|
||||
|
||||
FRAME_CONTEXT lfc; // last frame entropy
|
||||
FRAME_CONTEXT fc; // this frame entropy
|
||||
FRAME_CONTEXT lfc; /* last frame entropy */
|
||||
FRAME_CONTEXT fc; /* this frame entropy */
|
||||
|
||||
unsigned int current_video_frame;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -19,7 +19,35 @@
|
|||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
// global constants
|
||||
|
||||
#define RGB_TO_YUV(t) \
|
||||
( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16), \
|
||||
(-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
|
||||
( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
|
||||
|
||||
/* global constants */
|
||||
|
||||
static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x98FB98) }, /* PaleGreen */
|
||||
{ RGB_TO_YUV(0x00FF00) }, /* Green */
|
||||
{ RGB_TO_YUV(0xADFF2F) }, /* GreenYellow */
|
||||
{ RGB_TO_YUV(0x228B22) }, /* ForestGreen */
|
||||
{ RGB_TO_YUV(0x006400) }, /* DarkGreen */
|
||||
{ RGB_TO_YUV(0x98F5FF) }, /* Cadet Blue */
|
||||
{ RGB_TO_YUV(0x6CA6CD) }, /* Sky Blue */
|
||||
{ RGB_TO_YUV(0x00008B) }, /* Dark blue */
|
||||
{ RGB_TO_YUV(0x551A8B) }, /* Purple */
|
||||
{ RGB_TO_YUV(0xFF0000) } /* Red */
|
||||
};
|
||||
|
||||
static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
|
||||
{
|
||||
{ RGB_TO_YUV(0x00ff00) }, /* Blue */
|
||||
{ RGB_TO_YUV(0x0000ff) }, /* Green */
|
||||
{ RGB_TO_YUV(0xffff00) }, /* Yellow */
|
||||
{ RGB_TO_YUV(0xff0000) }, /* Red */
|
||||
};
|
||||
|
||||
static const short kernel5[] =
|
||||
{
|
||||
|
@ -76,7 +104,7 @@ const short vp8_rv[] =
|
|||
|
||||
|
||||
extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
|
||||
|
||||
extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
|
||||
/***********************************************************************************************************
|
||||
*/
|
||||
void vp8_post_proc_down_and_across_c
|
||||
|
@ -101,7 +129,7 @@ void vp8_post_proc_down_and_across_c
|
|||
|
||||
for (row = 0; row < rows; row++)
|
||||
{
|
||||
// post_proc_down for one row
|
||||
/* post_proc_down for one row */
|
||||
p_src = src_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
|
@ -124,7 +152,7 @@ void vp8_post_proc_down_and_across_c
|
|||
p_dst[col] = v;
|
||||
}
|
||||
|
||||
// now post_proc_across
|
||||
/* now post_proc_across */
|
||||
p_src = dst_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
|
@ -153,12 +181,12 @@ void vp8_post_proc_down_and_across_c
|
|||
p_dst[col-2] = d[(col-2)&7];
|
||||
}
|
||||
|
||||
//handle the last two pixels
|
||||
/* handle the last two pixels */
|
||||
p_dst[col-2] = d[(col-2)&7];
|
||||
p_dst[col-1] = d[(col-1)&7];
|
||||
|
||||
|
||||
//next row
|
||||
/* next row */
|
||||
src_ptr += pitch;
|
||||
dst_ptr += pitch;
|
||||
}
|
||||
|
@ -351,9 +379,9 @@ static void fillrd(struct postproc_state *state, int q, int a)
|
|||
|
||||
sigma = ai + .5 + .6 * (63 - qi) / 63.0;
|
||||
|
||||
// set up a lookup table of 256 entries that matches
|
||||
// a gaussian distribution with sigma determined by q.
|
||||
//
|
||||
/* set up a lookup table of 256 entries that matches
|
||||
* a gaussian distribution with sigma determined by q.
|
||||
*/
|
||||
{
|
||||
double i;
|
||||
int next, j;
|
||||
|
@ -444,6 +472,89 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
|||
}
|
||||
}
|
||||
|
||||
/* Blend the macro block with a solid colored square. Leave the
|
||||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
*/
|
||||
void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
{
|
||||
int i, j;
|
||||
int y1_const = y1*((1<<16)-alpha);
|
||||
int u1_const = u1*((1<<16)-alpha);
|
||||
int v1_const = v1*((1<<16)-alpha);
|
||||
|
||||
y += stride + 2;
|
||||
for (i = 0; i < 14; i++)
|
||||
{
|
||||
for (j = 0; j < 14; j++)
|
||||
{
|
||||
y[j] = (y[j]*alpha + y1_const)>>16;
|
||||
}
|
||||
y += stride;
|
||||
}
|
||||
|
||||
stride >>= 1;
|
||||
|
||||
u += stride + 1;
|
||||
v += stride + 1;
|
||||
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
for (j = 0; j < 6; j++)
|
||||
{
|
||||
u[j] = (u[j]*alpha + u1_const)>>16;
|
||||
v[j] = (v[j]*alpha + v1_const)>>16;
|
||||
}
|
||||
u += stride;
|
||||
v += stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
|
||||
{
|
||||
int dx;
|
||||
int dy;
|
||||
|
||||
if (*x1 > width)
|
||||
{
|
||||
dx = *x1 - x0;
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = width;
|
||||
if (dy)
|
||||
*y1 = ((width-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*x1 < 0)
|
||||
{
|
||||
dx = *x1 - x0;
|
||||
dy = *y1 - y0;
|
||||
|
||||
*x1 = 0;
|
||||
if (dy)
|
||||
*y1 = ((0-x0)*dy)/dx + y0;
|
||||
}
|
||||
if (*y1 > height)
|
||||
{
|
||||
dx = *x1 - x0;
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = height;
|
||||
if (dx)
|
||||
*x1 = ((height-y0)*dx)/dy + x0;
|
||||
}
|
||||
if (*y1 < 0)
|
||||
{
|
||||
dx = *x1 - x0;
|
||||
dy = *y1 - y0;
|
||||
|
||||
*y1 = 0;
|
||||
if (dx)
|
||||
*x1 = ((0-y0)*dx)/dy + x0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
#define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
|
||||
#else
|
||||
|
@ -465,7 +576,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
{
|
||||
*dest = *oci->frame_to_show;
|
||||
|
||||
// handle problem with extending borders
|
||||
/* handle problem with extending borders */
|
||||
dest->y_width = oci->Width;
|
||||
dest->y_height = oci->Height;
|
||||
dest->uv_height = dest->y_height / 2;
|
||||
|
@ -521,7 +632,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
oci->mb_cols, oci->mb_rows);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
}
|
||||
else if (flags & VP8D_DEBUG_LEVEL2)
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL2)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
|
@ -533,7 +645,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
|
@ -547,12 +659,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; //border
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
}
|
||||
else if (flags & VP8D_DEBUG_LEVEL3)
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL3)
|
||||
{
|
||||
int i, j;
|
||||
unsigned char *y_ptr;
|
||||
|
@ -564,7 +677,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
|
@ -581,12 +694,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; //border
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
}
|
||||
else if (flags & VP8D_DEBUG_LEVEL4)
|
||||
|
||||
if (flags & VP8D_DEBUG_LEVEL4)
|
||||
{
|
||||
sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
|
||||
vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
|
||||
|
@ -601,7 +715,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
|
||||
y_ptr = post->y_buffer + 4 * post->y_stride + 4;
|
||||
|
||||
// vp8_filter each macro block
|
||||
/* vp8_filter each macro block */
|
||||
for (i = 0; i < mb_rows; i++)
|
||||
{
|
||||
for (j = 0; j < mb_cols; j++)
|
||||
|
@ -614,7 +728,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
y_ptr += 16;
|
||||
}
|
||||
|
||||
mb_index ++; //border
|
||||
mb_index ++; /* border */
|
||||
y_ptr += post->y_stride * 16 - post->y_width;
|
||||
|
||||
}
|
||||
|
@ -623,11 +737,122 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
|
|||
|
||||
}
|
||||
|
||||
/* Draw motion vectors */
|
||||
if (flags & VP8D_DEBUG_LEVEL5)
|
||||
{
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
int mb_cols = width >> 4;
|
||||
unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
int x0, y0;
|
||||
|
||||
for (y0 = 8; y0 < (height + 8); y0 += 16)
|
||||
{
|
||||
for (x0 = 8; x0 < (width + 8); x0 += 16)
|
||||
{
|
||||
int x1, y1;
|
||||
if (mi->mbmi.mode >= NEARESTMV)
|
||||
{
|
||||
MV *mv = &mi->mbmi.mv.as_mv;
|
||||
|
||||
x1 = x0 + (mv->col >> 3);
|
||||
y1 = y0 + (mv->row >> 3);
|
||||
|
||||
if (x1 != x0 && y1 != y0)
|
||||
{
|
||||
constrain_line (x0, &x1, y0-1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0-1, y1, y_buffer, y_stride);
|
||||
|
||||
constrain_line (x0, &x1, y0+1, &y1, width, height);
|
||||
vp8_blit_line (x0, x1, y0+1, y1, y_buffer, y_stride);
|
||||
}
|
||||
else
|
||||
vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
|
||||
}
|
||||
mi++;
|
||||
}
|
||||
mi++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Color in block modes */
|
||||
if (flags & VP8D_DEBUG_LEVEL6)
|
||||
{
|
||||
int i, j;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
|
||||
unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
|
||||
unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
{
|
||||
for (j = 0; j < width; j += 16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
|
||||
U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
|
||||
V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
|
||||
mi++;
|
||||
}
|
||||
y_ptr += y_stride*16;
|
||||
u_ptr += y_stride*4;
|
||||
v_ptr += y_stride*4;
|
||||
|
||||
mi++;
|
||||
}
|
||||
}
|
||||
|
||||
/* Color in frame reference blocks */
|
||||
if (flags & VP8D_DEBUG_LEVEL7)
|
||||
{
|
||||
int i, j;
|
||||
YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
|
||||
int width = post->y_width;
|
||||
int height = post->y_height;
|
||||
unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
|
||||
unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
|
||||
unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
|
||||
int y_stride = oci->post_proc_buffer.y_stride;
|
||||
MODE_INFO *mi = oci->mi;
|
||||
|
||||
for (i = 0; i < height; i += 16)
|
||||
{
|
||||
for (j = 0; j < width; j +=16)
|
||||
{
|
||||
int Y = 0, U = 0, V = 0;
|
||||
|
||||
Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
|
||||
U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
|
||||
V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
|
||||
|
||||
POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
|
||||
(&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
|
||||
|
||||
mi++;
|
||||
}
|
||||
y_ptr += y_stride*16;
|
||||
u_ptr += y_stride*4;
|
||||
v_ptr += y_stride*4;
|
||||
|
||||
mi++;
|
||||
}
|
||||
}
|
||||
|
||||
*dest = oci->post_proc_buffer;
|
||||
|
||||
// handle problem with extending borders
|
||||
/* handle problem with extending borders */
|
||||
dest->y_width = oci->Width;
|
||||
dest->y_height = oci->Height;
|
||||
dest->uv_height = dest->y_height / 2;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -24,6 +24,10 @@
|
|||
char whiteclamp[16], char bothclamp[16],\
|
||||
unsigned int w, unsigned int h, int pitch)
|
||||
|
||||
#define prototype_postproc_blend_mb(sym)\
|
||||
void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
|
||||
int y1, int u1, int v1, int alpha, int stride)
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
#include "x86/postproc_x86.h"
|
||||
#endif
|
||||
|
@ -48,16 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
|
|||
#endif
|
||||
extern prototype_postproc_addnoise(vp8_postproc_addnoise);
|
||||
|
||||
#ifndef vp8_postproc_blend_mb
|
||||
#define vp8_postproc_blend_mb vp8_blend_mb_c
|
||||
#endif
|
||||
extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
|
||||
|
||||
typedef prototype_postproc((*vp8_postproc_fn_t));
|
||||
typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
|
||||
typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
|
||||
typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
|
||||
typedef struct
|
||||
{
|
||||
vp8_postproc_inplace_fn_t down;
|
||||
vp8_postproc_inplace_fn_t across;
|
||||
vp8_postproc_fn_t downacross;
|
||||
vp8_postproc_addnoise_fn_t addnoise;
|
||||
vp8_postproc_blend_mb_fn_t blend_mb;
|
||||
} vp8_postproc_rtcd_vtable_t;
|
||||
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
|
@ -14,13 +14,16 @@
|
|||
enum
|
||||
{
|
||||
VP8D_NOFILTERING = 0,
|
||||
VP8D_DEBLOCK = 1,
|
||||
VP8D_DEMACROBLOCK = 2,
|
||||
VP8D_ADDNOISE = 4,
|
||||
VP8D_DEBUG_LEVEL1 = 8,
|
||||
VP8D_DEBUG_LEVEL2 = 16,
|
||||
VP8D_DEBUG_LEVEL3 = 32,
|
||||
VP8D_DEBUG_LEVEL4 = 64
|
||||
VP8D_DEBLOCK = 1<<0,
|
||||
VP8D_DEMACROBLOCK = 1<<1,
|
||||
VP8D_ADDNOISE = 1<<2,
|
||||
VP8D_DEBUG_LEVEL1 = 1<<3,
|
||||
VP8D_DEBUG_LEVEL2 = 1<<4,
|
||||
VP8D_DEBUG_LEVEL3 = 1<<5,
|
||||
VP8D_DEBUG_LEVEL4 = 1<<6,
|
||||
VP8D_DEBUG_LEVEL5 = 1<<7,
|
||||
VP8D_DEBUG_LEVEL6 = 1<<8,
|
||||
VP8D_DEBUG_LEVEL7 = 1<<9
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче