Rough merge of master into experimental
Creates a merge between the master and experimental branches. Fixes a number of conflicts in the build system to allow *either* VP8 or VP9 to be built. Specifically either: $ configure --disable-vp9 $ configure --disable-vp8 --disable-unit-tests VP9 still exports its symbols and files as VP8, so that will be resolved in the next commit. Unit tests are broken in VP9, but this isn't a new issue. They are fixed upstream on origin/experimental as of this writing, but rebasing this merge proved difficult, so will tackle that in a second merge commit. Change-Id: I2b7d852c18efd58d1ebc621b8041fe0260442c21
This commit is contained in:
Коммит
7b8dfcb5a2
|
@ -32,6 +32,8 @@
|
|||
/ivfdec.dox
|
||||
/ivfenc
|
||||
/ivfenc.dox
|
||||
/libvpx.so*
|
||||
/libvpx.ver
|
||||
/obj_int_extract
|
||||
/postproc
|
||||
/postproc.c
|
||||
|
@ -43,12 +45,12 @@
|
|||
/simple_encoder
|
||||
/simple_encoder.c
|
||||
/simple_encoder.dox
|
||||
/test_libvpx
|
||||
/twopass_encoder
|
||||
/twopass_encoder.c
|
||||
/twopass_encoder.dox
|
||||
/vp8_api1_migration.dox
|
||||
/vp8_scalable_patterns
|
||||
/vp8_scalable_patterns.c
|
||||
/vp8_scalable_patterns.dox
|
||||
/vp8_set_maps
|
||||
/vp8_set_maps.c
|
||||
|
@ -56,7 +58,14 @@
|
|||
/vp8cx_set_ref
|
||||
/vp8cx_set_ref.c
|
||||
/vp8cx_set_ref.dox
|
||||
/vpx.pc
|
||||
/vpx_config.c
|
||||
/vpx_config.h
|
||||
/vpx_rtcd.h
|
||||
/vpx_version.h
|
||||
/vpxdec
|
||||
/vpxenc
|
||||
TAGS
|
||||
.cproject
|
||||
.project
|
||||
.settings
|
||||
|
|
3
.mailmap
3
.mailmap
|
@ -3,3 +3,6 @@ Johann Koenig <johannkoenig@google.com>
|
|||
Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
|
||||
Tom Finegan <tomfinegan@google.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@entropywave.com>
|
||||
Ralph Giles <giles@xiph.org> <giles@mozilla.com>
|
||||
Alpha Lam <hclam@google.com> <hclam@chromium.org>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
|
|
8
AUTHORS
8
AUTHORS
|
@ -6,10 +6,12 @@ Adrian Grange <agrange@google.com>
|
|||
Alex Converse <alex.converse@gmail.com>
|
||||
Alexis Ballier <aballier@gentoo.org>
|
||||
Alok Ahuja <waveletcoeff@gmail.com>
|
||||
Alpha Lam <hclam@google.com>
|
||||
Andoni Morales Alastruey <ylatuya@gmail.com>
|
||||
Andres Mejia <mcitadel@gmail.com>
|
||||
Aron Rosenberg <arosenberg@logitech.com>
|
||||
Attila Nagy <attilanagy@google.com>
|
||||
Deb Mukherjee <debargha@google.com>
|
||||
Fabio Pedretti <fabio.ped@libero.it>
|
||||
Frank Galligan <fgalligan@google.com>
|
||||
Fredrik Söderquist <fs@opera.com>
|
||||
|
@ -21,6 +23,7 @@ Henrik Lundin <hlundin@google.com>
|
|||
James Berry <jamesberry@google.com>
|
||||
James Zern <jzern@google.com>
|
||||
Jan Kratochvil <jan.kratochvil@redhat.com>
|
||||
Jeff Faust <jfaust@google.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
Jim Bankoski <jimbankoski@google.com>
|
||||
Johann Koenig <johannkoenig@google.com>
|
||||
|
@ -28,9 +31,11 @@ John Koleszar <jkoleszar@google.com>
|
|||
Joshua Bleecher Snyder <josh@treelinelabs.com>
|
||||
Justin Clift <justin@salasaga.org>
|
||||
Justin Lebar <justin.lebar@gmail.com>
|
||||
KO Myung-Hun <komh@chollian.net>
|
||||
Lou Quillio <louquillio@google.com>
|
||||
Luca Barbato <lu_zero@gentoo.org>
|
||||
Makoto Kato <makoto.kt@gmail.com>
|
||||
Marco Paniconi <marpan@google.com>
|
||||
Martin Ettl <ettl.martin78@googlemail.com>
|
||||
Michael Kohler <michaelkohler@live.com>
|
||||
Mike Hommey <mhommey@mozilla.com>
|
||||
|
@ -40,12 +45,15 @@ Patrik Westin <patrik.westin@gmail.com>
|
|||
Paul Wilkins <paulwilkins@google.com>
|
||||
Pavol Rusnak <stick@gk2.sk>
|
||||
Philip Jägenstedt <philipj@opera.com>
|
||||
Priit Laes <plaes@plaes.org>
|
||||
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
|
||||
Rafaël Carré <funman@videolan.org>
|
||||
Ralph Giles <giles@xiph.org>
|
||||
Ronald S. Bultje <rbultje@google.com>
|
||||
Scott LaVarnway <slavarnway@google.com>
|
||||
Stefan Holmer <holmer@google.com>
|
||||
Taekhyun Kim <takim@nvidia.com>
|
||||
Takanori MATSUURA <t.matsuu@gmail.com>
|
||||
Tero Rintaluoma <teror@google.com>
|
||||
Thijs Vermeir <thijsvermeir@gmail.com>
|
||||
Timothy B. Terriberry <tterribe@xiph.org>
|
||||
|
|
140
CHANGELOG
140
CHANGELOG
|
@ -1,3 +1,143 @@
|
|||
2012-05-09 v1.1.0 "Eider"
|
||||
This introduces a number of enhancements, mostly focused on real-time
|
||||
encoding. In addition, it fixes a decoder bug (first introduced in
|
||||
Duclair) so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
This release introduces a new temporal denoiser, controlled by the
|
||||
VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not
|
||||
currently take a strength parameter, so the control is effectively
|
||||
a boolean - zero (off) or non-zero (on). For compatibility with
|
||||
existing applications, the values accepted are the same as those
|
||||
for the spatial denoiser (0-6). The temporal denoiser is enabled
|
||||
by default, and the older spatial denoiser may be restored by
|
||||
configuring with --disable-temporal-denoising. The temporal denoiser
|
||||
is more computationally intensive than the spatial one.
|
||||
|
||||
This release removes support for a legacy, decode only API that was
|
||||
supported, but deprecated, at the initial release of libvpx
|
||||
(v0.9.0). This is not expected to have any impact. If you are
|
||||
impacted, you can apply a reversion to commit 2bf8fb58 locally.
|
||||
Please update to the latest libvpx API if you are affected.
|
||||
|
||||
- Enhancements:
|
||||
Adds a motion compensated temporal denoiser to the encoder, which
|
||||
gives higher quality than the older spatial denoiser. (See above
|
||||
for notes on upgrading).
|
||||
|
||||
In addition, support for new compilers and platforms were added,
|
||||
including:
|
||||
improved support for XCode
|
||||
Android x86 NDK build
|
||||
OS/2 support
|
||||
SunCC support
|
||||
|
||||
Changing resolution with vpx_codec_enc_config_set() is now
|
||||
supported. Previously, reinitializing the codec was required to
|
||||
change the input resolution.
|
||||
|
||||
The vpxenc application has initial support for producing multiple
|
||||
encodes from the same input in one call. Resizing is not yet
|
||||
supported, but varying other codec parameters is. Use -- to
|
||||
delineate output streams. Options persist from one stream to the
|
||||
next.
|
||||
|
||||
Also, the vpxenc application will now use a keyframe interval of
|
||||
5 seconds by default. Use the --kf-max-dist option to override.
|
||||
|
||||
- Speed:
|
||||
Decoder performance improved 2.5% versus Duclair. Encoder speed is
|
||||
consistent with Duclair for most material. Two pass encoding of
|
||||
slideshow-like material will see significant improvements.
|
||||
|
||||
Large realtime encoding speed gains at a small quality expense are
|
||||
possible by configuring the on-the-fly bitpacking experiment with
|
||||
--enable-onthefly-bitpacking. Realtime encoder can be up to 13%
|
||||
faster (ARM) depending on the number of threads and bitrate
|
||||
settings. This technique sees constant gain over the 5-16 speed
|
||||
range. For VC style input the loss seen is up to 0.2dB. See commit
|
||||
52cf4dca for further details.
|
||||
|
||||
- Quality:
|
||||
On the whole, quality is consistent with the Duclair release. Some
|
||||
tweaks:
|
||||
|
||||
Reduced blockiness in easy sections by applying a penalty to
|
||||
intra modes.
|
||||
|
||||
Improved quality of static sections (like slideshows) with
|
||||
two pass encoding.
|
||||
|
||||
Improved keyframe sizing with multiple temporal layers
|
||||
|
||||
- Bug Fixes:
|
||||
Corrected alt-ref contribution to frame rate for visible updates
|
||||
to the alt-ref buffer. This affected applications making manual
|
||||
usage of the frame reference flags, or temporal layers.
|
||||
|
||||
Additional constraints were added to disable multi-frame quality
|
||||
enhancement (MFQE) in sections of the frame where there is motion.
|
||||
(#392)
|
||||
|
||||
Fixed corruption issues when vpx_codec_enc_config_set() was called
|
||||
with spatial resampling enabled.
|
||||
|
||||
Fixed a decoder error introduced in Duclair where the segmentation
|
||||
map was not being reinitialized on keyframes (#378)
|
||||
|
||||
|
||||
2012-01-27 v1.0.0 "Duclair"
|
||||
Our fourth named release, focused on performance and features related to
|
||||
real-time encoding. It also fixes a decoder crash bug introduced in
|
||||
v0.9.7, so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with prior releases of libvpx, so the
|
||||
"major" version number has been bumped to 1. You must recompile your
|
||||
applications against the latest version of the libvpx headers. The
|
||||
API remains compatible, and this should not require code changes in most
|
||||
applications.
|
||||
|
||||
- Enhancements:
|
||||
This release introduces several substantial new features to the encoder,
|
||||
of particular interest to real time streaming applications.
|
||||
|
||||
Temporal scalability allows the encoder to produce a stream that can
|
||||
be decimated to different frame rates, with independent rate targetting
|
||||
for each substream.
|
||||
|
||||
Multiframe quality enhancement postprocessing can make visual quality
|
||||
more consistent in the presence of frames that are substantially
|
||||
different quality than the surrounding frames, as in the temporal
|
||||
scalability case and in some forced keyframe scenarios.
|
||||
|
||||
Multiple-resolution encoding support allows the encoding of the
|
||||
same content at different resolutions faster than encoding them
|
||||
separately.
|
||||
|
||||
- Speed:
|
||||
Optimization targets for this release included the decoder and the real-
|
||||
time modes of the encoder. Decoder speed on x86 has improved 10.5% with
|
||||
this release. Encoder improvements followed a curve where speeds 1-3
|
||||
improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved
|
||||
1.5% to 10.5%, respectively. "Best" mode speed is consistent with the
|
||||
Cayuga release.
|
||||
|
||||
- Quality:
|
||||
Encoder quality in the single stream case is consistent with the Cayuga
|
||||
release.
|
||||
|
||||
- Bug Fixes:
|
||||
This release fixes an OOB read decoder crash bug present in v0.9.7
|
||||
related to the clamping of motion vectors in SPLITMV blocks. This
|
||||
behavior could be triggered by corrupt input or by starting
|
||||
decoding from a P-frame.
|
||||
|
||||
|
||||
2011-08-15 v0.9.7-p1 "Cayuga" patch 1
|
||||
This is an incremental bugfix release against Cayuga. All users of that
|
||||
release are strongly encouraged to upgrade.
|
||||
|
|
9
LICENSE
9
LICENSE
|
@ -1,4 +1,4 @@
|
|||
Copyright (c) 2010, Google Inc. All rights reserved.
|
||||
Copyright (c) 2010, The WebM Project authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
|
@ -12,9 +12,10 @@ met:
|
|||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name of Google nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
* Neither the name of Google, nor the WebM Project, nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
|
|
20
README
20
README
|
@ -1,5 +1,5 @@
|
|||
vpx Multi-Format Codec SDK
|
||||
README - 19 May 2010
|
||||
README - 21 June 2012
|
||||
|
||||
Welcome to the WebM VP8 Codec SDK!
|
||||
|
||||
|
@ -15,11 +15,19 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
|||
* Building the documentation requires PHP[3] and Doxygen[4]. If you do not
|
||||
have these packages, you must pass --disable-install-docs to the
|
||||
configure script.
|
||||
* Downloading the data for the unit tests requires curl[5] and sha1sum.
|
||||
sha1sum is provided via the GNU coreutils, installed by default on
|
||||
many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
|
||||
available, a compatible version of sha1sum can be built from
|
||||
source[6]. These requirements are optional if not running the unit
|
||||
tests.
|
||||
|
||||
[1]: http://www.tortall.net/projects/yasm
|
||||
[2]: http://www.cygwin.com
|
||||
[3]: http://php.net
|
||||
[4]: http://www.doxygen.org
|
||||
[5]: http://curl.haxx.se
|
||||
[6]: http://www.microbrew.org/tools/md5sha1sum/
|
||||
|
||||
2. Out-of-tree builds
|
||||
Out of tree builds are a supported method of building the application. For
|
||||
|
@ -42,17 +50,13 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
|||
--help output of the configure script. As of this writing, the list of
|
||||
available targets is:
|
||||
|
||||
armv5te-android-gcc
|
||||
armv5te-linux-rvct
|
||||
armv5te-linux-gcc
|
||||
armv5te-symbian-gcc
|
||||
armv6-darwin-gcc
|
||||
armv6-linux-rvct
|
||||
armv6-linux-gcc
|
||||
armv6-symbian-gcc
|
||||
iwmmxt-linux-rvct
|
||||
iwmmxt-linux-gcc
|
||||
iwmmxt2-linux-rvct
|
||||
iwmmxt2-linux-gcc
|
||||
armv7-android-gcc
|
||||
armv7-linux-rvct
|
||||
armv7-linux-gcc
|
||||
mips32-linux-gcc
|
||||
|
@ -98,5 +102,5 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
|||
|
||||
SUPPORT
|
||||
This library is an open source project supported by its community. Please
|
||||
please email webm-users@webmproject.org for help.
|
||||
please email webm-discuss@webmproject.org for help.
|
||||
|
||||
|
|
|
@ -20,27 +20,36 @@ show_help(){
|
|||
show_help_pre
|
||||
cat << EOF
|
||||
Advanced options:
|
||||
${toggle_libs} don't build libraries
|
||||
${toggle_examples} don't build examples
|
||||
${toggle_unit_tests} build unit tests
|
||||
${toggle_libs} libraries
|
||||
${toggle_examples} examples
|
||||
${toggle_docs} documentation
|
||||
${toggle_unit_tests} unit tests
|
||||
--libc=PATH path to alternate libc
|
||||
--as={yasm|nasm|auto} use specified assembler [auto, yasm preferred]
|
||||
--sdk-path=PATH path to root of sdk (iOS, android builds only)
|
||||
${toggle_fast_unaligned} don't use unaligned accesses, even when
|
||||
supported by hardware [auto]
|
||||
${toggle_codec_srcs} in/exclude codec library source code
|
||||
${toggle_debug_libs} in/exclude debug version of libraries
|
||||
${toggle_md5} support for output of checksum data
|
||||
${toggle_static_msvcrt} use static MSVCRT (VS builds only)
|
||||
${toggle_vp8} VP8 codec support
|
||||
${toggle_vp9} VP9 codec support
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_mem_tracker} track memory usage
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
${toggle_spatial_resampling} spatial sampling (scaling) support
|
||||
${toggle_realtime_only} enable this option while building for real-time encoding
|
||||
${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding
|
||||
${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses
|
||||
${toggle_runtime_cpu_detect} runtime cpu detection
|
||||
${toggle_shared} shared library support
|
||||
${toggle_static} static library support
|
||||
${toggle_small} favor smaller size over speed
|
||||
${toggle_postproc_visualizer} macro block / block level visualizers
|
||||
${toggle_multi_res_encoding} enable multiple-resolution encoding
|
||||
${toggle_temporal_denoising} enable temporal denoising and disable the spatial denoiser
|
||||
|
||||
Codecs:
|
||||
Codecs can be selectively enabled or disabled individually, or by family:
|
||||
|
@ -76,19 +85,15 @@ EOF
|
|||
|
||||
# all_platforms is a list of all supported target platforms. Maintain
|
||||
# alphabetically by architecture, generic-gnu last.
|
||||
all_platforms="${all_platforms} armv5te-android-gcc"
|
||||
all_platforms="${all_platforms} armv5te-linux-rvct"
|
||||
all_platforms="${all_platforms} armv5te-linux-gcc"
|
||||
all_platforms="${all_platforms} armv5te-none-rvct"
|
||||
all_platforms="${all_platforms} armv5te-symbian-gcc"
|
||||
all_platforms="${all_platforms} armv6-darwin-gcc"
|
||||
all_platforms="${all_platforms} armv6-linux-rvct"
|
||||
all_platforms="${all_platforms} armv6-linux-gcc"
|
||||
all_platforms="${all_platforms} armv6-none-rvct"
|
||||
all_platforms="${all_platforms} armv6-symbian-gcc"
|
||||
all_platforms="${all_platforms} iwmmxt-linux-rvct"
|
||||
all_platforms="${all_platforms} iwmmxt-linux-gcc"
|
||||
all_platforms="${all_platforms} iwmmxt2-linux-rvct"
|
||||
all_platforms="${all_platforms} iwmmxt2-linux-gcc"
|
||||
all_platforms="${all_platforms} armv7-android-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8
|
||||
all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8
|
||||
|
@ -105,8 +110,12 @@ all_platforms="${all_platforms} x86-darwin8-gcc"
|
|||
all_platforms="${all_platforms} x86-darwin8-icc"
|
||||
all_platforms="${all_platforms} x86-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin9-icc"
|
||||
all_platforms="${all_platforms} x86-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin11-gcc"
|
||||
all_platforms="${all_platforms} x86-darwin12-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-gcc"
|
||||
all_platforms="${all_platforms} x86-linux-icc"
|
||||
all_platforms="${all_platforms} x86-os2-gcc"
|
||||
all_platforms="${all_platforms} x86-solaris-gcc"
|
||||
all_platforms="${all_platforms} x86-win32-gcc"
|
||||
all_platforms="${all_platforms} x86-win32-vs7"
|
||||
|
@ -115,13 +124,18 @@ all_platforms="${all_platforms} x86-win32-vs9"
|
|||
all_platforms="${all_platforms} x86_64-darwin9-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin10-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin11-gcc"
|
||||
all_platforms="${all_platforms} x86_64-darwin12-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-gcc"
|
||||
all_platforms="${all_platforms} x86_64-linux-icc"
|
||||
all_platforms="${all_platforms} x86_64-solaris-gcc"
|
||||
all_platforms="${all_platforms} x86_64-win64-gcc"
|
||||
all_platforms="${all_platforms} x86_64-win64-vs8"
|
||||
all_platforms="${all_platforms} x86_64-win64-vs9"
|
||||
all_platforms="${all_platforms} universal-darwin8-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin9-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin10-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin11-gcc"
|
||||
all_platforms="${all_platforms} universal-darwin12-gcc"
|
||||
all_platforms="${all_platforms} generic-gnu"
|
||||
|
||||
# all_targets is a list of all targets that can be configured
|
||||
|
@ -158,20 +172,29 @@ enable optimizations
|
|||
enable fast_unaligned #allow unaligned accesses, if supported by hw
|
||||
enable md5
|
||||
enable spatial_resampling
|
||||
enable multithread
|
||||
enable os_support
|
||||
enable temporal_denoising
|
||||
|
||||
[ -d ${source_path}/../include ] && enable alt_tree_layout
|
||||
for d in vp9; do
|
||||
for d in vp8 vp9; do
|
||||
[ -d ${source_path}/${d} ] && disable alt_tree_layout;
|
||||
done
|
||||
|
||||
if ! enabled alt_tree_layout; then
|
||||
# development environment
|
||||
[ -d ${source_path}/vp8 ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
|
||||
[ -d ${source_path}/vp9 ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
|
||||
else
|
||||
# customer environment
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] && CODECS="${CODECS} vp8_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
|
||||
[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
|
||||
[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
|
||||
[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder
|
||||
|
||||
[ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
|
||||
fi
|
||||
|
@ -188,13 +211,12 @@ ARCH_LIST="
|
|||
ppc64
|
||||
"
|
||||
ARCH_EXT_LIST="
|
||||
armv5te
|
||||
armv6
|
||||
armv7
|
||||
iwmmxt
|
||||
iwmmxt2
|
||||
edsp
|
||||
media
|
||||
neon
|
||||
|
||||
mips32
|
||||
dspr2
|
||||
|
||||
mmx
|
||||
sse
|
||||
|
@ -252,6 +274,7 @@ CONFIG_LIST="
|
|||
dc_recon
|
||||
runtime_cpu_detect
|
||||
postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
${CODEC_FAMILIES}
|
||||
|
@ -259,12 +282,17 @@ CONFIG_LIST="
|
|||
decoders
|
||||
static_msvcrt
|
||||
spatial_resampling
|
||||
realtime_only
|
||||
onthefly_bitpacking
|
||||
error_concealment
|
||||
shared
|
||||
static
|
||||
small
|
||||
postproc_visualizer
|
||||
os_support
|
||||
unit_tests
|
||||
multi_res_encoding
|
||||
temporal_denoising
|
||||
experimental
|
||||
${EXPERIMENT_LIST}
|
||||
"
|
||||
|
@ -285,6 +313,7 @@ CMDLINE_SELECT="
|
|||
|
||||
libs
|
||||
examples
|
||||
docs
|
||||
libc
|
||||
as
|
||||
fast_unaligned
|
||||
|
@ -295,17 +324,23 @@ CMDLINE_SELECT="
|
|||
dequant_tokens
|
||||
dc_recon
|
||||
postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
${CODEC_FAMILIES}
|
||||
static_msvcrt
|
||||
mem_tracker
|
||||
spatial_resampling
|
||||
realtime_only
|
||||
onthefly_bitpacking
|
||||
error_concealment
|
||||
shared
|
||||
static
|
||||
small
|
||||
postproc_visualizer
|
||||
unit_tests
|
||||
multi_res_encoding
|
||||
temporal_denoising
|
||||
experimental
|
||||
"
|
||||
|
||||
|
@ -394,6 +429,7 @@ process_targets() {
|
|||
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
|
||||
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
|
||||
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
|
||||
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
|
||||
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
|
||||
case "${tgt_os}" in
|
||||
|
@ -448,6 +484,18 @@ EOF
|
|||
}
|
||||
|
||||
process_detect() {
|
||||
if enabled shared; then
|
||||
# Can only build shared libs on a subset of platforms. Doing this check
|
||||
# here rather than at option parse time because the target auto-detect
|
||||
# magic happens after the command line has been parsed.
|
||||
if ! enabled linux; then
|
||||
if enabled gnu; then
|
||||
echo "--enable-shared is only supported on ELF; assuming this is OK"
|
||||
else
|
||||
die "--enable-shared only supported on ELF for now"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
if [ -z "$CC" ]; then
|
||||
echo "Bypassing toolchain for environment detection."
|
||||
enable external_build
|
||||
|
@ -492,11 +540,20 @@ process_toolchain() {
|
|||
case $toolchain in
|
||||
universal-darwin*)
|
||||
local darwin_ver=${tgt_os##darwin}
|
||||
fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
|
||||
|
||||
# Intel
|
||||
fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
|
||||
if [ $darwin_ver -gt 8 ]; then
|
||||
# Snow Leopard (10.6/darwin10) dropped support for PPC
|
||||
# Include PPC support for all prior versions
|
||||
if [ $darwin_ver -lt 10 ]; then
|
||||
fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
|
||||
fi
|
||||
|
||||
# Tiger (10.4/darwin8) brought support for x86
|
||||
if [ $darwin_ver -ge 8 ]; then
|
||||
fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
|
||||
fi
|
||||
|
||||
# Leopard (10.5/darwin9) brought 64 bit support
|
||||
if [ $darwin_ver -ge 9 ]; then
|
||||
fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
|
||||
fi
|
||||
;;
|
||||
|
@ -512,8 +569,11 @@ process_toolchain() {
|
|||
check_add_cflags -Wpointer-arith
|
||||
check_add_cflags -Wtype-limits
|
||||
check_add_cflags -Wcast-qual
|
||||
check_add_cflags -Wundef
|
||||
check_add_cflags -Wvla
|
||||
check_add_cflags -Wimplicit-function-declaration
|
||||
check_add_cflags -Wuninitialized
|
||||
check_add_cflags -Wunused-variable
|
||||
check_add_cflags -Wunused-but-set-variable
|
||||
enabled extra_warnings || check_add_cflags -Wno-unused-function
|
||||
fi
|
||||
|
||||
|
@ -568,6 +628,21 @@ process_toolchain() {
|
|||
if enabled postproc_visualizer; then
|
||||
enabled postproc || die "postproc_visualizer requires postproc to be enabled"
|
||||
fi
|
||||
|
||||
# Enable unit tests if we have a working C++ compiler
|
||||
case "$toolchain" in
|
||||
*-vs*)
|
||||
soft_enable unit_tests
|
||||
;;
|
||||
*-android-*)
|
||||
# GTestLog must be modified to use Android logging utilities.
|
||||
;;
|
||||
*)
|
||||
check_cxx "$@" <<EOF && soft_enable unit_tests
|
||||
int z;
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
|
||||
|
@ -576,7 +651,8 @@ process_toolchain() {
|
|||
##
|
||||
CONFIGURE_ARGS="$@"
|
||||
process "$@"
|
||||
cat <<EOF > ${BUILD_PFX}vpx_config.c
|
||||
print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
|
||||
cat <<EOF >> ${BUILD_PFX}vpx_config.c
|
||||
static const char* const cfg = "$CONFIGURE_ARGS";
|
||||
const char *vpx_codec_build_config(void) {return cfg;}
|
||||
EOF
|
||||
|
|
3
docs.mk
3
docs.mk
|
@ -21,9 +21,6 @@ CODEC_DOX := mainpage.dox \
|
|||
usage_dx.dox \
|
||||
|
||||
# Other doxy files sourced in Markdown
|
||||
TXT_DOX-$(CONFIG_VP9) += vp8_api1_migration.dox
|
||||
vp8_api1_migration.dox.DESC = VP8 API 1.x Migration
|
||||
|
||||
TXT_DOX = $(call enabled,TXT_DOX)
|
||||
|
||||
%.dox: %.txt
|
||||
|
|
43
examples.mk
43
examples.mk
|
@ -16,7 +16,7 @@ UTILS-$(CONFIG_DECODERS) += vpxdec.c
|
|||
vpxdec.SRCS += md5_utils.c md5_utils.h
|
||||
vpxdec.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxdec.SRCS += vpx/vpx_integer.h
|
||||
vpxdec.SRCS += args.c args.h vpx_ports/config.h
|
||||
vpxdec.SRCS += args.c args.h
|
||||
vpxdec.SRCS += tools_common.c tools_common.h
|
||||
vpxdec.SRCS += nestegg/halloc/halloc.h
|
||||
vpxdec.SRCS += nestegg/halloc/src/align.h
|
||||
|
@ -30,13 +30,17 @@ vpxdec.DESCRIPTION = Full featured decoder
|
|||
UTILS-$(CONFIG_ENCODERS) += vpxenc.c
|
||||
vpxenc.SRCS += args.c args.h y4minput.c y4minput.h
|
||||
vpxenc.SRCS += tools_common.c tools_common.h
|
||||
vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops.h
|
||||
vpxenc.SRCS += vpx_ports/mem_ops_aligned.h
|
||||
vpxenc.SRCS += vpx_ports/vpx_timer.h
|
||||
vpxenc.SRCS += libmkv/EbmlIDs.h
|
||||
vpxenc.SRCS += libmkv/EbmlWriter.c
|
||||
vpxenc.SRCS += libmkv/EbmlWriter.h
|
||||
vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
|
||||
vpxenc.DESCRIPTION = Full featured encoder
|
||||
UTILS-$(CONFIG_ENCODERS) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
|
||||
|
||||
# Clean up old ivfenc, ivfdec binaries.
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
@ -77,29 +81,44 @@ GEN_EXAMPLES-$(CONFIG_ENCODERS) += decode_with_drops.c
|
|||
endif
|
||||
decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
|
||||
decode_with_drops.DESCRIPTION = Drops frames while decoding
|
||||
ifeq ($(CONFIG_DECODERS),yes)
|
||||
GEN_EXAMPLES-$(CONFIG_ERROR_CONCEALMENT) += decode_with_partial_drops.c
|
||||
endif
|
||||
decode_with_partial_drops.GUID = 61C2D026-5754-46AC-916F-1343ECC5537E
|
||||
decode_with_partial_drops.DESCRIPTION = Drops parts of frames while decoding
|
||||
GEN_EXAMPLES-$(CONFIG_ENCODERS) += error_resilient.c
|
||||
error_resilient.GUID = DF5837B9-4145-4F92-A031-44E4F832E00C
|
||||
error_resilient.DESCRIPTION = Error Resiliency Feature
|
||||
|
||||
GEN_EXAMPLES-$(CONFIG_VP9_ENCODER) += vp8_scalable_patterns.c
|
||||
vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
|
||||
vp8_scalable_patterns.DESCRIPTION = VP8 Scalable Bitstream Patterns
|
||||
GEN_EXAMPLES-$(CONFIG_VP9_ENCODER) += vp8_set_maps.c
|
||||
GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_set_maps.c
|
||||
vp8_set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
|
||||
vp8_set_maps.DESCRIPTION = VP8 set active and ROI maps
|
||||
GEN_EXAMPLES-$(CONFIG_VP9_ENCODER) += vp8cx_set_ref.c
|
||||
GEN_EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c
|
||||
vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
|
||||
vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame
|
||||
|
||||
# C file is provided, not generated automatically.
|
||||
UTILS-$(CONFIG_MULTI_RES_ENCODING) += vp8_multi_resolution_encoder.c
|
||||
vp8_multi_resolution_encoder.SRCS \
|
||||
+= third_party/libyuv/include/libyuv/basic_types.h \
|
||||
third_party/libyuv/include/libyuv/cpu_id.h \
|
||||
third_party/libyuv/include/libyuv/scale.h \
|
||||
third_party/libyuv/source/row.h \
|
||||
third_party/libyuv/source/scale.c \
|
||||
third_party/libyuv/source/cpu_id.c
|
||||
vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de
|
||||
vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
|
||||
|
||||
# Handle extra library flags depending on codec configuration
|
||||
|
||||
# We should not link to math library (libm) on RVCT
|
||||
# when building for bare-metal targets
|
||||
ifeq ($(CONFIG_OS_SUPPORT), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
|
||||
else
|
||||
ifeq ($(CONFIG_GCC), yes)
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m
|
||||
CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m
|
||||
endif
|
||||
endif
|
||||
|
@ -117,6 +136,8 @@ ifeq ($(HAVE_ALT_TREE_LAYOUT),yes)
|
|||
INC_PATH := $(SRC_PATH_BARE)/../include
|
||||
else
|
||||
LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.)
|
||||
INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8
|
||||
INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8
|
||||
INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9
|
||||
INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9
|
||||
LIB_PATH := $(call enabled,LIB_PATH)
|
||||
|
@ -152,12 +173,12 @@ $(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
|
|||
# Create build/install dependencies for all examples. The common case
|
||||
# is handled here. The MSVS case is handled below.
|
||||
NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
|
||||
DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=))
|
||||
INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=))
|
||||
DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
|
||||
INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
|
||||
DIST-SRCS-yes += $(ALL_SRCS)
|
||||
INSTALL-SRCS-yes += $(UTIL_SRCS)
|
||||
OBJS-$(NOT_MSVS) += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
|
||||
BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=))
|
||||
BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))
|
||||
|
||||
|
||||
# Instantiate linker template for all examples.
|
||||
|
@ -168,7 +189,7 @@ $(foreach bin,$(BINS-yes),\
|
|||
$(if $(BUILD_OBJS),$(eval $(bin):\
|
||||
$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
|
||||
$(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
|
||||
$(call objs,$($(notdir $(bin)).SRCS)) \
|
||||
$(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
|
||||
-l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
|
||||
)))\
|
||||
$(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
|
||||
|
|
|
@ -48,8 +48,8 @@ for(plane=0; plane < 3; plane++) {
|
|||
unsigned char *buf =img->planes[plane];
|
||||
|
||||
for(y=0; y < (plane ? (img->d_h + 1) >> 1 : img->d_h); y++) {
|
||||
if(fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
|
||||
outfile));
|
||||
(void) fwrite(buf, 1, (plane ? (img->d_w + 1) >> 1 : img->d_w),
|
||||
outfile);
|
||||
buf += img->stride[plane];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
|
|||
mem_put_le32(header+24, frame_cnt); /* length */
|
||||
mem_put_le32(header+28, 0); /* unused */
|
||||
|
||||
if(fwrite(header, 1, 32, outfile));
|
||||
(void) fwrite(header, 1, 32, outfile);
|
||||
}
|
||||
|
||||
|
||||
|
@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
|
|||
mem_put_le32(header+4, pts&0xFFFFFFFF);
|
||||
mem_put_le32(header+8, pts >> 32);
|
||||
|
||||
if(fwrite(header, 1, 12, outfile));
|
||||
(void) fwrite(header, 1, 12, outfile);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
|
|
@ -61,13 +61,14 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
|
|||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
|
||||
case VPX_CODEC_CX_FRAME_PKT:
|
||||
write_ivf_frame_header(outfile, pkt);
|
||||
if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
|
||||
outfile));
|
||||
(void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
|
||||
outfile);
|
||||
break;
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
||||
vpx_img_free(&raw);
|
||||
if(vpx_codec_destroy(&codec))
|
||||
die_codec(&codec, "Failed to destroy codec");
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
||||
|
|
|
@ -58,7 +58,7 @@ if(frame_cnt%30 == 1) {
|
|||
if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
|
||||
die_codec(&codec, "Failed to turn off postproc");
|
||||
} else if(frame_cnt%30 == 16) {
|
||||
vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK, 4, 0};
|
||||
vp8_postproc_cfg_t pp = {VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0};
|
||||
|
||||
if(vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp))
|
||||
die_codec(&codec, "Failed to turn on postproc");
|
||||
|
|
|
@ -71,5 +71,17 @@ Pass Progress Reporting
|
|||
It's sometimes helpful to see when each pass completes.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
|
||||
printf("Pass %d complete.\n", pass+1);
|
||||
if(vpx_codec_destroy(&codec))
|
||||
die_codec(&codec, "Failed to destroy codec");
|
||||
}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_LOOP_END
|
||||
|
||||
|
||||
Clean-up
|
||||
-----------------------------
|
||||
Destruction of the encoder instance must be done on each pass. The
|
||||
raw image should be destroyed at the end as usual.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
||||
vpx_img_free(&raw);
|
||||
free(stats.buf);
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DESTROY
|
||||
|
|
|
@ -1,143 +0,0 @@
|
|||
@TEMPLATE encoder_tmpl.c
|
||||
VP8 Scalable Frame Patterns
|
||||
===========================
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
|
||||
This is an example demonstrating how to control the VP8 encoder's
|
||||
reference frame selection and update mechanism for video applications
|
||||
that benefit from a scalable bitstream.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ INTRODUCTION
|
||||
|
||||
|
||||
Configuration
|
||||
-------------
|
||||
Scalable frame patterns are most useful in an error resilient context,
|
||||
so error resiliency mode is enabled, as in the `error_resilient.c`
|
||||
example. In addition, we want to disable automatic keyframe selection,
|
||||
so we force an interval of 1000 frames.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_SET_CFG2
|
||||
|
||||
/* Enable error resilient mode */
|
||||
cfg.g_error_resilient = 1;
|
||||
cfg.g_lag_in_frames = 0;
|
||||
cfg.kf_mode = VPX_KF_FIXED;
|
||||
|
||||
/* Disable automatic keyframe placement */
|
||||
cfg.kf_min_dist = cfg.kf_max_dist = 1000;
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ENC_SET_CFG2
|
||||
|
||||
This example uses the following frame pattern (L->last_frame,
|
||||
G->golden_frame, A->alt_ref_frame):
|
||||
|
||||
* Frame 0 Intra, use none, update L&G&A
|
||||
* Frame 1 Inter, use LGA, update none
|
||||
* Frame 2 Inter, use LGA, update L
|
||||
* Frame 3 Inter, use LGA, update none
|
||||
* Frame 4 Inter, use GA, update L&G
|
||||
* Frame 5 Inter, use LGA, update none
|
||||
* Frame 6 Inter, use LGA, update L
|
||||
* Frame 7 Inter, use LGA, update none
|
||||
* Frame 8 Inter, use A, update L&G&A
|
||||
* Frame 9 Inter, use LGA, update none
|
||||
* Frame 10 Inter, use LGA, update L
|
||||
* Frame 11 Inter, use LGA, update none
|
||||
* Frame 12 Inter, use GA, update L&G
|
||||
* Frame 13 Inter, use LGA, update none
|
||||
* Frame 14 Inter, use LGA, update L
|
||||
* Frame 15 Inter, use LGA, update none
|
||||
* ...Repeats the pattern from frame 0
|
||||
|
||||
Change this variable to test the 3 decodable streams case.
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_VARS
|
||||
int num_streams = 5;
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TWOPASS_VARS
|
||||
|
||||
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PER_FRAME_CFG
|
||||
flags = 0;
|
||||
if(num_streams == 5)
|
||||
{
|
||||
switch(frame_cnt % 16) {
|
||||
case 0:
|
||||
flags |= VPX_EFLAG_FORCE_KF;
|
||||
flags |= VP8_EFLAG_FORCE_GF;
|
||||
flags |= VP8_EFLAG_FORCE_ARF;
|
||||
break;
|
||||
case 1:
|
||||
case 3:
|
||||
case 5:
|
||||
case 7:
|
||||
case 9:
|
||||
case 11:
|
||||
case 13:
|
||||
case 15:
|
||||
flags |= VP8_EFLAG_NO_UPD_LAST;
|
||||
flags |= VP8_EFLAG_NO_UPD_GF;
|
||||
flags |= VP8_EFLAG_NO_UPD_ARF;
|
||||
break;
|
||||
case 2:
|
||||
case 6:
|
||||
case 10:
|
||||
case 14:
|
||||
break;
|
||||
case 4:
|
||||
flags |= VP8_EFLAG_NO_REF_LAST;
|
||||
flags |= VP8_EFLAG_FORCE_GF;
|
||||
break;
|
||||
case 8:
|
||||
flags |= VP8_EFLAG_NO_REF_LAST;
|
||||
flags |= VP8_EFLAG_NO_REF_GF;
|
||||
flags |= VP8_EFLAG_FORCE_GF;
|
||||
flags |= VP8_EFLAG_FORCE_ARF;
|
||||
break;
|
||||
case 12:
|
||||
flags |= VP8_EFLAG_NO_REF_LAST;
|
||||
flags |= VP8_EFLAG_FORCE_GF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(frame_cnt % 9) {
|
||||
case 0:
|
||||
if(frame_cnt==0)
|
||||
{
|
||||
flags |= VPX_EFLAG_FORCE_KF;
|
||||
}
|
||||
else
|
||||
{
|
||||
cfg.rc_max_quantizer = 26;
|
||||
cfg.rc_min_quantizer = 0;
|
||||
cfg.rc_target_bitrate = 300;
|
||||
flags |= VP8_EFLAG_NO_REF_LAST;
|
||||
flags |= VP8_EFLAG_NO_REF_ARF;
|
||||
}
|
||||
flags |= VP8_EFLAG_FORCE_GF;
|
||||
flags |= VP8_EFLAG_FORCE_ARF;
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 4:
|
||||
case 5:
|
||||
case 7:
|
||||
case 8:
|
||||
cfg.rc_max_quantizer = 45;
|
||||
cfg.rc_min_quantizer = 0;
|
||||
cfg.rc_target_bitrate = 230;
|
||||
break;
|
||||
case 3:
|
||||
case 6:
|
||||
cfg.rc_max_quantizer = 45;
|
||||
cfg.rc_min_quantizer = 0;
|
||||
cfg.rc_target_bitrate = 215;
|
||||
flags |= VP8_EFLAG_NO_REF_LAST;
|
||||
flags |= VP8_EFLAG_FORCE_ARF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PER_FRAME_CFG
|
||||
|
||||
Observing The Effects
|
||||
---------------------
|
||||
Use the `decode_with_drops` example to decode with various dropped frame
|
||||
patterns. Good patterns to start with are 1/2, 3/4, 7/8, and 15/16
|
||||
drops.
|
292
libmkv/EbmlIDs.h
292
libmkv/EbmlIDs.h
|
@ -1,16 +1,16 @@
|
|||
// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the LICENSE file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef MKV_DEFS_HPP
|
||||
#define MKV_DEFS_HPP 1
|
||||
|
||||
// Commenting out values not available in webm, but available in matroska
|
||||
/* Commenting out values not available in webm, but available in matroska */
|
||||
|
||||
enum mkv {
|
||||
EBML = 0x1A45DFA3,
|
||||
|
@ -21,7 +21,7 @@ enum mkv {
|
|||
DocType = 0x4282,
|
||||
DocTypeVersion = 0x4287,
|
||||
DocTypeReadVersion = 0x4285,
|
||||
// CRC_32 = 0xBF,
|
||||
/* CRC_32 = 0xBF, */
|
||||
Void = 0xEC,
|
||||
SignatureSlot = 0x1B538667,
|
||||
SignatureAlgo = 0x7E8A,
|
||||
|
@ -31,61 +31,61 @@ enum mkv {
|
|||
SignatureElements = 0x7E5B,
|
||||
SignatureElementList = 0x7E7B,
|
||||
SignedElement = 0x6532,
|
||||
// segment
|
||||
/* segment */
|
||||
Segment = 0x18538067,
|
||||
// Meta Seek Information
|
||||
/* Meta Seek Information */
|
||||
SeekHead = 0x114D9B74,
|
||||
Seek = 0x4DBB,
|
||||
SeekID = 0x53AB,
|
||||
SeekPosition = 0x53AC,
|
||||
// Segment Information
|
||||
/* Segment Information */
|
||||
Info = 0x1549A966,
|
||||
// SegmentUID = 0x73A4,
|
||||
// SegmentFilename = 0x7384,
|
||||
// PrevUID = 0x3CB923,
|
||||
// PrevFilename = 0x3C83AB,
|
||||
// NextUID = 0x3EB923,
|
||||
// NextFilename = 0x3E83BB,
|
||||
// SegmentFamily = 0x4444,
|
||||
// ChapterTranslate = 0x6924,
|
||||
// ChapterTranslateEditionUID = 0x69FC,
|
||||
// ChapterTranslateCodec = 0x69BF,
|
||||
// ChapterTranslateID = 0x69A5,
|
||||
/* SegmentUID = 0x73A4, */
|
||||
/* SegmentFilename = 0x7384, */
|
||||
/* PrevUID = 0x3CB923, */
|
||||
/* PrevFilename = 0x3C83AB, */
|
||||
/* NextUID = 0x3EB923, */
|
||||
/* NextFilename = 0x3E83BB, */
|
||||
/* SegmentFamily = 0x4444, */
|
||||
/* ChapterTranslate = 0x6924, */
|
||||
/* ChapterTranslateEditionUID = 0x69FC, */
|
||||
/* ChapterTranslateCodec = 0x69BF, */
|
||||
/* ChapterTranslateID = 0x69A5, */
|
||||
TimecodeScale = 0x2AD7B1,
|
||||
Segment_Duration = 0x4489,
|
||||
DateUTC = 0x4461,
|
||||
// Title = 0x7BA9,
|
||||
/* Title = 0x7BA9, */
|
||||
MuxingApp = 0x4D80,
|
||||
WritingApp = 0x5741,
|
||||
// Cluster
|
||||
/* Cluster */
|
||||
Cluster = 0x1F43B675,
|
||||
Timecode = 0xE7,
|
||||
// SilentTracks = 0x5854,
|
||||
// SilentTrackNumber = 0x58D7,
|
||||
// Position = 0xA7,
|
||||
/* SilentTracks = 0x5854, */
|
||||
/* SilentTrackNumber = 0x58D7, */
|
||||
/* Position = 0xA7, */
|
||||
PrevSize = 0xAB,
|
||||
BlockGroup = 0xA0,
|
||||
Block = 0xA1,
|
||||
// BlockVirtual = 0xA2,
|
||||
// BlockAdditions = 0x75A1,
|
||||
// BlockMore = 0xA6,
|
||||
// BlockAddID = 0xEE,
|
||||
// BlockAdditional = 0xA5,
|
||||
/* BlockVirtual = 0xA2, */
|
||||
/* BlockAdditions = 0x75A1, */
|
||||
/* BlockMore = 0xA6, */
|
||||
/* BlockAddID = 0xEE, */
|
||||
/* BlockAdditional = 0xA5, */
|
||||
BlockDuration = 0x9B,
|
||||
// ReferencePriority = 0xFA,
|
||||
/* ReferencePriority = 0xFA, */
|
||||
ReferenceBlock = 0xFB,
|
||||
// ReferenceVirtual = 0xFD,
|
||||
// CodecState = 0xA4,
|
||||
// Slices = 0x8E,
|
||||
// TimeSlice = 0xE8,
|
||||
/* ReferenceVirtual = 0xFD, */
|
||||
/* CodecState = 0xA4, */
|
||||
/* Slices = 0x8E, */
|
||||
/* TimeSlice = 0xE8, */
|
||||
LaceNumber = 0xCC,
|
||||
// FrameNumber = 0xCD,
|
||||
// BlockAdditionID = 0xCB,
|
||||
// MkvDelay = 0xCE,
|
||||
// Cluster_Duration = 0xCF,
|
||||
/* FrameNumber = 0xCD, */
|
||||
/* BlockAdditionID = 0xCB, */
|
||||
/* MkvDelay = 0xCE, */
|
||||
/* Cluster_Duration = 0xCF, */
|
||||
SimpleBlock = 0xA3,
|
||||
// EncryptedBlock = 0xAF,
|
||||
// Track
|
||||
/* EncryptedBlock = 0xAF, */
|
||||
/* Track */
|
||||
Tracks = 0x1654AE6B,
|
||||
TrackEntry = 0xAE,
|
||||
TrackNumber = 0xD7,
|
||||
|
@ -95,28 +95,28 @@ enum mkv {
|
|||
FlagDefault = 0x88,
|
||||
FlagForced = 0x55AA,
|
||||
FlagLacing = 0x9C,
|
||||
// MinCache = 0x6DE7,
|
||||
// MaxCache = 0x6DF8,
|
||||
/* MinCache = 0x6DE7, */
|
||||
/* MaxCache = 0x6DF8, */
|
||||
DefaultDuration = 0x23E383,
|
||||
// TrackTimecodeScale = 0x23314F,
|
||||
// TrackOffset = 0x537F,
|
||||
// MaxBlockAdditionID = 0x55EE,
|
||||
/* TrackTimecodeScale = 0x23314F, */
|
||||
/* TrackOffset = 0x537F, */
|
||||
/* MaxBlockAdditionID = 0x55EE, */
|
||||
Name = 0x536E,
|
||||
Language = 0x22B59C,
|
||||
CodecID = 0x86,
|
||||
CodecPrivate = 0x63A2,
|
||||
CodecName = 0x258688,
|
||||
// AttachmentLink = 0x7446,
|
||||
// CodecSettings = 0x3A9697,
|
||||
// CodecInfoURL = 0x3B4040,
|
||||
// CodecDownloadURL = 0x26B240,
|
||||
// CodecDecodeAll = 0xAA,
|
||||
// TrackOverlay = 0x6FAB,
|
||||
// TrackTranslate = 0x6624,
|
||||
// TrackTranslateEditionUID = 0x66FC,
|
||||
// TrackTranslateCodec = 0x66BF,
|
||||
// TrackTranslateTrackID = 0x66A5,
|
||||
// video
|
||||
/* AttachmentLink = 0x7446, */
|
||||
/* CodecSettings = 0x3A9697, */
|
||||
/* CodecInfoURL = 0x3B4040, */
|
||||
/* CodecDownloadURL = 0x26B240, */
|
||||
/* CodecDecodeAll = 0xAA, */
|
||||
/* TrackOverlay = 0x6FAB, */
|
||||
/* TrackTranslate = 0x6624, */
|
||||
/* TrackTranslateEditionUID = 0x66FC, */
|
||||
/* TrackTranslateCodec = 0x66BF, */
|
||||
/* TrackTranslateTrackID = 0x66A5, */
|
||||
/* video */
|
||||
Video = 0xE0,
|
||||
FlagInterlaced = 0x9A,
|
||||
StereoMode = 0x53B8,
|
||||
|
@ -130,101 +130,101 @@ enum mkv {
|
|||
DisplayHeight = 0x54BA,
|
||||
DisplayUnit = 0x54B2,
|
||||
AspectRatioType = 0x54B3,
|
||||
// ColourSpace = 0x2EB524,
|
||||
// GammaValue = 0x2FB523,
|
||||
/* ColourSpace = 0x2EB524, */
|
||||
/* GammaValue = 0x2FB523, */
|
||||
FrameRate = 0x2383E3,
|
||||
// end video
|
||||
// audio
|
||||
/* end video */
|
||||
/* audio */
|
||||
Audio = 0xE1,
|
||||
SamplingFrequency = 0xB5,
|
||||
OutputSamplingFrequency = 0x78B5,
|
||||
Channels = 0x9F,
|
||||
// ChannelPositions = 0x7D7B,
|
||||
/* ChannelPositions = 0x7D7B, */
|
||||
BitDepth = 0x6264,
|
||||
// end audio
|
||||
// content encoding
|
||||
// ContentEncodings = 0x6d80,
|
||||
// ContentEncoding = 0x6240,
|
||||
// ContentEncodingOrder = 0x5031,
|
||||
// ContentEncodingScope = 0x5032,
|
||||
// ContentEncodingType = 0x5033,
|
||||
// ContentCompression = 0x5034,
|
||||
// ContentCompAlgo = 0x4254,
|
||||
// ContentCompSettings = 0x4255,
|
||||
// ContentEncryption = 0x5035,
|
||||
// ContentEncAlgo = 0x47e1,
|
||||
// ContentEncKeyID = 0x47e2,
|
||||
// ContentSignature = 0x47e3,
|
||||
// ContentSigKeyID = 0x47e4,
|
||||
// ContentSigAlgo = 0x47e5,
|
||||
// ContentSigHashAlgo = 0x47e6,
|
||||
// end content encoding
|
||||
// Cueing Data
|
||||
/* end audio */
|
||||
/* content encoding */
|
||||
/* ContentEncodings = 0x6d80, */
|
||||
/* ContentEncoding = 0x6240, */
|
||||
/* ContentEncodingOrder = 0x5031, */
|
||||
/* ContentEncodingScope = 0x5032, */
|
||||
/* ContentEncodingType = 0x5033, */
|
||||
/* ContentCompression = 0x5034, */
|
||||
/* ContentCompAlgo = 0x4254, */
|
||||
/* ContentCompSettings = 0x4255, */
|
||||
/* ContentEncryption = 0x5035, */
|
||||
/* ContentEncAlgo = 0x47e1, */
|
||||
/* ContentEncKeyID = 0x47e2, */
|
||||
/* ContentSignature = 0x47e3, */
|
||||
/* ContentSigKeyID = 0x47e4, */
|
||||
/* ContentSigAlgo = 0x47e5, */
|
||||
/* ContentSigHashAlgo = 0x47e6, */
|
||||
/* end content encoding */
|
||||
/* Cueing Data */
|
||||
Cues = 0x1C53BB6B,
|
||||
CuePoint = 0xBB,
|
||||
CueTime = 0xB3,
|
||||
CueTrackPositions = 0xB7,
|
||||
CueTrack = 0xF7,
|
||||
CueClusterPosition = 0xF1,
|
||||
CueBlockNumber = 0x5378,
|
||||
// CueCodecState = 0xEA,
|
||||
// CueReference = 0xDB,
|
||||
// CueRefTime = 0x96,
|
||||
// CueRefCluster = 0x97,
|
||||
// CueRefNumber = 0x535F,
|
||||
// CueRefCodecState = 0xEB,
|
||||
// Attachment
|
||||
// Attachments = 0x1941A469,
|
||||
// AttachedFile = 0x61A7,
|
||||
// FileDescription = 0x467E,
|
||||
// FileName = 0x466E,
|
||||
// FileMimeType = 0x4660,
|
||||
// FileData = 0x465C,
|
||||
// FileUID = 0x46AE,
|
||||
// FileReferral = 0x4675,
|
||||
// Chapters
|
||||
// Chapters = 0x1043A770,
|
||||
// EditionEntry = 0x45B9,
|
||||
// EditionUID = 0x45BC,
|
||||
// EditionFlagHidden = 0x45BD,
|
||||
// EditionFlagDefault = 0x45DB,
|
||||
// EditionFlagOrdered = 0x45DD,
|
||||
// ChapterAtom = 0xB6,
|
||||
// ChapterUID = 0x73C4,
|
||||
// ChapterTimeStart = 0x91,
|
||||
// ChapterTimeEnd = 0x92,
|
||||
// ChapterFlagHidden = 0x98,
|
||||
// ChapterFlagEnabled = 0x4598,
|
||||
// ChapterSegmentUID = 0x6E67,
|
||||
// ChapterSegmentEditionUID = 0x6EBC,
|
||||
// ChapterPhysicalEquiv = 0x63C3,
|
||||
// ChapterTrack = 0x8F,
|
||||
// ChapterTrackNumber = 0x89,
|
||||
// ChapterDisplay = 0x80,
|
||||
// ChapString = 0x85,
|
||||
// ChapLanguage = 0x437C,
|
||||
// ChapCountry = 0x437E,
|
||||
// ChapProcess = 0x6944,
|
||||
// ChapProcessCodecID = 0x6955,
|
||||
// ChapProcessPrivate = 0x450D,
|
||||
// ChapProcessCommand = 0x6911,
|
||||
// ChapProcessTime = 0x6922,
|
||||
// ChapProcessData = 0x6933,
|
||||
// Tagging
|
||||
// Tags = 0x1254C367,
|
||||
// Tag = 0x7373,
|
||||
// Targets = 0x63C0,
|
||||
// TargetTypeValue = 0x68CA,
|
||||
// TargetType = 0x63CA,
|
||||
// Tagging_TrackUID = 0x63C5,
|
||||
// Tagging_EditionUID = 0x63C9,
|
||||
// Tagging_ChapterUID = 0x63C4,
|
||||
// AttachmentUID = 0x63C6,
|
||||
// SimpleTag = 0x67C8,
|
||||
// TagName = 0x45A3,
|
||||
// TagLanguage = 0x447A,
|
||||
// TagDefault = 0x4484,
|
||||
// TagString = 0x4487,
|
||||
// TagBinary = 0x4485,
|
||||
CueBlockNumber = 0x5378
|
||||
/* CueCodecState = 0xEA, */
|
||||
/* CueReference = 0xDB, */
|
||||
/* CueRefTime = 0x96, */
|
||||
/* CueRefCluster = 0x97, */
|
||||
/* CueRefNumber = 0x535F, */
|
||||
/* CueRefCodecState = 0xEB, */
|
||||
/* Attachment */
|
||||
/* Attachments = 0x1941A469, */
|
||||
/* AttachedFile = 0x61A7, */
|
||||
/* FileDescription = 0x467E, */
|
||||
/* FileName = 0x466E, */
|
||||
/* FileMimeType = 0x4660, */
|
||||
/* FileData = 0x465C, */
|
||||
/* FileUID = 0x46AE, */
|
||||
/* FileReferral = 0x4675, */
|
||||
/* Chapters */
|
||||
/* Chapters = 0x1043A770, */
|
||||
/* EditionEntry = 0x45B9, */
|
||||
/* EditionUID = 0x45BC, */
|
||||
/* EditionFlagHidden = 0x45BD, */
|
||||
/* EditionFlagDefault = 0x45DB, */
|
||||
/* EditionFlagOrdered = 0x45DD, */
|
||||
/* ChapterAtom = 0xB6, */
|
||||
/* ChapterUID = 0x73C4, */
|
||||
/* ChapterTimeStart = 0x91, */
|
||||
/* ChapterTimeEnd = 0x92, */
|
||||
/* ChapterFlagHidden = 0x98, */
|
||||
/* ChapterFlagEnabled = 0x4598, */
|
||||
/* ChapterSegmentUID = 0x6E67, */
|
||||
/* ChapterSegmentEditionUID = 0x6EBC, */
|
||||
/* ChapterPhysicalEquiv = 0x63C3, */
|
||||
/* ChapterTrack = 0x8F, */
|
||||
/* ChapterTrackNumber = 0x89, */
|
||||
/* ChapterDisplay = 0x80, */
|
||||
/* ChapString = 0x85, */
|
||||
/* ChapLanguage = 0x437C, */
|
||||
/* ChapCountry = 0x437E, */
|
||||
/* ChapProcess = 0x6944, */
|
||||
/* ChapProcessCodecID = 0x6955, */
|
||||
/* ChapProcessPrivate = 0x450D, */
|
||||
/* ChapProcessCommand = 0x6911, */
|
||||
/* ChapProcessTime = 0x6922, */
|
||||
/* ChapProcessData = 0x6933, */
|
||||
/* Tagging */
|
||||
/* Tags = 0x1254C367, */
|
||||
/* Tag = 0x7373, */
|
||||
/* Targets = 0x63C0, */
|
||||
/* TargetTypeValue = 0x68CA, */
|
||||
/* TargetType = 0x63CA, */
|
||||
/* Tagging_TrackUID = 0x63C5, */
|
||||
/* Tagging_EditionUID = 0x63C9, */
|
||||
/* Tagging_ChapterUID = 0x63C4, */
|
||||
/* AttachmentUID = 0x63C6, */
|
||||
/* SimpleTag = 0x67C8, */
|
||||
/* TagName = 0x45A3, */
|
||||
/* TagLanguage = 0x447A, */
|
||||
/* TagDefault = 0x4484, */
|
||||
/* TagString = 0x4487, */
|
||||
/* TagBinary = 0x4485, */
|
||||
};
|
||||
#endif
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the LICENSE file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "EbmlWriter.h"
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
|
@ -18,10 +18,12 @@
|
|||
#define LITERALU64(n) n##LLU
|
||||
#endif
|
||||
|
||||
void Ebml_WriteLen(EbmlGlobal *glob, long long val) {
|
||||
// TODO check and make sure we are not > than 0x0100000000000000LLU
|
||||
unsigned char size = 8; // size in bytes to output
|
||||
unsigned long long minVal = LITERALU64(0x00000000000000ff); // mask to compare for byte size
|
||||
void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) {
|
||||
/* TODO check and make sure we are not > than 0x0100000000000000LLU */
|
||||
unsigned char size = 8; /* size in bytes to output */
|
||||
|
||||
/* mask to compare for byte size */
|
||||
int64_t minVal = 0xff;
|
||||
|
||||
for (size = 1; size < 8; size ++) {
|
||||
if (val < minVal)
|
||||
|
@ -30,29 +32,31 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) {
|
|||
minVal = (minVal << 7);
|
||||
}
|
||||
|
||||
val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
|
||||
val |= (((uint64_t)0x80) << ((size - 1) * 7));
|
||||
|
||||
Ebml_Serialize(glob, (void *) &val, sizeof(val), size);
|
||||
}
|
||||
|
||||
void Ebml_WriteString(EbmlGlobal *glob, const char *str) {
|
||||
const size_t size_ = strlen(str);
|
||||
const unsigned long long size = size_;
|
||||
const uint64_t size = size_;
|
||||
Ebml_WriteLen(glob, size);
|
||||
// TODO: it's not clear from the spec whether the nul terminator
|
||||
// should be serialized too. For now we omit the null terminator.
|
||||
Ebml_Write(glob, str, size);
|
||||
/* TODO: it's not clear from the spec whether the nul terminator
|
||||
* should be serialized too. For now we omit the null terminator.
|
||||
*/
|
||||
Ebml_Write(glob, str, (unsigned long)size);
|
||||
}
|
||||
|
||||
void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) {
|
||||
const size_t strlen = wcslen(wstr);
|
||||
|
||||
// TODO: it's not clear from the spec whether the nul terminator
|
||||
// should be serialized too. For now we include it.
|
||||
const unsigned long long size = strlen;
|
||||
/* TODO: it's not clear from the spec whether the nul terminator
|
||||
* should be serialized too. For now we include it.
|
||||
*/
|
||||
const uint64_t size = strlen;
|
||||
|
||||
Ebml_WriteLen(glob, size);
|
||||
Ebml_Write(glob, wstr, size);
|
||||
Ebml_Write(glob, wstr, (unsigned long)size);
|
||||
}
|
||||
|
||||
void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) {
|
||||
|
@ -78,12 +82,12 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
|
|||
}
|
||||
|
||||
void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) {
|
||||
unsigned char size = 8; // size in bytes to output
|
||||
unsigned char size = 8; /* size in bytes to output */
|
||||
unsigned char sizeSerialized = 0;
|
||||
unsigned long minVal;
|
||||
|
||||
Ebml_WriteID(glob, class_id);
|
||||
minVal = 0x7fLU; // mask to compare for byte size
|
||||
minVal = 0x7fLU; /* mask to compare for byte size */
|
||||
|
||||
for (size = 1; size < 4; size ++) {
|
||||
if (ui < minVal) {
|
||||
|
@ -97,7 +101,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
|
|||
Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1);
|
||||
Ebml_Serialize(glob, &ui, sizeof(ui), size);
|
||||
}
|
||||
// TODO: perhaps this is a poor name for this id serializer helper function
|
||||
/* TODO: perhaps this is a poor name for this id serializer helper function */
|
||||
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
|
||||
int size;
|
||||
for (size = 4; size > 1; size--) {
|
||||
|
@ -150,4 +154,4 @@ void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO Serialize Date
|
||||
/* TODO Serialize Date */
|
||||
|
|
|
@ -1,26 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef EBMLWRITER_HPP
|
||||
#define EBMLWRITER_HPP
|
||||
|
||||
// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the LICENSE file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
// note: you must define write and serialize functions as well as your own EBML_GLOBAL
|
||||
// These functions MUST be implemented
|
||||
#include <stddef.h>
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
/* note: you must define write and serialize functions as well as your own
|
||||
* EBML_GLOBAL
|
||||
*
|
||||
* These functions MUST be implemented
|
||||
*/
|
||||
|
||||
typedef struct EbmlGlobal EbmlGlobal;
|
||||
void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long);
|
||||
void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
|
||||
/////
|
||||
|
||||
/*****/
|
||||
|
||||
void Ebml_WriteLen(EbmlGlobal *glob, long long val);
|
||||
void Ebml_WriteLen(EbmlGlobal *glob, int64_t val);
|
||||
void Ebml_WriteString(EbmlGlobal *glob, const char *str);
|
||||
void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
|
||||
void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
|
||||
|
@ -28,11 +32,11 @@ void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t
|
|||
void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
|
||||
void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
|
||||
void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
|
||||
// TODO make this more generic to signed
|
||||
/* TODO make this more generic to signed */
|
||||
void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
|
||||
void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
|
||||
void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
|
||||
void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
|
||||
void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
|
||||
// TODO need date function
|
||||
/* TODO need date function */
|
||||
#endif
|
||||
|
|
185
libs.mk
185
libs.mk
|
@ -17,6 +17,34 @@ else
|
|||
ASM:=.asm
|
||||
endif
|
||||
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
|
||||
OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
|
||||
define asm_offsets_template
|
||||
$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
|
||||
@echo " [CREATE] $$@"
|
||||
$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
|
||||
$$(BUILD_PFX)$(2).S: $(2)
|
||||
CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
|
||||
endef
|
||||
else
|
||||
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
|
||||
define asm_offsets_template
|
||||
$$(BUILD_PFX)$(1): obj_int_extract
|
||||
$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
|
||||
@echo " [CREATE] $$@"
|
||||
$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
|
||||
OBJS-yes += $$(BUILD_PFX)$(2).o
|
||||
CLEAN-OBJS += $$(BUILD_PFX)$(1)
|
||||
$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
|
||||
endef
|
||||
endif # rvct
|
||||
endif # !gcc
|
||||
|
||||
|
||||
CODEC_SRCS-yes += CHANGELOG
|
||||
CODEC_SRCS-yes += libs.mk
|
||||
|
||||
|
@ -29,15 +57,47 @@ CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS))
|
|||
include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk
|
||||
CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
|
||||
|
||||
ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
|
||||
VP8_PREFIX=vp8/
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_ENCODER),yes)
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS))
|
||||
CODEC_SRCS-yes += $(VP8_PREFIX)vp8cx.mk vpx/vp8.h vpx/vp8cx.h
|
||||
CODEC_SRCS-$(ARCH_ARM) += $(VP8_PREFIX)vp88cx_arm.mk
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
|
||||
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
|
||||
CODEC_DOC_SECTIONS += vp8 vp8_encoder
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_DECODER),yes)
|
||||
include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS))
|
||||
CODEC_SRCS-yes += $(VP8_PREFIX)vp8dx.mk vpx/vp8.h vpx/vp8dx.h
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/%
|
||||
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
|
||||
CODEC_DOC_SECTIONS += vp8 vp8_decoder
|
||||
endif
|
||||
|
||||
ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
|
||||
VP9_PREFIX=vp9/
|
||||
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP9_ENCODER),yes)
|
||||
VP9_PREFIX=vp9/
|
||||
include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk
|
||||
CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
|
||||
CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
|
||||
CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h vpx/vp8e.h
|
||||
CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
|
||||
CODEC_SRCS-$(ARCH_ARM) += $(VP9_PREFIX)vp98cx_arm.mk
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8e.h include/vpx/vp8cx.h
|
||||
INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
|
||||
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
|
||||
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
|
||||
CODEC_DOC_SECTIONS += vp9 vp9_encoder
|
||||
|
@ -117,7 +177,6 @@ INSTALL-LIBS-yes += include/vpx/vpx_integer.h
|
|||
INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_top.h
|
||||
INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_bottom.h
|
||||
INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder.h
|
||||
INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder_compat.h
|
||||
INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_encoder.h
|
||||
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
@ -149,7 +208,7 @@ ifeq ($(CONFIG_MSVS),yes)
|
|||
obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
|
||||
@cp $(SRC_PATH_BARE)/build/x86-msvs/obj_int_extract.bat .
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=obj_int_extract \
|
||||
|
@ -165,14 +224,14 @@ PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
|
|||
|
||||
vpx.def: $(call enabled,CODEC_EXPORTS)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
|
||||
--name=vpx\
|
||||
--out=$@ $^
|
||||
CLEAN-OBJS += vpx.def
|
||||
|
||||
vpx.vcproj: $(CODEC_SRCS) vpx.def
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
$(if $(CONFIG_SHARED),--dll,--lib) \
|
||||
--target=$(TOOLCHAIN) \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
|
@ -264,6 +323,7 @@ vpx.pc: config.mk libs.mk
|
|||
$(qexec)echo 'Requires:' >> $@
|
||||
$(qexec)echo 'Conflicts:' >> $@
|
||||
$(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@
|
||||
$(qexec)echo 'Libs.private: -lm -lpthread' >> $@
|
||||
$(qexec)echo 'Cflags: -I$${includedir}' >> $@
|
||||
INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc
|
||||
INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
|
||||
|
@ -298,57 +358,6 @@ endif
|
|||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
|
||||
OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
|
||||
|
||||
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
|
||||
$(BUILD_PFX)asm_com_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
|
||||
$(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S: $(VP9_PREFIX)common/asm_com_offsets.c
|
||||
CLEAN-OBJS += $(BUILD_PFX)asm_com_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)common/asm_com_offsets.c.S
|
||||
|
||||
$(BUILD_PFX)asm_enc_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
|
||||
$(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S: $(VP9_PREFIX)encoder/asm_enc_offsets.c
|
||||
CLEAN-OBJS += $(BUILD_PFX)asm_enc_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)encoder/asm_enc_offsets.c.S
|
||||
|
||||
$(BUILD_PFX)asm_dec_offsets.asm: $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)LC_ALL=C grep $(OFFSET_PATTERN) $< | tr -d '$$\#' $(ADS2GAS) > $@
|
||||
$(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S: $(VP9_PREFIX)decoder/asm_dec_offsets.c
|
||||
CLEAN-OBJS += $(BUILD_PFX)asm_dec_offsets.asm $(BUILD_PFX)$(VP9_PREFIX)decoder/asm_dec_offsets.c.S
|
||||
else
|
||||
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
|
||||
asm_com_offsets.asm: obj_int_extract
|
||||
asm_com_offsets.asm: $(VP9_PREFIX)common/asm_com_offsets.c.o
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP9_PREFIX)common/asm_com_offsets.c.o
|
||||
CLEAN-OBJS += asm_com_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_com_offsets.asm
|
||||
|
||||
asm_enc_offsets.asm: obj_int_extract
|
||||
asm_enc_offsets.asm: $(VP9_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP9_PREFIX)encoder/asm_enc_offsets.c.o
|
||||
CLEAN-OBJS += asm_enc_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_enc_offsets.asm
|
||||
|
||||
asm_dec_offsets.asm: obj_int_extract
|
||||
asm_dec_offsets.asm: $(VP9_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)./obj_int_extract rvds $< $(ADS2GAS) > $@
|
||||
OBJS-yes += $(VP9_PREFIX)decoder/asm_dec_offsets.c.o
|
||||
CLEAN-OBJS += asm_dec_offsets.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)asm_dec_offsets.asm
|
||||
endif
|
||||
endif
|
||||
|
||||
$(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h)
|
||||
CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
|
||||
|
@ -356,15 +365,15 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_version.h
|
|||
#
|
||||
# Rule to generate runtime cpu detection files
|
||||
#
|
||||
$(OBJS-yes:.o=.d): vpx_rtcd.h
|
||||
vpx_rtcd.h: $(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
|
||||
$(BUILD_PFX)vpx_rtcd.h: $(SRC_PATH_BARE)/$(sort $(filter %rtcd_defs.sh,$(CODEC_SRCS)))
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/rtcd.sh --arch=$(TGT_ISA) \
|
||||
--sym=vpx_rtcd \
|
||||
--config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
|
||||
$(RTCD_OPTIONS) $^ > $@
|
||||
--sym=vpx_rtcd \
|
||||
--config=$(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk \
|
||||
$(RTCD_OPTIONS) $^ > $@
|
||||
CLEAN-OBJS += $(BUILD_PFX)vpx_rtcd.h
|
||||
|
||||
|
||||
CODEC_DOC_SRCS += vpx/vpx_codec.h \
|
||||
vpx/vpx_decoder.h \
|
||||
vpx/vpx_encoder.h \
|
||||
|
@ -373,7 +382,6 @@ CODEC_DOC_SRCS += vpx/vpx_codec.h \
|
|||
##
|
||||
## libvpx test directives
|
||||
##
|
||||
|
||||
ifeq ($(CONFIG_UNIT_TESTS),yes)
|
||||
LIBVPX_TEST_DATA_PATH ?= .
|
||||
|
||||
|
@ -392,8 +400,12 @@ $(LIBVPX_TEST_DATA):
|
|||
testdata:: $(LIBVPX_TEST_DATA)
|
||||
$(qexec)if [ -x "$$(which sha1sum)" ]; then\
|
||||
echo "Checking test data:";\
|
||||
(cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c)\
|
||||
< $(SRC_PATH_BARE)/test/test-data.sha1; \
|
||||
if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
|
||||
for f in $(call enabled,LIBVPX_TEST_DATA); do\
|
||||
grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
|
||||
(cd $(LIBVPX_TEST_DATA_PATH); sha1sum -c);\
|
||||
done; \
|
||||
fi; \
|
||||
else\
|
||||
echo "Skipping test data integrity check, sha1sum not found.";\
|
||||
fi
|
||||
|
@ -403,7 +415,7 @@ ifeq ($(CONFIG_MSVS),yes)
|
|||
|
||||
gtest.vcproj: $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--lib \
|
||||
--target=$(TOOLCHAIN) \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
|
@ -418,7 +430,7 @@ PROJECTS-$(CONFIG_MSVS) += gtest.vcproj
|
|||
|
||||
test_libvpx.vcproj: $(LIBVPX_TEST_SRCS)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=test_libvpx \
|
||||
|
@ -428,28 +440,6 @@ test_libvpx.vcproj: $(LIBVPX_TEST_SRCS)
|
|||
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
|
||||
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
|
||||
-L. -l$(CODEC_LIB) -lwinmm -l$(GTEST_LIB) $^
|
||||
ifeq ($(CONFIG_STATIC_MSVCRT),--static-crt)
|
||||
lib_sfx=mt
|
||||
else
|
||||
lib_sfx=md
|
||||
endif
|
||||
|
||||
define unit_test_vcproj_template
|
||||
$(notdir $(1:.cc=.vcproj)): $(SRC_PATH_BARE)/$(1)
|
||||
@echo " [vcproj] $$@"
|
||||
$$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
|
||||
--exe\
|
||||
--target=$$(TOOLCHAIN)\
|
||||
--name=$(notdir $(1:.cc=))\
|
||||
--ver=$$(CONFIG_VS_VERSION)\
|
||||
$$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
|
||||
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
|
||||
-L. -lvpxmd -lwinmm -lgtest$(lib_sfx) $$^
|
||||
endef
|
||||
|
||||
$(foreach proj,$(LIBVPX_TEST_BINS),\
|
||||
$(eval $(call unit_test_vcproj_template,$(proj))))
|
||||
|
||||
PROJECTS-$(CONFIG_MSVS) += test_libvpx.vcproj
|
||||
|
||||
|
@ -461,24 +451,28 @@ else
|
|||
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
|
||||
GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
|
||||
GTEST_OBJS=$(call objs,$(GTEST_SRCS))
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
|
||||
LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
|
||||
$(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)
|
||||
|
||||
LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
|
||||
$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
|
||||
OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
|
||||
BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
|
||||
|
||||
# Install test sources only if codec source is included
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
|
||||
$(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
|
||||
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
|
||||
|
||||
CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
|
||||
CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
|
||||
$(foreach bin,$(LIBVPX_TEST_BINS),\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(bin): libvpx.a libgtest.a ))\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(bin): \
|
||||
lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
|
||||
$(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
|
||||
$(LIBVPX_TEST_OBJS) \
|
||||
-L. -lvpx -lgtest -lpthread -lm)\
|
||||
|
@ -503,3 +497,6 @@ libs.doxy: $(CODEC_DOC_SRCS)
|
|||
@echo "PREDEFINED = VPX_CODEC_DISABLE_COMPAT" >> $@
|
||||
@echo "INCLUDE_PATH += ." >> $@;
|
||||
@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@
|
||||
|
||||
## Generate vpx_rtcd.h for all objects
|
||||
$(OBJS-yes:.o=.d): $(BUILD_PFX)vpx_rtcd.h
|
||||
|
|
16
mainpage.dox
16
mainpage.dox
|
@ -12,8 +12,12 @@
|
|||
|
||||
This distribution of the WebM VP8 Codec SDK includes the following support:
|
||||
|
||||
\if vp8_encoder - \ref vp8_encoder \endif
|
||||
\if vp8_decoder - \ref vp8_decoder \endif
|
||||
\if vp8_encoder
|
||||
- \ref vp8_encoder
|
||||
\endif
|
||||
\if vp8_decoder
|
||||
- \ref vp8_decoder
|
||||
\endif
|
||||
|
||||
|
||||
\section main_startpoints Starting Points
|
||||
|
@ -24,8 +28,12 @@
|
|||
- Read the \ref samples "sample code" for examples of how to interact with the
|
||||
codec.
|
||||
- \ref codec reference
|
||||
\if encoder - \ref encoder reference \endif
|
||||
\if decoder - \ref decoder reference \endif
|
||||
\if encoder
|
||||
- \ref encoder reference
|
||||
\endif
|
||||
\if decoder
|
||||
- \ref decoder reference
|
||||
\endif
|
||||
|
||||
\section main_support Support Options & FAQ
|
||||
The WebM project is an open source project supported by its community. For
|
||||
|
|
|
@ -1272,7 +1272,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
|
|||
if (total > block_size)
|
||||
return -1;
|
||||
|
||||
entry = ne_find_track_entry(ctx, track - 1);
|
||||
entry = ne_find_track_entry(ctx, (unsigned int)(track - 1));
|
||||
if (!entry)
|
||||
return -1;
|
||||
|
||||
|
@ -1291,7 +1291,7 @@ ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_pac
|
|||
|
||||
pkt = ne_alloc(sizeof(*pkt));
|
||||
pkt->track = track - 1;
|
||||
pkt->timecode = abs_timecode * tc_scale * track_scale;
|
||||
pkt->timecode = (uint64_t)(abs_timecode * tc_scale * track_scale);
|
||||
|
||||
ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
|
||||
block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
|
||||
|
@ -1774,35 +1774,35 @@ nestegg_track_video_params(nestegg * ctx, unsigned int track,
|
|||
|
||||
if (ne_get_uint(entry->video.pixel_width, &value) != 0)
|
||||
return -1;
|
||||
params->width = value;
|
||||
params->width = (unsigned int)value;
|
||||
|
||||
if (ne_get_uint(entry->video.pixel_height, &value) != 0)
|
||||
return -1;
|
||||
params->height = value;
|
||||
params->height = (unsigned int)value;
|
||||
|
||||
value = 0;
|
||||
ne_get_uint(entry->video.pixel_crop_bottom, &value);
|
||||
params->crop_bottom = value;
|
||||
params->crop_bottom = (unsigned int)value;
|
||||
|
||||
value = 0;
|
||||
ne_get_uint(entry->video.pixel_crop_top, &value);
|
||||
params->crop_top = value;
|
||||
params->crop_top = (unsigned int)value;
|
||||
|
||||
value = 0;
|
||||
ne_get_uint(entry->video.pixel_crop_left, &value);
|
||||
params->crop_left = value;
|
||||
params->crop_left = (unsigned int)value;
|
||||
|
||||
value = 0;
|
||||
ne_get_uint(entry->video.pixel_crop_right, &value);
|
||||
params->crop_right = value;
|
||||
params->crop_right = (unsigned int)value;
|
||||
|
||||
value = params->width;
|
||||
ne_get_uint(entry->video.display_width, &value);
|
||||
params->display_width = value;
|
||||
params->display_width = (unsigned int)value;
|
||||
|
||||
value = params->height;
|
||||
ne_get_uint(entry->video.display_height, &value);
|
||||
params->display_height = value;
|
||||
params->display_height = (unsigned int)value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1828,11 +1828,11 @@ nestegg_track_audio_params(nestegg * ctx, unsigned int track,
|
|||
|
||||
value = 1;
|
||||
ne_get_uint(entry->audio.channels, &value);
|
||||
params->channels = value;
|
||||
params->channels = (unsigned int)value;
|
||||
|
||||
value = 16;
|
||||
ne_get_uint(entry->audio.bit_depth, &value);
|
||||
params->depth = value;
|
||||
params->depth = (unsigned int)value;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1888,7 +1888,7 @@ nestegg_free_packet(nestegg_packet * pkt)
|
|||
int
|
||||
nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
|
||||
{
|
||||
*track = pkt->track;
|
||||
*track = (unsigned int)pkt->track;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
17
solution.mk
17
solution.mk
|
@ -8,18 +8,19 @@
|
|||
## be found in the AUTHORS file in the root of the source tree.
|
||||
##
|
||||
|
||||
# libvpx reverse dependencies (targets that depend on libvpx)
|
||||
VPX_NONDEPS=$(addsuffix .vcproj,vpx gtest obj_int_extract)
|
||||
VPX_RDEPS=$(foreach vcp,\
|
||||
$(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.vcproj=):vpx)
|
||||
|
||||
vpx.sln: $(wildcard *.vcproj)
|
||||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
|
||||
$(if $(filter %vpx.vcproj,$^),\
|
||||
$(foreach vcp,$(filter-out %vpx.vcproj %gtest.vcproj %obj_int_extract.vcproj,$^),\
|
||||
--dep=$(vcp:.vcproj=):vpx) \
|
||||
$(foreach vcp,$(filter %_test.vcproj,$^),\
|
||||
--dep=$(vcp:.vcproj=):gtest)) \
|
||||
--dep=vpx:obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $^
|
||||
$(if $(filter vpx.vcproj,$^),$(VPX_RDEPS)) \
|
||||
--dep=vpx:obj_int_extract \
|
||||
--dep=test_libvpx:gtest \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $^
|
||||
vpx.sln.mk: vpx.sln
|
||||
@true
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@ namespace libvpx_test {
|
|||
|
||||
class ACMRandom {
|
||||
public:
|
||||
ACMRandom() {
|
||||
Reset(DeterministicSeed());
|
||||
}
|
||||
|
||||
explicit ACMRandom(int seed) {
|
||||
Reset(seed);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// lookahead range: [kLookAheadMin, kLookAheadMax).
|
||||
const int kLookAheadMin = 5;
|
||||
const int kLookAheadMax = 26;
|
||||
|
||||
class AltRefTest : public libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
AltRefTest() : altref_count_(0) {}
|
||||
virtual ~AltRefTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(libvpx_test::kTwoPassGood);
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int pass) {
|
||||
altref_count_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_CPUUSED, 3);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_;
|
||||
}
|
||||
|
||||
int altref_count() const { return altref_count_; }
|
||||
|
||||
private:
|
||||
int altref_count_;
|
||||
};
|
||||
|
||||
TEST_P(AltRefTest, MonotonicTimestamps) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 1000;
|
||||
cfg_.g_lag_in_frames = GetParam();
|
||||
|
||||
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
timebase.den, timebase.num, 0, 30);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
EXPECT_GE(altref_count(), 1);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(NonZeroLag, AltRefTest,
|
||||
::testing::Range(kLookAheadMin, kLookAheadMax));
|
||||
} // namespace
|
|
@ -8,26 +8,28 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
extern "C" {
|
||||
#include "vp9/encoder/boolhuff.h"
|
||||
#include "vp9/decoder/dboolhuff.h"
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vp8/decoder/dboolhuff.h"
|
||||
}
|
||||
|
||||
#include "acm_random.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
#include "test/acm_random.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
namespace {
|
||||
const int num_tests = 10;
|
||||
} // namespace
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
TEST(VP8, TestBitIO) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int n = 0; n < num_tests; ++n) {
|
||||
|
@ -38,15 +40,15 @@ TEST(VP8, TestBitIO) {
|
|||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
const int parity = i & 1;
|
||||
probas[i] =
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
(method == 2) ? 128 :
|
||||
(method == 3) ? rnd.Rand8() :
|
||||
(method == 4) ? (parity ? 0 : 255) :
|
||||
(method == 0) ? 0 : (method == 1) ? 255 :
|
||||
(method == 2) ? 128 :
|
||||
(method == 3) ? rnd.Rand8() :
|
||||
(method == 4) ? (parity ? 0 : 255) :
|
||||
// alternate between low and high proba:
|
||||
(method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) :
|
||||
(method == 6) ?
|
||||
(parity ? rnd(64) : 255 - rnd(64)) :
|
||||
(parity ? rnd(32) : 255 - rnd(32));
|
||||
(parity ? rnd(64) : 255 - rnd(64)) :
|
||||
(parity ? rnd(32) : 255 - rnd(32));
|
||||
}
|
||||
for (int bit_method = 0; bit_method <= 3; ++bit_method) {
|
||||
const int random_seed = 6432;
|
||||
|
@ -54,7 +56,7 @@ TEST(VP8, TestBitIO) {
|
|||
ACMRandom bit_rnd(random_seed);
|
||||
BOOL_CODER bw;
|
||||
uint8_t bw_buffer[buffer_size];
|
||||
vp8_start_encode(&bw, bw_buffer);
|
||||
vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);
|
||||
|
||||
int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
|
||||
for (int i = 0; i < bits_to_test; ++i) {
|
||||
|
@ -78,7 +80,7 @@ TEST(VP8, TestBitIO) {
|
|||
bit = bit_rnd(2);
|
||||
}
|
||||
GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
|
||||
<< "pos: " << i << " / " << bits_to_test
|
||||
<< "pos: "<< i << " / " << bits_to_test
|
||||
<< " bit_method: " << bit_method
|
||||
<< " method: " << method;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/video_source.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class ConfigTest : public ::libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<enum libvpx_test::TestMode> {
|
||||
public:
|
||||
ConfigTest() : frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {}
|
||||
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GetParam());
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
frame_count_in_ = 0;
|
||||
frame_count_out_ = 0;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource* /*video*/) {
|
||||
++frame_count_in_;
|
||||
abort_ |= (frame_count_in_ >= frame_count_max_);
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t* /*pkt*/) {
|
||||
++frame_count_out_;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
unsigned int frame_count_in_;
|
||||
unsigned int frame_count_out_;
|
||||
unsigned int frame_count_max_;
|
||||
};
|
||||
|
||||
TEST_P(ConfigTest, LagIsDisabled) {
|
||||
frame_count_max_ = 2;
|
||||
cfg_.g_lag_in_frames = 15;
|
||||
|
||||
libvpx_test::DummyVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
EXPECT_EQ(frame_count_in_, frame_count_out_);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OnePassModes, ConfigTest, ONE_PASS_TEST_MODES);
|
||||
} // namespace
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <cmath>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
|
||||
// CQ level range: [kCQLevelMin, kCQLevelMax).
|
||||
const int kCQLevelMin = 4;
|
||||
const int kCQLevelMax = 63;
|
||||
const int kCQLevelStep = 8;
|
||||
const int kCQTargetBitrate = 2000;
|
||||
|
||||
namespace {
|
||||
|
||||
class CQTest : public libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
CQTest() : cq_level_(GetParam()) { init_flags_ = VPX_CODEC_USE_PSNR; }
|
||||
virtual ~CQTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(libvpx_test::kTwoPassGood);
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
file_size_ = 0;
|
||||
psnr_ = 0.0;
|
||||
n_frames_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
|
||||
libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 1) {
|
||||
if (cfg_.rc_end_usage == VPX_CQ) {
|
||||
encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_);
|
||||
}
|
||||
encoder->Control(VP8E_SET_CPUUSED, 3);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0);
|
||||
n_frames_++;
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
file_size_ += pkt->data.frame.sz;
|
||||
}
|
||||
|
||||
double GetLinearPSNROverBitrate() const {
|
||||
double avg_psnr = log10(psnr_ / n_frames_) * 10.0;
|
||||
return pow(10.0, avg_psnr / 10.0) / file_size_;
|
||||
}
|
||||
|
||||
int file_size() const { return file_size_; }
|
||||
int n_frames() const { return n_frames_; }
|
||||
|
||||
private:
|
||||
int cq_level_;
|
||||
int file_size_;
|
||||
double psnr_;
|
||||
int n_frames_;
|
||||
};
|
||||
|
||||
int prev_actual_bitrate = kCQTargetBitrate;
|
||||
TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = kCQTargetBitrate;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
|
||||
cfg_.rc_end_usage = VPX_CQ;
|
||||
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
timebase.den, timebase.num, 0, 30);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
const double cq_psnr_lin = GetLinearPSNROverBitrate();
|
||||
const int cq_actual_bitrate = file_size() * 8 * 30 / (n_frames() * 1000);
|
||||
EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate);
|
||||
EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate);
|
||||
prev_actual_bitrate = cq_actual_bitrate;
|
||||
|
||||
// try targeting the approximate same bitrate with VBR mode
|
||||
cfg_.rc_end_usage = VPX_VBR;
|
||||
cfg_.rc_target_bitrate = cq_actual_bitrate;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
const double vbr_psnr_lin = GetLinearPSNROverBitrate();
|
||||
EXPECT_GE(cq_psnr_lin, vbr_psnr_lin);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(CQLevelRange, CQTest,
|
||||
::testing::Range(kCQLevelMin, kCQLevelMax,
|
||||
kCQLevelStep));
|
||||
} // namespace
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
namespace {
|
||||
|
||||
class DatarateTest : public ::libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<enum libvpx_test::TestMode> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GetParam());
|
||||
ResetModel();
|
||||
}
|
||||
|
||||
virtual void ResetModel() {
|
||||
last_pts_ = 0;
|
||||
bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
|
||||
frame_number_ = 0;
|
||||
first_drop_ = 0;
|
||||
bits_total_ = 0;
|
||||
duration_ = 0.0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
const vpx_rational_t tb = video->timebase();
|
||||
timebase_ = static_cast<double>(tb.num) / tb.den;
|
||||
duration_ = 0;
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
// Time since last timestamp = duration.
|
||||
vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
|
||||
|
||||
// TODO(jimbankoski): Remove these lines when the issue:
|
||||
// http://code.google.com/p/webm/issues/detail?id=496 is fixed.
|
||||
// For now the codec assumes buffer starts at starting buffer rate
|
||||
// plus one frame's time.
|
||||
if (last_pts_ == 0)
|
||||
duration = 1;
|
||||
|
||||
// Add to the buffer the bits we'd expect from a constant bitrate server.
|
||||
bits_in_buffer_model_ += duration * timebase_ * cfg_.rc_target_bitrate
|
||||
* 1000;
|
||||
|
||||
/* Test the buffer model here before subtracting the frame. Do so because
|
||||
* the way the leaky bucket model works in libvpx is to allow the buffer to
|
||||
* empty - and then stop showing frames until we've got enough bits to
|
||||
* show one. */
|
||||
ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
|
||||
<< pkt->data.frame.pts;
|
||||
|
||||
const int frame_size_in_bits = pkt->data.frame.sz * 8;
|
||||
|
||||
// Subtract from the buffer the bits associated with a played back frame.
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
|
||||
// Update the running total of bits for end of test datarate checks.
|
||||
bits_total_ += frame_size_in_bits ;
|
||||
|
||||
// If first drop not set and we have a drop set it to this time.
|
||||
if (!first_drop_ && duration > 1)
|
||||
first_drop_ = last_pts_ + 1;
|
||||
|
||||
// Update the most recent pts.
|
||||
last_pts_ = pkt->data.frame.pts;
|
||||
|
||||
// We update this so that we can calculate the datarate minus the last
|
||||
// frame encoded in the file.
|
||||
bits_in_last_frame_ = frame_size_in_bits;
|
||||
|
||||
++frame_number_;
|
||||
}
|
||||
|
||||
virtual void EndPassHook(void) {
|
||||
if (bits_total_) {
|
||||
const double file_size_in_kb = bits_total_ / 1000; /* bits per kilobit */
|
||||
|
||||
duration_ = (last_pts_ + 1) * timebase_;
|
||||
|
||||
// Effective file datarate includes the time spent prebuffering.
|
||||
effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
|
||||
/ (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
|
||||
|
||||
file_datarate_ = file_size_in_kb / duration_;
|
||||
}
|
||||
}
|
||||
|
||||
vpx_codec_pts_t last_pts_;
|
||||
int bits_in_buffer_model_;
|
||||
double timebase_;
|
||||
int frame_number_;
|
||||
vpx_codec_pts_t first_drop_;
|
||||
int64_t bits_total_;
|
||||
double duration_;
|
||||
double file_datarate_;
|
||||
double effective_datarate_;
|
||||
int bits_in_last_frame_;
|
||||
};
|
||||
|
||||
TEST_P(DatarateTest, BasicBufferModel) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_dropframe_thresh = 1;
|
||||
cfg_.rc_max_quantizer = 56;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
// 2 pass cbr datarate control has a bug hidden by the small # of
|
||||
// frames selected in this encode. The problem is that even if the buffer is
|
||||
// negative we produce a keyframe on a cutscene. Ignoring datarate
|
||||
// constraints
|
||||
// TODO(jimbankoski): ( Fix when issue
|
||||
// http://code.google.com/p/webm/issues/detail?id=495 is addressed. )
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 140);
|
||||
|
||||
for (int i = 70; i < 700; i += 200) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_)
|
||||
<< " The datarate for the file exceeds the target!";
|
||||
|
||||
ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
|
||||
<< " The datarate for the file missed the target!";
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(DatarateTest, ChangingDropFrameThresh) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_max_quantizer = 36;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
cfg_.rc_target_bitrate = 200;
|
||||
cfg_.kf_mode = VPX_KF_DISABLED;
|
||||
|
||||
const int frame_count = 40;
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, frame_count);
|
||||
|
||||
// Here we check that the first dropped frame gets earlier and earlier
|
||||
// as the drop frame threshold is increased.
|
||||
|
||||
const int kDropFrameThreshTestStep = 30;
|
||||
vpx_codec_pts_t last_drop = frame_count;
|
||||
for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
|
||||
cfg_.rc_dropframe_thresh = i;
|
||||
ResetModel();
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_LE(first_drop_, last_drop)
|
||||
<< " The first dropped frame for drop_thresh " << i
|
||||
<< " > first dropped frame for drop_thresh "
|
||||
<< i - kDropFrameThreshTestStep;
|
||||
last_drop = first_drop_;
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(AllModes, DatarateTest, ALL_TEST_MODES);
|
||||
} // namespace
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "test/decode_test_driver.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/video_source.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
#if CONFIG_VP8_DECODER
|
||||
void Decoder::DecodeFrame(const uint8_t *cxdata, int size) {
|
||||
if (!decoder_.priv) {
|
||||
const vpx_codec_err_t res_init = vpx_codec_dec_init(&decoder_,
|
||||
&vpx_codec_vp8_dx_algo,
|
||||
&cfg_, 0);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res_init) << DecodeError();
|
||||
}
|
||||
|
||||
const vpx_codec_err_t res_dec = vpx_codec_decode(&decoder_,
|
||||
cxdata, size, NULL, 0);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res_dec) << DecodeError();
|
||||
}
|
||||
|
||||
void DecoderTest::RunLoop(CompressedVideoSource *video) {
|
||||
vpx_codec_dec_cfg_t dec_cfg = {0};
|
||||
Decoder decoder(dec_cfg, 0);
|
||||
|
||||
// Decode frames.
|
||||
for (video->Begin(); video->cxdata(); video->Next()) {
|
||||
decoder.DecodeFrame(video->cxdata(), video->frame_size());
|
||||
|
||||
DxDataIterator dec_iter = decoder.GetDxData();
|
||||
const vpx_image_t *img = NULL;
|
||||
|
||||
// Get decompressed data
|
||||
while ((img = dec_iter.Next()))
|
||||
DecompressedFrameHook(*img, video->frame_number());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // namespace libvpx_test
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_DECODE_TEST_DRIVER_H_
|
||||
#define TEST_DECODE_TEST_DRIVER_H_
|
||||
#include <cstring>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx_config.h"
|
||||
#include "vpx/vpx_decoder.h"
|
||||
#include "vpx/vp8dx.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
class CompressedVideoSource;
|
||||
|
||||
// Provides an object to handle decoding output
|
||||
class DxDataIterator {
|
||||
public:
|
||||
explicit DxDataIterator(vpx_codec_ctx_t *decoder)
|
||||
: decoder_(decoder), iter_(NULL) {}
|
||||
|
||||
const vpx_image_t *Next() {
|
||||
return vpx_codec_get_frame(decoder_, &iter_);
|
||||
}
|
||||
|
||||
private:
|
||||
vpx_codec_ctx_t *decoder_;
|
||||
vpx_codec_iter_t iter_;
|
||||
};
|
||||
|
||||
// Provides a simplified interface to manage one video decoding.
|
||||
//
|
||||
// TODO: similar to Encoder class, the exact services should be
|
||||
// added as more tests are added.
|
||||
class Decoder {
|
||||
public:
|
||||
Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
|
||||
: cfg_(cfg), deadline_(deadline) {
|
||||
memset(&decoder_, 0, sizeof(decoder_));
|
||||
}
|
||||
|
||||
~Decoder() {
|
||||
vpx_codec_destroy(&decoder_);
|
||||
}
|
||||
|
||||
void DecodeFrame(const uint8_t *cxdata, int size);
|
||||
|
||||
DxDataIterator GetDxData() {
|
||||
return DxDataIterator(&decoder_);
|
||||
}
|
||||
|
||||
void set_deadline(unsigned long deadline) {
|
||||
deadline_ = deadline;
|
||||
}
|
||||
|
||||
void Control(int ctrl_id, int arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
|
||||
}
|
||||
|
||||
protected:
|
||||
const char *DecodeError() {
|
||||
const char *detail = vpx_codec_error_detail(&decoder_);
|
||||
return detail ? detail : vpx_codec_error(&decoder_);
|
||||
}
|
||||
|
||||
vpx_codec_ctx_t decoder_;
|
||||
vpx_codec_dec_cfg_t cfg_;
|
||||
unsigned int deadline_;
|
||||
};
|
||||
|
||||
// Common test functionality for all Decoder tests.
|
||||
class DecoderTest {
|
||||
public:
|
||||
// Main loop.
|
||||
virtual void RunLoop(CompressedVideoSource *video);
|
||||
|
||||
// Hook to be called on every decompressed frame.
|
||||
virtual void DecompressedFrameHook(const vpx_image_t& img,
|
||||
const unsigned int frame_number) {}
|
||||
|
||||
protected:
|
||||
DecoderTest() {}
|
||||
|
||||
virtual ~DecoderTest() {}
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_DECODE_TEST_DRIVER_H_
|
|
@ -0,0 +1,204 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "vpx_config.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#if CONFIG_VP8_DECODER
|
||||
#include "test/decode_test_driver.h"
|
||||
#endif
|
||||
#include "test/video_source.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
|
||||
if (video->img())
|
||||
EncodeFrameInternal(*video, frame_flags);
|
||||
else
|
||||
Flush();
|
||||
|
||||
// Handle twopass stats
|
||||
CxDataIterator iter = GetCxData();
|
||||
|
||||
while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
|
||||
if (pkt->kind != VPX_CODEC_STATS_PKT)
|
||||
continue;
|
||||
|
||||
stats_->Append(*pkt);
|
||||
}
|
||||
}
|
||||
|
||||
void Encoder::EncodeFrameInternal(const VideoSource &video,
|
||||
const unsigned long frame_flags) {
|
||||
vpx_codec_err_t res;
|
||||
const vpx_image_t *img = video.img();
|
||||
|
||||
// Handle first frame initialization
|
||||
if (!encoder_.priv) {
|
||||
cfg_.g_w = img->d_w;
|
||||
cfg_.g_h = img->d_h;
|
||||
cfg_.g_timebase = video.timebase();
|
||||
cfg_.rc_twopass_stats_in = stats_->buf();
|
||||
res = vpx_codec_enc_init(&encoder_, &vpx_codec_vp8_cx_algo, &cfg_,
|
||||
init_flags_);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
// Handle frame resizing
|
||||
if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
|
||||
cfg_.g_w = img->d_w;
|
||||
cfg_.g_h = img->d_h;
|
||||
res = vpx_codec_enc_config_set(&encoder_, &cfg_);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
// Encode the frame
|
||||
res = vpx_codec_encode(&encoder_,
|
||||
video.img(), video.pts(), video.duration(),
|
||||
frame_flags, deadline_);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void Encoder::Flush() {
|
||||
const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0,
|
||||
deadline_);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void EncoderTest::SetMode(TestMode mode) {
|
||||
switch (mode) {
|
||||
case kRealTime:
|
||||
deadline_ = VPX_DL_REALTIME;
|
||||
break;
|
||||
|
||||
case kOnePassGood:
|
||||
case kTwoPassGood:
|
||||
deadline_ = VPX_DL_GOOD_QUALITY;
|
||||
break;
|
||||
|
||||
case kOnePassBest:
|
||||
case kTwoPassBest:
|
||||
deadline_ = VPX_DL_BEST_QUALITY;
|
||||
break;
|
||||
|
||||
default:
|
||||
ASSERT_TRUE(false) << "Unexpected mode " << mode;
|
||||
}
|
||||
|
||||
if (mode == kTwoPassGood || mode == kTwoPassBest)
|
||||
passes_ = 2;
|
||||
else
|
||||
passes_ = 1;
|
||||
}
|
||||
// The function should return "true" most of the time, therefore no early
|
||||
// break-out is implemented within the match checking process.
|
||||
static bool compare_img(const vpx_image_t *img1,
|
||||
const vpx_image_t *img2) {
|
||||
bool match = (img1->fmt == img2->fmt) &&
|
||||
(img1->d_w == img2->d_w) &&
|
||||
(img1->d_h == img2->d_h);
|
||||
|
||||
const unsigned int width_y = img1->d_w;
|
||||
const unsigned int height_y = img1->d_h;
|
||||
unsigned int i;
|
||||
for (i = 0; i < height_y; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
|
||||
img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
|
||||
width_y) == 0) && match;
|
||||
const unsigned int width_uv = (img1->d_w + 1) >> 1;
|
||||
const unsigned int height_uv = (img1->d_h + 1) >> 1;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
|
||||
img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
|
||||
width_uv) == 0) && match;
|
||||
for (i = 0; i < height_uv; ++i)
|
||||
match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
|
||||
img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
|
||||
width_uv) == 0) && match;
|
||||
return match;
|
||||
}
|
||||
|
||||
void EncoderTest::RunLoop(VideoSource *video) {
|
||||
#if CONFIG_VP8_DECODER
|
||||
vpx_codec_dec_cfg_t dec_cfg = {0};
|
||||
#endif
|
||||
|
||||
stats_.Reset();
|
||||
|
||||
for (unsigned int pass = 0; pass < passes_; pass++) {
|
||||
last_pts_ = 0;
|
||||
|
||||
if (passes_ == 1)
|
||||
cfg_.g_pass = VPX_RC_ONE_PASS;
|
||||
else if (pass == 0)
|
||||
cfg_.g_pass = VPX_RC_FIRST_PASS;
|
||||
else
|
||||
cfg_.g_pass = VPX_RC_LAST_PASS;
|
||||
|
||||
BeginPassHook(pass);
|
||||
Encoder encoder(cfg_, deadline_, init_flags_, &stats_);
|
||||
#if CONFIG_VP8_DECODER
|
||||
Decoder decoder(dec_cfg, 0);
|
||||
bool has_cxdata = false;
|
||||
#endif
|
||||
bool again;
|
||||
for (again = true, video->Begin(); again; video->Next()) {
|
||||
again = video->img() != NULL;
|
||||
|
||||
PreEncodeFrameHook(video);
|
||||
PreEncodeFrameHook(video, &encoder);
|
||||
encoder.EncodeFrame(video, frame_flags_);
|
||||
|
||||
CxDataIterator iter = encoder.GetCxData();
|
||||
|
||||
while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
|
||||
again = true;
|
||||
|
||||
switch (pkt->kind) {
|
||||
case VPX_CODEC_CX_FRAME_PKT:
|
||||
#if CONFIG_VP8_DECODER
|
||||
has_cxdata = true;
|
||||
decoder.DecodeFrame((const uint8_t*)pkt->data.frame.buf,
|
||||
pkt->data.frame.sz);
|
||||
#endif
|
||||
ASSERT_GE(pkt->data.frame.pts, last_pts_);
|
||||
last_pts_ = pkt->data.frame.pts;
|
||||
FramePktHook(pkt);
|
||||
break;
|
||||
|
||||
case VPX_CODEC_PSNR_PKT:
|
||||
PSNRPktHook(pkt);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP8_DECODER
|
||||
if (has_cxdata) {
|
||||
const vpx_image_t *img_enc = encoder.GetPreviewFrame();
|
||||
DxDataIterator dec_iter = decoder.GetDxData();
|
||||
const vpx_image_t *img_dec = dec_iter.Next();
|
||||
if(img_enc && img_dec) {
|
||||
const bool res = compare_img(img_enc, img_dec);
|
||||
ASSERT_TRUE(res)<< "Encoder/Decoder mismatch found.";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!Continue())
|
||||
break;
|
||||
}
|
||||
|
||||
EndPassHook();
|
||||
|
||||
if (!Continue())
|
||||
break;
|
||||
}
|
||||
}
|
||||
} // namespace libvpx_test
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TEST_ENCODE_TEST_DRIVER_H_
|
||||
#define TEST_ENCODE_TEST_DRIVER_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_encoder.h"
|
||||
#include "vpx/vp8cx.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
class VideoSource;
|
||||
|
||||
enum TestMode {
|
||||
kRealTime,
|
||||
kOnePassGood,
|
||||
kOnePassBest,
|
||||
kTwoPassGood,
|
||||
kTwoPassBest
|
||||
};
|
||||
#define ALL_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
|
||||
::libvpx_test::kOnePassGood, \
|
||||
::libvpx_test::kOnePassBest, \
|
||||
::libvpx_test::kTwoPassGood, \
|
||||
::libvpx_test::kTwoPassBest)
|
||||
|
||||
#define ONE_PASS_TEST_MODES ::testing::Values(::libvpx_test::kRealTime, \
|
||||
::libvpx_test::kOnePassGood, \
|
||||
::libvpx_test::kOnePassBest)
|
||||
|
||||
|
||||
// Provides an object to handle the libvpx get_cx_data() iteration pattern
|
||||
class CxDataIterator {
|
||||
public:
|
||||
explicit CxDataIterator(vpx_codec_ctx_t *encoder)
|
||||
: encoder_(encoder), iter_(NULL) {}
|
||||
|
||||
const vpx_codec_cx_pkt_t *Next() {
|
||||
return vpx_codec_get_cx_data(encoder_, &iter_);
|
||||
}
|
||||
|
||||
private:
|
||||
vpx_codec_ctx_t *encoder_;
|
||||
vpx_codec_iter_t iter_;
|
||||
};
|
||||
|
||||
// Implements an in-memory store for libvpx twopass statistics
|
||||
class TwopassStatsStore {
|
||||
public:
|
||||
void Append(const vpx_codec_cx_pkt_t &pkt) {
|
||||
buffer_.append(reinterpret_cast<char *>(pkt.data.twopass_stats.buf),
|
||||
pkt.data.twopass_stats.sz);
|
||||
}
|
||||
|
||||
vpx_fixed_buf_t buf() {
|
||||
const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() };
|
||||
return buf;
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
buffer_.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string buffer_;
|
||||
};
|
||||
|
||||
|
||||
// Provides a simplified interface to manage one video encoding pass, given
|
||||
// a configuration and video source.
|
||||
//
|
||||
// TODO(jkoleszar): The exact services it provides and the appropriate
|
||||
// level of abstraction will be fleshed out as more tests are written.
|
||||
class Encoder {
|
||||
public:
|
||||
Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
|
||||
const unsigned long init_flags, TwopassStatsStore *stats)
|
||||
: cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) {
|
||||
memset(&encoder_, 0, sizeof(encoder_));
|
||||
}
|
||||
|
||||
~Encoder() {
|
||||
vpx_codec_destroy(&encoder_);
|
||||
}
|
||||
|
||||
CxDataIterator GetCxData() {
|
||||
return CxDataIterator(&encoder_);
|
||||
}
|
||||
|
||||
const vpx_image_t *GetPreviewFrame() {
|
||||
return vpx_codec_get_preview_frame(&encoder_);
|
||||
}
|
||||
// This is a thin wrapper around vpx_codec_encode(), so refer to
|
||||
// vpx_encoder.h for its semantics.
|
||||
void EncodeFrame(VideoSource *video, const unsigned long frame_flags);
|
||||
|
||||
// Convenience wrapper for EncodeFrame()
|
||||
void EncodeFrame(VideoSource *video) {
|
||||
EncodeFrame(video, 0);
|
||||
}
|
||||
|
||||
void Control(int ctrl_id, int arg) {
|
||||
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
|
||||
}
|
||||
|
||||
void set_deadline(unsigned long deadline) {
|
||||
deadline_ = deadline;
|
||||
}
|
||||
|
||||
protected:
|
||||
const char *EncoderError() {
|
||||
const char *detail = vpx_codec_error_detail(&encoder_);
|
||||
return detail ? detail : vpx_codec_error(&encoder_);
|
||||
}
|
||||
|
||||
// Encode an image
|
||||
void EncodeFrameInternal(const VideoSource &video,
|
||||
const unsigned long frame_flags);
|
||||
|
||||
// Flush the encoder on EOS
|
||||
void Flush();
|
||||
|
||||
vpx_codec_ctx_t encoder_;
|
||||
vpx_codec_enc_cfg_t cfg_;
|
||||
unsigned long deadline_;
|
||||
unsigned long init_flags_;
|
||||
TwopassStatsStore *stats_;
|
||||
};
|
||||
|
||||
// Common test functionality for all Encoder tests.
|
||||
//
|
||||
// This class is a mixin which provides the main loop common to all
|
||||
// encoder tests. It provides hooks which can be overridden by subclasses
|
||||
// to implement each test's specific behavior, while centralizing the bulk
|
||||
// of the boilerplate. Note that it doesn't inherit the gtest testing
|
||||
// classes directly, so that tests can be parameterized differently.
|
||||
class EncoderTest {
|
||||
protected:
|
||||
EncoderTest() : abort_(false), init_flags_(0), frame_flags_(0),
|
||||
last_pts_(0) {}
|
||||
|
||||
virtual ~EncoderTest() {}
|
||||
|
||||
// Initialize the cfg_ member with the default configuration.
|
||||
void InitializeConfig() {
|
||||
const vpx_codec_err_t res = vpx_codec_enc_config_default(
|
||||
&vpx_codec_vp8_cx_algo, &cfg_, 0);
|
||||
ASSERT_EQ(VPX_CODEC_OK, res);
|
||||
}
|
||||
|
||||
// Map the TestMode enum to the deadline_ and passes_ variables.
|
||||
void SetMode(TestMode mode);
|
||||
|
||||
// Main loop.
|
||||
virtual void RunLoop(VideoSource *video);
|
||||
|
||||
// Hook to be called at the beginning of a pass.
|
||||
virtual void BeginPassHook(unsigned int pass) {}
|
||||
|
||||
// Hook to be called at the end of a pass.
|
||||
virtual void EndPassHook() {}
|
||||
|
||||
// Hook to be called before encoding a frame.
|
||||
virtual void PreEncodeFrameHook(VideoSource *video) {}
|
||||
virtual void PreEncodeFrameHook(VideoSource *video, Encoder *encoder) {}
|
||||
|
||||
// Hook to be called on every compressed data packet.
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {}
|
||||
|
||||
// Hook to be called on every PSNR packet.
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}
|
||||
|
||||
// Hook to determine whether the encode loop should continue.
|
||||
virtual bool Continue() const { return !abort_; }
|
||||
|
||||
bool abort_;
|
||||
vpx_codec_enc_cfg_t cfg_;
|
||||
unsigned int passes_;
|
||||
unsigned long deadline_;
|
||||
TwopassStatsStore stats_;
|
||||
unsigned long init_flags_;
|
||||
unsigned long frame_flags_;
|
||||
vpx_codec_pts_t last_pts_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_ENCODE_TEST_DRIVER_H_
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
|
||||
Use of this source code is governed by a BSD-style license
|
||||
that can be found in the LICENSE file in the root of the source
|
||||
tree. An additional intellectual property rights grant can be found
|
||||
in the file PATENTS. All contributing project authors may
|
||||
be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class ErrorResilienceTest : public libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<int> {
|
||||
protected:
|
||||
ErrorResilienceTest() {
|
||||
psnr_ = 0.0;
|
||||
nframes_ = 0;
|
||||
encoding_mode_ = static_cast<libvpx_test::TestMode>(GetParam());
|
||||
}
|
||||
virtual ~ErrorResilienceTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(encoding_mode_);
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
psnr_ = 0.0;
|
||||
nframes_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
psnr_ += pkt->data.psnr.psnr[0];
|
||||
nframes_++;
|
||||
}
|
||||
|
||||
double GetAveragePsnr() const {
|
||||
if (nframes_)
|
||||
return psnr_ / nframes_;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
private:
|
||||
double psnr_;
|
||||
unsigned int nframes_;
|
||||
libvpx_test::TestMode encoding_mode_;
|
||||
};
|
||||
|
||||
TEST_P(ErrorResilienceTest, OnVersusOff) {
|
||||
const vpx_rational timebase = { 33333333, 1000000000 };
|
||||
cfg_.g_timebase = timebase;
|
||||
cfg_.rc_target_bitrate = 2000;
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
|
||||
init_flags_ = VPX_CODEC_USE_PSNR;
|
||||
|
||||
libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
timebase.den, timebase.num, 0, 30);
|
||||
|
||||
// Error resilient mode OFF.
|
||||
cfg_.g_error_resilient = 0;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
const double psnr_resilience_off = GetAveragePsnr();
|
||||
EXPECT_GT(psnr_resilience_off, 25.0);
|
||||
|
||||
// Error resilient mode ON.
|
||||
cfg_.g_error_resilient = 1;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
const double psnr_resilience_on = GetAveragePsnr();
|
||||
EXPECT_GT(psnr_resilience_on, 25.0);
|
||||
|
||||
// Test that turning on error resilient mode hurts by 10% at most.
|
||||
if (psnr_resilience_off > 0.0) {
|
||||
const double psnr_ratio = psnr_resilience_on / psnr_resilience_off;
|
||||
EXPECT_GE(psnr_ratio, 0.9);
|
||||
EXPECT_LE(psnr_ratio, 1.1);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OnOffTest, ErrorResilienceTest,
|
||||
ONE_PASS_TEST_MODES);
|
||||
} // namespace
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TEST_I420_VIDEO_SOURCE_H_
|
||||
#define TEST_I420_VIDEO_SOURCE_H_
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "test/video_source.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
// This class extends VideoSource to allow parsing of raw yv12
|
||||
// so that we can do actual file encodes.
|
||||
class I420VideoSource : public VideoSource {
|
||||
public:
|
||||
I420VideoSource(const std::string &file_name,
|
||||
unsigned int width, unsigned int height,
|
||||
int rate_numerator, int rate_denominator,
|
||||
unsigned int start, int limit)
|
||||
: file_name_(file_name),
|
||||
input_file_(NULL),
|
||||
img_(NULL),
|
||||
start_(start),
|
||||
limit_(limit),
|
||||
frame_(0),
|
||||
width_(0),
|
||||
height_(0),
|
||||
framerate_numerator_(rate_numerator),
|
||||
framerate_denominator_(rate_denominator) {
|
||||
|
||||
// This initializes raw_sz_, width_, height_ and allocates an img.
|
||||
SetSize(width, height);
|
||||
}
|
||||
|
||||
virtual ~I420VideoSource() {
|
||||
vpx_img_free(img_);
|
||||
if (input_file_)
|
||||
fclose(input_file_);
|
||||
}
|
||||
|
||||
virtual void Begin() {
|
||||
if (input_file_)
|
||||
fclose(input_file_);
|
||||
input_file_ = OpenTestDataFile(file_name_);
|
||||
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
|
||||
<< file_name_;
|
||||
if (start_) {
|
||||
fseek(input_file_, raw_sz_ * start_, SEEK_SET);
|
||||
}
|
||||
|
||||
frame_ = start_;
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; }
|
||||
|
||||
// Models a stream where Timebase = 1/FPS, so pts == frame.
|
||||
virtual vpx_codec_pts_t pts() const { return frame_; }
|
||||
|
||||
virtual unsigned long duration() const { return 1; }
|
||||
|
||||
virtual vpx_rational_t timebase() const {
|
||||
const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
|
||||
return t;
|
||||
}
|
||||
|
||||
virtual unsigned int frame() const { return frame_; }
|
||||
|
||||
virtual unsigned int limit() const { return limit_; }
|
||||
|
||||
void SetSize(unsigned int width, unsigned int height) {
|
||||
if (width != width_ || height != height_) {
|
||||
vpx_img_free(img_);
|
||||
img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 1);
|
||||
ASSERT_TRUE(img_ != NULL);
|
||||
width_ = width;
|
||||
height_ = height;
|
||||
raw_sz_ = width * height * 3 / 2;
|
||||
}
|
||||
}
|
||||
|
||||
virtual void FillFrame() {
|
||||
// Read a frame from input_file.
|
||||
if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
|
||||
limit_ = frame_;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
std::string file_name_;
|
||||
FILE *input_file_;
|
||||
vpx_image_t *img_;
|
||||
size_t raw_sz_;
|
||||
unsigned int start_;
|
||||
unsigned int limit_;
|
||||
unsigned int frame_;
|
||||
unsigned int width_;
|
||||
unsigned int height_;
|
||||
unsigned int framerate_numerator_;
|
||||
unsigned int framerate_denominator_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_I420_VIDEO_SOURCE_H_
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
}
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
|
||||
int pred_stride, unsigned char *dst_ptr,
|
||||
int dst_stride);
|
||||
namespace {
|
||||
class IDCTTest : public ::testing::TestWithParam<idct_fn_t>
|
||||
{
|
||||
protected:
|
||||
virtual void SetUp()
|
||||
{
|
||||
int i;
|
||||
|
||||
UUT = GetParam();
|
||||
memset(input, 0, sizeof(input));
|
||||
/* Set up guard blocks */
|
||||
for(i=0; i<256; i++)
|
||||
output[i] = ((i&0xF)<4&&(i<64))?0:-1;
|
||||
}
|
||||
|
||||
idct_fn_t UUT;
|
||||
short input[16];
|
||||
unsigned char output[256];
|
||||
unsigned char predict[256];
|
||||
};
|
||||
|
||||
TEST_P(IDCTTest, TestGuardBlocks)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
if((i&0xF) < 4 && i<64)
|
||||
EXPECT_EQ(0, output[i]) << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]);
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAllZeros)
|
||||
{
|
||||
int i;
|
||||
|
||||
UUT(input, output, 16, output, 16);
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
if((i&0xF) < 4 && i<64)
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAllOnes)
|
||||
{
|
||||
int i;
|
||||
|
||||
input[0] = 4;
|
||||
UUT(input, output, 16, output, 16);
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
if((i&0xF) < 4 && i<64)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestAddOne)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
predict[i] = i;
|
||||
|
||||
input[0] = 4;
|
||||
UUT(input, predict, 16, output, 16);
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
if((i&0xF) < 4 && i<64)
|
||||
EXPECT_EQ(i+1, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
TEST_P(IDCTTest, TestWithData)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i=0; i<16; i++)
|
||||
input[i] = i;
|
||||
|
||||
UUT(input, output, 16, output, 16);
|
||||
|
||||
for(i=0; i<256; i++)
|
||||
if((i&0xF) > 3 || i>63)
|
||||
EXPECT_EQ(255, output[i]) << "i==" << i;
|
||||
else if(i == 0)
|
||||
EXPECT_EQ(11, output[i]) << "i==" << i;
|
||||
else if(i == 34)
|
||||
EXPECT_EQ(1, output[i]) << "i==" << i;
|
||||
else if(i == 2 || i == 17 || i == 32)
|
||||
EXPECT_EQ(3, output[i]) << "i==" << i;
|
||||
else
|
||||
EXPECT_EQ(0, output[i]) << "i==" << i;
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_c));
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
|
||||
::testing::Values(vp8_short_idct4x4llm_mmx));
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,354 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include "test/acm_random.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
class IntraPredBase {
|
||||
protected:
|
||||
void SetupMacroblock(uint8_t *data, int block_size, int stride,
|
||||
int num_planes) {
|
||||
memset(&mb_, 0, sizeof(mb_));
|
||||
memset(&mi_, 0, sizeof(mi_));
|
||||
mb_.up_available = 1;
|
||||
mb_.left_available = 1;
|
||||
mb_.mode_info_context = &mi_;
|
||||
stride_ = stride;
|
||||
block_size_ = block_size;
|
||||
num_planes_ = num_planes;
|
||||
for (int p = 0; p < num_planes; p++)
|
||||
data_ptr_[p] = data + stride * (block_size + 1) * p +
|
||||
stride + block_size;
|
||||
}
|
||||
|
||||
void FillRandom() {
|
||||
// Fill edges with random data
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
for (int x = -1 ; x <= block_size_; x++)
|
||||
data_ptr_[p][x - stride_] = rnd.Rand8();
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) = 0;
|
||||
|
||||
void SetLeftUnavailable() {
|
||||
mb_.left_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int i = -1; i < block_size_; ++i)
|
||||
data_ptr_[p][stride_ * i - 1] = 129;
|
||||
}
|
||||
|
||||
void SetTopUnavailable() {
|
||||
mb_.up_available = 0;
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
|
||||
}
|
||||
|
||||
void SetTopLeftUnavailable() {
|
||||
SetLeftUnavailable();
|
||||
SetTopUnavailable();
|
||||
}
|
||||
|
||||
int BlockSizeLog2Min1() const {
|
||||
switch (block_size_) {
|
||||
case 16:
|
||||
return 3;
|
||||
case 8:
|
||||
return 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
// check DC prediction output against a reference
|
||||
void CheckDCPrediction() const {
|
||||
for (int p = 0; p < num_planes_; p++) {
|
||||
// calculate expected DC
|
||||
int expected;
|
||||
if (mb_.up_available || mb_.left_available) {
|
||||
int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
|
||||
mb_.left_available;
|
||||
if (mb_.up_available)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
sum += data_ptr_[p][x - stride_];
|
||||
if (mb_.left_available)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
sum += data_ptr_[p][y * stride_ - 1];
|
||||
expected = (sum + (1 << (shift - 1))) >> shift;
|
||||
} else
|
||||
expected = 0x80;
|
||||
|
||||
// check that all subsequent lines are equal to the first
|
||||
for (int y = 1; y < block_size_; ++y)
|
||||
ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
|
||||
block_size_));
|
||||
// within the first line, ensure that each pixel has the same value
|
||||
for (int x = 1; x < block_size_; ++x)
|
||||
ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
|
||||
// now ensure that that pixel has the expected (DC) value
|
||||
ASSERT_EQ(expected, data_ptr_[p][0]);
|
||||
}
|
||||
}
|
||||
|
||||
// check V prediction output against a reference
|
||||
void CheckVPrediction() const {
|
||||
// check that all lines equal the top border
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
|
||||
&data_ptr_[p][y * stride_], block_size_));
|
||||
}
|
||||
|
||||
// check H prediction output against a reference
|
||||
void CheckHPrediction() const {
|
||||
// for each line, ensure that each pixel is equal to the left border
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
for (int x = 0; x < block_size_; x++)
|
||||
ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
|
||||
data_ptr_[p][x + y * stride_]);
|
||||
}
|
||||
|
||||
static int ClipByte(int value) {
|
||||
if (value > 255)
|
||||
return 255;
|
||||
else if (value < 0)
|
||||
return 0;
|
||||
return value;
|
||||
}
|
||||
|
||||
// check TM prediction output against a reference
|
||||
void CheckTMPrediction() const {
|
||||
for (int p = 0; p < num_planes_; p++)
|
||||
for (int y = 0; y < block_size_; y++)
|
||||
for (int x = 0; x < block_size_; x++) {
|
||||
const int expected = ClipByte(data_ptr_[p][x - stride_]
|
||||
+ data_ptr_[p][stride_ * y - 1]
|
||||
- data_ptr_[p][-1 - stride_]);
|
||||
ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
|
||||
}
|
||||
}
|
||||
|
||||
// Actual test
|
||||
void RunTest() {
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED");
|
||||
FillRandom();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED LEFT");
|
||||
FillRandom();
|
||||
SetLeftUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED TOP");
|
||||
FillRandom();
|
||||
SetTopUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("DC_PRED TOP_LEFT");
|
||||
FillRandom();
|
||||
SetTopLeftUnavailable();
|
||||
Predict(DC_PRED);
|
||||
CheckDCPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("H_PRED");
|
||||
FillRandom();
|
||||
Predict(H_PRED);
|
||||
CheckHPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("V_PRED");
|
||||
FillRandom();
|
||||
Predict(V_PRED);
|
||||
CheckVPrediction();
|
||||
}
|
||||
{
|
||||
SCOPED_TRACE("TM_PRED");
|
||||
FillRandom();
|
||||
Predict(TM_PRED);
|
||||
CheckTMPrediction();
|
||||
}
|
||||
}
|
||||
|
||||
MACROBLOCKD mb_;
|
||||
MODE_INFO mi_;
|
||||
uint8_t *data_ptr_[2]; // in the case of Y, only [0] is used
|
||||
int stride_;
|
||||
int block_size_;
|
||||
int num_planes_;
|
||||
};
|
||||
|
||||
typedef void (*intra_pred_y_fn_t)(MACROBLOCKD *x,
|
||||
uint8_t *yabove_row,
|
||||
uint8_t *yleft,
|
||||
int left_stride,
|
||||
uint8_t *ypred_ptr,
|
||||
int y_stride);
|
||||
|
||||
class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
protected:
|
||||
static const int kBlockSize = 16;
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kStride = kBlockSize * 3;
|
||||
// We use 48 so that the data pointer of the first pixel in each row of
|
||||
// each macroblock is 16-byte aligned, and this gives us access to the
|
||||
// top-left and top-right corner pixels belonging to the top-left/right
|
||||
// macroblocks.
|
||||
// We use 17 lines so we have one line above us for top-prediction.
|
||||
static const int kDataBufferSize = kStride * (kBlockSize + 1);
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 1);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.mode = mode;
|
||||
pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[0] - 1, kStride,
|
||||
data_ptr_[0], kStride);
|
||||
}
|
||||
|
||||
intra_pred_y_fn_t pred_fn_;
|
||||
static uint8_t* data_array_;
|
||||
};
|
||||
|
||||
uint8_t* IntraPredYTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredYTest, IntraPredTests) {
|
||||
RunTest();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mby_s_c));
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mby_s_sse2));
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mby_s_ssse3));
|
||||
#endif
|
||||
|
||||
typedef void (*intra_pred_uv_fn_t)(MACROBLOCKD *x,
|
||||
uint8_t *uabove_row,
|
||||
uint8_t *vabove_row,
|
||||
uint8_t *uleft,
|
||||
uint8_t *vleft,
|
||||
int left_stride,
|
||||
uint8_t *upred_ptr,
|
||||
uint8_t *vpred_ptr,
|
||||
int pred_stride);
|
||||
|
||||
class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
|
||||
protected IntraPredBase {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
data_array_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(data_array_);
|
||||
data_array_ = NULL;
|
||||
}
|
||||
|
||||
protected:
|
||||
static const int kBlockSize = 8;
|
||||
static const int kDataAlignment = 8;
|
||||
static const int kStride = kBlockSize * 3;
|
||||
// We use 24 so that the data pointer of the first pixel in each row of
|
||||
// each macroblock is 8-byte aligned, and this gives us access to the
|
||||
// top-left and top-right corner pixels belonging to the top-left/right
|
||||
// macroblocks.
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
|
||||
|
||||
virtual void SetUp() {
|
||||
pred_fn_ = GetParam();
|
||||
SetupMacroblock(data_array_, kBlockSize, kStride, 2);
|
||||
}
|
||||
|
||||
virtual void Predict(MB_PREDICTION_MODE mode) {
|
||||
mb_.mode_info_context->mbmi.uv_mode = mode;
|
||||
pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
|
||||
data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
|
||||
data_ptr_[0], data_ptr_[1], kStride);
|
||||
}
|
||||
|
||||
intra_pred_uv_fn_t pred_fn_;
|
||||
// We use 24 so that the data pointer of the first pixel in each row of
|
||||
// each macroblock is 8-byte aligned, and this gives us access to the
|
||||
// top-left and top-right corner pixels belonging to the top-left/right
|
||||
// macroblocks.
|
||||
// We use 9 lines so we have one line above us for top-prediction.
|
||||
// [0] = U, [1] = V
|
||||
static uint8_t* data_array_;
|
||||
};
|
||||
|
||||
uint8_t* IntraPredUVTest::data_array_ = NULL;
|
||||
|
||||
TEST_P(IntraPredUVTest, IntraPredTests) {
|
||||
RunTest();
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_c));
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_sse2));
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
|
||||
::testing::Values(
|
||||
vp8_build_intra_predictors_mbuv_s_ssse3));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TEST_IVF_VIDEO_SOURCE_H_
|
||||
#define TEST_IVF_VIDEO_SOURCE_H_
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <new>
|
||||
#include <string>
|
||||
#include "test/video_source.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
const unsigned int kCodeBufferSize = 256 * 1024;
|
||||
const unsigned int kIvfFileHdrSize = 32;
|
||||
const unsigned int kIvfFrameHdrSize = 12;
|
||||
|
||||
static unsigned int MemGetLe32(const uint8_t *mem) {
|
||||
return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]);
|
||||
}
|
||||
|
||||
// This class extends VideoSource to allow parsing of ivf files,
|
||||
// so that we can do actual file decodes.
|
||||
class IVFVideoSource : public CompressedVideoSource {
|
||||
public:
|
||||
IVFVideoSource(const std::string &file_name)
|
||||
: file_name_(file_name),
|
||||
input_file_(NULL),
|
||||
compressed_frame_buf_(NULL),
|
||||
frame_sz_(0),
|
||||
frame_(0),
|
||||
end_of_file_(false) {
|
||||
}
|
||||
|
||||
virtual ~IVFVideoSource() {
|
||||
delete[] compressed_frame_buf_;
|
||||
|
||||
if (input_file_)
|
||||
fclose(input_file_);
|
||||
}
|
||||
|
||||
virtual void Init() {
|
||||
// Allocate a buffer for read in the compressed video frame.
|
||||
compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
|
||||
ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
|
||||
}
|
||||
|
||||
virtual void Begin() {
|
||||
input_file_ = OpenTestDataFile(file_name_);
|
||||
ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
|
||||
<< file_name_;
|
||||
|
||||
// Read file header
|
||||
uint8_t file_hdr[kIvfFileHdrSize];
|
||||
ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_))
|
||||
<< "File header read failed.";
|
||||
// Check file header
|
||||
ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I'
|
||||
&& file_hdr[3] == 'F') << "Input is not an IVF file.";
|
||||
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
void FillFrame() {
|
||||
uint8_t frame_hdr[kIvfFrameHdrSize];
|
||||
// Check frame header and read a frame from input_file.
|
||||
if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
|
||||
!= kIvfFrameHdrSize) {
|
||||
end_of_file_ = true;
|
||||
} else {
|
||||
end_of_file_ = false;
|
||||
|
||||
frame_sz_ = MemGetLe32(frame_hdr);
|
||||
ASSERT_LE(frame_sz_, kCodeBufferSize)
|
||||
<< "Frame is too big for allocated code buffer";
|
||||
ASSERT_EQ(frame_sz_,
|
||||
fread(compressed_frame_buf_, 1, frame_sz_, input_file_))
|
||||
<< "Failed to read complete frame";
|
||||
}
|
||||
}
|
||||
|
||||
virtual const uint8_t *cxdata() const {
|
||||
return end_of_file_ ? NULL : compressed_frame_buf_;
|
||||
}
|
||||
virtual const unsigned int frame_size() const { return frame_sz_; }
|
||||
virtual const unsigned int frame_number() const { return frame_; }
|
||||
|
||||
protected:
|
||||
std::string file_name_;
|
||||
FILE *input_file_;
|
||||
uint8_t *compressed_frame_buf_;
|
||||
unsigned int frame_sz_;
|
||||
unsigned int frame_;
|
||||
bool end_of_file_;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_IVF_VIDEO_SOURCE_H_
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
class KeyframeTest : public ::libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<enum libvpx_test::TestMode> {
|
||||
protected:
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GetParam());
|
||||
kf_count_ = 0;
|
||||
kf_count_max_ = INT_MAX;
|
||||
kf_do_force_kf_ = false;
|
||||
set_cpu_used_ = 0;
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (kf_do_force_kf_)
|
||||
frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF;
|
||||
if (set_cpu_used_ && video->frame() == 1)
|
||||
encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||
kf_pts_list_.push_back(pkt->data.frame.pts);
|
||||
kf_count_++;
|
||||
abort_ |= kf_count_ > kf_count_max_;
|
||||
}
|
||||
}
|
||||
|
||||
bool kf_do_force_kf_;
|
||||
int kf_count_;
|
||||
int kf_count_max_;
|
||||
std::vector<vpx_codec_pts_t> kf_pts_list_;
|
||||
int set_cpu_used_;
|
||||
};
|
||||
|
||||
TEST_P(KeyframeTest, TestRandomVideoSource) {
|
||||
// Validate that encoding the RandomVideoSource produces multiple keyframes.
|
||||
// This validates the results of the TestDisableKeyframes test.
|
||||
kf_count_max_ = 2; // early exit successful tests.
|
||||
|
||||
::libvpx_test::RandomVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
// In realtime mode - auto placed keyframes are exceedingly rare, don't
|
||||
// bother with this check if(GetParam() > 0)
|
||||
if(GetParam() > 0)
|
||||
EXPECT_GT(kf_count_, 1);
|
||||
}
|
||||
|
||||
TEST_P(KeyframeTest, TestDisableKeyframes) {
|
||||
cfg_.kf_mode = VPX_KF_DISABLED;
|
||||
kf_count_max_ = 1; // early exit failed tests.
|
||||
|
||||
::libvpx_test::RandomVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
EXPECT_EQ(1, kf_count_);
|
||||
}
|
||||
|
||||
TEST_P(KeyframeTest, TestForceKeyframe) {
|
||||
cfg_.kf_mode = VPX_KF_DISABLED;
|
||||
kf_do_force_kf_ = true;
|
||||
|
||||
::libvpx_test::DummyVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
// verify that every third frame is a keyframe.
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(KeyframeTest, TestKeyframeMaxDistance) {
|
||||
cfg_.kf_max_dist = 25;
|
||||
|
||||
::libvpx_test::DummyVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
// verify that keyframe interval matches kf_max_dist
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter;
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(KeyframeTest, TestAutoKeyframe) {
|
||||
cfg_.kf_mode = VPX_KF_AUTO;
|
||||
kf_do_force_kf_ = false;
|
||||
|
||||
// Force a deterministic speed step in Real Time mode, as the faster modes
|
||||
// may not produce a keyframe like we expect. This is necessary when running
|
||||
// on very slow environments (like Valgrind). The step -11 was determined
|
||||
// experimentally as the fastest mode that still throws the keyframe.
|
||||
if (deadline_ == VPX_DL_REALTIME)
|
||||
set_cpu_used_ = -11;
|
||||
|
||||
// This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120.
|
||||
// I check only the first 40 frames to make sure there's a keyframe at frame
|
||||
// 0 and 30.
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 40);
|
||||
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
// In realtime mode - auto placed keyframes are exceedingly rare, don't
|
||||
// bother with this check
|
||||
if(GetParam() > 0)
|
||||
EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes ";
|
||||
|
||||
// Verify that keyframes match the file keyframes in the file.
|
||||
for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
|
||||
iter != kf_pts_list_.end(); ++iter) {
|
||||
|
||||
if (deadline_ == VPX_DL_REALTIME && *iter > 0)
|
||||
EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
|
||||
<< *iter;
|
||||
else
|
||||
EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(AllModes, KeyframeTest, ALL_TEST_MODES);
|
||||
} // namespace
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
typedef void (*post_proc_func_t)(unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line,
|
||||
int cols,
|
||||
unsigned char *flimit,
|
||||
int size);
|
||||
|
||||
namespace {
|
||||
|
||||
class Vp8PostProcessingFilterTest
|
||||
: public ::testing::TestWithParam<post_proc_func_t> {};
|
||||
|
||||
// Test routine for the VP8 post-processing function
|
||||
// vp8_post_proc_down_and_across_mb_row_c.
|
||||
|
||||
TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
|
||||
// Size of the underlying data block that will be filtered.
|
||||
const int block_width = 16;
|
||||
const int block_height = 16;
|
||||
|
||||
// 5-tap filter needs 2 padding rows above and below the block in the input.
|
||||
const int input_width = block_width;
|
||||
const int input_height = block_height + 4;
|
||||
const int input_stride = input_width;
|
||||
const int input_size = input_width * input_height;
|
||||
|
||||
// Filter extends output block by 8 samples at left and right edges.
|
||||
const int output_width = block_width + 16;
|
||||
const int output_height = block_height;
|
||||
const int output_stride = output_width;
|
||||
const int output_size = output_width * output_height;
|
||||
|
||||
uint8_t *const src_image =
|
||||
reinterpret_cast<uint8_t*>(vpx_calloc(input_size, 1));
|
||||
uint8_t *const dst_image =
|
||||
reinterpret_cast<uint8_t*>(vpx_calloc(output_size, 1));
|
||||
|
||||
// Pointers to top-left pixel of block in the input and output images.
|
||||
uint8_t *const src_image_ptr = src_image + (input_stride << 1);
|
||||
uint8_t *const dst_image_ptr = dst_image + 8;
|
||||
uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
|
||||
(void)vpx_memset(flimits, 255, block_width);
|
||||
|
||||
// Initialize pixels in the input:
|
||||
// block pixels to value 1,
|
||||
// border pixels to value 10.
|
||||
(void)vpx_memset(src_image, 10, input_size);
|
||||
uint8_t *pixel_ptr = src_image_ptr;
|
||||
for (int i = 0; i < block_height; ++i) {
|
||||
for (int j = 0; j < block_width; ++j) {
|
||||
pixel_ptr[j] = 1;
|
||||
}
|
||||
pixel_ptr += input_stride;
|
||||
}
|
||||
|
||||
// Initialize pixels in the output to 99.
|
||||
(void)vpx_memset(dst_image, 99, output_size);
|
||||
|
||||
GetParam()(src_image_ptr, dst_image_ptr, input_stride,
|
||||
output_stride, block_width, flimits, 16);
|
||||
|
||||
static const uint8_t expected_data[block_height] = {
|
||||
4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
|
||||
};
|
||||
|
||||
pixel_ptr = dst_image_ptr;
|
||||
for (int i = 0; i < block_height; ++i) {
|
||||
for (int j = 0; j < block_width; ++j) {
|
||||
EXPECT_EQ(expected_data[i], pixel_ptr[j])
|
||||
<< "Vp8PostProcessingFilterTest failed with invalid filter output";
|
||||
}
|
||||
pixel_ptr += output_stride;
|
||||
}
|
||||
|
||||
vpx_free(src_image);
|
||||
vpx_free(dst_image);
|
||||
vpx_free(flimits);
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, Vp8PostProcessingFilterTest,
|
||||
::testing::Values(vp8_post_proc_down_and_across_mb_row_c));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, Vp8PostProcessingFilterTest,
|
||||
::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/video_source.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const unsigned int kInitialWidth = 320;
|
||||
const unsigned int kInitialHeight = 240;
|
||||
|
||||
unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
|
||||
if (frame < 10)
|
||||
return val;
|
||||
if (frame < 20)
|
||||
return val / 2;
|
||||
if (frame < 30)
|
||||
return val * 2 / 3;
|
||||
if (frame < 40)
|
||||
return val / 4;
|
||||
if (frame < 50)
|
||||
return val * 7 / 8;
|
||||
return val;
|
||||
}
|
||||
|
||||
class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
|
||||
public:
|
||||
ResizingVideoSource() {
|
||||
SetSize(kInitialWidth, kInitialHeight);
|
||||
limit_ = 60;
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
|
||||
ScaleForFrameNumber(frame_, kInitialHeight));
|
||||
FillFrame();
|
||||
}
|
||||
};
|
||||
|
||||
class ResizeTest : public ::libvpx_test::EncoderTest,
|
||||
public ::testing::TestWithParam<enum libvpx_test::TestMode> {
|
||||
protected:
|
||||
struct FrameInfo {
|
||||
FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
|
||||
: pts(_pts), w(_w), h(_h) {}
|
||||
|
||||
vpx_codec_pts_t pts;
|
||||
unsigned int w;
|
||||
unsigned int h;
|
||||
};
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(GetParam());
|
||||
}
|
||||
|
||||
virtual bool Continue() const {
|
||||
return !HasFatalFailure() && !abort_;
|
||||
}
|
||||
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
|
||||
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
|
||||
const unsigned char *buf =
|
||||
reinterpret_cast<const unsigned char *>(pkt->data.frame.buf);
|
||||
const unsigned int w = (buf[6] | (buf[7] << 8)) & 0x3fff;
|
||||
const unsigned int h = (buf[8] | (buf[9] << 8)) & 0x3fff;
|
||||
|
||||
frame_info_list_.push_back(FrameInfo(pkt->data.frame.pts, w, h));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector< FrameInfo > frame_info_list_;
|
||||
};
|
||||
|
||||
TEST_P(ResizeTest, TestExternalResizeWorks) {
|
||||
ResizingVideoSource video;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
|
||||
for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
|
||||
info != frame_info_list_.end(); ++info) {
|
||||
const vpx_codec_pts_t pts = info->pts;
|
||||
const unsigned int expected_w = ScaleForFrameNumber(pts, kInitialWidth);
|
||||
const unsigned int expected_h = ScaleForFrameNumber(pts, kInitialHeight);
|
||||
|
||||
EXPECT_EQ(expected_w, info->w)
|
||||
<< "Frame " << pts << "had unexpected width";
|
||||
EXPECT_EQ(expected_h, info->h)
|
||||
<< "Frame " << pts << "had unexpected height";
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OnePass, ResizeTest, ONE_PASS_TEST_MODES);
|
||||
} // namespace
|
|
@ -0,0 +1,250 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/util.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
|
||||
typedef unsigned int (*sad_m_by_n_fn_t)(const unsigned char *source_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *reference_ptr,
|
||||
int reference_stride,
|
||||
unsigned int max_sad);
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
namespace {
|
||||
class SADTest : public PARAMS(int, int, sad_m_by_n_fn_t) {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
source_data_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
reference_data_ = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(kDataAlignment, kDataBufferSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(source_data_);
|
||||
source_data_ = NULL;
|
||||
vpx_free(reference_data_);
|
||||
reference_data_ = NULL;
|
||||
}
|
||||
|
||||
protected:
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kDataBufferSize = 16 * 32;
|
||||
|
||||
virtual void SetUp() {
|
||||
sad_fn_ = GET_PARAM(2);
|
||||
height_ = GET_PARAM(1);
|
||||
width_ = GET_PARAM(0);
|
||||
source_stride_ = width_ * 2;
|
||||
reference_stride_ = width_ * 2;
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
sad_m_by_n_fn_t sad_fn_;
|
||||
virtual unsigned int SAD(unsigned int max_sad) {
|
||||
return sad_fn_(source_data_, source_stride_,
|
||||
reference_data_, reference_stride_,
|
||||
max_sad);
|
||||
}
|
||||
|
||||
// Sum of Absolute Differences. Given two blocks, calculate the absolute
|
||||
// difference between two pixels in the same relative location; accumulate.
|
||||
unsigned int ReferenceSAD(unsigned int max_sad) {
|
||||
unsigned int sad = 0;
|
||||
|
||||
for (int h = 0; h < height_; ++h) {
|
||||
for (int w = 0; w < width_; ++w) {
|
||||
sad += abs(source_data_[h * source_stride_ + w]
|
||||
- reference_data_[h * reference_stride_ + w]);
|
||||
}
|
||||
if (sad > max_sad) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sad;
|
||||
}
|
||||
|
||||
void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
|
||||
for (int h = 0; h < height_; ++h) {
|
||||
for (int w = 0; w < width_; ++w) {
|
||||
data[h * stride + w] = fill_constant;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillRandom(uint8_t *data, int stride) {
|
||||
for (int h = 0; h < height_; ++h) {
|
||||
for (int w = 0; w < width_; ++w) {
|
||||
data[h * stride + w] = rnd_.Rand8();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CheckSad(unsigned int max_sad) {
|
||||
unsigned int reference_sad, exp_sad;
|
||||
|
||||
reference_sad = ReferenceSAD(max_sad);
|
||||
exp_sad = SAD(max_sad);
|
||||
|
||||
if (reference_sad <= max_sad) {
|
||||
ASSERT_EQ(exp_sad, reference_sad);
|
||||
} else {
|
||||
// Alternative implementations are not required to check max_sad
|
||||
ASSERT_GE(exp_sad, reference_sad);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle blocks up to 16x16 with stride up to 32
|
||||
int height_, width_;
|
||||
static uint8_t* source_data_;
|
||||
int source_stride_;
|
||||
static uint8_t* reference_data_;
|
||||
int reference_stride_;
|
||||
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
uint8_t* SADTest::source_data_ = NULL;
|
||||
uint8_t* SADTest::reference_data_ = NULL;
|
||||
|
||||
TEST_P(SADTest, MaxRef) {
|
||||
FillConstant(source_data_, source_stride_, 0);
|
||||
FillConstant(reference_data_, reference_stride_, 255);
|
||||
CheckSad(UINT_MAX);
|
||||
}
|
||||
|
||||
TEST_P(SADTest, MaxSrc) {
|
||||
FillConstant(source_data_, source_stride_, 255);
|
||||
FillConstant(reference_data_, reference_stride_, 0);
|
||||
CheckSad(UINT_MAX);
|
||||
}
|
||||
|
||||
TEST_P(SADTest, ShortRef) {
|
||||
int tmp_stride = reference_stride_;
|
||||
reference_stride_ >>= 1;
|
||||
FillRandom(source_data_, source_stride_);
|
||||
FillRandom(reference_data_, reference_stride_);
|
||||
CheckSad(UINT_MAX);
|
||||
reference_stride_ = tmp_stride;
|
||||
}
|
||||
|
||||
TEST_P(SADTest, UnalignedRef) {
|
||||
// The reference frame, but not the source frame, may be unaligned for
|
||||
// certain types of searches.
|
||||
int tmp_stride = reference_stride_;
|
||||
reference_stride_ -= 1;
|
||||
FillRandom(source_data_, source_stride_);
|
||||
FillRandom(reference_data_, reference_stride_);
|
||||
CheckSad(UINT_MAX);
|
||||
reference_stride_ = tmp_stride;
|
||||
}
|
||||
|
||||
TEST_P(SADTest, ShortSrc) {
|
||||
int tmp_stride = source_stride_;
|
||||
source_stride_ >>= 1;
|
||||
FillRandom(source_data_, source_stride_);
|
||||
FillRandom(reference_data_, reference_stride_);
|
||||
CheckSad(UINT_MAX);
|
||||
source_stride_ = tmp_stride;
|
||||
}
|
||||
|
||||
TEST_P(SADTest, MaxSAD) {
|
||||
// Verify that, when max_sad is set, the implementation does not return a
|
||||
// value lower than the reference.
|
||||
FillConstant(source_data_, source_stride_, 255);
|
||||
FillConstant(reference_data_, reference_stride_, 0);
|
||||
CheckSad(128);
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
const sad_m_by_n_fn_t sad_16x16_c = vp8_sad16x16_c;
|
||||
const sad_m_by_n_fn_t sad_8x16_c = vp8_sad8x16_c;
|
||||
const sad_m_by_n_fn_t sad_16x8_c = vp8_sad16x8_c;
|
||||
const sad_m_by_n_fn_t sad_8x8_c = vp8_sad8x8_c;
|
||||
const sad_m_by_n_fn_t sad_4x4_c = vp8_sad4x4_c;
|
||||
INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_c),
|
||||
make_tuple(8, 16, sad_8x16_c),
|
||||
make_tuple(16, 8, sad_16x8_c),
|
||||
make_tuple(8, 8, sad_8x8_c),
|
||||
make_tuple(4, 4, sad_4x4_c)));
|
||||
|
||||
// ARM tests
|
||||
#if HAVE_MEDIA
|
||||
const sad_m_by_n_fn_t sad_16x16_armv6 = vp8_sad16x16_armv6;
|
||||
INSTANTIATE_TEST_CASE_P(MEDIA, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_armv6)));
|
||||
|
||||
#endif
|
||||
#if HAVE_NEON
|
||||
const sad_m_by_n_fn_t sad_16x16_neon = vp8_sad16x16_neon;
|
||||
const sad_m_by_n_fn_t sad_8x16_neon = vp8_sad8x16_neon;
|
||||
const sad_m_by_n_fn_t sad_16x8_neon = vp8_sad16x8_neon;
|
||||
const sad_m_by_n_fn_t sad_8x8_neon = vp8_sad8x8_neon;
|
||||
const sad_m_by_n_fn_t sad_4x4_neon = vp8_sad4x4_neon;
|
||||
INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_neon),
|
||||
make_tuple(8, 16, sad_8x16_neon),
|
||||
make_tuple(16, 8, sad_16x8_neon),
|
||||
make_tuple(8, 8, sad_8x8_neon),
|
||||
make_tuple(4, 4, sad_4x4_neon)));
|
||||
#endif
|
||||
|
||||
// X86 tests
|
||||
#if HAVE_MMX
|
||||
const sad_m_by_n_fn_t sad_16x16_mmx = vp8_sad16x16_mmx;
|
||||
const sad_m_by_n_fn_t sad_8x16_mmx = vp8_sad8x16_mmx;
|
||||
const sad_m_by_n_fn_t sad_16x8_mmx = vp8_sad16x8_mmx;
|
||||
const sad_m_by_n_fn_t sad_8x8_mmx = vp8_sad8x8_mmx;
|
||||
const sad_m_by_n_fn_t sad_4x4_mmx = vp8_sad4x4_mmx;
|
||||
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_mmx),
|
||||
make_tuple(8, 16, sad_8x16_mmx),
|
||||
make_tuple(16, 8, sad_16x8_mmx),
|
||||
make_tuple(8, 8, sad_8x8_mmx),
|
||||
make_tuple(4, 4, sad_4x4_mmx)));
|
||||
#endif
|
||||
#if HAVE_SSE2
|
||||
const sad_m_by_n_fn_t sad_16x16_wmt = vp8_sad16x16_wmt;
|
||||
const sad_m_by_n_fn_t sad_8x16_wmt = vp8_sad8x16_wmt;
|
||||
const sad_m_by_n_fn_t sad_16x8_wmt = vp8_sad16x8_wmt;
|
||||
const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
|
||||
const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_wmt),
|
||||
make_tuple(8, 16, sad_8x16_wmt),
|
||||
make_tuple(16, 8, sad_16x8_wmt),
|
||||
make_tuple(8, 8, sad_8x8_wmt),
|
||||
make_tuple(4, 4, sad_4x4_wmt)));
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
|
||||
INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
|
||||
make_tuple(16, 16, sad_16x16_sse3)));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
extern "C" {
|
||||
#include "vp8/encoder/onyx_int.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
TEST(Vp8RoiMapTest, ParameterCheck) {
|
||||
int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
|
||||
unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
|
||||
|
||||
const int internalq_trans[] = {
|
||||
0, 1, 2, 3, 4, 5, 7, 8,
|
||||
9, 10, 12, 13, 15, 17, 18, 19,
|
||||
20, 21, 23, 24, 25, 26, 27, 28,
|
||||
29, 30, 31, 33, 35, 37, 39, 41,
|
||||
43, 45, 47, 49, 51, 53, 55, 57,
|
||||
59, 61, 64, 67, 70, 73, 76, 79,
|
||||
82, 85, 88, 91, 94, 97, 100, 103,
|
||||
106, 109, 112, 115, 118, 121, 124, 127,
|
||||
};
|
||||
|
||||
// Initialize elements of cpi with valid defaults.
|
||||
VP8_COMP cpi;
|
||||
cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA;
|
||||
cpi.cyclic_refresh_mode_enabled = 0;
|
||||
cpi.mb.e_mbd.segmentation_enabled = 0;
|
||||
cpi.mb.e_mbd.update_mb_segmentation_map = 0;
|
||||
cpi.mb.e_mbd.update_mb_segmentation_data = 0;
|
||||
cpi.common.mb_rows = 240 >> 4;
|
||||
cpi.common.mb_cols = 320 >> 4;
|
||||
const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
|
||||
vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
|
||||
|
||||
// Segment map
|
||||
cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
|
||||
|
||||
// Allocate memory for the source memory map.
|
||||
unsigned char *roi_map =
|
||||
reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
|
||||
vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
|
||||
vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
|
||||
vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
|
||||
|
||||
// Do a test call with valid parameters.
|
||||
int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
cpi.common.mb_cols, delta_q, delta_lf,
|
||||
threshold);
|
||||
EXPECT_EQ(0, roi_retval)
|
||||
<< "vp8_set_roimap roi failed with default test parameters";
|
||||
|
||||
// Check that the values in the cpi structure get set as expected.
|
||||
if (roi_retval == 0) {
|
||||
// Check that the segment map got set.
|
||||
const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs);
|
||||
EXPECT_EQ(0, mapcompare) << "segment map error";
|
||||
|
||||
// Check the q deltas (note the need to translate into
|
||||
// the interanl range of 0-127.
|
||||
for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
|
||||
const int transq = internalq_trans[abs(delta_q[i])];
|
||||
if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) {
|
||||
EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i])
|
||||
<< "segment delta_q error";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the loop filter deltas
|
||||
for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
|
||||
if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) {
|
||||
EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i])
|
||||
<< "segment delta_lf error";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check the breakout thresholds
|
||||
for (int i = 0; i < MAX_MB_SEGMENTS; ++i) {
|
||||
unsigned int breakout =
|
||||
static_cast<unsigned int>(cpi.segment_encode_breakout[i]);
|
||||
|
||||
if (threshold[i] != breakout) {
|
||||
EXPECT_EQ(threshold[i], breakout)
|
||||
<< "breakout threshold error";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Segmentation, and segmentation update flages should be set.
|
||||
EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled)
|
||||
<< "segmentation_enabled error";
|
||||
EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map)
|
||||
<< "update_mb_segmentation_map error";
|
||||
EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data)
|
||||
<< "update_mb_segmentation_data error";
|
||||
|
||||
|
||||
// Try a range of delta q and lf parameters (some legal, some not)
|
||||
for (int i = 0; i < 1000; ++i) {
|
||||
int rand_deltas[4];
|
||||
int deltas_valid;
|
||||
rand_deltas[0] = (rand() % 160) - 80;
|
||||
rand_deltas[1] = (rand() % 160) - 80;
|
||||
rand_deltas[2] = (rand() % 160) - 80;
|
||||
rand_deltas[3] = (rand() % 160) - 80;
|
||||
|
||||
deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
|
||||
(abs(rand_deltas[1]) <= 63) &&
|
||||
(abs(rand_deltas[2]) <= 63) &&
|
||||
(abs(rand_deltas[3]) <= 63)) ? 0 : -1;
|
||||
|
||||
// Test with random delta q values.
|
||||
roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
cpi.common.mb_cols, rand_deltas,
|
||||
delta_lf, threshold);
|
||||
EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error";
|
||||
|
||||
// One delta_q error shown at a time
|
||||
if (deltas_valid != roi_retval)
|
||||
break;
|
||||
|
||||
// Test with random loop filter values.
|
||||
roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
cpi.common.mb_cols, delta_q,
|
||||
rand_deltas, threshold);
|
||||
EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error";
|
||||
|
||||
// One delta loop filter error shown at a time
|
||||
if (deltas_valid != roi_retval)
|
||||
break;
|
||||
}
|
||||
|
||||
// Test that we report and error if cyclic refresh is enabled.
|
||||
cpi.cyclic_refresh_mode_enabled = 1;
|
||||
roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
cpi.common.mb_cols, delta_q,
|
||||
delta_lf, threshold);
|
||||
EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error";
|
||||
cpi.cyclic_refresh_mode_enabled = 0;
|
||||
|
||||
// Test invalid number of rows or colums.
|
||||
roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1,
|
||||
cpi.common.mb_cols, delta_q,
|
||||
delta_lf, threshold);
|
||||
EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error";
|
||||
|
||||
roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
|
||||
cpi.common.mb_cols - 1, delta_q,
|
||||
delta_lf, threshold);
|
||||
EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error";
|
||||
}
|
||||
|
||||
// Free allocated memory
|
||||
if (cpi.segmentation_map)
|
||||
vpx_free(cpi.segmentation_map);
|
||||
if (roi_map)
|
||||
vpx_free(roi_map);
|
||||
};
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,222 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "test/acm_random.h"
|
||||
#include "test/util.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
extern "C" {
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
typedef void (*sixtap_predict_fn_t)(uint8_t *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
uint8_t *dst_ptr,
|
||||
int dst_pitch);
|
||||
|
||||
class SixtapPredictTest : public PARAMS(int, int, sixtap_predict_fn_t) {
|
||||
public:
|
||||
static void SetUpTestCase() {
|
||||
src_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kSrcSize));
|
||||
dst_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
|
||||
dst_c_ = reinterpret_cast<uint8_t*>(vpx_memalign(kDataAlignment, kDstSize));
|
||||
}
|
||||
|
||||
static void TearDownTestCase() {
|
||||
vpx_free(src_);
|
||||
src_ = NULL;
|
||||
vpx_free(dst_);
|
||||
dst_ = NULL;
|
||||
vpx_free(dst_c_);
|
||||
dst_c_ = NULL;
|
||||
}
|
||||
|
||||
protected:
|
||||
// Make test arrays big enough for 16x16 functions. Six-tap filters
|
||||
// need 5 extra pixels outside of the macroblock.
|
||||
static const int kSrcStride = 21;
|
||||
static const int kDstStride = 16;
|
||||
static const int kDataAlignment = 16;
|
||||
static const int kSrcSize = kSrcStride * kSrcStride + 1;
|
||||
static const int kDstSize = kDstStride * kDstStride;
|
||||
|
||||
virtual void SetUp() {
|
||||
width_ = GET_PARAM(0);
|
||||
height_ = GET_PARAM(1);
|
||||
sixtap_predict_ = GET_PARAM(2);
|
||||
memset(src_, 0, sizeof(src_));
|
||||
memset(dst_, 0, sizeof(dst_));
|
||||
memset(dst_c_, 0, sizeof(dst_c_));
|
||||
}
|
||||
|
||||
int width_;
|
||||
int height_;
|
||||
sixtap_predict_fn_t sixtap_predict_;
|
||||
// The src stores the macroblock we will filter on, and makes it 1 byte larger
|
||||
// in order to test unaligned access. The result is stored in dst and dst_c(c
|
||||
// reference code result).
|
||||
static uint8_t* src_;
|
||||
static uint8_t* dst_;
|
||||
static uint8_t* dst_c_;
|
||||
};
|
||||
|
||||
uint8_t* SixtapPredictTest::src_ = NULL;
|
||||
uint8_t* SixtapPredictTest::dst_ = NULL;
|
||||
uint8_t* SixtapPredictTest::dst_c_ = NULL;
|
||||
|
||||
TEST_P(SixtapPredictTest, TestWithPresetData) {
|
||||
// Test input
|
||||
static const uint8_t test_data[kSrcSize] = {
|
||||
216, 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177,
|
||||
79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120,
|
||||
48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32,
|
||||
182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124,
|
||||
148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10,
|
||||
166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153,
|
||||
167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184,
|
||||
114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148,
|
||||
182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128,
|
||||
224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76,
|
||||
148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69,
|
||||
176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142,
|
||||
64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181,
|
||||
235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42,
|
||||
148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184,
|
||||
35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77,
|
||||
209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221,
|
||||
223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170,
|
||||
24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13,
|
||||
93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69,
|
||||
49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215,
|
||||
135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36,
|
||||
119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35,
|
||||
93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52,
|
||||
53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83,
|
||||
198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52,
|
||||
134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202,
|
||||
75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16
|
||||
};
|
||||
|
||||
// Expected result
|
||||
static const uint8_t expected_dst[kDstSize] = {
|
||||
117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38,
|
||||
105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79,
|
||||
208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102,
|
||||
159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231,
|
||||
208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184,
|
||||
100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133,
|
||||
113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240,
|
||||
255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185,
|
||||
200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189,
|
||||
76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191,
|
||||
185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193,
|
||||
161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95,
|
||||
15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196,
|
||||
84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166,
|
||||
189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180,
|
||||
224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75,
|
||||
136, 85, 189, 46, 71, 200, 90
|
||||
};
|
||||
|
||||
uint8_t *src = const_cast<uint8_t*>(test_data);
|
||||
|
||||
sixtap_predict_(&src[kSrcStride * 2 + 2 + 1], kSrcStride,
|
||||
2, 2, dst_, kDstStride);
|
||||
|
||||
for (int i = 0; i < height_; ++i)
|
||||
for (int j = 0; j < width_; ++j)
|
||||
ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j])
|
||||
<< "i==" << (i * width_ + j);
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
TEST_P(SixtapPredictTest, TestWithRandomData) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
for (int i = 0; i < kSrcSize; ++i)
|
||||
src_[i] = rnd.Rand8();
|
||||
|
||||
// Run tests for all possible offsets.
|
||||
for (int xoffset = 0; xoffset < 8; ++xoffset) {
|
||||
for (int yoffset = 0; yoffset < 8; ++yoffset) {
|
||||
// Call c reference function.
|
||||
// Move start point to next pixel to test if the function reads
|
||||
// unaligned data correctly.
|
||||
vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
|
||||
xoffset, yoffset, dst_c_, kDstStride);
|
||||
|
||||
// Run test.
|
||||
sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
|
||||
xoffset, yoffset, dst_, kDstStride);
|
||||
|
||||
for (int i = 0; i < height_; ++i)
|
||||
for (int j = 0; j < width_; ++j)
|
||||
ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j])
|
||||
<< "i==" << (i * width_ + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
const sixtap_predict_fn_t sixtap_16x16_c = vp8_sixtap_predict16x16_c;
|
||||
const sixtap_predict_fn_t sixtap_8x8_c = vp8_sixtap_predict8x8_c;
|
||||
const sixtap_predict_fn_t sixtap_8x4_c = vp8_sixtap_predict8x4_c;
|
||||
const sixtap_predict_fn_t sixtap_4x4_c = vp8_sixtap_predict4x4_c;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, SixtapPredictTest, ::testing::Values(
|
||||
make_tuple(16, 16, sixtap_16x16_c),
|
||||
make_tuple(8, 8, sixtap_8x8_c),
|
||||
make_tuple(8, 4, sixtap_8x4_c),
|
||||
make_tuple(4, 4, sixtap_4x4_c)));
|
||||
#if HAVE_MMX
|
||||
const sixtap_predict_fn_t sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
|
||||
const sixtap_predict_fn_t sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
|
||||
const sixtap_predict_fn_t sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
|
||||
const sixtap_predict_fn_t sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, SixtapPredictTest, ::testing::Values(
|
||||
make_tuple(16, 16, sixtap_16x16_mmx),
|
||||
make_tuple(8, 8, sixtap_8x8_mmx),
|
||||
make_tuple(8, 4, sixtap_8x4_mmx),
|
||||
make_tuple(4, 4, sixtap_4x4_mmx)));
|
||||
#endif
|
||||
#if HAVE_SSE2
|
||||
const sixtap_predict_fn_t sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
|
||||
const sixtap_predict_fn_t sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
|
||||
const sixtap_predict_fn_t sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, SixtapPredictTest, ::testing::Values(
|
||||
make_tuple(16, 16, sixtap_16x16_sse2),
|
||||
make_tuple(8, 8, sixtap_8x8_sse2),
|
||||
make_tuple(8, 4, sixtap_8x4_sse2)));
|
||||
#endif
|
||||
#if HAVE_SSSE3
|
||||
const sixtap_predict_fn_t sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
|
||||
const sixtap_predict_fn_t sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
|
||||
const sixtap_predict_fn_t sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
|
||||
const sixtap_predict_fn_t sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSSE3, SixtapPredictTest, ::testing::Values(
|
||||
make_tuple(16, 16, sixtap_16x16_ssse3),
|
||||
make_tuple(8, 8, sixtap_8x8_ssse3),
|
||||
make_tuple(8, 4, sixtap_8x4_ssse3),
|
||||
make_tuple(4, 4, sixtap_4x4_ssse3)));
|
||||
#endif
|
||||
} // namespace
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
extern "C" {
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
typedef void (*subtract_b_fn_t)(BLOCK *be, BLOCKD *bd, int pitch);
|
||||
|
||||
namespace {
|
||||
|
||||
class SubtractBlockTest : public ::testing::TestWithParam<subtract_b_fn_t> {};
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
BLOCK be;
|
||||
BLOCKD bd;
|
||||
// in libvpx, this stride is always 16
|
||||
const int kDiffPredStride = 16;
|
||||
const int kSrcStride[] = {32, 16, 8, 4, 0};
|
||||
const int kBlockWidth = 4;
|
||||
const int kBlockHeight = 4;
|
||||
|
||||
// Allocate... align to 16 for mmx/sse tests
|
||||
uint8_t *source = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
|
||||
be.src_diff = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
|
||||
bd.predictor = reinterpret_cast<unsigned char*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
|
||||
|
||||
for(int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
// start at block0
|
||||
be.src = 0;
|
||||
be.base_src = &source;
|
||||
be.src_stride = kSrcStride[i];
|
||||
|
||||
// set difference
|
||||
int16_t *src_diff = be.src_diff;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
src_diff[c] = 0xa5a5;
|
||||
}
|
||||
src_diff += kDiffPredStride;
|
||||
}
|
||||
|
||||
// set destination
|
||||
uint8_t *base_src = *be.base_src;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
base_src[c] = rnd.Rand8();
|
||||
}
|
||||
base_src += be.src_stride;
|
||||
}
|
||||
|
||||
// set predictor
|
||||
uint8_t *predictor = bd.predictor;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
predictor[c] = rnd.Rand8();
|
||||
}
|
||||
predictor += kDiffPredStride;
|
||||
}
|
||||
|
||||
GetParam()(&be, &bd, kDiffPredStride);
|
||||
|
||||
base_src = *be.base_src;
|
||||
src_diff = be.src_diff;
|
||||
predictor = bd.predictor;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
|
||||
<< ", c = " << c;
|
||||
}
|
||||
src_diff += kDiffPredStride;
|
||||
predictor += kDiffPredStride;
|
||||
base_src += be.src_stride;
|
||||
}
|
||||
}
|
||||
vpx_free(be.src_diff);
|
||||
vpx_free(source);
|
||||
vpx_free(bd.predictor);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_c));
|
||||
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_mmx));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_sse2));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -1 +1,123 @@
|
|||
d5dfb0151c9051f8c85999255645d7a23916d3c0 hantro_collage_w352h288.yuv
|
||||
5184c46ddca8b1fadd16742e8500115bc8f749da vp80-00-comprehensive-001.ivf
|
||||
65bf1bbbced81b97bd030f376d1b7f61a224793f vp80-00-comprehensive-002.ivf
|
||||
906b4c1e99eb734504c504b3f1ad8052137ce672 vp80-00-comprehensive-003.ivf
|
||||
ec144b1af53af895db78355785650b96dd3f0ade vp80-00-comprehensive-004.ivf
|
||||
afc7091785c62f1c121c4554a2830c30704587d9 vp80-00-comprehensive-005.ivf
|
||||
42ea9d55c818145d06a9b633b8e85c6a6164fd3e vp80-00-comprehensive-006.ivf
|
||||
e5b3a73ab79fe024c14309d653d6bed92902ee3b vp80-00-comprehensive-007.ivf
|
||||
f3c50a58875930adfb84525c0ef59d7e4c08540c vp80-00-comprehensive-008.ivf
|
||||
4b2841fdb83db51ae322096ae468bbb9dc2c8362 vp80-00-comprehensive-009.ivf
|
||||
efbff736e3a91ab6a98c5bc2dce65d645944c7b1 vp80-00-comprehensive-010.ivf
|
||||
6b315102cae008d22a3d2c231be92cb704a222f8 vp80-00-comprehensive-011.ivf
|
||||
f3214a4fea14c2d5ec689936c1613f274c859ee8 vp80-00-comprehensive-012.ivf
|
||||
e4094e96d308c8a35b74c480a43d853c5294cd34 vp80-00-comprehensive-013.ivf
|
||||
5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a vp80-00-comprehensive-014.ivf
|
||||
e8467688ddf26b5000664f904faf0d70506aa653 vp80-00-comprehensive-015.ivf
|
||||
aab55582337dfd2a39ff54fb2576a91910d49337 vp80-00-comprehensive-016.ivf
|
||||
1ba24724f80203c9bae4f1d0f99d534721980016 vp80-00-comprehensive-017.ivf
|
||||
143a15512b46f436280ddb4d0e6411eb4af434f2 vp80-00-comprehensive-018.ivf
|
||||
c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b vp80-01-intra-1400.ivf
|
||||
f383955229afe3408453e316d11553d923ca60d5 vp80-01-intra-1411.ivf
|
||||
84e1f4343f174c9f3c83f834bac3196fb325bf2c vp80-01-intra-1416.ivf
|
||||
fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 vp80-01-intra-1417.ivf
|
||||
71ea772d3e9d315b8cbecf41207b8a237c34853b vp80-02-inter-1402.ivf
|
||||
d85dbc4271525dcd128c503f936fe69091d1f8d0 vp80-02-inter-1412.ivf
|
||||
d4e5d3ad56511867d025f93724d090f92ba6ec3d vp80-02-inter-1418.ivf
|
||||
91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa vp80-02-inter-1424.ivf
|
||||
17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 vp80-03-segmentation-01.ivf
|
||||
3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb vp80-03-segmentation-02.ivf
|
||||
c156778d5340967d4b369c490848076e92f1f875 vp80-03-segmentation-03.ivf
|
||||
d25dcff6c60e87a1af70945b8911b6b4998533b0 vp80-03-segmentation-04.ivf
|
||||
362baba2ce454c9db21218f35e81c27a5ed0b730 vp80-03-segmentation-1401.ivf
|
||||
d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 vp80-03-segmentation-1403.ivf
|
||||
033adf7f3a13836a3f1cffcb87c1972900f2b5c6 vp80-03-segmentation-1407.ivf
|
||||
4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f vp80-03-segmentation-1408.ivf
|
||||
f37a62b197c2600d75e0ccfbb31b60efdedac251 vp80-03-segmentation-1409.ivf
|
||||
eb25bd7bfba5b2f6935018a930f42d123b1e7fcd vp80-03-segmentation-1410.ivf
|
||||
b9d5c436663a30c27cfff84b53a002e501258843 vp80-03-segmentation-1413.ivf
|
||||
6da92b9d1a180cc3a8afe348ab12258f5a37be1a vp80-03-segmentation-1414.ivf
|
||||
a4f5842602886bd669f115f93d8a35c035cb0948 vp80-03-segmentation-1415.ivf
|
||||
f295dceb8ef278b77251b3f9df8aee22e161d547 vp80-03-segmentation-1425.ivf
|
||||
198dbf9f36f733200e432664cc8c5752d59779de vp80-03-segmentation-1426.ivf
|
||||
7704804e32f5de976803929934a7fafe101ac7b0 vp80-03-segmentation-1427.ivf
|
||||
831ccd862ea95ca025d2f3bd8b88678752f5416d vp80-03-segmentation-1432.ivf
|
||||
b3c11978529289f9109f2766fcaba3ebc40e11ef vp80-03-segmentation-1435.ivf
|
||||
a835a731f5520ebfc1002c40121264d0020559ac vp80-03-segmentation-1436.ivf
|
||||
1d1732942f773bb2a5775fcb9689b1579ce28eab vp80-03-segmentation-1437.ivf
|
||||
db04799adfe089dfdf74dbd43cc05ede7161f99e vp80-03-segmentation-1441.ivf
|
||||
7caf39b3f20cfd52b998210878062e52a5edf1e6 vp80-03-segmentation-1442.ivf
|
||||
3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 vp80-04-partitions-1404.ivf
|
||||
93cc323b6b6867f1b12dd48773424549c6960a6b vp80-04-partitions-1405.ivf
|
||||
047eedb14b865bdac8a3538e63801054e0295e9c vp80-04-partitions-1406.ivf
|
||||
0f1233bd2bc33f56ce5e495dbd455d122339f384 vp80-05-sharpness-1428.ivf
|
||||
51767fc136488a9535c2a4c38067c542ee2048df vp80-05-sharpness-1429.ivf
|
||||
9805aa107672de25d6fb8c35e20d06deca5efe18 vp80-05-sharpness-1430.ivf
|
||||
61db6b965f9c27aebe71b85bf2d5877e58e4bbdf vp80-05-sharpness-1431.ivf
|
||||
10420d266290d2923555f84af38eeb96edbd3ae8 vp80-05-sharpness-1433.ivf
|
||||
3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 vp80-05-sharpness-1434.ivf
|
||||
c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 vp80-05-sharpness-1438.ivf
|
||||
aff51d865c2621b60510459244ea83e958e4baed vp80-05-sharpness-1439.ivf
|
||||
da386e72b19b5485a6af199c5eb60ef25e510dd1 vp80-05-sharpness-1440.ivf
|
||||
6759a095203d96ccd267ce09b1b050b8cc4c2f1f vp80-05-sharpness-1443.ivf
|
||||
db55ec7fd02c864ba996ff060b25b1e08611330b vp80-00-comprehensive-001.ivf.md5
|
||||
29db0ad011cba1e45f856d5623cd38dac3e3bf19 vp80-00-comprehensive-002.ivf.md5
|
||||
e84f258f69e173e7d68f8f8c037a0a3766902182 vp80-00-comprehensive-003.ivf.md5
|
||||
eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 vp80-00-comprehensive-004.ivf.md5
|
||||
4206f71c94894bd5b5b376f6c09b3817dbc65206 vp80-00-comprehensive-005.ivf.md5
|
||||
4f89b356f6f2fecb928f330a10f804f00f5325f5 vp80-00-comprehensive-006.ivf.md5
|
||||
2813236a32964dd8007e17648bcf035a20fcda6c vp80-00-comprehensive-007.ivf.md5
|
||||
10746c72098f872803c900e17c5680e451f5f498 vp80-00-comprehensive-008.ivf.md5
|
||||
39a23d0692ce64421a7bb7cdf6ccec5928d37fff vp80-00-comprehensive-009.ivf.md5
|
||||
f6e3de8931a0cc659bda8fbc14050346955e72d4 vp80-00-comprehensive-010.ivf.md5
|
||||
101683ec195b6e944f7cd1e468fc8921439363e6 vp80-00-comprehensive-011.ivf.md5
|
||||
1f592751ce46d8688998fa0fa4fbdcda0fd4058c vp80-00-comprehensive-012.ivf.md5
|
||||
6066176f90ca790251e795fca1a5797d59999841 vp80-00-comprehensive-013.ivf.md5
|
||||
2656da94ba93691f23edc4d60b3a09e2be46c217 vp80-00-comprehensive-014.ivf.md5
|
||||
c6e0d5f5d61460c8ac8edfa4e701f10312c03133 vp80-00-comprehensive-015.ivf.md5
|
||||
ee60fee501d8493e34e8d6a1fe315b51ed09b24a vp80-00-comprehensive-016.ivf.md5
|
||||
9f1914ceffcad4546c0a29de3ef591d8bea304dc vp80-00-comprehensive-017.ivf.md5
|
||||
e0305178fe288a9fd8082b39e2d03181edb19054 vp80-00-comprehensive-018.ivf.md5
|
||||
612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 vp80-01-intra-1400.ivf.md5
|
||||
48ea06097ac8269c5e8c2131d3d0639f431fcf0e vp80-01-intra-1411.ivf.md5
|
||||
6e2ab4e7677ad0ba868083ca6bc387ee922b400c vp80-01-intra-1416.ivf.md5
|
||||
eca0a90348959ce3854142f8d8641b13050e8349 vp80-01-intra-1417.ivf.md5
|
||||
920feea203145d5c2258a91c4e6991934a79a99e vp80-02-inter-1402.ivf.md5
|
||||
f71d97909fe2b3dd65be7e1f56c72237f0cef200 vp80-02-inter-1412.ivf.md5
|
||||
e911254569a30bbb2a237ff8b79f69ed9da0672d vp80-02-inter-1418.ivf.md5
|
||||
58c789c50c9bb9cc90580bed291164a0939d28ba vp80-02-inter-1424.ivf.md5
|
||||
ff3e2f441327b9c20a0b37c524e0f5a48a36de7b vp80-03-segmentation-01.ivf.md5
|
||||
0791f417f076a542ae66fbc3426ab4d94cbd6c75 vp80-03-segmentation-02.ivf.md5
|
||||
722e50f1a6a91c34302d68681faffc1c26d1cc57 vp80-03-segmentation-03.ivf.md5
|
||||
c701f1885bcfb27fb8e70cc65606b289172ef889 vp80-03-segmentation-04.ivf.md5
|
||||
f79bc9ec189a2b4807632a3d0c5bf04a178b5300 vp80-03-segmentation-1401.ivf.md5
|
||||
b9aa4c74c0219b639811c44760d0b24cd8bb436a vp80-03-segmentation-1403.ivf.md5
|
||||
70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 vp80-03-segmentation-1407.ivf.md5
|
||||
265f962ee781531f9a93b9309461316fd32b2a1d vp80-03-segmentation-1408.ivf.md5
|
||||
0c4ecbbd6dc042d30e626d951b65f460dd6cd563 vp80-03-segmentation-1409.ivf.md5
|
||||
cf779af36a937f06570a0fca9db64ba133451dee vp80-03-segmentation-1410.ivf.md5
|
||||
0e6c5036d51ab078842f133934926c598a9cff02 vp80-03-segmentation-1413.ivf.md5
|
||||
eb3930aaf229116c80d507516c34759c3f6cdf69 vp80-03-segmentation-1414.ivf.md5
|
||||
123d6c0f72ee87911c4ae7538e87b7d163b22d6c vp80-03-segmentation-1415.ivf.md5
|
||||
e70551d1a38920e097a5d8782390b79ecaeb7505 vp80-03-segmentation-1425.ivf.md5
|
||||
44e8f4117e46dbb302b2cfd81171cc1a1846e431 vp80-03-segmentation-1426.ivf.md5
|
||||
52636e54aee5f95bbace37021bd67de5db767e9a vp80-03-segmentation-1427.ivf.md5
|
||||
b1ad3eff20215c28e295b15ef3636ed926d59cba vp80-03-segmentation-1432.ivf.md5
|
||||
24c22a552fa28a90e5978f67f57181cc2d7546d7 vp80-03-segmentation-1435.ivf.md5
|
||||
96c49c390abfced18a7a8c9b9ea10af778e10edb vp80-03-segmentation-1436.ivf.md5
|
||||
f95eb6214571434f1f73ab7833b9ccdf47588020 vp80-03-segmentation-1437.ivf.md5
|
||||
1c0700ca27c9b0090a7747a4b0b4dc21d1843181 vp80-03-segmentation-1441.ivf.md5
|
||||
81d4f23ca32667ee958bae579c8f5e97ba72eb97 vp80-03-segmentation-1442.ivf.md5
|
||||
272efcef07a3a30fbca51bfd566063d8258ec0be vp80-04-partitions-1404.ivf.md5
|
||||
66ed219ab812ac801b256d35cf495d193d4cf478 vp80-04-partitions-1405.ivf.md5
|
||||
36083f37f56f502bd60ec5e07502ee9e6b8699b0 vp80-04-partitions-1406.ivf.md5
|
||||
6ca909bf168a64c09415626294665dc1be3d1973 vp80-05-sharpness-1428.ivf.md5
|
||||
1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a vp80-05-sharpness-1429.ivf.md5
|
||||
71bcbe5357d36a19df5b07fbe3e27bffa8893f0a vp80-05-sharpness-1430.ivf.md5
|
||||
89a09b1dffce2d55770a89e58d9925c70ef79bf8 vp80-05-sharpness-1431.ivf.md5
|
||||
08444a18b4e6ba3450c0796dd728d48c399a2dc9 vp80-05-sharpness-1433.ivf.md5
|
||||
6d6223719a90c13e848aa2a8a6642098cdb5977a vp80-05-sharpness-1434.ivf.md5
|
||||
41d70bb5fa45bc88da1604a0af466930b8dd77b5 vp80-05-sharpness-1438.ivf.md5
|
||||
086c56378df81b6cee264d7540a7b8f2b405c7a4 vp80-05-sharpness-1439.ivf.md5
|
||||
d32dc2c4165eb266ea4c23c14a45459b363def32 vp80-05-sharpness-1440.ivf.md5
|
||||
8c69dc3d8e563f56ffab5ad1e400d9e689dd23df vp80-05-sharpness-1443.ivf.md5
|
190
test/test.mk
190
test/test.mk
|
@ -1,10 +1,186 @@
|
|||
LIBVPX_TEST_SRCS-yes += test.mk
|
||||
LIBVPX_TEST_SRCS-yes += acm_random.h
|
||||
LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += dct16x16_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += fdct4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += fdct8x8_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += test.mk
|
||||
LIBVPX_TEST_SRCS-yes += test_libvpx.cc
|
||||
LIBVPX_TEST_SRCS-yes += util.h
|
||||
LIBVPX_TEST_SRCS-yes += video_source.h
|
||||
|
||||
LIBVPX_TEST_DATA-yes += hantro_collage_w352h288.yuv
|
||||
##
|
||||
## BLACK BOX TESTS
|
||||
##
|
||||
## Black box tests only use the public API.
|
||||
##
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += datarate_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += encode_test_driver.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += error_resilience_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += i420_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ../md5_utils.h ../md5_utils.c
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += decode_test_driver.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += ivf_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += test_vector_test.cc
|
||||
##
|
||||
## WHITE BOX TESTS
|
||||
##
|
||||
## Whitebox tests invoke functions not exposed via the public API. Certain
|
||||
## shared library builds don't make these functions accessible.
|
||||
##
|
||||
ifeq ($(CONFIG_SHARED),)
|
||||
|
||||
# These tests require both the encoder and decoder to be built.
|
||||
ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
|
||||
LIBVPX_TEST_SRCS-yes += boolcoder_test.cc
|
||||
endif
|
||||
|
||||
LIBVPX_TEST_SRCS-yes += idctllm_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += intrapred_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
|
||||
LIBVPX_TEST_SRCS-yes += sad_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
|
||||
LIBVPX_TEST_SRCS-yes += sixtap_predict_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
|
||||
|
||||
# VP9 tests
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
|
||||
ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
|
||||
LIBVPX_TEST_SRCS-yes += idct8x8_test.cc
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
##
|
||||
## TEST DATA
|
||||
##
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_ENCODER) += hantro_collage_w352h288.yuv
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5
|
||||
LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5
|
||||
|
|
|
@ -26,7 +26,7 @@ int main(int argc, char **argv) {
|
|||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
#if ARCH_X86 || ARCH_X86_64
|
||||
int simd_caps = x86_simd_caps();
|
||||
const int simd_caps = x86_simd_caps();
|
||||
if (!(simd_caps & HAS_MMX))
|
||||
append_gtest_filter(":-MMX/*");
|
||||
if (!(simd_caps & HAS_SSE))
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
|
||||
Use of this source code is governed by a BSD-style license
|
||||
that can be found in the LICENSE file in the root of the source
|
||||
tree. An additional intellectual property rights grant can be found
|
||||
in the file PATENTS. All contributing project authors may
|
||||
be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/decode_test_driver.h"
|
||||
#include "test/ivf_video_source.h"
|
||||
extern "C" {
|
||||
#include "./md5_utils.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define snprintf sprintf_s
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
// There are 61 test vectors in total.
|
||||
const char *kTestVectors[] = {
|
||||
"vp80-00-comprehensive-001.ivf",
|
||||
"vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf",
|
||||
"vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf",
|
||||
"vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf",
|
||||
"vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf",
|
||||
"vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf",
|
||||
"vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf",
|
||||
"vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf",
|
||||
"vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf",
|
||||
"vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf",
|
||||
"vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf",
|
||||
"vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf",
|
||||
"vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf",
|
||||
"vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf",
|
||||
"vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf",
|
||||
"vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf",
|
||||
"vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf",
|
||||
"vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf",
|
||||
"vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf",
|
||||
"vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf",
|
||||
"vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf",
|
||||
"vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf",
|
||||
"vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf",
|
||||
"vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf",
|
||||
"vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf",
|
||||
"vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf",
|
||||
"vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf",
|
||||
"vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf",
|
||||
"vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf",
|
||||
"vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf",
|
||||
"vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf"
|
||||
};
|
||||
|
||||
class TestVectorTest : public libvpx_test::DecoderTest,
|
||||
public ::testing::TestWithParam<const char*> {
|
||||
protected:
|
||||
TestVectorTest() : md5_file_(NULL) {}
|
||||
|
||||
virtual ~TestVectorTest() {
|
||||
if (md5_file_)
|
||||
fclose(md5_file_);
|
||||
}
|
||||
|
||||
void OpenMD5File(const std::string& md5_file_name_) {
|
||||
md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
|
||||
ASSERT_TRUE(md5_file_) << "Md5 file open failed. Filename: "
|
||||
<< md5_file_name_;
|
||||
}
|
||||
|
||||
virtual void DecompressedFrameHook(const vpx_image_t& img,
|
||||
const unsigned int frame_number) {
|
||||
char expected_md5[33];
|
||||
char junk[128];
|
||||
|
||||
// Read correct md5 checksums.
|
||||
const int res = fscanf(md5_file_, "%s %s", expected_md5, junk);
|
||||
ASSERT_NE(res, EOF) << "Read md5 data failed";
|
||||
expected_md5[32] = '\0';
|
||||
|
||||
MD5Context md5;
|
||||
MD5Init(&md5);
|
||||
|
||||
// Compute and update md5 for each raw in decompressed data.
|
||||
for (int plane = 0; plane < 3; ++plane) {
|
||||
uint8_t *buf = img.planes[plane];
|
||||
|
||||
for (unsigned int y = 0; y < (plane ? (img.d_h + 1) >> 1 : img.d_h);
|
||||
++y) {
|
||||
MD5Update(&md5, buf, (plane ? (img.d_w + 1) >> 1 : img.d_w));
|
||||
buf += img.stride[plane];
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t md5_sum[16];
|
||||
MD5Final(md5_sum, &md5);
|
||||
|
||||
char actual_md5[33];
|
||||
// Convert to get the actual md5.
|
||||
for (int i = 0; i < 16; i++) {
|
||||
snprintf(&actual_md5[i * 2], sizeof(actual_md5) - i * 2, "%02x",
|
||||
md5_sum[i]);
|
||||
}
|
||||
actual_md5[32] = '\0';
|
||||
|
||||
// Check md5 match.
|
||||
ASSERT_STREQ(expected_md5, actual_md5)
|
||||
<< "Md5 checksums don't match: frame number = " << frame_number;
|
||||
}
|
||||
|
||||
private:
|
||||
FILE *md5_file_;
|
||||
};
|
||||
|
||||
// This test runs through the whole set of test vectors, and decodes them.
|
||||
// The md5 checksums are computed for each frame in the video file. If md5
|
||||
// checksums match the correct md5 data, then the test is passed. Otherwise,
|
||||
// the test failed.
|
||||
TEST_P(TestVectorTest, MD5Match) {
|
||||
const std::string filename = GetParam();
|
||||
// Open compressed video file.
|
||||
libvpx_test::IVFVideoSource video(filename);
|
||||
|
||||
video.Init();
|
||||
|
||||
// Construct md5 file name.
|
||||
const std::string md5_filename = filename + ".md5";
|
||||
OpenMD5File(md5_filename);
|
||||
|
||||
// Decode frame, and check the md5 matching.
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(TestVectorSequence, TestVectorTest,
|
||||
::testing::ValuesIn(kTestVectors));
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef TEST_UTIL_H_
|
||||
#define TEST_UTIL_H_
|
||||
|
||||
// Macros
|
||||
#define PARAMS(...) ::testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > >
|
||||
#define GET_PARAM(k) std::tr1::get< k >(GetParam())
|
||||
|
||||
#endif // TEST_UTIL_H_
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#ifndef TEST_VIDEO_SOURCE_H_
|
||||
#define TEST_VIDEO_SOURCE_H_
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
#include "test/acm_random.h"
|
||||
#include "vpx/vpx_encoder.h"
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
static FILE *OpenTestDataFile(const std::string& file_name) {
|
||||
std::string path_to_source = file_name;
|
||||
const char *kDataPath = getenv("LIBVPX_TEST_DATA_PATH");
|
||||
|
||||
if (kDataPath) {
|
||||
path_to_source = kDataPath;
|
||||
path_to_source += "/";
|
||||
path_to_source += file_name;
|
||||
}
|
||||
|
||||
return fopen(path_to_source.c_str(), "rb");
|
||||
}
|
||||
|
||||
// Abstract base class for test video sources, which provide a stream of
|
||||
// vpx_image_t images with associated timestamps and duration.
|
||||
class VideoSource {
|
||||
public:
|
||||
virtual ~VideoSource() {}
|
||||
|
||||
// Prepare the stream for reading, rewind/open as necessary.
|
||||
virtual void Begin() = 0;
|
||||
|
||||
// Advance the cursor to the next frame
|
||||
virtual void Next() = 0;
|
||||
|
||||
// Get the current video frame, or NULL on End-Of-Stream.
|
||||
virtual vpx_image_t *img() const = 0;
|
||||
|
||||
// Get the presentation timestamp of the current frame.
|
||||
virtual vpx_codec_pts_t pts() const = 0;
|
||||
|
||||
// Get the current frame's duration
|
||||
virtual unsigned long duration() const = 0;
|
||||
|
||||
// Get the timebase for the stream
|
||||
virtual vpx_rational_t timebase() const = 0;
|
||||
|
||||
// Get the current frame counter, starting at 0.
|
||||
virtual unsigned int frame() const = 0;
|
||||
|
||||
// Get the current file limit.
|
||||
virtual unsigned int limit() const = 0;
|
||||
};
|
||||
|
||||
|
||||
class DummyVideoSource : public VideoSource {
|
||||
public:
|
||||
DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
|
||||
SetSize(80, 64);
|
||||
}
|
||||
|
||||
virtual ~DummyVideoSource() { vpx_img_free(img_); }
|
||||
|
||||
virtual void Begin() {
|
||||
frame_ = 0;
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
virtual void Next() {
|
||||
++frame_;
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
virtual vpx_image_t *img() const {
|
||||
return (frame_ < limit_) ? img_ : NULL;
|
||||
}
|
||||
|
||||
// Models a stream where Timebase = 1/FPS, so pts == frame.
|
||||
virtual vpx_codec_pts_t pts() const { return frame_; }
|
||||
|
||||
virtual unsigned long duration() const { return 1; }
|
||||
|
||||
virtual vpx_rational_t timebase() const {
|
||||
const vpx_rational_t t = {1, 30};
|
||||
return t;
|
||||
}
|
||||
|
||||
virtual unsigned int frame() const { return frame_; }
|
||||
|
||||
virtual unsigned int limit() const { return limit_; }
|
||||
|
||||
void SetSize(unsigned int width, unsigned int height) {
|
||||
if (width != width_ || height != height_) {
|
||||
vpx_img_free(img_);
|
||||
raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
|
||||
img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_VPXI420, width, height, 32);
|
||||
width_ = width;
|
||||
height_ = height;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void FillFrame() { memset(img_->img_data, 0, raw_sz_); }
|
||||
|
||||
vpx_image_t *img_;
|
||||
size_t raw_sz_;
|
||||
unsigned int limit_;
|
||||
unsigned int frame_;
|
||||
unsigned int width_;
|
||||
unsigned int height_;
|
||||
};
|
||||
|
||||
|
||||
class RandomVideoSource : public DummyVideoSource {
|
||||
public:
|
||||
RandomVideoSource(int seed = ACMRandom::DeterministicSeed())
|
||||
: rnd_(seed),
|
||||
seed_(seed) { }
|
||||
|
||||
protected:
|
||||
// Reset the RNG to get a matching stream for the second pass
|
||||
virtual void Begin() {
|
||||
frame_ = 0;
|
||||
rnd_.Reset(seed_);
|
||||
FillFrame();
|
||||
}
|
||||
|
||||
// 15 frames of noise, followed by 15 static frames. Reset to 0 rather
|
||||
// than holding previous frames to encourage keyframes to be thrown.
|
||||
virtual void FillFrame() {
|
||||
if (frame_ % 30 < 15)
|
||||
for (size_t i = 0; i < raw_sz_; ++i)
|
||||
img_->img_data[i] = rnd_.Rand8();
|
||||
else
|
||||
memset(img_->img_data, 0, raw_sz_);
|
||||
}
|
||||
|
||||
ACMRandom rnd_;
|
||||
int seed_;
|
||||
};
|
||||
|
||||
// Abstract base class for test video sources, which provide a stream of
|
||||
// decompressed images to the decoder.
|
||||
class CompressedVideoSource {
|
||||
public:
|
||||
virtual ~CompressedVideoSource() {}
|
||||
|
||||
virtual void Init() = 0;
|
||||
|
||||
// Prepare the stream for reading, rewind/open as necessary.
|
||||
virtual void Begin() = 0;
|
||||
|
||||
// Advance the cursor to the next frame
|
||||
virtual void Next() = 0;
|
||||
|
||||
virtual const uint8_t *cxdata() const = 0;
|
||||
|
||||
virtual const unsigned int frame_size() const = 0;
|
||||
|
||||
virtual const unsigned int frame_number() const = 0;
|
||||
};
|
||||
|
||||
} // namespace libvpx_test
|
||||
|
||||
#endif // TEST_VIDEO_SOURCE_H_
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
|
||||
extern "C" {
|
||||
#include "vpx_rtcd.h"
|
||||
}
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
const int cospi8sqrt2minus1 = 20091;
|
||||
const int sinpi8sqrt2 = 35468;
|
||||
|
||||
void reference_idct4x4(const int16_t *input, int16_t *output) {
|
||||
const int16_t *ip = input;
|
||||
int16_t *op = output;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const int a1 = ip[0] + ip[8];
|
||||
const int b1 = ip[0] - ip[8];
|
||||
const int temp1 = (ip[4] * sinpi8sqrt2) >> 16;
|
||||
const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16);
|
||||
const int c1 = temp1 - temp2;
|
||||
const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16);
|
||||
const int temp4 = (ip[12] * sinpi8sqrt2) >> 16;
|
||||
const int d1 = temp3 + temp4;
|
||||
op[0] = a1 + d1;
|
||||
op[12] = a1 - d1;
|
||||
op[4] = b1 + c1;
|
||||
op[8] = b1 - c1;
|
||||
++ip;
|
||||
++op;
|
||||
}
|
||||
ip = output;
|
||||
op = output;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const int a1 = ip[0] + ip[2];
|
||||
const int b1 = ip[0] - ip[2];
|
||||
const int temp1 = (ip[1] * sinpi8sqrt2) >> 16;
|
||||
const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16);
|
||||
const int c1 = temp1 - temp2;
|
||||
const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16);
|
||||
const int temp4 = (ip[3] * sinpi8sqrt2) >> 16;
|
||||
const int d1 = temp3 + temp4;
|
||||
op[0] = (a1 + d1 + 4) >> 3;
|
||||
op[3] = (a1 - d1 + 4) >> 3;
|
||||
op[1] = (b1 + c1 + 4) >> 3;
|
||||
op[2] = (b1 - c1 + 4) >> 3;
|
||||
ip += 4;
|
||||
op += 4;
|
||||
}
|
||||
}
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
TEST(Vp8FdctTest, SignBiasCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int16_t test_input_block[16];
|
||||
int16_t test_output_block[16];
|
||||
const int pitch = 8;
|
||||
int count_sign_block[16][2];
|
||||
const int count_test_block = 1000000;
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 16; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
bool bias_acceptable = true;
|
||||
for (int j = 0; j < 16; ++j)
|
||||
bias_acceptable = bias_acceptable &&
|
||||
(abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000);
|
||||
|
||||
EXPECT_EQ(true, bias_acceptable)
|
||||
<< "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]";
|
||||
|
||||
memset(count_sign_block, 0, sizeof(count_sign_block));
|
||||
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
// Initialize a test block with input range [-15, 15].
|
||||
for (int j = 0; j < 16; ++j)
|
||||
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
|
||||
|
||||
vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
if (test_output_block[j] < 0)
|
||||
++count_sign_block[j][0];
|
||||
else if (test_output_block[j] > 0)
|
||||
++count_sign_block[j][1];
|
||||
}
|
||||
}
|
||||
|
||||
bias_acceptable = true;
|
||||
for (int j = 0; j < 16; ++j)
|
||||
bias_acceptable = bias_acceptable &&
|
||||
(abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000);
|
||||
|
||||
EXPECT_EQ(true, bias_acceptable)
|
||||
<< "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
|
||||
};
|
||||
|
||||
TEST(Vp8FdctTest, RoundTripErrorCheck) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
int max_error = 0;
|
||||
double total_error = 0;
|
||||
const int count_test_block = 1000000;
|
||||
for (int i = 0; i < count_test_block; ++i) {
|
||||
int16_t test_input_block[16];
|
||||
int16_t test_temp_block[16];
|
||||
int16_t test_output_block[16];
|
||||
|
||||
// Initialize a test block with input range [-255, 255].
|
||||
for (int j = 0; j < 16; ++j)
|
||||
test_input_block[j] = rnd.Rand8() - rnd.Rand8();
|
||||
|
||||
const int pitch = 8;
|
||||
vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
|
||||
reference_idct4x4(test_temp_block, test_output_block);
|
||||
|
||||
for (int j = 0; j < 16; ++j) {
|
||||
const int diff = test_input_block[j] - test_output_block[j];
|
||||
const int error = diff * diff;
|
||||
if (max_error < error)
|
||||
max_error = error;
|
||||
total_error += error;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_GE(1, max_error )
|
||||
<< "Error: FDCT/IDCT has an individual roundtrip error > 1";
|
||||
|
||||
EXPECT_GE(count_test_block, total_error)
|
||||
<< "Error: FDCT/IDCT has average roundtrip error > 1 per block";
|
||||
};
|
||||
|
||||
} // namespace
|
|
@ -60,7 +60,7 @@ void SetUseReferenceImpl(int use) {
|
|||
|
||||
#if defined(__ARM_NEON__) && !defined(YUV_DISABLE_ASM)
|
||||
#define HAS_SCALEROWDOWN2_NEON
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||
void ScaleRowDown2_NEON(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
|
@ -102,7 +102,7 @@ void ScaleRowDown2Int_NEON(const uint8* src_ptr, int src_stride,
|
|||
}
|
||||
|
||||
#define HAS_SCALEROWDOWN4_NEON
|
||||
static void ScaleRowDown4_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||
static void ScaleRowDown4_NEON(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
|
@ -160,7 +160,7 @@ static void ScaleRowDown4Int_NEON(const uint8* src_ptr, int src_stride,
|
|||
// Down scale from 4 to 3 pixels. Use the neon multilane read/write
|
||||
// to load up the every 4th pixel into a 4 different registers.
|
||||
// Point samples 32 pixels to 24 pixels.
|
||||
static void ScaleRowDown34_NEON(const uint8* src_ptr, int /* src_stride */,
|
||||
static void ScaleRowDown34_NEON(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"1: \n"
|
||||
|
@ -284,7 +284,7 @@ const unsigned short mult38_div9[8] __attribute__ ((aligned(16))) =
|
|||
65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18 };
|
||||
|
||||
// 32 -> 12
|
||||
static void ScaleRowDown38_NEON(const uint8* src_ptr, int,
|
||||
static void ScaleRowDown38_NEON(const uint8* src_ptr, int src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"vld1.u8 {q3}, [%3] \n"
|
||||
|
|
|
@ -5,7 +5,7 @@ import subprocess
|
|||
import sys
|
||||
|
||||
LONG_OPTIONS = ["shard=", "shards="]
|
||||
BASE_COMMAND = "./configure --enable-internal-stats --enable-experimental"
|
||||
BASE_COMMAND = "./configure --disable-vp8 --disable-unit-tests --enable-internal-stats --enable-experimental"
|
||||
|
||||
def RunCommand(command):
|
||||
run = subprocess.Popen(command, shell=True)
|
||||
|
|
11
usage.dox
11
usage.dox
|
@ -1,6 +1,6 @@
|
|||
/*!\page usage Usage
|
||||
|
||||
The vpx Multi-Format codec SDK provides a unified interface amongst its
|
||||
The vpx multi-format codec SDK provides a unified interface amongst its
|
||||
supported codecs. This abstraction allows applications using this SDK to
|
||||
easily support multiple video formats with minimal code duplication or
|
||||
"special casing." This section describes the interface common to all codecs.
|
||||
|
@ -14,8 +14,12 @@
|
|||
|
||||
Fore more information on decoder and encoder specific usage, see the
|
||||
following pages:
|
||||
\if decoder - \subpage usage_decode \endif
|
||||
\if decoder - \subpage usage_encode \endif
|
||||
\if decoder
|
||||
- \subpage usage_decode
|
||||
\endif
|
||||
\if decoder
|
||||
- \subpage usage_encode
|
||||
\endif
|
||||
|
||||
\section usage_types Important Data Types
|
||||
There are two important data structures to consider in this interface.
|
||||
|
@ -82,6 +86,7 @@
|
|||
|
||||
The available initialization methods are:
|
||||
\if encoder - #vpx_codec_enc_init (calls vpx_codec_enc_init_ver()) \endif
|
||||
\if multi-encoder - #vpx_codec_enc_init_multi (calls vpx_codec_enc_init_multi_ver()) \endif
|
||||
\if decoder - #vpx_codec_dec_init (calls vpx_codec_dec_init_ver()) \endif
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*! \page usage_encode Encode
|
||||
|
||||
The vpx_codec_encode() function is at the core of the decode loop. It
|
||||
The vpx_codec_encode() function is at the core of the encode loop. It
|
||||
processes raw images passed by the application, producing packets of
|
||||
compressed data. The <code>deadline</code> parameter controls the amount
|
||||
of time in microseconds the encoder should spend working on the frame. For
|
||||
|
@ -10,5 +10,4 @@
|
|||
|
||||
\ref samples
|
||||
|
||||
|
||||
*/
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "blockd.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "onyxc_int.h"
|
||||
#include "findnearmv.h"
|
||||
#include "entropymode.h"
|
||||
#include "systemdependent.h"
|
||||
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
|
||||
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
|
||||
#if CONFIG_POSTPROC
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
|
||||
if (oci->post_proc_buffer_int_used)
|
||||
vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int);
|
||||
|
||||
vpx_free(oci->pp_limits_buffer);
|
||||
oci->pp_limits_buffer = NULL;
|
||||
#endif
|
||||
|
||||
vpx_free(oci->above_context);
|
||||
vpx_free(oci->mip);
|
||||
#if CONFIG_ERROR_CONCEALMENT
|
||||
vpx_free(oci->prev_mip);
|
||||
oci->prev_mip = NULL;
|
||||
#endif
|
||||
|
||||
oci->above_context = NULL;
|
||||
oci->mip = NULL;
|
||||
}
|
||||
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
|
||||
{
|
||||
int i;
|
||||
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
|
||||
/* our internal buffers are always multiples of 16 */
|
||||
if ((width & 0xf) != 0)
|
||||
width += 16 - (width & 0xf);
|
||||
|
||||
if ((height & 0xf) != 0)
|
||||
height += 16 - (height & 0xf);
|
||||
|
||||
|
||||
for (i = 0; i < NUM_YV12_BUFFERS; i++)
|
||||
{
|
||||
oci->fb_idx_ref_cnt[i] = 0;
|
||||
oci->yv12_fb[i].flags = 0;
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
}
|
||||
|
||||
oci->new_fb_idx = 0;
|
||||
oci->lst_fb_idx = 1;
|
||||
oci->gld_fb_idx = 2;
|
||||
oci->alt_fb_idx = 3;
|
||||
|
||||
oci->fb_idx_ref_cnt[0] = 1;
|
||||
oci->fb_idx_ref_cnt[1] = 1;
|
||||
oci->fb_idx_ref_cnt[2] = 1;
|
||||
oci->fb_idx_ref_cnt[3] = 1;
|
||||
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mb_rows = height >> 4;
|
||||
oci->mb_cols = width >> 4;
|
||||
oci->MBs = oci->mb_rows * oci->mb_cols;
|
||||
oci->mode_info_stride = oci->mb_cols + 1;
|
||||
oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));
|
||||
|
||||
if (!oci->mip)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->mi = oci->mip + oci->mode_info_stride + 1;
|
||||
|
||||
/* Allocation of previous mode info will be done in vp8_decode_frame()
|
||||
* as it is a decoder only data */
|
||||
|
||||
oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
|
||||
|
||||
if (!oci->above_context)
|
||||
goto allocation_fail;
|
||||
|
||||
#if CONFIG_POSTPROC
|
||||
if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
|
||||
goto allocation_fail;
|
||||
|
||||
oci->post_proc_buffer_int_used = 0;
|
||||
vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
|
||||
vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
|
||||
oci->post_proc_buffer.frame_size);
|
||||
|
||||
/* Allocate buffer to store post-processing filter coefficients.
|
||||
*
|
||||
* Note: Round up mb_cols to support SIMD reads
|
||||
*/
|
||||
oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
|
||||
if (!oci->pp_limits_buffer)
|
||||
goto allocation_fail;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
allocation_fail:
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
return 1;
|
||||
}
|
||||
|
||||
void vp8_setup_version(VP8_COMMON *cm)
|
||||
{
|
||||
switch (cm->version)
|
||||
{
|
||||
case 0:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 1:
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 2:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
case 3:
|
||||
cm->no_lpf = 1;
|
||||
cm->filter_type = SIMPLE_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 1;
|
||||
cm->full_pixel = 1;
|
||||
break;
|
||||
default:
|
||||
/*4,5,6,7 are reserved for future use*/
|
||||
cm->no_lpf = 0;
|
||||
cm->filter_type = NORMAL_LOOPFILTER;
|
||||
cm->use_bilinear_mc_filter = 0;
|
||||
cm->full_pixel = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
void vp8_create_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_machine_specific_config(oci);
|
||||
|
||||
vp8_init_mbmode_probs(oci);
|
||||
vp8_default_bmode_probs(oci->fc.bmode_prob);
|
||||
|
||||
oci->mb_no_coeff_skip = 1;
|
||||
oci->no_lpf = 0;
|
||||
oci->filter_type = NORMAL_LOOPFILTER;
|
||||
oci->use_bilinear_mc_filter = 0;
|
||||
oci->full_pixel = 0;
|
||||
oci->multi_token_partition = ONE_PARTITION;
|
||||
oci->clr_type = REG_YUV;
|
||||
oci->clamp_type = RECON_CLAMP_REQUIRED;
|
||||
|
||||
/* Initialize reference frame sign bias structure to defaults */
|
||||
vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
|
||||
|
||||
/* Default disable buffer to buffer copying */
|
||||
oci->copy_buffer_to_gf = 0;
|
||||
oci->copy_buffer_to_arf = 0;
|
||||
}
|
||||
|
||||
void vp8_remove_common(VP8_COMMON *oci)
|
||||
{
|
||||
vp8_de_alloc_frame_buffers(oci);
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __INC_ALLOCCOMMON_H
|
||||
#define __INC_ALLOCCOMMON_H
|
||||
|
||||
#include "onyxc_int.h"
|
||||
|
||||
void vp8_create_common(VP8_COMMON *oci);
|
||||
void vp8_remove_common(VP8_COMMON *oci);
|
||||
void vp8_de_alloc_frame_buffers(VP8_COMMON *oci);
|
||||
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height);
|
||||
void vp8_setup_version(VP8_COMMON *oci);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,237 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_bil_second_pass_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned short *dst_ptr,
|
||||
; r2 unsigned int src_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; The output is transposed stroed in output array to make it easy for second pass filtering.
|
||||
|vp8_filter_block2d_bil_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
mov r12, r3 ; outer-loop counter
|
||||
|
||||
add r7, r2, r4 ; preload next row
|
||||
pld [r0, r7]
|
||||
|
||||
sub r2, r2, r4 ; src increment for height loop
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
|
||||
mov r3, r3, lsl #1 ; height*2
|
||||
add r3, r3, #2 ; plus 2 to make output buffer 4-bit aligned since height is actually (height+1)
|
||||
|
||||
mov r11, r1 ; save dst_ptr for each row
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_1st_filter
|
||||
|
||||
|bil_height_loop_1st_v6|
|
||||
ldrb r6, [r0] ; load source data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
mov lr, r4, lsr #2 ; 4-in-parellel loop counter
|
||||
|
||||
|bil_width_loop_1st_v6|
|
||||
ldrb r9, [r0, #3]
|
||||
ldrb r10, [r0, #4]
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; src[1] | src[0]
|
||||
pkhbt r7, r7, r8, lsl #16 ; src[2] | src[1]
|
||||
|
||||
smuad r6, r6, r5 ; apply the filter
|
||||
pkhbt r8, r8, r9, lsl #16 ; src[3] | src[2]
|
||||
smuad r7, r7, r5
|
||||
pkhbt r9, r9, r10, lsl #16 ; src[4] | src[3]
|
||||
|
||||
smuad r8, r8, r5
|
||||
smuad r9, r9, r5
|
||||
|
||||
add r0, r0, #4
|
||||
subs lr, lr, #1
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #16, r6, asr #7
|
||||
usat r7, #16, r7, asr #7
|
||||
|
||||
strh r6, [r1], r3 ; result is transposed and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strh r7, [r1], r3
|
||||
add r9, r9, #0x40
|
||||
usat r8, #16, r8, asr #7
|
||||
usat r9, #16, r9, asr #7
|
||||
|
||||
strh r8, [r1], r3 ; result is transposed and stored
|
||||
|
||||
ldrneb r6, [r0] ; load source data
|
||||
strh r9, [r1], r3
|
||||
|
||||
ldrneb r7, [r0, #1]
|
||||
ldrneb r8, [r0, #2]
|
||||
|
||||
bne bil_width_loop_1st_v6
|
||||
|
||||
add r0, r0, r2 ; move to next input row
|
||||
subs r12, r12, #1
|
||||
|
||||
add r9, r2, r4, lsl #1 ; adding back block width
|
||||
pld [r0, r9] ; preload next row
|
||||
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_1st_v6
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_1st_filter|
|
||||
|bil_height_loop_null_1st|
|
||||
mov lr, r4, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_null_1st|
|
||||
ldrb r6, [r0] ; load data
|
||||
ldrb r7, [r0, #1]
|
||||
ldrb r8, [r0, #2]
|
||||
ldrb r9, [r0, #3]
|
||||
|
||||
strh r6, [r1], r3 ; store it to immediate buffer
|
||||
add r0, r0, #4
|
||||
strh r7, [r1], r3
|
||||
subs lr, lr, #1
|
||||
strh r8, [r1], r3
|
||||
strh r9, [r1], r3
|
||||
|
||||
bne bil_width_loop_null_1st
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r2 ; move to next input line
|
||||
add r11, r11, #2 ; move over to next column
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_1st
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP ; |vp8_filter_block2d_bil_first_pass_armv6|
|
||||
|
||||
|
||||
;---------------------------------
|
||||
; r0 unsigned short *src_ptr,
|
||||
; r1 unsigned char *dst_ptr,
|
||||
; r2 int dst_pitch,
|
||||
; r3 unsigned int height,
|
||||
; stack unsigned int width,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_bil_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r4, [sp, #36] ; width
|
||||
|
||||
ldr r5, [r11] ; load up filter coefficients
|
||||
mov r12, r4 ; outer-loop counter = width, since we work on transposed data matrix
|
||||
mov r11, r1
|
||||
|
||||
cmp r5, #128 ; if filter coef = 128, then skip the filter
|
||||
beq bil_null_2nd_filter
|
||||
|
||||
|bil_height_loop_2nd|
|
||||
ldr r6, [r0] ; load the data
|
||||
ldr r8, [r0, #4]
|
||||
ldrh r10, [r0, #8]
|
||||
mov lr, r3, lsr #2 ; loop counter
|
||||
|
||||
|bil_width_loop_2nd|
|
||||
pkhtb r7, r6, r8 ; src[1] | src[2]
|
||||
pkhtb r9, r8, r10 ; src[3] | src[4]
|
||||
|
||||
smuad r6, r6, r5 ; apply filter
|
||||
smuad r8, r8, r5 ; apply filter
|
||||
|
||||
subs lr, lr, #1
|
||||
|
||||
smuadx r7, r7, r5 ; apply filter
|
||||
smuadx r9, r9, r5 ; apply filter
|
||||
|
||||
add r0, r0, #8
|
||||
|
||||
add r6, r6, #0x40 ; round_shift_and_clamp
|
||||
add r7, r7, #0x40
|
||||
usat r6, #8, r6, asr #7
|
||||
usat r7, #8, r7, asr #7
|
||||
strb r6, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
add r8, r8, #0x40 ; round_shift_and_clamp
|
||||
strb r7, [r1], r2
|
||||
add r9, r9, #0x40
|
||||
usat r8, #8, r8, asr #7
|
||||
usat r9, #8, r9, asr #7
|
||||
strb r8, [r1], r2 ; the result is transposed back and stored
|
||||
|
||||
ldrne r6, [r0] ; load data
|
||||
strb r9, [r1], r2
|
||||
ldrne r8, [r0, #4]
|
||||
ldrneh r10, [r0, #8]
|
||||
|
||||
bne bil_width_loop_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4 ; update src for next row
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_2nd
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
|bil_null_2nd_filter|
|
||||
|bil_height_loop_null_2nd|
|
||||
mov lr, r3, lsr #2
|
||||
|
||||
|bil_width_loop_null_2nd|
|
||||
ldr r6, [r0], #4 ; load data
|
||||
subs lr, lr, #1
|
||||
ldr r8, [r0], #4
|
||||
|
||||
strb r6, [r1], r2 ; store data
|
||||
mov r7, r6, lsr #16
|
||||
strb r7, [r1], r2
|
||||
mov r9, r8, lsr #16
|
||||
strb r8, [r1], r2
|
||||
strb r9, [r1], r2
|
||||
|
||||
bne bil_width_loop_null_2nd
|
||||
|
||||
subs r12, r12, #1
|
||||
add r0, r0, #4
|
||||
add r11, r11, #1
|
||||
mov r1, r11
|
||||
|
||||
bne bil_height_loop_null_2nd
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_armv6|
|
||||
|
||||
END
|
|
@ -0,0 +1,186 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
;push {r4-r7}
|
||||
|
||||
;preload
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
ands r4, r0, #15
|
||||
beq copy_mem16x16_fast
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem16x16_8
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem16x16_4
|
||||
|
||||
;copy one byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
ldrb r6, [r0, #2]
|
||||
ldrb r7, [r0, #3]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
strb r6, [r2, #2]
|
||||
strb r7, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
ldrb r6, [r0, #6]
|
||||
ldrb r7, [r0, #7]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
strb r6, [r2, #6]
|
||||
strb r7, [r2, #7]
|
||||
|
||||
ldrb r4, [r0, #8]
|
||||
ldrb r5, [r0, #9]
|
||||
ldrb r6, [r0, #10]
|
||||
ldrb r7, [r0, #11]
|
||||
|
||||
strb r4, [r2, #8]
|
||||
strb r5, [r2, #9]
|
||||
strb r6, [r2, #10]
|
||||
strb r7, [r2, #11]
|
||||
|
||||
ldrb r4, [r0, #12]
|
||||
ldrb r5, [r0, #13]
|
||||
ldrb r6, [r0, #14]
|
||||
ldrb r7, [r0, #15]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #12]
|
||||
strb r5, [r2, #13]
|
||||
strb r6, [r2, #14]
|
||||
strb r7, [r2, #15]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
ldrneb r6, [r0, #2]
|
||||
ldrneb r7, [r0, #3]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem16x16_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r0, #8]
|
||||
ldr r7, [r0, #12]
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
str r6, [r2, #8]
|
||||
str r7, [r2, #12]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
ldrne r6, [r0, #8]
|
||||
ldrne r7, [r0, #12]
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
|
||||
bne copy_mem16x16_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem16x16_8
|
||||
sub r1, r1, #16
|
||||
sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_8_loop
|
||||
ldmia r0!, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
ldmia r0!, {r6-r7}
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
stmia r2!, {r4-r5}
|
||||
subs r12, r12, #1
|
||||
;stm r2, {r4-r5}
|
||||
stmia r2!, {r6-r7}
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_8_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
;copy 16 bytes each time
|
||||
copy_mem16x16_fast
|
||||
;sub r1, r1, #16
|
||||
;sub r3, r3, #16
|
||||
|
||||
mov r12, #16
|
||||
|
||||
copy_mem16x16_fast_loop
|
||||
ldmia r0, {r4-r7}
|
||||
;ldm r0, {r4-r7}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r7}
|
||||
;stm r2, {r4-r7}
|
||||
add r2, r2, r3
|
||||
|
||||
pld [r0, #31] ; preload for next 16x16 block
|
||||
bne copy_mem16x16_fast_loop
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
;pop {r4-r7}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,128 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void vp8_copy_mem8x4_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x4_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x4_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x4_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x4_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x4_4_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x4_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #4
|
||||
|
||||
copy_mem8x4_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x4_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,128 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_v6|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_v6( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_v6| PROC
|
||||
;push {r4-r5}
|
||||
stmdb sp!, {r4-r5}
|
||||
|
||||
;preload
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
ands r4, r0, #7
|
||||
beq copy_mem8x8_fast
|
||||
|
||||
ands r4, r0, #3
|
||||
beq copy_mem8x8_4
|
||||
|
||||
;copy 1 byte each time
|
||||
ldrb r4, [r0]
|
||||
ldrb r5, [r0, #1]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_1_loop
|
||||
strb r4, [r2]
|
||||
strb r5, [r2, #1]
|
||||
|
||||
ldrb r4, [r0, #2]
|
||||
ldrb r5, [r0, #3]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
strb r4, [r2, #2]
|
||||
strb r5, [r2, #3]
|
||||
|
||||
ldrb r4, [r0, #4]
|
||||
ldrb r5, [r0, #5]
|
||||
|
||||
strb r4, [r2, #4]
|
||||
strb r5, [r2, #5]
|
||||
|
||||
ldrb r4, [r0, #6]
|
||||
ldrb r5, [r0, #7]
|
||||
|
||||
add r0, r0, r1
|
||||
|
||||
strb r4, [r2, #6]
|
||||
strb r5, [r2, #7]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrneb r4, [r0]
|
||||
ldrneb r5, [r0, #1]
|
||||
|
||||
bne copy_mem8x8_1_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 4 bytes each time
|
||||
copy_mem8x8_4
|
||||
ldr r4, [r0]
|
||||
ldr r5, [r0, #4]
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_4_loop
|
||||
subs r12, r12, #1
|
||||
add r0, r0, r1
|
||||
|
||||
str r4, [r2]
|
||||
str r5, [r2, #4]
|
||||
|
||||
add r2, r2, r3
|
||||
|
||||
ldrne r4, [r0]
|
||||
ldrne r5, [r0, #4]
|
||||
|
||||
bne copy_mem8x8_4_loop
|
||||
|
||||
ldmia sp!, {r4 - r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
;copy 8 bytes each time
|
||||
copy_mem8x8_fast
|
||||
;sub r1, r1, #8
|
||||
;sub r3, r3, #8
|
||||
|
||||
mov r12, #8
|
||||
|
||||
copy_mem8x8_fast_loop
|
||||
ldmia r0, {r4-r5}
|
||||
;ldm r0, {r4-r5}
|
||||
add r0, r0, r1
|
||||
|
||||
subs r12, r12, #1
|
||||
stmia r2, {r4-r5}
|
||||
;stm r2, {r4-r5}
|
||||
add r2, r2, r3
|
||||
|
||||
bne copy_mem8x8_fast_loop
|
||||
|
||||
ldmia sp!, {r4-r5}
|
||||
;pop {r4-r5}
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,70 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
; int pred_stride, unsigned char *dst_ptr,
|
||||
; int dst_stride)
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 pred_stride
|
||||
; r3 dst_ptr
|
||||
; sp dst_stride
|
||||
|
||||
|vp8_dc_only_idct_add_v6| PROC
|
||||
stmdb sp!, {r4 - r7}
|
||||
|
||||
add r0, r0, #4 ; input_dc += 4
|
||||
ldr r12, c0x0000FFFF
|
||||
ldr r4, [r1], r2
|
||||
and r0, r12, r0, asr #3 ; input_dc >> 3 + mask
|
||||
ldr r6, [r1], r2
|
||||
orr r0, r0, r0, lsl #16 ; a1 | a1
|
||||
|
||||
ldr r12, [sp, #16] ; dst stride
|
||||
|
||||
uxtab16 r5, r0, r4 ; a1+2 | a1+0
|
||||
uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
ldr r4, [r1], r2
|
||||
str r5, [r3], r12
|
||||
ldr r6, [r1]
|
||||
str r7, [r3], r12
|
||||
|
||||
uxtab16 r5, r0, r4
|
||||
uxtab16 r4, r0, r4, ror #8
|
||||
uxtab16 r7, r0, r6
|
||||
uxtab16 r6, r0, r6, ror #8
|
||||
usat16 r5, #8, r5
|
||||
usat16 r4, #8, r4
|
||||
usat16 r7, #8, r7
|
||||
usat16 r6, #8, r6
|
||||
orr r5, r5, r4, lsl #8
|
||||
orr r7, r7, r6, lsl #8
|
||||
str r5, [r3], r12
|
||||
str r7, [r3]
|
||||
|
||||
ldmia sp!, {r4 - r7}
|
||||
bx lr
|
||||
|
||||
ENDP ; |vp8_dc_only_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
c0x0000FFFF DCD 0x0000FFFF
|
||||
END
|
|
@ -0,0 +1,190 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_dequant_idct_add_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
;void vp8_dequant_idct_v6(short *input, short *dq,
|
||||
; unsigned char *dest, int stride)
|
||||
; r0 = q
|
||||
; r1 = dq
|
||||
; r2 = dst
|
||||
; r3 = stride
|
||||
|
||||
|vp8_dequant_idct_add_v6| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
ldr r4, [r0] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
sub sp, sp, #4
|
||||
str r3, [sp]
|
||||
|
||||
mov r12, #4
|
||||
|
||||
vp8_dequant_add_loop
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
ldr r4, [r0, #4] ;input
|
||||
ldr r5, [r1], #4 ;dq
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
smulbb r6, r4, r5
|
||||
smultt r7, r4, r5
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
ldrne r4, [r0, #4]
|
||||
ldrne r5, [r1], #4
|
||||
|
||||
strh r6, [r0], #2
|
||||
strh r7, [r0], #2
|
||||
|
||||
bne vp8_dequant_add_loop
|
||||
|
||||
sub r0, r0, #32
|
||||
mov r1, r0
|
||||
|
||||
; short_idct4x4llm_v6_dual
|
||||
ldr r3, cospi8sqrt2minus1
|
||||
ldr r4, sinpi8sqrt2
|
||||
ldr r6, [r0, #8]
|
||||
mov r5, #2
|
||||
vp8_dequant_idct_loop1_v6
|
||||
ldr r12, [r0, #24]
|
||||
ldr r14, [r0, #16]
|
||||
smulwt r9, r3, r6
|
||||
smulwb r7, r3, r6
|
||||
smulwt r10, r4, r6
|
||||
smulwb r8, r4, r6
|
||||
pkhbt r7, r7, r9, lsl #16
|
||||
smulwt r11, r3, r12
|
||||
pkhbt r8, r8, r10, lsl #16
|
||||
uadd16 r6, r6, r7
|
||||
smulwt r7, r4, r12
|
||||
smulwb r9, r3, r12
|
||||
smulwb r10, r4, r12
|
||||
subs r5, r5, #1
|
||||
pkhbt r9, r9, r11, lsl #16
|
||||
ldr r11, [r0], #4
|
||||
pkhbt r10, r10, r7, lsl #16
|
||||
uadd16 r7, r12, r9
|
||||
usub16 r7, r8, r7
|
||||
uadd16 r6, r6, r10
|
||||
uadd16 r10, r11, r14
|
||||
usub16 r8, r11, r14
|
||||
uadd16 r9, r10, r6
|
||||
usub16 r10, r10, r6
|
||||
uadd16 r6, r8, r7
|
||||
usub16 r7, r8, r7
|
||||
str r6, [r1, #8]
|
||||
ldrne r6, [r0, #8]
|
||||
str r7, [r1, #16]
|
||||
str r10, [r1, #24]
|
||||
str r9, [r1], #4
|
||||
bne vp8_dequant_idct_loop1_v6
|
||||
|
||||
mov r5, #2
|
||||
sub r0, r1, #8
|
||||
vp8_dequant_idct_loop2_v6
|
||||
ldr r6, [r0], #4
|
||||
ldr r7, [r0], #4
|
||||
ldr r8, [r0], #4
|
||||
ldr r9, [r0], #4
|
||||
smulwt r1, r3, r6
|
||||
smulwt r12, r4, r6
|
||||
smulwt lr, r3, r8
|
||||
smulwt r10, r4, r8
|
||||
pkhbt r11, r8, r6, lsl #16
|
||||
pkhbt r1, lr, r1, lsl #16
|
||||
pkhbt r12, r10, r12, lsl #16
|
||||
pkhtb r6, r6, r8, asr #16
|
||||
uadd16 r6, r1, r6
|
||||
pkhbt lr, r9, r7, lsl #16
|
||||
uadd16 r10, r11, lr
|
||||
usub16 lr, r11, lr
|
||||
pkhtb r8, r7, r9, asr #16
|
||||
subs r5, r5, #1
|
||||
smulwt r1, r3, r8
|
||||
smulwb r7, r3, r8
|
||||
smulwt r11, r4, r8
|
||||
smulwb r9, r4, r8
|
||||
pkhbt r1, r7, r1, lsl #16
|
||||
uadd16 r8, r1, r8
|
||||
pkhbt r11, r9, r11, lsl #16
|
||||
usub16 r1, r12, r8
|
||||
uadd16 r8, r11, r6
|
||||
ldr r9, c0x00040004
|
||||
ldr r12, [sp] ; get stride from stack
|
||||
uadd16 r6, r10, r8
|
||||
usub16 r7, r10, r8
|
||||
uadd16 r7, r7, r9
|
||||
uadd16 r6, r6, r9
|
||||
uadd16 r10, r14, r1
|
||||
usub16 r1, r14, r1
|
||||
uadd16 r10, r10, r9
|
||||
uadd16 r1, r1, r9
|
||||
ldr r11, [r2] ; load input from dst
|
||||
mov r8, r7, asr #3
|
||||
pkhtb r9, r8, r10, asr #19
|
||||
mov r8, r1, asr #3
|
||||
pkhtb r8, r8, r6, asr #19
|
||||
uxtb16 lr, r11, ror #8
|
||||
qadd16 r9, r9, lr
|
||||
uxtb16 lr, r11
|
||||
qadd16 r8, r8, lr
|
||||
usat16 r9, #8, r9
|
||||
usat16 r8, #8, r8
|
||||
orr r9, r8, r9, lsl #8
|
||||
ldr r11, [r2, r12] ; load input from dst
|
||||
mov r7, r7, lsl #16
|
||||
mov r1, r1, lsl #16
|
||||
mov r10, r10, lsl #16
|
||||
mov r6, r6, lsl #16
|
||||
mov r7, r7, asr #3
|
||||
pkhtb r7, r7, r10, asr #19
|
||||
mov r1, r1, asr #3
|
||||
pkhtb r1, r1, r6, asr #19
|
||||
uxtb16 r8, r11, ror #8
|
||||
qadd16 r7, r7, r8
|
||||
uxtb16 r8, r11
|
||||
qadd16 r1, r1, r8
|
||||
usat16 r7, #8, r7
|
||||
usat16 r1, #8, r1
|
||||
orr r1, r1, r7, lsl #8
|
||||
str r9, [r2], r12 ; store output to dst
|
||||
str r1, [r2], r12 ; store output to dst
|
||||
bne vp8_dequant_idct_loop2_v6
|
||||
|
||||
; vpx_memset
|
||||
sub r0, r0, #32
|
||||
add sp, sp, #4
|
||||
|
||||
mov r12, #0
|
||||
str r12, [r0]
|
||||
str r12, [r0, #4]
|
||||
str r12, [r0, #8]
|
||||
str r12, [r0, #12]
|
||||
str r12, [r0, #16]
|
||||
str r12, [r0, #20]
|
||||
str r12, [r0, #24]
|
||||
str r12, [r0, #28]
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_dequant_idct_add_v6|
|
||||
|
||||
; Constant Pool
|
||||
cospi8sqrt2minus1 DCD 0x00004E7B
|
||||
sinpi8sqrt2 DCD 0x00008A8C
|
||||
c0x00040004 DCD 0x00040004
|
||||
|
||||
END
|
|
@ -0,0 +1,69 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dequantize_b_loop_v6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------
|
||||
;void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
; r0 short *Q,
|
||||
; r1 short *DQC
|
||||
; r2 short *DQ
|
||||
|vp8_dequantize_b_loop_v6| PROC
|
||||
stmdb sp!, {r4-r9, lr}
|
||||
|
||||
ldr r3, [r0] ;load Q
|
||||
ldr r4, [r1] ;load DQC
|
||||
ldr r5, [r0, #4]
|
||||
ldr r6, [r1, #4]
|
||||
|
||||
mov r12, #2 ;loop counter
|
||||
|
||||
dequant_loop
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
smultt r8, r3, r4
|
||||
smulbb r9, r5, r6
|
||||
smultt lr, r5, r6
|
||||
|
||||
ldr r3, [r0, #8]
|
||||
ldr r4, [r1, #8]
|
||||
ldr r5, [r0, #12]
|
||||
ldr r6, [r1, #12]
|
||||
|
||||
strh r7, [r2], #2 ;store result
|
||||
smulbb r7, r3, r4 ;multiply
|
||||
strh r8, [r2], #2
|
||||
smultt r8, r3, r4
|
||||
strh r9, [r2], #2
|
||||
smulbb r9, r5, r6
|
||||
strh lr, [r2], #2
|
||||
smultt lr, r5, r6
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
add r0, r0, #16
|
||||
add r1, r1, #16
|
||||
|
||||
ldrne r3, [r0]
|
||||
strh r7, [r2], #2 ;store result
|
||||
ldrne r4, [r1]
|
||||
strh r8, [r2], #2
|
||||
ldrne r5, [r0, #4]
|
||||
strh r9, [r2], #2
|
||||
ldrne r6, [r1, #4]
|
||||
strh lr, [r2], #2
|
||||
|
||||
bne dequant_loop
|
||||
|
||||
ldmia sp!, {r4-r9, pc}
|
||||
ENDP ;|vp8_dequantize_b_loop_v6|
|
||||
|
||||
END
|
|
@ -0,0 +1,624 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_filter_block2d_first_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_16x16_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_8x8_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter4_block2d_second_pass_armv6|
|
||||
EXPORT |vp8_filter_block2d_first_pass_only_armv6|
|
||||
EXPORT |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 short *output_ptr
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int output_width
|
||||
; stack unsigned int output_height
|
||||
; stack const short *vp8_filter
|
||||
;-------------------------------------
|
||||
; vp8_filter the input and put in the output array. Apply the 6 tap FIR filter with
|
||||
; the output being a 2 byte value and the intput being a 1 byte value.
|
||||
|vp8_filter_block2d_first_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 16x16 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_16x16_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #18 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_16_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_16_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_16_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #34 ; adding back block width(=16)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_16_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; --------------------------
|
||||
; 8x8 version
|
||||
; -----------------------------
|
||||
|vp8_filter_block2d_first_pass_8x8_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; vp8_filter address
|
||||
ldr r7, [sp, #36] ; output height
|
||||
|
||||
add r4, r2, #10 ; preload next low
|
||||
pld [r0, r4]
|
||||
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
mov r3, r3, lsl #1 ; multiply width by 2 because using shorts
|
||||
add r12, r3, #16 ; square off the output
|
||||
sub sp, sp, #4
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r1, [sp] ; push destination to stack
|
||||
mov r7, r7, lsl #16 ; height is top part of counter
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_8_6|
|
||||
ldrb r8, [r0, #-2] ; load source data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
orr r7, r7, r3, lsr #2 ; construct loop counter
|
||||
|
||||
|width_loop_1st_8_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
smuad lr, lr, r4 ; apply the filter
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
sub r7, r7, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r11, r10, r6, r8
|
||||
|
||||
ands r10, r7, #0xff ; test loop counter
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r11, r11, #0x40
|
||||
ldrneb r9, [r0, #-1]
|
||||
usat r11, #8, r11, asr #7
|
||||
|
||||
strh lr, [r1], r12 ; result is transposed and stored, which
|
||||
; will make second pass filtering easier.
|
||||
ldrneb r10, [r0], #2
|
||||
strh r11, [r1], r12
|
||||
|
||||
bne width_loop_1st_8_6
|
||||
|
||||
ldr r1, [sp] ; load and update dst address
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, r2 ; move to next input line
|
||||
|
||||
add r11, r2, #18 ; adding back block width(=8)
|
||||
pld [r0, r11] ; preload next low
|
||||
|
||||
add r1, r1, #2 ; move over to next column
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_1st_8_6
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
sub sp, sp, #4
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
str r1, [sp] ; push destination to stack
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
|
||||
sub r0, r0, #4 ; offset input buffer
|
||||
|
||||
|height_loop_2nd|
|
||||
ldr r8, [r0] ; load the data
|
||||
ldr r9, [r0, #4]
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd|
|
||||
smuad lr, r4, r8 ; apply filter
|
||||
sub r7, r7, #1
|
||||
smulbt r8, r4, r8
|
||||
|
||||
ldr r10, [r0, #8]
|
||||
|
||||
smlad lr, r5, r9, lr
|
||||
smladx r8, r12, r9, r8
|
||||
|
||||
ldrh r9, [r0, #12]
|
||||
|
||||
smlad lr, r6, r10, lr
|
||||
smladx r8, r11, r10, r8
|
||||
|
||||
add r0, r0, #4
|
||||
smlatb r10, r6, r9, r8
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ands r8, r7, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r2 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrne r8, [r0] ; load data for next loop
|
||||
ldrne r9, [r0, #4]
|
||||
strb r10, [r1], r2
|
||||
|
||||
bne width_loop_2nd
|
||||
|
||||
ldr r1, [sp] ; update dst for next loop
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; updata src for next loop
|
||||
add r1, r1, #1
|
||||
str r1, [sp]
|
||||
|
||||
bne height_loop_2nd
|
||||
|
||||
add sp, sp, #4
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;---------------------------------
|
||||
; r0 short *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int output_pitch,
|
||||
; r3 unsigned int cnt,
|
||||
; stack const short *vp8_filter
|
||||
;---------------------------------
|
||||
|vp8_filter4_block2d_second_pass_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #36] ; vp8_filter address
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
add lr, r1, r3 ; save final destination pointer
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
pkhbt r12, r5, r4 ; pack the filter differently
|
||||
pkhbt r11, r6, r5
|
||||
mov r4, #0x40 ; rounding factor (for smlad{x})
|
||||
|
||||
|height_loop_2nd_4|
|
||||
ldrd r8, [r0, #-4] ; load the data
|
||||
orr r7, r7, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_2nd_4|
|
||||
ldr r10, [r0, #4]!
|
||||
smladx r6, r9, r12, r4 ; apply filter
|
||||
pkhbt r8, r9, r8
|
||||
smlad r5, r8, r12, r4
|
||||
pkhbt r8, r10, r9
|
||||
smladx r6, r10, r11, r6
|
||||
sub r7, r7, #1
|
||||
smlad r5, r8, r11, r5
|
||||
|
||||
mov r8, r9 ; shift the data for the next loop
|
||||
mov r9, r10
|
||||
|
||||
usat r6, #8, r6, asr #7 ; shift and clamp
|
||||
usat r5, #8, r5, asr #7
|
||||
|
||||
strb r5, [r1], r2 ; the result is transposed back and stored
|
||||
tst r7, #0xff
|
||||
strb r6, [r1], r2
|
||||
|
||||
bne width_loop_2nd_4
|
||||
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #16 ; update src for next loop
|
||||
sub r1, lr, r7, lsr #16 ; update dst for next loop
|
||||
|
||||
bne height_loop_2nd_4
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_first_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
add r7, r2, r3 ; preload next low
|
||||
add r7, r7, #2
|
||||
pld [r0, r7]
|
||||
|
||||
ldr r4, [sp, #36] ; output pitch
|
||||
ldr r11, [sp, #40] ; HFilter address
|
||||
sub sp, sp, #8
|
||||
|
||||
mov r7, r3
|
||||
sub r2, r2, r3 ; inside loop increments input array,
|
||||
; so the height loop only needs to add
|
||||
; r2 - width to the input pointer
|
||||
|
||||
sub r4, r4, r3
|
||||
str r4, [sp] ; save modified output pitch
|
||||
str r2, [sp, #4]
|
||||
|
||||
mov r2, #0x40
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
; six tap filter
|
||||
|height_loop_1st_only_6|
|
||||
ldrb r8, [r0, #-2] ; load data
|
||||
ldrb r9, [r0, #-1]
|
||||
ldrb r10, [r0], #2
|
||||
|
||||
mov r12, r3, lsr #1 ; loop counter
|
||||
|
||||
|width_loop_1st_only_6|
|
||||
ldrb r11, [r0, #-1]
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0]
|
||||
|
||||
;; smuad lr, lr, r4
|
||||
smlad lr, lr, r4, r2
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
;; smuad r8, r8, r4
|
||||
smlad r8, r8, r4, r2
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0, #1]
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0, #2]
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
;; add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0, #-2] ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
;; add r10, r10, #0x40
|
||||
strb lr, [r1], #1 ; store the result
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0, #-1]
|
||||
strb r10, [r1], #1
|
||||
ldrneb r10, [r0], #2
|
||||
|
||||
bne width_loop_1st_only_6
|
||||
|
||||
ldr lr, [sp] ; load back output pitch
|
||||
ldr r12, [sp, #4] ; load back output pitch
|
||||
subs r7, r7, #1
|
||||
add r0, r0, r12 ; updata src for next loop
|
||||
|
||||
add r11, r12, r3 ; preload next low
|
||||
add r11, r11, #2
|
||||
pld [r0, r11]
|
||||
|
||||
add r1, r1, lr ; update dst for next loop
|
||||
|
||||
bne height_loop_1st_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_first_pass_only_armv6|
|
||||
|
||||
|
||||
;------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 unsigned char *output_ptr,
|
||||
; r2 unsigned int src_pixels_per_line
|
||||
; r3 unsigned int cnt,
|
||||
; stack unsigned int output_pitch,
|
||||
; stack const short *vp8_filter
|
||||
;------------------------------------
|
||||
|vp8_filter_block2d_second_pass_only_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldr r11, [sp, #40] ; VFilter address
|
||||
ldr r12, [sp, #36] ; output pitch
|
||||
|
||||
mov r7, r3, lsl #16 ; height is top part of counter
|
||||
sub r0, r0, r2, lsl #1 ; need 6 elements for filtering, 2 before, 3 after
|
||||
|
||||
sub sp, sp, #8
|
||||
|
||||
ldr r4, [r11] ; load up packed filter coefficients
|
||||
ldr r5, [r11, #4]
|
||||
ldr r6, [r11, #8]
|
||||
|
||||
str r0, [sp] ; save r0 to stack
|
||||
str r1, [sp, #4] ; save dst to stack
|
||||
|
||||
; six tap filter
|
||||
|width_loop_2nd_only_6|
|
||||
ldrb r8, [r0], r2 ; load data
|
||||
orr r7, r7, r3 ; loop counter
|
||||
ldrb r9, [r0], r2
|
||||
ldrb r10, [r0], r2
|
||||
|
||||
|height_loop_2nd_only_6|
|
||||
; filter first column in this inner loop, than, move to next colum.
|
||||
ldrb r11, [r0], r2
|
||||
|
||||
pkhbt lr, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r8, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
ldrb r9, [r0], r2
|
||||
|
||||
smuad lr, lr, r4
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
smuad r8, r8, r4
|
||||
pkhbt r11, r11, r9, lsl #16 ; r9 | r11
|
||||
|
||||
smlad lr, r10, r5, lr
|
||||
ldrb r10, [r0], r2
|
||||
smlad r8, r11, r5, r8
|
||||
ldrb r11, [r0]
|
||||
|
||||
sub r7, r7, #2
|
||||
sub r0, r0, r2, lsl #2
|
||||
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
pkhbt r10, r10, r11, lsl #16 ; r11 | r10
|
||||
|
||||
smlad lr, r9, r6, lr
|
||||
smlad r10, r10, r6, r8
|
||||
|
||||
ands r9, r7, #0xff
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
ldrneb r8, [r0], r2 ; load data for next loop
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r1], r12 ; store the result for the column
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
ldrneb r9, [r0], r2
|
||||
strb r10, [r1], r12
|
||||
ldrneb r10, [r0], r2
|
||||
|
||||
bne height_loop_2nd_only_6
|
||||
|
||||
ldr r0, [sp]
|
||||
ldr r1, [sp, #4]
|
||||
subs r7, r7, #0x10000
|
||||
add r0, r0, #1 ; move to filter next column
|
||||
str r0, [sp]
|
||||
add r1, r1, #1
|
||||
str r1, [sp, #4]
|
||||
|
||||
bne width_loop_2nd_only_6
|
||||
|
||||
add sp, sp, #8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_filter_block2d_second_pass_only_armv6|
|
||||
|
||||
END
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_v6(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dst, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dst, stride, dst, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dst+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dst+4, stride, dst+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[2] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+32, dq, dst+8, stride);
|
||||
else if (eobs[2] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[32]*dq[0], dst+8, stride, dst+8, stride);
|
||||
((int *)(q+32))[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[3] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+48, dq, dst+12, stride);
|
||||
else if (eobs[3] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[48]*dq[0], dst+12, stride,dst+12,stride);
|
||||
((int *)(q+48))[0] = 0;
|
||||
}
|
||||
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_v6(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstu, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstu, stride, dstu, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstu+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstu+4, stride,
|
||||
dstu+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
if (eobs[0] > 1)
|
||||
vp8_dequant_idct_add_v6 (q, dq, dstv, stride);
|
||||
else if (eobs[0] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[0]*dq[0], dstv, stride, dstv, stride);
|
||||
((int *)q)[0] = 0;
|
||||
}
|
||||
|
||||
if (eobs[1] > 1)
|
||||
vp8_dequant_idct_add_v6 (q+16, dq, dstv+4, stride);
|
||||
else if (eobs[1] == 1)
|
||||
{
|
||||
vp8_dc_only_idct_add_v6 (q[16]*dq[0], dstv+4, stride,
|
||||
dstv+4, stride);
|
||||
((int *)(q+16))[0] = 0;
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
eobs += 2;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,202 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_short_idct4x4llm_v6_dual|
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch,
|
||||
; unsigned char *dst, int stride)
|
||||
; r0 short* input
|
||||
; r1 unsigned char* pred
|
||||
; r2 int pitch
|
||||
; r3 unsigned char* dst
|
||||
; sp int stride
|
||||
|
||||
|vp8_short_idct4x4llm_v6_dual| PROC
|
||||
stmdb sp!, {r4-r11, lr}
|
||||
|
||||
sub sp, sp, #4
|
||||
|
||||
mov r4, #0x00008A00 ; sin
|
||||
orr r4, r4, #0x0000008C ; sinpi8sqrt2
|
||||
|
||||
mov r5, #0x00004E00 ; cos
|
||||
orr r5, r5, #0x0000007B ; cospi8sqrt2minus1
|
||||
orr r5, r5, #1<<31 ; loop counter on top bit
|
||||
|
||||
loop1_dual
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12, [r0, #(12*2)] ; i13|i12
|
||||
ldr r14, [r0, #(8*2)] ; i9 | i8
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbb r7, r5, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16
|
||||
|
||||
smulbt r11, r5, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16
|
||||
pkhtb r7, r9, r7, asr #16 ; 5c | 4c
|
||||
pkhbt r8, r8, r10, lsl #16 ; 5s | 4s
|
||||
uadd16 r6, r6, r7 ; 5c+5 | 4c+4
|
||||
|
||||
smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16
|
||||
smulbb r9, r5, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
pkhtb r9, r11, r9, asr #16 ; 13c | 12c
|
||||
ldr r11, [r0] ; i1 | i0
|
||||
pkhbt r10, r10, r7, lsl #16 ; 13s | 12s
|
||||
uadd16 r7, r12, r9 ; 13c+13 | 12c+12
|
||||
|
||||
usub16 r7, r8, r7 ; c
|
||||
uadd16 r6, r6, r10 ; d
|
||||
uadd16 r10, r11, r14 ; a
|
||||
usub16 r8, r11, r14 ; b
|
||||
|
||||
uadd16 r9, r10, r6 ; a+d
|
||||
usub16 r10, r10, r6 ; a-d
|
||||
uadd16 r6, r8, r7 ; b+c
|
||||
usub16 r7, r8, r7 ; b-c
|
||||
|
||||
; use input buffer to store intermediate results
|
||||
str r6, [r0, #(4*2)] ; o5 | o4
|
||||
str r7, [r0, #(8*2)] ; o9 | o8
|
||||
str r10,[r0, #(12*2)] ; o13|o12
|
||||
str r9, [r0], #4 ; o1 | o0
|
||||
|
||||
bcs loop1_dual
|
||||
|
||||
sub r0, r0, #8 ; reset input/output
|
||||
str r0, [sp]
|
||||
|
||||
loop2_dual
|
||||
|
||||
ldr r6, [r0, #(4*2)] ; i5 | i4
|
||||
ldr r12,[r0, #(2*2)] ; i3 | i2
|
||||
ldr r14,[r0, #(6*2)] ; i7 | i6
|
||||
ldr r0, [r0, #(0*2)] ; i1 | i0
|
||||
|
||||
smulbt r9, r5, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16
|
||||
smulbt r7, r5, r0 ; (ip[1] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16
|
||||
smulwt r8, r4, r0 ; (ip[1] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhbt r11, r6, r0, lsl #16 ; i0 | i4
|
||||
pkhtb r7, r7, r9, asr #16 ; 1c | 5c
|
||||
pkhtb r0, r0, r6, asr #16 ; i1 | i5
|
||||
pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1
|
||||
|
||||
uadd16 r0, r7, r0 ; 1c+1 | 5c+5 = temp2
|
||||
pkhbt r9, r14, r12, lsl #16 ; i2 | i6
|
||||
uadd16 r10, r11, r9 ; a
|
||||
usub16 r9, r11, r9 ; b
|
||||
pkhtb r6, r12, r14, asr #16 ; i3 | i7
|
||||
|
||||
subs r5, r5, #1<<31 ; i--
|
||||
|
||||
smulbt r7, r5, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16
|
||||
smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16
|
||||
smulbb r12, r5, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16
|
||||
smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16
|
||||
|
||||
pkhtb r7, r7, r12, asr #16 ; 3c | 7c
|
||||
pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1
|
||||
|
||||
uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2
|
||||
usub16 r12, r8, r6 ; c (o1 | o5)
|
||||
uadd16 r6, r11, r0 ; d (o3 | o7)
|
||||
uadd16 r7, r10, r6 ; a+d
|
||||
|
||||
mov r8, #4 ; set up 4's
|
||||
orr r8, r8, #0x40000 ; 4|4
|
||||
|
||||
usub16 r6, r10, r6 ; a-d
|
||||
uadd16 r6, r6, r8 ; a-d+4, 3|7
|
||||
uadd16 r7, r7, r8 ; a+d+4, 0|4
|
||||
uadd16 r10, r9, r12 ; b+c
|
||||
usub16 r0, r9, r12 ; b-c
|
||||
uadd16 r10, r10, r8 ; b+c+4, 1|5
|
||||
uadd16 r8, r0, r8 ; b-c+4, 2|6
|
||||
|
||||
ldr lr, [sp, #40] ; dst stride
|
||||
|
||||
ldrb r0, [r1] ; pred p0
|
||||
ldrb r11, [r1, #1] ; pred p1
|
||||
ldrb r12, [r1, #2] ; pred p2
|
||||
|
||||
add r0, r0, r7, asr #19 ; p0 + o0
|
||||
add r11, r11, r10, asr #19 ; p1 + o1
|
||||
add r12, r12, r8, asr #19 ; p2 + o2
|
||||
|
||||
usat r0, #8, r0 ; d0 = clip8(p0 + o0)
|
||||
usat r11, #8, r11 ; d1 = clip8(p1 + o1)
|
||||
usat r12, #8, r12 ; d2 = clip8(p2 + o2)
|
||||
|
||||
add r0, r0, r11, lsl #8 ; |--|--|d1|d0|
|
||||
|
||||
ldrb r11, [r1, #3] ; pred p3
|
||||
|
||||
add r0, r0, r12, lsl #16 ; |--|d2|d1|d0|
|
||||
|
||||
add r11, r11, r6, asr #19 ; p3 + o3
|
||||
|
||||
sxth r7, r7 ;
|
||||
sxth r10, r10 ;
|
||||
|
||||
usat r11, #8, r11 ; d3 = clip8(p3 + o3)
|
||||
|
||||
sxth r8, r8 ;
|
||||
sxth r6, r6 ;
|
||||
|
||||
add r0, r0, r11, lsl #24 ; |d3|d2|d1|d0|
|
||||
|
||||
ldrb r12, [r1, r2]! ; pred p4
|
||||
str r0, [r3], lr
|
||||
ldrb r11, [r1, #1] ; pred p5
|
||||
|
||||
add r12, r12, r7, asr #3 ; p4 + o4
|
||||
add r11, r11, r10, asr #3 ; p5 + o5
|
||||
|
||||
usat r12, #8, r12 ; d4 = clip8(p4 + o4)
|
||||
usat r11, #8, r11 ; d5 = clip8(p5 + o5)
|
||||
|
||||
ldrb r7, [r1, #2] ; pred p6
|
||||
ldrb r10, [r1, #3] ; pred p6
|
||||
|
||||
add r12, r12, r11, lsl #8 ; |--|--|d5|d4|
|
||||
|
||||
add r7, r7, r8, asr #3 ; p6 + o6
|
||||
add r10, r10, r6, asr #3 ; p7 + o7
|
||||
|
||||
ldr r0, [sp] ; load input pointer
|
||||
|
||||
usat r7, #8, r7 ; d6 = clip8(p6 + o6)
|
||||
usat r10, #8, r10 ; d7 = clip8(p7 + o7)
|
||||
|
||||
add r12, r12, r7, lsl #16 ; |--|d6|d5|d4|
|
||||
add r12, r12, r10, lsl #24 ; |d7|d6|d5|d4|
|
||||
|
||||
str r12, [r3], lr
|
||||
add r0, r0, #16
|
||||
add r1, r1, r2 ; pred + pitch
|
||||
|
||||
bcs loop2_dual
|
||||
|
||||
add sp, sp, #4 ; idct_output buffer
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -0,0 +1,611 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_intra4x4_predict_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
|
||||
;void vp8_intra4x4_predict_armv6(unsigned char *Above, unsigned char *yleft,
|
||||
; B_PREDICTION_MODE left_stride, int b_mode,
|
||||
; unsigned char *dst, int dst_stride,
|
||||
; unsigned char top_left)
|
||||
|
||||
; r0: *Above
|
||||
; r1: *yleft
|
||||
; r2: left_stride
|
||||
; r3: b_mode
|
||||
; sp + #40: dst
|
||||
; sp + #44: dst_stride
|
||||
; sp + #48: top_left
|
||||
|vp8_intra4x4_predict_armv6| PROC
|
||||
push {r4-r12, lr}
|
||||
|
||||
cmp r3, #10
|
||||
addlt pc, pc, r3, lsl #2 ; position independent switch
|
||||
pop {r4-r12, pc} ; default
|
||||
b b_dc_pred
|
||||
b b_tm_pred
|
||||
b b_ve_pred
|
||||
b b_he_pred
|
||||
b b_ld_pred
|
||||
b b_rd_pred
|
||||
b b_vr_pred
|
||||
b b_vl_pred
|
||||
b b_hd_pred
|
||||
b b_hu_pred
|
||||
|
||||
b_dc_pred
|
||||
; load values
|
||||
ldr r8, [r0] ; Above
|
||||
ldrb r4, [r1], r2 ; Left[0]
|
||||
mov r9, #0
|
||||
ldrb r5, [r1], r2 ; Left[1]
|
||||
ldrb r6, [r1], r2 ; Left[2]
|
||||
usad8 r12, r8, r9
|
||||
ldrb r7, [r1] ; Left[3]
|
||||
|
||||
; calculate dc
|
||||
add r4, r4, r5
|
||||
add r4, r4, r6
|
||||
add r4, r4, r7
|
||||
add r4, r4, r12
|
||||
add r4, r4, #4
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
mov r12, r4, asr #3 ; (expected_dc + 4) >> 3
|
||||
|
||||
add r12, r12, r12, lsl #8
|
||||
ldr r3, [sp, #40] ; dst
|
||||
add r12, r12, r12, lsl #16
|
||||
|
||||
; store values
|
||||
str r12, [r3], r0
|
||||
str r12, [r3], r0
|
||||
str r12, [r3], r0
|
||||
str r12, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_tm_pred
|
||||
ldr r8, [r0] ; Above
|
||||
ldrb r9, [sp, #48] ; top_left
|
||||
ldrb r4, [r1], r2 ; Left[0]
|
||||
ldrb r5, [r1], r2 ; Left[1]
|
||||
ldrb r6, [r1], r2 ; Left[2]
|
||||
ldrb r7, [r1] ; Left[3]
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
add r9, r9, r9, lsl #16 ; [tl|tl]
|
||||
uxtb16 r10, r8 ; a[2|0]
|
||||
uxtb16 r11, r8, ror #8 ; a[3|1]
|
||||
ssub16 r10, r10, r9 ; a[2|0] - [tl|tl]
|
||||
ssub16 r11, r11, r9 ; a[3|1] - [tl|tl]
|
||||
|
||||
add r4, r4, r4, lsl #16 ; l[0|0]
|
||||
add r5, r5, r5, lsl #16 ; l[1|1]
|
||||
add r6, r6, r6, lsl #16 ; l[2|2]
|
||||
add r7, r7, r7, lsl #16 ; l[3|3]
|
||||
|
||||
sadd16 r1, r4, r10 ; l[0|0] + a[2|0] - [tl|tl]
|
||||
sadd16 r2, r4, r11 ; l[0|0] + a[3|1] - [tl|tl]
|
||||
usat16 r1, #8, r1
|
||||
usat16 r2, #8, r2
|
||||
|
||||
sadd16 r4, r5, r10 ; l[1|1] + a[2|0] - [tl|tl]
|
||||
sadd16 r5, r5, r11 ; l[1|1] + a[3|1] - [tl|tl]
|
||||
|
||||
add r12, r1, r2, lsl #8 ; [3|2|1|0]
|
||||
str r12, [r3], r0
|
||||
|
||||
usat16 r4, #8, r4
|
||||
usat16 r5, #8, r5
|
||||
|
||||
sadd16 r1, r6, r10 ; l[2|2] + a[2|0] - [tl|tl]
|
||||
sadd16 r2, r6, r11 ; l[2|2] + a[3|1] - [tl|tl]
|
||||
|
||||
add r12, r4, r5, lsl #8 ; [3|2|1|0]
|
||||
str r12, [r3], r0
|
||||
|
||||
usat16 r1, #8, r1
|
||||
usat16 r2, #8, r2
|
||||
|
||||
sadd16 r4, r7, r10 ; l[3|3] + a[2|0] - [tl|tl]
|
||||
sadd16 r5, r7, r11 ; l[3|3] + a[3|1] - [tl|tl]
|
||||
|
||||
add r12, r1, r2, lsl #8 ; [3|2|1|0]
|
||||
|
||||
usat16 r4, #8, r4
|
||||
usat16 r5, #8, r5
|
||||
|
||||
str r12, [r3], r0
|
||||
|
||||
add r12, r4, r5, lsl #8 ; [3|2|1|0]
|
||||
str r12, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_ve_pred
|
||||
ldr r8, [r0] ; a[3|2|1|0]
|
||||
ldr r11, c00FF00FF
|
||||
ldrb r9, [sp, #48] ; top_left
|
||||
ldrb r10, [r0, #4] ; a[4]
|
||||
|
||||
ldr r0, c00020002
|
||||
|
||||
uxtb16 r4, r8 ; a[2|0]
|
||||
uxtb16 r5, r8, ror #8 ; a[3|1]
|
||||
ldr r2, [sp, #44] ; dst_stride
|
||||
pkhbt r9, r9, r5, lsl #16 ; a[1|-1]
|
||||
|
||||
add r9, r9, r4, lsl #1 ;[a[1]+2*a[2] | tl+2*a[0] ]
|
||||
uxtab16 r9, r9, r5 ;[a[1]+2*a[2]+a[3] | tl+2*a[0]+a[1] ]
|
||||
ldr r3, [sp, #40] ; dst
|
||||
uxtab16 r9, r9, r0 ;[a[1]+2*a[2]+a[3]+2| tl+2*a[0]+a[1]+2]
|
||||
|
||||
add r0, r0, r10, lsl #16 ;[a[4]+2 | 2]
|
||||
add r0, r0, r4, asr #16 ;[a[4]+2 | a[2]+2]
|
||||
add r0, r0, r5, lsl #1 ;[a[4]+2*a[3]+2 | a[2]+2*a[1]+2]
|
||||
uadd16 r4, r4, r0 ;[a[4]+2*a[3]+a[2]+2|a[2]+2*a[1]+a[0]+2]
|
||||
|
||||
and r9, r11, r9, asr #2
|
||||
and r4, r11, r4, asr #2
|
||||
add r9, r9, r4, lsl #8
|
||||
|
||||
; store values
|
||||
str r9, [r3], r2
|
||||
str r9, [r3], r2
|
||||
str r9, [r3], r2
|
||||
str r9, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
|
||||
b_he_pred
|
||||
ldrb r4, [r1], r2 ; Left[0]
|
||||
ldrb r8, [sp, #48] ; top_left
|
||||
ldrb r5, [r1], r2 ; Left[1]
|
||||
ldrb r6, [r1], r2 ; Left[2]
|
||||
ldrb r7, [r1] ; Left[3]
|
||||
|
||||
add r8, r8, r4 ; tl + l[0]
|
||||
add r9, r4, r5 ; l[0] + l[1]
|
||||
add r10, r5, r6 ; l[1] + l[2]
|
||||
add r11, r6, r7 ; l[2] + l[3]
|
||||
|
||||
mov r0, #2<<14
|
||||
|
||||
add r8, r8, r9 ; tl + 2*l[0] + l[1]
|
||||
add r4, r9, r10 ; l[0] + 2*l[1] + l[2]
|
||||
add r5, r10, r11 ; l[1] + 2*l[2] + l[3]
|
||||
add r6, r11, r7, lsl #1 ; l[2] + 2*l[3] + l[3]
|
||||
|
||||
|
||||
add r8, r0, r8, lsl #14 ; (tl + 2*l[0] + l[1])>>2 in top half
|
||||
add r9, r0, r4, lsl #14 ; (l[0] + 2*l[1] + l[2])>>2 in top half
|
||||
add r10,r0, r5, lsl #14 ; (l[1] + 2*l[2] + l[3])>>2 in top half
|
||||
add r11,r0, r6, lsl #14 ; (l[2] + 2*l[3] + l[3])>>2 in top half
|
||||
|
||||
pkhtb r8, r8, r8, asr #16 ; l[-|0|-|0]
|
||||
pkhtb r9, r9, r9, asr #16 ; l[-|1|-|1]
|
||||
pkhtb r10, r10, r10, asr #16 ; l[-|2|-|2]
|
||||
pkhtb r11, r11, r11, asr #16 ; l[-|3|-|3]
|
||||
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
add r8, r8, r8, lsl #8 ; l[0|0|0|0]
|
||||
add r9, r9, r9, lsl #8 ; l[1|1|1|1]
|
||||
add r10, r10, r10, lsl #8 ; l[2|2|2|2]
|
||||
add r11, r11, r11, lsl #8 ; l[3|3|3|3]
|
||||
|
||||
; store values
|
||||
str r8, [r3], r0
|
||||
str r9, [r3], r0
|
||||
str r10, [r3], r0
|
||||
str r11, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_ld_pred
|
||||
ldr r4, [r0] ; Above[0-3]
|
||||
ldr r12, c00020002
|
||||
ldr r5, [r0, #4] ; Above[4-7]
|
||||
ldr lr, c00FF00FF
|
||||
|
||||
uxtb16 r6, r4 ; a[2|0]
|
||||
uxtb16 r7, r4, ror #8 ; a[3|1]
|
||||
uxtb16 r8, r5 ; a[6|4]
|
||||
uxtb16 r9, r5, ror #8 ; a[7|5]
|
||||
pkhtb r10, r6, r8 ; a[2|4]
|
||||
pkhtb r11, r7, r9 ; a[3|5]
|
||||
|
||||
add r4, r6, r7, lsl #1 ; [a2+2*a3 | a0+2*a1]
|
||||
add r4, r4, r10, ror #16 ; [a2+2*a3+a4 | a0+2*a1+a2]
|
||||
uxtab16 r4, r4, r12 ; [a2+2*a3+a4+2 | a0+2*a1+a2+2]
|
||||
|
||||
add r5, r7, r10, ror #15 ; [a3+2*a4 | a1+2*a2]
|
||||
add r5, r5, r11, ror #16 ; [a3+2*a4+a5 | a1+2*a2+a3]
|
||||
uxtab16 r5, r5, r12 ; [a3+2*a4+a5+2 | a1+2*a2+a3+2]
|
||||
|
||||
pkhtb r7, r9, r8, asr #16
|
||||
add r6, r8, r9, lsl #1 ; [a6+2*a7 | a4+2*a5]
|
||||
uadd16 r6, r6, r7 ; [a6+2*a7+a7 | a4+2*a5+a6]
|
||||
uxtab16 r6, r6, r12 ; [a6+2*a7+a7+2 | a4+2*a5+a6+2]
|
||||
|
||||
uxth r7, r9 ; [ a5]
|
||||
add r7, r7, r8, asr #15 ; [ a5+2*a6]
|
||||
add r7, r7, r9, asr #16 ; [ a5+2*a6+a7]
|
||||
uxtah r7, r7, r12 ; [ a5+2*a6+a7+2]
|
||||
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
; scale down
|
||||
and r4, lr, r4, asr #2
|
||||
and r5, lr, r5, asr #2
|
||||
and r6, lr, r6, asr #2
|
||||
mov r7, r7, asr #2
|
||||
|
||||
add r8, r4, r5, lsl #8 ; [3|2|1|0]
|
||||
str r8, [r3], r0
|
||||
|
||||
mov r9, r8, lsr #8
|
||||
add r9, r9, r6, lsl #24 ; [4|3|2|1]
|
||||
str r9, [r3], r0
|
||||
|
||||
mov r10, r9, lsr #8
|
||||
add r10, r10, r7, lsl #24 ; [5|4|3|2]
|
||||
str r10, [r3], r0
|
||||
|
||||
mov r6, r6, lsr #16
|
||||
mov r11, r10, lsr #8
|
||||
add r11, r11, r6, lsl #24 ; [6|5|4|3]
|
||||
str r11, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_rd_pred
|
||||
ldrb r7, [r1], r2 ; l[0] = pp[3]
|
||||
ldr lr, [r0] ; Above = pp[8|7|6|5]
|
||||
ldrb r8, [sp, #48] ; tl = pp[4]
|
||||
ldrb r6, [r1], r2 ; l[1] = pp[2]
|
||||
ldrb r5, [r1], r2 ; l[2] = pp[1]
|
||||
ldrb r4, [r1], r2 ; l[3] = pp[0]
|
||||
|
||||
|
||||
uxtb16 r9, lr ; p[7|5]
|
||||
uxtb16 r10, lr, ror #8 ; p[8|6]
|
||||
add r4, r4, r6, lsl #16 ; p[2|0]
|
||||
add r5, r5, r7, lsl #16 ; p[3|1]
|
||||
add r6, r6, r8, lsl #16 ; p[4|2]
|
||||
pkhbt r7, r7, r9, lsl #16 ; p[5|3]
|
||||
pkhbt r8, r8, r10, lsl #16 ; p[6|4]
|
||||
|
||||
ldr r12, c00020002
|
||||
ldr lr, c00FF00FF
|
||||
|
||||
add r4, r4, r5, lsl #1 ; [p2+2*p3 | p0+2*p1]
|
||||
add r4, r4, r6 ; [p2+2*p3+p4 | p0+2*p1+p2]
|
||||
uxtab16 r4, r4, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
|
||||
|
||||
add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
|
||||
add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
|
||||
uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
|
||||
|
||||
add r6, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
|
||||
add r6, r6, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
|
||||
uxtab16 r6, r6, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
|
||||
|
||||
add r7, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
|
||||
add r7, r7, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
|
||||
uxtab16 r7, r7, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
|
||||
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
; scale down
|
||||
and r7, lr, r7, asr #2
|
||||
and r6, lr, r6, asr #2
|
||||
and r5, lr, r5, asr #2
|
||||
and r4, lr, r4, asr #2
|
||||
|
||||
add r8, r6, r7, lsl #8 ; [6|5|4|3]
|
||||
str r8, [r3], r0
|
||||
|
||||
mov r9, r8, lsl #8 ; [5|4|3|-]
|
||||
uxtab r9, r9, r4, ror #16 ; [5|4|3|2]
|
||||
str r9, [r3], r0
|
||||
|
||||
mov r10, r9, lsl #8 ; [4|3|2|-]
|
||||
uxtab r10, r10, r5 ; [4|3|2|1]
|
||||
str r10, [r3], r0
|
||||
|
||||
mov r11, r10, lsl #8 ; [3|2|1|-]
|
||||
uxtab r11, r11, r4 ; [3|2|1|0]
|
||||
str r11, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_vr_pred
|
||||
ldrb r7, [r1], r2 ; l[0] = pp[3]
|
||||
ldr lr, [r0] ; Above = pp[8|7|6|5]
|
||||
ldrb r8, [sp, #48] ; tl = pp[4]
|
||||
ldrb r6, [r1], r2 ; l[1] = pp[2]
|
||||
ldrb r5, [r1], r2 ; l[2] = pp[1]
|
||||
ldrb r4, [r1] ; l[3] = pp[0]
|
||||
|
||||
add r5, r5, r7, lsl #16 ; p[3|1]
|
||||
add r6, r6, r8, lsl #16 ; p[4|2]
|
||||
uxtb16 r9, lr ; p[7|5]
|
||||
uxtb16 r10, lr, ror #8 ; p[8|6]
|
||||
pkhbt r7, r7, r9, lsl #16 ; p[5|3]
|
||||
pkhbt r8, r8, r10, lsl #16 ; p[6|4]
|
||||
|
||||
ldr r4, c00010001
|
||||
ldr r12, c00020002
|
||||
ldr lr, c00FF00FF
|
||||
|
||||
add r5, r5, r6, lsl #1 ; [p3+2*p4 | p1+2*p2]
|
||||
add r5, r5, r7 ; [p3+2*p4+p5 | p1+2*p2+p3]
|
||||
uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
|
||||
|
||||
add r6, r6, r7, lsl #1 ; [p4+2*p5 | p2+2*p3]
|
||||
add r6, r6, r8 ; [p4+2*p5+p6 | p2+2*p3+p4]
|
||||
uxtab16 r6, r6, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
|
||||
|
||||
uadd16 r11, r8, r9 ; [p6+p7 | p4+p5]
|
||||
uhadd16 r11, r11, r4 ; [(p6+p7+1)>>1 | (p4+p5+1)>>1]
|
||||
; [F|E]
|
||||
|
||||
add r7, r7, r8, lsl #1 ; [p5+2*p6 | p3+2*p4]
|
||||
add r7, r7, r9 ; [p5+2*p6+p7 | p3+2*p4+p5]
|
||||
uxtab16 r7, r7, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
|
||||
|
||||
uadd16 r2, r9, r10 ; [p7+p8 | p5+p6]
|
||||
uhadd16 r2, r2, r4 ; [(p7+p8+1)>>1 | (p5+p6+1)>>1]
|
||||
; [J|I]
|
||||
|
||||
add r8, r8, r9, lsl #1 ; [p6+2*p7 | p4+2*p5]
|
||||
add r8, r8, r10 ; [p6+2*p7+p8 | p4+2*p5+p6]
|
||||
uxtab16 r8, r8, r12 ; [p6+2*p7+p8+2 | p4+2*p5+p6+2]
|
||||
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
; scale down
|
||||
and r5, lr, r5, asr #2 ; [B|A]
|
||||
and r6, lr, r6, asr #2 ; [D|C]
|
||||
and r7, lr, r7, asr #2 ; [H|G]
|
||||
and r8, lr, r8, asr #2 ; [L|K]
|
||||
|
||||
add r12, r11, r2, lsl #8 ; [J|F|I|E]
|
||||
str r12, [r3], r0
|
||||
|
||||
add r12, r7, r8, lsl #8 ; [L|H|K|G]
|
||||
str r12, [r3], r0
|
||||
|
||||
pkhbt r2, r6, r2, lsl #16 ; [-|I|-|C]
|
||||
add r2, r2, r11, lsl #8 ; [F|I|E|C]
|
||||
|
||||
pkhtb r12, r6, r5 ; [-|D|-|A]
|
||||
pkhtb r10, r7, r5, asr #16 ; [-|H|-|B]
|
||||
str r2, [r3], r0
|
||||
add r12, r12, r10, lsl #8 ; [H|D|B|A]
|
||||
str r12, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_vl_pred
|
||||
ldr r4, [r0] ; [3|2|1|0] = Above[0-3]
|
||||
ldr r12, c00020002
|
||||
ldr r5, [r0, #4] ; [7|6|5|4] = Above[4-7]
|
||||
ldr lr, c00FF00FF
|
||||
ldr r2, c00010001
|
||||
|
||||
mov r0, r4, lsr #16 ; [-|-|3|2]
|
||||
add r0, r0, r5, lsl #16 ; [5|4|3|2]
|
||||
uxtb16 r6, r4 ; [2|0]
|
||||
uxtb16 r7, r4, ror #8 ; [3|1]
|
||||
uxtb16 r8, r0 ; [4|2]
|
||||
uxtb16 r9, r0, ror #8 ; [5|3]
|
||||
uxtb16 r10, r5 ; [6|4]
|
||||
uxtb16 r11, r5, ror #8 ; [7|5]
|
||||
|
||||
uadd16 r4, r6, r7 ; [p2+p3 | p0+p1]
|
||||
uhadd16 r4, r4, r2 ; [(p2+p3+1)>>1 | (p0+p1+1)>>1]
|
||||
; [B|A]
|
||||
|
||||
add r5, r6, r7, lsl #1 ; [p2+2*p3 | p0+2*p1]
|
||||
add r5, r5, r8 ; [p2+2*p3+p4 | p0+2*p1+p2]
|
||||
uxtab16 r5, r5, r12 ; [p2+2*p3+p4+2 | p0+2*p1+p2+2]
|
||||
|
||||
uadd16 r6, r7, r8 ; [p3+p4 | p1+p2]
|
||||
uhadd16 r6, r6, r2 ; [(p3+p4+1)>>1 | (p1+p2+1)>>1]
|
||||
; [F|E]
|
||||
|
||||
add r7, r7, r8, lsl #1 ; [p3+2*p4 | p1+2*p2]
|
||||
add r7, r7, r9 ; [p3+2*p4+p5 | p1+2*p2+p3]
|
||||
uxtab16 r7, r7, r12 ; [p3+2*p4+p5+2 | p1+2*p2+p3+2]
|
||||
|
||||
add r8, r8, r9, lsl #1 ; [p4+2*p5 | p2+2*p3]
|
||||
add r8, r8, r10 ; [p4+2*p5+p6 | p2+2*p3+p4]
|
||||
uxtab16 r8, r8, r12 ; [p4+2*p5+p6+2 | p2+2*p3+p4+2]
|
||||
|
||||
add r9, r9, r10, lsl #1 ; [p5+2*p6 | p3+2*p4]
|
||||
add r9, r9, r11 ; [p5+2*p6+p7 | p3+2*p4+p5]
|
||||
uxtab16 r9, r9, r12 ; [p5+2*p6+p7+2 | p3+2*p4+p5+2]
|
||||
|
||||
ldr r0, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
; scale down
|
||||
and r5, lr, r5, asr #2 ; [D|C]
|
||||
and r7, lr, r7, asr #2 ; [H|G]
|
||||
and r8, lr, r8, asr #2 ; [I|D]
|
||||
and r9, lr, r9, asr #2 ; [J|H]
|
||||
|
||||
add r10, r4, r6, lsl #8 ; [F|B|E|A]
|
||||
str r10, [r3], r0
|
||||
|
||||
add r5, r5, r7, lsl #8 ; [H|C|G|D]
|
||||
str r5, [r3], r0
|
||||
|
||||
pkhtb r12, r8, r4, asr #16 ; [-|I|-|B]
|
||||
pkhtb r10, r9, r8 ; [-|J|-|D]
|
||||
|
||||
add r12, r6, r12, lsl #8 ; [I|F|B|E]
|
||||
str r12, [r3], r0
|
||||
|
||||
add r10, r7, r10, lsl #8 ; [J|H|D|G]
|
||||
str r10, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
b_hd_pred
|
||||
ldrb r7, [r1], r2 ; l[0] = pp[3]
|
||||
ldr lr, [r0] ; Above = pp[8|7|6|5]
|
||||
ldrb r8, [sp, #48] ; tl = pp[4]
|
||||
ldrb r6, [r1], r2 ; l[1] = pp[2]
|
||||
ldrb r5, [r1], r2 ; l[2] = pp[1]
|
||||
ldrb r4, [r1] ; l[3] = pp[0]
|
||||
|
||||
uxtb16 r9, lr ; p[7|5]
|
||||
uxtb16 r10, lr, ror #8 ; p[8|6]
|
||||
|
||||
add r4, r4, r5, lsl #16 ; p[1|0]
|
||||
add r5, r5, r6, lsl #16 ; p[2|1]
|
||||
add r6, r6, r7, lsl #16 ; p[3|2]
|
||||
add r7, r7, r8, lsl #16 ; p[4|3]
|
||||
|
||||
ldr r12, c00020002
|
||||
ldr lr, c00FF00FF
|
||||
ldr r2, c00010001
|
||||
|
||||
pkhtb r8, r7, r9 ; p[4|5]
|
||||
pkhtb r1, r9, r10 ; p[7|6]
|
||||
pkhbt r10, r8, r10, lsl #16 ; p[6|5]
|
||||
|
||||
uadd16 r11, r4, r5 ; [p1+p2 | p0+p1]
|
||||
uhadd16 r11, r11, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
|
||||
; [B|A]
|
||||
|
||||
add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
|
||||
add r4, r4, r6 ; [p1+2*p2+p3 | p0+2*p1+p2]
|
||||
uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
|
||||
|
||||
uadd16 r0, r6, r7 ; [p3+p4 | p2+p3]
|
||||
uhadd16 r0, r0, r2 ; [(p3+p4+1)>>1 | (p2+p3+1)>>1]
|
||||
; [F|E]
|
||||
|
||||
add r5, r6, r7, lsl #1 ; [p3+2*p4 | p2+2*p3]
|
||||
add r5, r5, r8, ror #16 ; [p3+2*p4+p5 | p2+2*p3+p4]
|
||||
uxtab16 r5, r5, r12 ; [p3+2*p4+p5+2 | p2+2*p3+p4+2]
|
||||
|
||||
add r6, r12, r8, ror #16 ; [p5+2 | p4+2]
|
||||
add r6, r6, r10, lsl #1 ; [p5+2+2*p6 | p4+2+2*p5]
|
||||
uxtab16 r6, r6, r1 ; [p5+2+2*p6+p7 | p4+2+2*p5+p6]
|
||||
|
||||
; scale down
|
||||
and r4, lr, r4, asr #2 ; [D|C]
|
||||
and r5, lr, r5, asr #2 ; [H|G]
|
||||
and r6, lr, r6, asr #2 ; [J|I]
|
||||
|
||||
ldr lr, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
|
||||
pkhtb r2, r0, r6 ; [-|F|-|I]
|
||||
pkhtb r12, r6, r5, asr #16 ; [-|J|-|H]
|
||||
add r12, r12, r2, lsl #8 ; [F|J|I|H]
|
||||
add r2, r0, r5, lsl #8 ; [H|F|G|E]
|
||||
mov r12, r12, ror #24 ; [J|I|H|F]
|
||||
str r12, [r3], lr
|
||||
|
||||
mov r7, r11, asr #16 ; [-|-|-|B]
|
||||
str r2, [r3], lr
|
||||
add r7, r7, r0, lsl #16 ; [-|E|-|B]
|
||||
add r7, r7, r4, asr #8 ; [-|E|D|B]
|
||||
add r7, r7, r5, lsl #24 ; [G|E|D|B]
|
||||
str r7, [r3], lr
|
||||
|
||||
add r5, r11, r4, lsl #8 ; [D|B|C|A]
|
||||
str r5, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
|
||||
|
||||
b_hu_pred
|
||||
ldrb r4, [r1], r2 ; Left[0]
|
||||
ldr r12, c00020002
|
||||
ldrb r5, [r1], r2 ; Left[1]
|
||||
ldr lr, c00FF00FF
|
||||
ldrb r6, [r1], r2 ; Left[2]
|
||||
ldr r2, c00010001
|
||||
ldrb r7, [r1] ; Left[3]
|
||||
|
||||
add r4, r4, r5, lsl #16 ; [1|0]
|
||||
add r5, r5, r6, lsl #16 ; [2|1]
|
||||
add r9, r6, r7, lsl #16 ; [3|2]
|
||||
|
||||
uadd16 r8, r4, r5 ; [p1+p2 | p0+p1]
|
||||
uhadd16 r8, r8, r2 ; [(p1+p2+1)>>1 | (p0+p1+1)>>1]
|
||||
; [B|A]
|
||||
|
||||
add r4, r4, r5, lsl #1 ; [p1+2*p2 | p0+2*p1]
|
||||
add r4, r4, r9 ; [p1+2*p2+p3 | p0+2*p1+p2]
|
||||
uxtab16 r4, r4, r12 ; [p1+2*p2+p3+2 | p0+2*p1+p2+2]
|
||||
ldr r2, [sp, #44] ; dst_stride
|
||||
ldr r3, [sp, #40] ; dst
|
||||
and r4, lr, r4, asr #2 ; [D|C]
|
||||
|
||||
add r10, r6, r7 ; [p2+p3]
|
||||
add r11, r10, r7, lsl #1 ; [p2+3*p3]
|
||||
add r10, r10, #1
|
||||
add r11, r11, #2
|
||||
mov r10, r10, asr #1 ; [E]
|
||||
mov r11, r11, asr #2 ; [F]
|
||||
|
||||
add r9, r7, r9, asr #8 ; [-|-|G|G]
|
||||
add r0, r8, r4, lsl #8 ; [D|B|C|A]
|
||||
add r7, r9, r9, lsl #16 ; [G|G|G|G]
|
||||
|
||||
str r0, [r3], r2
|
||||
|
||||
mov r1, r8, asr #16 ; [-|-|-|B]
|
||||
add r1, r1, r4, asr #8 ; [-|-|D|B]
|
||||
add r1, r1, r10, lsl #16 ; [-|E|D|B]
|
||||
add r1, r1, r11, lsl #24 ; [F|E|D|B]
|
||||
str r1, [r3], r2
|
||||
|
||||
add r10, r11, lsl #8 ; [-|-|F|E]
|
||||
add r10, r10, r9, lsl #16 ; [G|G|F|E]
|
||||
str r10, [r3], r2
|
||||
|
||||
str r7, [r3]
|
||||
|
||||
pop {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; constants
|
||||
c00010001
|
||||
DCD 0x00010001
|
||||
c00020002
|
||||
DCD 0x00020002
|
||||
c00FF00FF
|
||||
DCD 0x00FF00FF
|
||||
|
||||
END
|
|
@ -0,0 +1,136 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_v6(short *input, short *mb_dqcoeff)
|
||||
|vp8_short_inv_walsh4x4_v6| PROC
|
||||
|
||||
stmdb sp!, {r4 - r12, lr}
|
||||
|
||||
ldr r2, [r0, #0] ; [1 | 0]
|
||||
ldr r3, [r0, #4] ; [3 | 2]
|
||||
ldr r4, [r0, #8] ; [5 | 4]
|
||||
ldr r5, [r0, #12] ; [7 | 6]
|
||||
ldr r6, [r0, #16] ; [9 | 8]
|
||||
ldr r7, [r0, #20] ; [11 | 10]
|
||||
ldr r8, [r0, #24] ; [13 | 12]
|
||||
ldr r9, [r0, #28] ; [15 | 14]
|
||||
|
||||
qadd16 r10, r2, r8 ; a1 [1+13 | 0+12]
|
||||
qadd16 r11, r4, r6 ; b1 [5+9 | 4+8]
|
||||
qsub16 r12, r4, r6 ; c1 [5-9 | 4-8]
|
||||
qsub16 lr, r2, r8 ; d1 [1-13 | 0-12]
|
||||
|
||||
qadd16 r2, r10, r11 ; a1 + b1 [1 | 0]
|
||||
qadd16 r4, r12, lr ; c1 + d1 [5 | 4]
|
||||
qsub16 r6, r10, r11 ; a1 - b1 [9 | 8]
|
||||
qsub16 r8, lr, r12 ; d1 - c1 [13 | 12]
|
||||
|
||||
qadd16 r10, r3, r9 ; a1 [3+15 | 2+14]
|
||||
qadd16 r11, r5, r7 ; b1 [7+11 | 6+10]
|
||||
qsub16 r12, r5, r7 ; c1 [7-11 | 6-10]
|
||||
qsub16 lr, r3, r9 ; d1 [3-15 | 2-14]
|
||||
|
||||
qadd16 r3, r10, r11 ; a1 + b1 [3 | 2]
|
||||
qadd16 r5, r12, lr ; c1 + d1 [7 | 6]
|
||||
qsub16 r7, r10, r11 ; a1 - b1 [11 | 10]
|
||||
qsub16 r9, lr, r12 ; d1 - c1 [15 | 14]
|
||||
|
||||
; first transform complete
|
||||
|
||||
qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
|
||||
qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
|
||||
qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
|
||||
qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
|
||||
|
||||
qaddsubx r2, r10, r11 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r3, r11, r10 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
ldr r10, c0x00030003
|
||||
qaddsubx r4, r12, lr ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r5, lr, r12 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r2, r2, r10 ; [b2+3|c2+3]
|
||||
qadd16 r3, r3, r10 ; [a2+3|d2+3]
|
||||
qadd16 r4, r4, r10 ; [b2+3|c2+3]
|
||||
qadd16 r5, r5, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r3, #19 ; [0]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r2, #19 ; [1]
|
||||
strh lr, [r1], #32
|
||||
sxth r2, r2
|
||||
sxth r3, r3
|
||||
asr r2, r2, #3 ; [2]
|
||||
strh r2, [r1], #32
|
||||
asr r3, r3, #3 ; [3]
|
||||
strh r3, [r1], #32
|
||||
|
||||
asr r12, r5, #19 ; [4]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r4, #19 ; [5]
|
||||
strh lr, [r1], #32
|
||||
sxth r4, r4
|
||||
sxth r5, r5
|
||||
asr r4, r4, #3 ; [6]
|
||||
strh r4, [r1], #32
|
||||
asr r5, r5, #3 ; [7]
|
||||
strh r5, [r1], #32
|
||||
|
||||
qsubaddx r2, r6, r7 ; [c1|a1] [9-10 | 8+11]
|
||||
qaddsubx r3, r6, r7 ; [b1|d1] [9+10 | 8-11]
|
||||
qsubaddx r4, r8, r9 ; [c1|a1] [13-14 | 12+15]
|
||||
qaddsubx r5, r8, r9 ; [b1|d1] [13+14 | 12-15]
|
||||
|
||||
qaddsubx r6, r2, r3 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r7, r3, r2 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
qaddsubx r8, r4, r5 ; [b2|c2] [c1+d1 | a1-b1]
|
||||
qaddsubx r9, r5, r4 ; [a2|d2] [b1+a1 | d1-c1]
|
||||
|
||||
qadd16 r6, r6, r10 ; [b2+3|c2+3]
|
||||
qadd16 r7, r7, r10 ; [a2+3|d2+3]
|
||||
qadd16 r8, r8, r10 ; [b2+3|c2+3]
|
||||
qadd16 r9, r9, r10 ; [a2+3|d2+3]
|
||||
|
||||
asr r12, r7, #19 ; [8]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r6, #19 ; [9]
|
||||
strh lr, [r1], #32
|
||||
sxth r6, r6
|
||||
sxth r7, r7
|
||||
asr r6, r6, #3 ; [10]
|
||||
strh r6, [r1], #32
|
||||
asr r7, r7, #3 ; [11]
|
||||
strh r7, [r1], #32
|
||||
|
||||
asr r12, r9, #19 ; [12]
|
||||
strh r12, [r1], #32
|
||||
asr lr, r8, #19 ; [13]
|
||||
strh lr, [r1], #32
|
||||
sxth r8, r8
|
||||
sxth r9, r9
|
||||
asr r8, r8, #3 ; [14]
|
||||
strh r8, [r1], #32
|
||||
asr r9, r9, #3 ; [15]
|
||||
strh r9, [r1], #32
|
||||
|
||||
ldmia sp!, {r4 - r12, pc}
|
||||
ENDP ; |vp8_short_inv_walsh4x4_v6|
|
||||
|
||||
|
||||
; Constant Pool
|
||||
c0x00030003 DCD 0x00030003
|
||||
END
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,286 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
EXPORT |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
MACRO
|
||||
TRANSPOSE_MATRIX $a0, $a1, $a2, $a3, $b0, $b1, $b2, $b3
|
||||
; input: $a0, $a1, $a2, $a3; output: $b0, $b1, $b2, $b3
|
||||
; a0: 03 02 01 00
|
||||
; a1: 13 12 11 10
|
||||
; a2: 23 22 21 20
|
||||
; a3: 33 32 31 30
|
||||
; b3 b2 b1 b0
|
||||
|
||||
uxtb16 $b1, $a1 ; xx 12 xx 10
|
||||
uxtb16 $b0, $a0 ; xx 02 xx 00
|
||||
uxtb16 $b3, $a3 ; xx 32 xx 30
|
||||
uxtb16 $b2, $a2 ; xx 22 xx 20
|
||||
orr $b1, $b0, $b1, lsl #8 ; 12 02 10 00
|
||||
orr $b3, $b2, $b3, lsl #8 ; 32 22 30 20
|
||||
|
||||
uxtb16 $a1, $a1, ror #8 ; xx 13 xx 11
|
||||
uxtb16 $a3, $a3, ror #8 ; xx 33 xx 31
|
||||
uxtb16 $a0, $a0, ror #8 ; xx 03 xx 01
|
||||
uxtb16 $a2, $a2, ror #8 ; xx 23 xx 21
|
||||
orr $a0, $a0, $a1, lsl #8 ; 13 03 11 01
|
||||
orr $a2, $a2, $a3, lsl #8 ; 33 23 31 21
|
||||
|
||||
pkhtb $b2, $b3, $b1, asr #16 ; 32 22 12 02 -- p1
|
||||
pkhbt $b0, $b1, $b3, lsl #16 ; 30 20 10 00 -- p3
|
||||
|
||||
pkhtb $b3, $a2, $a0, asr #16 ; 33 23 13 03 -- p0
|
||||
pkhbt $b1, $a0, $a2, lsl #16 ; 31 21 11 01 -- p2
|
||||
MEND
|
||||
|
||||
|
||||
|
||||
src RN r0
|
||||
pstep RN r1
|
||||
|
||||
;r0 unsigned char *src_ptr,
|
||||
;r1 int src_pixel_step,
|
||||
;r2 const char *blimit
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_horizontal_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; blimit
|
||||
ldr r3, [src, -pstep, lsl #1] ; p1
|
||||
ldr r4, [src, -pstep] ; p0
|
||||
ldr r5, [src] ; q0
|
||||
ldr r6, [src, pstep] ; q1
|
||||
orr r12, r12, r12, lsl #8 ; blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #16 ; blimit
|
||||
mov r9, #4 ; double the count. we're doing 4 at a time
|
||||
mov lr, #0 ; need 0 in a couple places
|
||||
|
||||
|simple_hnext8|
|
||||
; vp8_simple_filter_mask()
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r10, r4, r5 ; p0 - q0
|
||||
uqsub8 r11, r5, r4 ; q0 - p0
|
||||
orr r8, r8, r7 ; abs(p1 - q1)
|
||||
orr r10, r10, r11 ; abs(p0 - q0)
|
||||
uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2
|
||||
uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1
|
||||
uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r8, #0
|
||||
usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags
|
||||
sel r10, r8, lr ; filter mask: F or 0
|
||||
cmp r10, #0
|
||||
beq simple_hskip_filter ; skip filtering if all masks are 0x00
|
||||
|
||||
;vp8_simple_filter()
|
||||
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
qadd8 r3, r3, r6 ; += q0 - p0
|
||||
ldr r8, c0x03030303
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, r10 ; vp8_filter &= mask
|
||||
|
||||
qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3
|
||||
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr
|
||||
shadd8 r8 , r8 , lr
|
||||
shadd8 r7 , r7 , lr ; Filter1 >>= 3
|
||||
shadd8 r8 , r8 , lr ; Filter2 >>= 3
|
||||
|
||||
qsub8 r5 ,r5, r7 ; u = q0 - Filter1
|
||||
qadd8 r4, r4, r8 ; u = p0 + Filter2
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
str r5, [src] ; store oq0 result
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
str r4, [src, -pstep] ; store op0 result
|
||||
|
||||
|simple_hskip_filter|
|
||||
subs r9, r9, #1
|
||||
addne src, src, #4 ; next row
|
||||
|
||||
ldrne r3, [src, -pstep, lsl #1] ; p1
|
||||
ldrne r4, [src, -pstep] ; p0
|
||||
ldrne r5, [src] ; q0
|
||||
ldrne r6, [src, pstep] ; q1
|
||||
|
||||
bne simple_hnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_armv6|
|
||||
|
||||
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
|vp8_loop_filter_simple_vertical_edge_armv6| PROC
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
|
||||
ldrb r12, [r2] ; r12: blimit
|
||||
ldr r2, c0x80808080
|
||||
orr r12, r12, r12, lsl #8
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrh r4, [src], pstep
|
||||
orr r12, r12, r12, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrh r6, [src], pstep
|
||||
mov r11, #4 ; double the count. we're doing 4 at a time
|
||||
|
||||
|simple_vnext8|
|
||||
; vp8_simple_filter_mask() function
|
||||
pkhbt r9, r3, r4, lsl #16
|
||||
pkhbt r10, r5, r6, lsl #16
|
||||
|
||||
;transpose r7, r8, r9, r10 to r3, r4, r5, r6
|
||||
TRANSPOSE_MATRIX r7, r8, r9, r10, r3, r4, r5, r6
|
||||
|
||||
uqsub8 r7, r3, r6 ; p1 - q1
|
||||
uqsub8 r8, r6, r3 ; q1 - p1
|
||||
uqsub8 r9, r4, r5 ; p0 - q0
|
||||
uqsub8 r10, r5, r4 ; q0 - p0
|
||||
orr r7, r7, r8 ; abs(p1 - q1)
|
||||
orr r9, r9, r10 ; abs(p0 - q0)
|
||||
mov r8, #0
|
||||
uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2
|
||||
uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2
|
||||
uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2
|
||||
mvn r10, #0 ; r10 == -1
|
||||
|
||||
usub8 r7, r12, r7 ; compare to flimit
|
||||
sel lr, r10, r8 ; filter mask
|
||||
|
||||
cmp lr, #0
|
||||
beq simple_vskip_filter ; skip filtering
|
||||
|
||||
;vp8_simple_filter() function
|
||||
eor r3, r3, r2 ; p1 offset to convert to a signed value
|
||||
eor r6, r6, r2 ; q1 offset to convert to a signed value
|
||||
eor r4, r4, r2 ; p0 offset to convert to a signed value
|
||||
eor r5, r5, r2 ; q0 offset to convert to a signed value
|
||||
|
||||
qsub8 r3, r3, r6 ; vp8_filter = p1 - q1
|
||||
qsub8 r6, r5, r4 ; q0 - p0
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r9, c0x03030303 ; r9 = 3
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter += q0 - p0
|
||||
ldr r7, c0x04040404
|
||||
|
||||
qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0))
|
||||
;STALL
|
||||
and r3, r3, lr ; vp8_filter &= mask
|
||||
|
||||
qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3
|
||||
qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4
|
||||
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8
|
||||
shadd8 r3 , r3 , r8
|
||||
shadd8 r9 , r9 , r8 ; Filter2 >>= 3
|
||||
shadd8 r3 , r3 , r8 ; Filter1 >>= 3
|
||||
|
||||
;calculate output
|
||||
sub src, src, pstep, lsl #2
|
||||
|
||||
qadd8 r4, r4, r9 ; u = p0 + Filter2
|
||||
qsub8 r5, r5, r3 ; u = q0 - Filter1
|
||||
eor r4, r4, r2 ; *op0 = u^0x80
|
||||
eor r5, r5, r2 ; *oq0 = u^0x80
|
||||
|
||||
strb r4, [src, #-1] ; store the result
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
mov r4, r4, lsr #8
|
||||
strb r5, [src], pstep
|
||||
mov r5, r5, lsr #8
|
||||
|
||||
strb r4, [src, #-1]
|
||||
strb r5, [src], pstep
|
||||
|
||||
|simple_vskip_filter|
|
||||
subs r11, r11, #1
|
||||
|
||||
; load soure data to r7, r8, r9, r10
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23] ; preload for next block
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
pkhbt r7, r3, r4, lsl #16
|
||||
|
||||
ldrneh r3, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r4, [src], pstep
|
||||
|
||||
pkhbt r8, r5, r6, lsl #16
|
||||
|
||||
ldrneh r5, [src, #-2]
|
||||
pld [src, #23]
|
||||
ldrneh r6, [src], pstep
|
||||
|
||||
bne simple_vnext8
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6|
|
||||
|
||||
; Constant Pool
|
||||
c0x80808080 DCD 0x80808080
|
||||
c0x03030303 DCD 0x03030303
|
||||
c0x04040404 DCD 0x04040404
|
||||
|
||||
END
|
|
@ -0,0 +1,273 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sixtap_predict8x4_armv6|
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
;-------------------------------------
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; stack unsigned char *dst_ptr,
|
||||
; stack int dst_pitch
|
||||
;-------------------------------------
|
||||
;note: In first pass, store the result in transpose(8linesx9columns) on stack. Temporary stack size is 184.
|
||||
;Line width is 20 that is 9 short data plus 2 to make it 4bytes aligned. In second pass, load data from stack,
|
||||
;and the result is stored in transpose.
|
||||
|vp8_sixtap_predict8x4_armv6| PROC
|
||||
stmdb sp!, {r4 - r11, lr}
|
||||
str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
add lr, sp, #4 ;point to temporary buffer
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;first-pass filter
|
||||
adr r12, filter8_coeff
|
||||
sub r0, r0, r1, lsl #1
|
||||
|
||||
add r3, r1, #10 ; preload next low
|
||||
pld [r0, r3]
|
||||
|
||||
add r2, r12, r2, lsl #4 ;calculate filter location
|
||||
add r0, r0, #3 ;adjust src only for loading convinience
|
||||
|
||||
ldr r3, [r2] ; load up packed filter coefficients
|
||||
ldr r4, [r2, #4]
|
||||
ldr r5, [r2, #8]
|
||||
|
||||
mov r2, #0x90000 ; height=9 is top part of counter
|
||||
|
||||
sub r1, r1, #8
|
||||
|
||||
|first_pass_hloop_v6|
|
||||
ldrb r6, [r0, #-5] ; load source data
|
||||
ldrb r7, [r0, #-4]
|
||||
ldrb r8, [r0, #-3]
|
||||
ldrb r9, [r0, #-2]
|
||||
ldrb r10, [r0, #-1]
|
||||
|
||||
orr r2, r2, #0x4 ; construct loop counter. width=8=4x2
|
||||
|
||||
pkhbt r6, r6, r7, lsl #16 ; r7 | r6
|
||||
pkhbt r7, r7, r8, lsl #16 ; r8 | r7
|
||||
|
||||
pkhbt r8, r8, r9, lsl #16 ; r9 | r8
|
||||
pkhbt r9, r9, r10, lsl #16 ; r10 | r9
|
||||
|
||||
|first_pass_wloop_v6|
|
||||
smuad r11, r6, r3 ; vp8_filter[0], vp8_filter[1]
|
||||
smuad r12, r7, r3
|
||||
|
||||
ldrb r6, [r0], #1
|
||||
|
||||
smlad r11, r8, r4, r11 ; vp8_filter[2], vp8_filter[3]
|
||||
ldrb r7, [r0], #1
|
||||
smlad r12, r9, r4, r12
|
||||
|
||||
pkhbt r10, r10, r6, lsl #16 ; r10 | r9
|
||||
pkhbt r6, r6, r7, lsl #16 ; r11 | r10
|
||||
smlad r11, r10, r5, r11 ; vp8_filter[4], vp8_filter[5]
|
||||
smlad r12, r6, r5, r12
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add r11, r11, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff ; test loop counter
|
||||
usat r11, #8, r11, asr #7
|
||||
add r12, r12, #0x40
|
||||
strh r11, [lr], #20 ; result is transposed and stored, which
|
||||
usat r12, #8, r12, asr #7
|
||||
|
||||
strh r12, [lr], #20
|
||||
|
||||
movne r11, r6
|
||||
movne r12, r7
|
||||
|
||||
movne r6, r8
|
||||
movne r7, r9
|
||||
movne r8, r10
|
||||
movne r9, r11
|
||||
movne r10, r12
|
||||
|
||||
bne first_pass_wloop_v6
|
||||
|
||||
;;add r9, ppl, #30 ; attempt to load 2 adjacent cache lines
|
||||
;;IF ARCHITECTURE=6
|
||||
;pld [src, ppl]
|
||||
;;pld [src, r9]
|
||||
;;ENDIF
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
|
||||
sub lr, lr, #158
|
||||
|
||||
add r0, r0, r1 ; move to next input line
|
||||
|
||||
add r11, r1, #18 ; preload next low. adding back block width(=8), which is subtracted earlier
|
||||
pld [r0, r11]
|
||||
|
||||
bne first_pass_hloop_v6
|
||||
|
||||
;second pass filter
|
||||
secondpass_filter
|
||||
ldr r3, [sp], #4 ; load back yoffset
|
||||
ldr r0, [sp, #216] ; load dst address from stack 180+36
|
||||
ldr r1, [sp, #220] ; load dst stride from stack 180+40
|
||||
|
||||
cmp r3, #0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
adr r12, filter8_coeff
|
||||
add lr, r12, r3, lsl #4 ;calculate filter location
|
||||
|
||||
mov r2, #0x00080000
|
||||
|
||||
ldr r3, [lr] ; load up packed filter coefficients
|
||||
ldr r4, [lr, #4]
|
||||
ldr r5, [lr, #8]
|
||||
|
||||
pkhbt r12, r4, r3 ; pack the filter differently
|
||||
pkhbt r11, r5, r4
|
||||
|
||||
second_pass_hloop_v6
|
||||
ldr r6, [sp] ; load the data
|
||||
ldr r7, [sp, #4]
|
||||
|
||||
orr r2, r2, #2 ; loop counter
|
||||
|
||||
second_pass_wloop_v6
|
||||
smuad lr, r3, r6 ; apply filter
|
||||
smulbt r10, r3, r6
|
||||
|
||||
ldr r8, [sp, #8]
|
||||
|
||||
smlad lr, r4, r7, lr
|
||||
smladx r10, r12, r7, r10
|
||||
|
||||
ldrh r9, [sp, #12]
|
||||
|
||||
smlad lr, r5, r8, lr
|
||||
smladx r10, r11, r8, r10
|
||||
|
||||
add sp, sp, #4
|
||||
smlatb r10, r5, r9, r10
|
||||
|
||||
sub r2, r2, #1
|
||||
|
||||
add lr, lr, #0x40 ; round_shift_and_clamp
|
||||
tst r2, #0xff
|
||||
usat lr, #8, lr, asr #7
|
||||
add r10, r10, #0x40
|
||||
strb lr, [r0], r1 ; the result is transposed back and stored
|
||||
usat r10, #8, r10, asr #7
|
||||
|
||||
strb r10, [r0],r1
|
||||
|
||||
movne r6, r7
|
||||
movne r7, r8
|
||||
|
||||
bne second_pass_wloop_v6
|
||||
|
||||
subs r2, r2, #0x10000
|
||||
add sp, sp, #12 ; updata src for next loop (20-8)
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne second_pass_hloop_v6
|
||||
|
||||
add sp, sp, #20
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r1, r1, #8
|
||||
mov r2, #9
|
||||
|
||||
skip_firstpass_hloop
|
||||
ldrb r4, [r0], #1 ; load data
|
||||
subs r2, r2, #1
|
||||
ldrb r5, [r0], #1
|
||||
strh r4, [lr], #20 ; store it to immediate buffer
|
||||
ldrb r6, [r0], #1 ; load data
|
||||
strh r5, [lr], #20
|
||||
ldrb r7, [r0], #1
|
||||
strh r6, [lr], #20
|
||||
ldrb r8, [r0], #1
|
||||
strh r7, [lr], #20
|
||||
ldrb r9, [r0], #1
|
||||
strh r8, [lr], #20
|
||||
ldrb r10, [r0], #1
|
||||
strh r9, [lr], #20
|
||||
ldrb r11, [r0], #1
|
||||
strh r10, [lr], #20
|
||||
add r0, r0, r1 ; move to next input line
|
||||
strh r11, [lr], #20
|
||||
|
||||
sub lr, lr, #158 ; move over to next column
|
||||
bne skip_firstpass_hloop
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;--------------------
|
||||
skip_secondpass_filter
|
||||
mov r2, #8
|
||||
add sp, sp, #4 ;start from src[0] instead of src[-2]
|
||||
|
||||
skip_secondpass_hloop
|
||||
ldr r6, [sp], #4
|
||||
subs r2, r2, #1
|
||||
ldr r8, [sp], #4
|
||||
|
||||
mov r7, r6, lsr #16 ; unpack
|
||||
strb r6, [r0], r1
|
||||
mov r9, r8, lsr #16
|
||||
strb r7, [r0], r1
|
||||
add sp, sp, #12 ; 20-8
|
||||
strb r8, [r0], r1
|
||||
strb r9, [r0], r1
|
||||
|
||||
sub r0, r0, r1, lsl #2
|
||||
add r0, r0, #1
|
||||
|
||||
bne skip_secondpass_hloop
|
||||
|
||||
add sp, sp, #16 ; 180 - (160 +4)
|
||||
|
||||
ldmia sp!, {r4 - r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
;One word each is reserved. Label filter_coeff can be used to access the data.
|
||||
;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
|
||||
filter8_coeff
|
||||
DCD 0x00000000, 0x00000080, 0x00000000, 0x00000000
|
||||
DCD 0xfffa0000, 0x000c007b, 0x0000ffff, 0x00000000
|
||||
DCD 0xfff50002, 0x0024006c, 0x0001fff8, 0x00000000
|
||||
DCD 0xfff70000, 0x0032005d, 0x0000fffa, 0x00000000
|
||||
DCD 0xfff00003, 0x004d004d, 0x0003fff0, 0x00000000
|
||||
DCD 0xfffa0000, 0x005d0032, 0x0000fff7, 0x00000000
|
||||
DCD 0xfff80001, 0x006c0024, 0x0002fff5, 0x00000000
|
||||
DCD 0xffff0000, 0x007b000c, 0x0000fffa, 0x00000000
|
||||
|
||||
;DCD 0, 0, 128, 0, 0, 0
|
||||
;DCD 0, -6, 123, 12, -1, 0
|
||||
;DCD 2, -11, 108, 36, -8, 1
|
||||
;DCD 0, -9, 93, 50, -6, 0
|
||||
;DCD 3, -16, 77, 77, -16, 3
|
||||
;DCD 0, -6, 50, 93, -9, 0
|
||||
;DCD 1, -8, 36, 108, -11, 2
|
||||
;DCD 0, -1, 12, 123, -6, 0
|
||||
|
||||
END
|
|
@ -0,0 +1,96 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sad16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 const unsigned char *src_ptr
|
||||
; r1 int src_stride
|
||||
; r2 const unsigned char *ref_ptr
|
||||
; r3 int ref_stride
|
||||
; stack max_sad (not used)
|
||||
|vp8_sad16x16_armv6| PROC
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
pld [r0, r1, lsl #1]
|
||||
pld [r2, r3, lsl #1]
|
||||
|
||||
mov r4, #0 ; sad = 0;
|
||||
mov r5, #8 ; loop count
|
||||
|
||||
loop
|
||||
; 1st row
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels (1A)
|
||||
ldr r8, [r2, #0x0] ; load 4 ref pixels (1A)
|
||||
ldr r7, [r0, #0x4] ; load 4 src pixels (1A)
|
||||
ldr r9, [r2, #0x4] ; load 4 ref pixels (1A)
|
||||
ldr r10, [r0, #0x8] ; load 4 src pixels (1B)
|
||||
ldr r11, [r0, #0xC] ; load 4 src pixels (1B)
|
||||
|
||||
usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels
|
||||
usad8 r8, r7, r9 ; calculate sad for 4 pixels
|
||||
|
||||
ldr r12, [r2, #0x8] ; load 4 ref pixels (1B)
|
||||
ldr lr, [r2, #0xC] ; load 4 ref pixels (1B)
|
||||
|
||||
add r0, r0, r1 ; set src pointer to next row
|
||||
add r2, r2, r3 ; set dst pointer to next row
|
||||
|
||||
pld [r0, r1, lsl #1]
|
||||
pld [r2, r3, lsl #1]
|
||||
|
||||
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
|
||||
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
|
||||
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels (2A)
|
||||
ldr r7, [r0, #0x4] ; load 4 src pixels (2A)
|
||||
add r4, r4, r8 ; add partial sad values
|
||||
|
||||
; 2nd row
|
||||
ldr r8, [r2, #0x0] ; load 4 ref pixels (2A)
|
||||
ldr r9, [r2, #0x4] ; load 4 ref pixels (2A)
|
||||
ldr r10, [r0, #0x8] ; load 4 src pixels (2B)
|
||||
ldr r11, [r0, #0xC] ; load 4 src pixels (2B)
|
||||
|
||||
usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels
|
||||
usad8 r8, r7, r9 ; calculate sad for 4 pixels
|
||||
|
||||
ldr r12, [r2, #0x8] ; load 4 ref pixels (2B)
|
||||
ldr lr, [r2, #0xC] ; load 4 ref pixels (2B)
|
||||
|
||||
add r0, r0, r1 ; set src pointer to next row
|
||||
add r2, r2, r3 ; set dst pointer to next row
|
||||
|
||||
usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels
|
||||
usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels
|
||||
|
||||
pld [r0, r1, lsl #1]
|
||||
pld [r2, r3, lsl #1]
|
||||
|
||||
subs r5, r5, #1 ; decrement loop counter
|
||||
add r4, r4, r8 ; add partial sad values
|
||||
|
||||
bne loop
|
||||
|
||||
mov r0, r4 ; return sad
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance16x16_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance16x16_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load 4 src pixels
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load 4 src pixels
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load 4 src pixels
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load 4 src pixels
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r9, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance8x8_armv6|
|
||||
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance8x8_armv6| PROC
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r12, #8 ; set loop counter to 8 (=block height)
|
||||
mov r4, #0 ; initialize sum = 0
|
||||
mov r5, #0 ; initialize sse = 0
|
||||
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r6, [r0, #0x0] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x0] ; load 4 ref pixels
|
||||
|
||||
mov lr, #0 ; constant zero
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r6, [r0, #0x4] ; load 4 src pixels
|
||||
ldr r7, [r2, #0x4] ; load 4 ref pixels
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r8, r6, r7 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r10, r8, lr ; select bytes with positive difference
|
||||
usub8 r9, r7, r6 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r8, r9, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r6, r10, lr ; calculate sum of positive differences
|
||||
usad8 r7, r8, lr ; calculate sum of negative differences
|
||||
orr r8, r8, r10 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r4, r4, r6 ; add positive differences to sum
|
||||
sub r4, r4, r7 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r7, r8 ; byte (two pixels) to halfwords
|
||||
uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
|
||||
smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
|
||||
subs r12, r12, #1 ; next row
|
||||
smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r8, [sp, #32] ; get address of sse
|
||||
mul r1, r4, r4 ; sum * sum
|
||||
str r5, [r8] ; store sse
|
||||
sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
|
||||
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -0,0 +1,182 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance_halfpixvar16x16_h_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance_halfpixvar16x16_h_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
ldr r10, c80808080
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov lr, #0 ; constant zero
|
||||
loop
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load 4 src pixels
|
||||
ldr r6, [r0, #1] ; load 4 src pixels with 1 byte offset
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load 4 src pixels
|
||||
ldr r6, [r0, #5] ; load 4 src pixels with 1 byte offset
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load 4 src pixels
|
||||
ldr r6, [r0, #9] ; load 4 src pixels with 1 byte offset
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load 4 src pixels
|
||||
ldr r6, [r0, #13] ; load 4 src pixels with 1 byte offset
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
c80808080
|
||||
DCD 0x80808080
|
||||
|
||||
END
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance_halfpixvar16x16_hv_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance_halfpixvar16x16_hv_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
ldr r10, c80808080
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov lr, #0 ; constant zero
|
||||
loop
|
||||
add r9, r0, r1 ; pointer to pixels on the next row
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load source pixels a, row N
|
||||
ldr r6, [r0, #1] ; load source pixels b, row N
|
||||
ldr r5, [r9, #0] ; load source pixels c, row N+1
|
||||
ldr r7, [r9, #1] ; load source pixels d, row N+1
|
||||
|
||||
; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
|
||||
mvn r7, r7
|
||||
uhsub8 r5, r5, r7
|
||||
eor r5, r5, r10
|
||||
; z = (x + y + 1) >> 1, interpolate half pixel values vertically
|
||||
mvn r5, r5
|
||||
uhsub8 r4, r4, r5
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load source pixels a, row N
|
||||
ldr r6, [r0, #5] ; load source pixels b, row N
|
||||
ldr r5, [r9, #4] ; load source pixels c, row N+1
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
ldr r7, [r9, #5] ; load source pixels d, row N+1
|
||||
|
||||
; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
|
||||
mvn r7, r7
|
||||
uhsub8 r5, r5, r7
|
||||
eor r5, r5, r10
|
||||
; z = (x + y + 1) >> 1, interpolate half pixel values vertically
|
||||
mvn r5, r5
|
||||
uhsub8 r4, r4, r5
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load source pixels a, row N
|
||||
ldr r6, [r0, #9] ; load source pixels b, row N
|
||||
ldr r5, [r9, #8] ; load source pixels c, row N+1
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
ldr r7, [r9, #9] ; load source pixels d, row N+1
|
||||
|
||||
; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
|
||||
mvn r7, r7
|
||||
uhsub8 r5, r5, r7
|
||||
eor r5, r5, r10
|
||||
; z = (x + y + 1) >> 1, interpolate half pixel values vertically
|
||||
mvn r5, r5
|
||||
uhsub8 r4, r4, r5
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load source pixels a, row N
|
||||
ldr r6, [r0, #13] ; load source pixels b, row N
|
||||
ldr r5, [r9, #12] ; load source pixels c, row N+1
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
ldr r7, [r9, #13] ; load source pixels d, row N+1
|
||||
|
||||
; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1
|
||||
mvn r7, r7
|
||||
uhsub8 r5, r5, r7
|
||||
eor r5, r5, r10
|
||||
; z = (x + y + 1) >> 1, interpolate half pixel values vertically
|
||||
mvn r5, r5
|
||||
uhsub8 r4, r4, r5
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
subs r12, r12, #1
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
c80808080
|
||||
DCD 0x80808080
|
||||
|
||||
END
|
|
@ -0,0 +1,184 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_variance_halfpixvar16x16_v_armv6|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src_ptr
|
||||
; r1 int source_stride
|
||||
; r2 unsigned char *ref_ptr
|
||||
; r3 int recon_stride
|
||||
; stack unsigned int *sse
|
||||
|vp8_variance_halfpixvar16x16_v_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r12, lr}
|
||||
|
||||
pld [r0, r1, lsl #0]
|
||||
pld [r2, r3, lsl #0]
|
||||
|
||||
mov r8, #0 ; initialize sum = 0
|
||||
ldr r10, c80808080
|
||||
mov r11, #0 ; initialize sse = 0
|
||||
mov r12, #16 ; set loop counter to 16 (=block height)
|
||||
mov lr, #0 ; constant zero
|
||||
loop
|
||||
add r9, r0, r1 ; set src pointer to next row
|
||||
; 1st 4 pixels
|
||||
ldr r4, [r0, #0] ; load 4 src pixels
|
||||
ldr r6, [r9, #0] ; load 4 src pixels from next row
|
||||
ldr r5, [r2, #0] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
pld [r0, r1, lsl #1]
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
pld [r2, r3, lsl #1]
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
; calculate total sum
|
||||
adds r8, r8, r4 ; add positive differences to sum
|
||||
subs r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 2nd 4 pixels
|
||||
ldr r4, [r0, #4] ; load 4 src pixels
|
||||
ldr r6, [r9, #4] ; load 4 src pixels from next row
|
||||
ldr r5, [r2, #4] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 3rd 4 pixels
|
||||
ldr r4, [r0, #8] ; load 4 src pixels
|
||||
ldr r6, [r9, #8] ; load 4 src pixels from next row
|
||||
ldr r5, [r2, #8] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
|
||||
; 4th 4 pixels
|
||||
ldr r4, [r0, #12] ; load 4 src pixels
|
||||
ldr r6, [r9, #12] ; load 4 src pixels from next row
|
||||
ldr r5, [r2, #12] ; load 4 ref pixels
|
||||
|
||||
; bilinear interpolation
|
||||
mvn r6, r6
|
||||
uhsub8 r4, r4, r6
|
||||
eor r4, r4, r10
|
||||
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
usub8 r6, r4, r5 ; calculate difference
|
||||
add r0, r0, r1 ; set src_ptr to next row
|
||||
sel r7, r6, lr ; select bytes with positive difference
|
||||
usub8 r6, r5, r4 ; calculate difference with reversed operands
|
||||
add r2, r2, r3 ; set dst_ptr to next row
|
||||
sel r6, r6, lr ; select bytes with negative difference
|
||||
|
||||
; calculate partial sums
|
||||
usad8 r4, r7, lr ; calculate sum of positive differences
|
||||
usad8 r5, r6, lr ; calculate sum of negative differences
|
||||
orr r6, r6, r7 ; differences of all 4 pixels
|
||||
|
||||
; calculate total sum
|
||||
add r8, r8, r4 ; add positive differences to sum
|
||||
sub r8, r8, r5 ; substract negative differences from sum
|
||||
|
||||
; calculate sse
|
||||
uxtb16 r5, r6 ; byte (two pixels) to halfwords
|
||||
uxtb16 r7, r6, ror #8 ; another two pixels to halfwords
|
||||
smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
|
||||
smlad r11, r7, r7, r11 ; dual signed multiply, add and accumulate (2)
|
||||
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
bne loop
|
||||
|
||||
; return stuff
|
||||
ldr r6, [sp, #40] ; get address of sse
|
||||
mul r0, r8, r8 ; sum * sum
|
||||
str r11, [r6] ; store sse
|
||||
sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
|
||||
|
||||
ldmfd sp!, {r4-r12, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
c80808080
|
||||
DCD 0x80808080
|
||||
|
||||
END
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "bilinearfilter_arm.h"
|
||||
|
||||
void vp8_filter_block2d_bil_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int dst_pitch,
|
||||
const short *HFilter,
|
||||
const short *VFilter,
|
||||
int Width,
|
||||
int Height
|
||||
)
|
||||
{
|
||||
unsigned short FData[36*16]; /* Temp data buffer used in filtering */
|
||||
|
||||
/* First filter 1-D horizontally... */
|
||||
vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pitch, Height + 1, Width, HFilter);
|
||||
|
||||
/* then 1-D vertically... */
|
||||
vp8_filter_block2d_bil_second_pass_armv6(FData, dst_ptr, dst_pitch, Height, Width, VFilter);
|
||||
}
|
||||
|
||||
|
||||
void vp8_bilinear_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4);
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
|
||||
HFilter = vp8_bilinear_filters[xoffset];
|
||||
VFilter = vp8_bilinear_filters[yoffset];
|
||||
|
||||
vp8_filter_block2d_bil_armv6(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16);
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BILINEARFILTER_ARM_H
|
||||
#define BILINEARFILTER_ARM_H
|
||||
|
||||
extern void vp8_filter_block2d_bil_first_pass_armv6
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
unsigned short *dst_ptr,
|
||||
unsigned int src_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_bil_second_pass_armv6
|
||||
(
|
||||
const unsigned short *src_ptr,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch,
|
||||
unsigned int height,
|
||||
unsigned int width,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#endif /* BILINEARFILTER_ARM_H */
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
|
||||
#if HAVE_NEON
|
||||
extern void vp8_dequantize_b_loop_neon(short *Q, short *DQC, short *DQ);
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern void vp8_dequantize_b_loop_v6(short *Q, short *DQC, short *DQ);
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
|
||||
void vp8_dequantize_b_neon(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
vp8_dequantize_b_loop_neon(Q, DQC, DQ);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
void vp8_dequantize_b_v6(BLOCKD *d, short *DQC)
|
||||
{
|
||||
short *DQ = d->dqcoeff;
|
||||
short *Q = d->qcoeff;
|
||||
|
||||
vp8_dequantize_b_loop_v6(Q, DQC, DQ);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include <math.h>
|
||||
#include "vp8/common/filter.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 8x8
|
||||
extern void vp8_filter_block2d_first_pass_8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
// 16x16
|
||||
extern void vp8_filter_block2d_first_pass_16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
short *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int output_width,
|
||||
unsigned int output_height,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter4_block2d_second_pass_armv6
|
||||
(
|
||||
short *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int output_pitch,
|
||||
unsigned int cnt,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
extern void vp8_filter_block2d_first_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
|
||||
extern void vp8_filter_block2d_second_pass_only_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
unsigned char *output_ptr,
|
||||
unsigned int src_pixels_per_line,
|
||||
unsigned int cnt,
|
||||
unsigned int output_pitch,
|
||||
const short *vp8_filter
|
||||
);
|
||||
|
||||
#if HAVE_MEDIA
|
||||
void vp8_sixtap_predict4x4_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data buffer used in filtering */
|
||||
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
/* Vfilter is null. First pass only */
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
/*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
|
||||
vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
|
||||
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Vfilter is a 4 tap filter */
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
/* Vfilter is 6 tap filter */
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x8_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_8x8_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void vp8_sixtap_predict16x16_armv6
|
||||
(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch
|
||||
)
|
||||
{
|
||||
const short *HFilter;
|
||||
const short *VFilter;
|
||||
DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16); /* Temp data buffer used in filtering */
|
||||
|
||||
HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */
|
||||
VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */
|
||||
|
||||
if (xoffset && !yoffset)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
|
||||
}
|
||||
/* Hfilter is null. Second pass only */
|
||||
else if (!xoffset && yoffset)
|
||||
{
|
||||
vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (yoffset & 0x1)
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
|
||||
vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
else
|
||||
{
|
||||
vp8_filter_block2d_first_pass_16x16_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
|
||||
vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,181 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
#include "vp8/common/loopfilter.h"
|
||||
#include "vp8/common/onyxc_int.h"
|
||||
|
||||
#define prototype_loopfilter(sym) \
|
||||
void sym(unsigned char *src, int pitch, const unsigned char *blimit,\
|
||||
const unsigned char *limit, const unsigned char *thresh, int count)
|
||||
|
||||
#if HAVE_MEDIA
|
||||
extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
|
||||
extern prototype_loopfilter(vp8_mbloop_filter_vertical_edge_armv6);
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
typedef void loopfilter_y_neon(unsigned char *src, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh);
|
||||
typedef void loopfilter_uv_neon(unsigned char *u, int pitch,
|
||||
unsigned char blimit, unsigned char limit, unsigned char thresh,
|
||||
unsigned char *v);
|
||||
|
||||
extern loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon;
|
||||
extern loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon;
|
||||
|
||||
extern loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon;
|
||||
extern loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon;
|
||||
#endif
|
||||
|
||||
#if HAVE_MEDIA
|
||||
/* ARMV6/MEDIA loopfilter functions*/
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_mbloop_filter_vertical_edge_armv6(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_armv6(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_horizontal_edge_armv6(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 4 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 8 * y_stride, y_stride, blimit);
|
||||
vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, blimit);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
vp8_loop_filter_vertical_edge_armv6(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
|
||||
if (v_ptr)
|
||||
vp8_loop_filter_vertical_edge_armv6(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1);
|
||||
}
|
||||
|
||||
void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, int y_stride,
|
||||
const unsigned char *blimit)
|
||||
{
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 4, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 8, y_stride, blimit);
|
||||
vp8_loop_filter_simple_vertical_edge_armv6(y_ptr + 12, y_stride, blimit);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
/* NEON loopfilter functions */
|
||||
/* Horizontal MB filtering */
|
||||
void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Vertical MB Filtering */
|
||||
void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char mblim = *lfi->mblim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr);
|
||||
}
|
||||
|
||||
/* Horizontal B Filtering */
|
||||
void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride);
|
||||
}
|
||||
|
||||
/* Vertical B Filtering */
|
||||
void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
|
||||
int y_stride, int uv_stride, loop_filter_info *lfi)
|
||||
{
|
||||
unsigned char blim = *lfi->blim;
|
||||
unsigned char lim = *lfi->lim;
|
||||
unsigned char hev_thr = *lfi->hev_thr;
|
||||
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr);
|
||||
vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr);
|
||||
|
||||
if (u_ptr)
|
||||
vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4);
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,357 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict16x16_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(r5) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict16x16_neon| PROC
|
||||
push {r4-r5, lr}
|
||||
|
||||
adr r12, bifilter16_coeff
|
||||
ldr r4, [sp, #12] ;load parameters from stack
|
||||
ldr r5, [sp, #16] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq secondpass_bfilter16x16_only
|
||||
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
|
||||
vld1.s32 {d31}, [r2] ;load first_pass filter
|
||||
|
||||
beq firstpass_bfilter16x16_only
|
||||
|
||||
sub sp, sp, #272 ;reserve space on stack for temporary storage
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
mov lr, sp
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
|
||||
mov r2, #3 ;loop counter
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (17x16)
|
||||
filt_blk2d_fp16x16_loop_neon
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d3, d0
|
||||
vmull.u8 q9, d5, d0
|
||||
vmull.u8 q10, d6, d0
|
||||
vmull.u8 q11, d8, d0
|
||||
vmull.u8 q12, d9, d0
|
||||
vmull.u8 q13, d11, d0
|
||||
vmull.u8 q14, d12, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
vext.8 d11, d11, d12, #1
|
||||
|
||||
vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q9, d5, d1
|
||||
vmlal.u8 q11, d8, d1
|
||||
vmlal.u8 q13, d11, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
vext.8 d12, d12, d13, #1
|
||||
|
||||
vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q10, d6, d1
|
||||
vmlal.u8 q12, d9, d1
|
||||
vmlal.u8 q14, d12, d1
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d15, q8, #7
|
||||
vqrshrn.u16 d16, q9, #7
|
||||
vqrshrn.u16 d17, q10, #7
|
||||
vqrshrn.u16 d18, q11, #7
|
||||
vqrshrn.u16 d19, q12, #7
|
||||
vqrshrn.u16 d20, q13, #7
|
||||
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
vqrshrn.u16 d21, q14, #7
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
|
||||
vst1.u8 {d14, d15, d16, d17}, [lr]! ;store result
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
vst1.u8 {d18, d19, d20, d21}, [lr]!
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
bne filt_blk2d_fp16x16_loop_neon
|
||||
|
||||
;First-pass filtering for rest 5 lines
|
||||
vld1.u8 {d14, d15, d16}, [r0], r1
|
||||
|
||||
vmull.u8 q9, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q10, d3, d0
|
||||
vmull.u8 q11, d5, d0
|
||||
vmull.u8 q12, d6, d0
|
||||
vmull.u8 q13, d8, d0
|
||||
vmull.u8 q14, d9, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
|
||||
vmlal.u8 q9, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q11, d5, d1
|
||||
vmlal.u8 q13, d8, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
|
||||
vmlal.u8 q10, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q12, d6, d1
|
||||
vmlal.u8 q14, d9, d1
|
||||
|
||||
vmull.u8 q1, d11, d0
|
||||
vmull.u8 q2, d12, d0
|
||||
vmull.u8 q3, d14, d0
|
||||
vmull.u8 q4, d15, d0
|
||||
|
||||
vext.8 d11, d11, d12, #1 ;construct src_ptr[1]
|
||||
vext.8 d14, d14, d15, #1
|
||||
|
||||
vmlal.u8 q1, d11, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q3, d14, d1
|
||||
|
||||
vext.8 d12, d12, d13, #1
|
||||
vext.8 d15, d15, d16, #1
|
||||
|
||||
vmlal.u8 q2, d12, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q4, d15, d1
|
||||
|
||||
vqrshrn.u16 d10, q9, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d11, q10, #7
|
||||
vqrshrn.u16 d12, q11, #7
|
||||
vqrshrn.u16 d13, q12, #7
|
||||
vqrshrn.u16 d14, q13, #7
|
||||
vqrshrn.u16 d15, q14, #7
|
||||
vqrshrn.u16 d16, q1, #7
|
||||
vqrshrn.u16 d17, q2, #7
|
||||
vqrshrn.u16 d18, q3, #7
|
||||
vqrshrn.u16 d19, q4, #7
|
||||
|
||||
vst1.u8 {d10, d11, d12, d13}, [lr]! ;store result
|
||||
vst1.u8 {d14, d15, d16, d17}, [lr]!
|
||||
vst1.u8 {d18, d19}, [lr]!
|
||||
|
||||
;Second pass: 16x16
|
||||
;secondpass_filter
|
||||
add r3, r12, r3, lsl #3
|
||||
sub lr, lr, #272
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
|
||||
vld1.u8 {d22, d23}, [lr]! ;load src data
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
mov r12, #4 ;loop counter
|
||||
|
||||
filt_blk2d_sp16x16_loop_neon
|
||||
vld1.u8 {d24, d25}, [lr]!
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {d26, d27}, [lr]!
|
||||
vmull.u8 q2, d23, d0
|
||||
vld1.u8 {d28, d29}, [lr]!
|
||||
vmull.u8 q3, d24, d0
|
||||
vld1.u8 {d30, d31}, [lr]!
|
||||
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d25, d1
|
||||
vmlal.u8 q3, d26, d1
|
||||
vmlal.u8 q4, d27, d1
|
||||
vmlal.u8 q5, d28, d1
|
||||
vmlal.u8 q6, d29, d1
|
||||
vmlal.u8 q7, d30, d1
|
||||
vmlal.u8 q8, d31, d1
|
||||
|
||||
subs r12, r12, #1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2, d3}, [r4], r5 ;store result
|
||||
vst1.u8 {d4, d5}, [r4], r5
|
||||
vst1.u8 {d6, d7}, [r4], r5
|
||||
vmov q11, q15
|
||||
vst1.u8 {d8, d9}, [r4], r5
|
||||
|
||||
bne filt_blk2d_sp16x16_loop_neon
|
||||
|
||||
add sp, sp, #272
|
||||
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;--------------------
|
||||
firstpass_bfilter16x16_only
|
||||
mov r2, #4 ;loop counter
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
;First Pass: output_height lines x output_width columns (16x16)
|
||||
filt_blk2d_fpo16x16_loop_neon
|
||||
vld1.u8 {d2, d3, d4}, [r0], r1 ;load src data
|
||||
vld1.u8 {d5, d6, d7}, [r0], r1
|
||||
vld1.u8 {d8, d9, d10}, [r0], r1
|
||||
vld1.u8 {d11, d12, d13}, [r0], r1
|
||||
|
||||
pld [r0]
|
||||
pld [r0, r1]
|
||||
pld [r0, r1, lsl #1]
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d3, d0
|
||||
vmull.u8 q9, d5, d0
|
||||
vmull.u8 q10, d6, d0
|
||||
vmull.u8 q11, d8, d0
|
||||
vmull.u8 q12, d9, d0
|
||||
vmull.u8 q13, d11, d0
|
||||
vmull.u8 q14, d12, d0
|
||||
|
||||
vext.8 d2, d2, d3, #1 ;construct src_ptr[1]
|
||||
vext.8 d5, d5, d6, #1
|
||||
vext.8 d8, d8, d9, #1
|
||||
vext.8 d11, d11, d12, #1
|
||||
|
||||
vmlal.u8 q7, d2, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q9, d5, d1
|
||||
vmlal.u8 q11, d8, d1
|
||||
vmlal.u8 q13, d11, d1
|
||||
|
||||
vext.8 d3, d3, d4, #1
|
||||
vext.8 d6, d6, d7, #1
|
||||
vext.8 d9, d9, d10, #1
|
||||
vext.8 d12, d12, d13, #1
|
||||
|
||||
vmlal.u8 q8, d3, d1 ;(src_ptr[0] * vp8_filter[1])
|
||||
vmlal.u8 q10, d6, d1
|
||||
vmlal.u8 q12, d9, d1
|
||||
vmlal.u8 q14, d12, d1
|
||||
|
||||
subs r2, r2, #1
|
||||
|
||||
vqrshrn.u16 d14, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d15, q8, #7
|
||||
vqrshrn.u16 d16, q9, #7
|
||||
vqrshrn.u16 d17, q10, #7
|
||||
vqrshrn.u16 d18, q11, #7
|
||||
vqrshrn.u16 d19, q12, #7
|
||||
vqrshrn.u16 d20, q13, #7
|
||||
vst1.u8 {d14, d15}, [r4], r5 ;store result
|
||||
vqrshrn.u16 d21, q14, #7
|
||||
|
||||
vst1.u8 {d16, d17}, [r4], r5
|
||||
vst1.u8 {d18, d19}, [r4], r5
|
||||
vst1.u8 {d20, d21}, [r4], r5
|
||||
|
||||
bne filt_blk2d_fpo16x16_loop_neon
|
||||
pop {r4-r5,pc}
|
||||
|
||||
;---------------------
|
||||
secondpass_bfilter16x16_only
|
||||
;Second pass: 16x16
|
||||
;secondpass_filter
|
||||
add r3, r12, r3, lsl #3
|
||||
mov r12, #4 ;loop counter
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
vld1.u8 {d22, d23}, [r0], r1 ;load src data
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
filt_blk2d_spo16x16_loop_neon
|
||||
vld1.u8 {d24, d25}, [r0], r1
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {d26, d27}, [r0], r1
|
||||
vmull.u8 q2, d23, d0
|
||||
vld1.u8 {d28, d29}, [r0], r1
|
||||
vmull.u8 q3, d24, d0
|
||||
vld1.u8 {d30, d31}, [r0], r1
|
||||
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d24, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d25, d1
|
||||
vmlal.u8 q3, d26, d1
|
||||
vmlal.u8 q4, d27, d1
|
||||
vmlal.u8 q5, d28, d1
|
||||
vmlal.u8 q6, d29, d1
|
||||
vmlal.u8 q7, d30, d1
|
||||
vmlal.u8 q8, d31, d1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2, d3}, [r4], r5 ;store result
|
||||
subs r12, r12, #1
|
||||
vst1.u8 {d4, d5}, [r4], r5
|
||||
vmov q11, q15
|
||||
vst1.u8 {d6, d7}, [r4], r5
|
||||
vst1.u8 {d8, d9}, [r4], r5
|
||||
|
||||
bne filt_blk2d_spo16x16_loop_neon
|
||||
pop {r4-r5,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
|
||||
bifilter16_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,130 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict4x4_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict4x4_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
adr r12, bifilter4_coeff
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (5x4)
|
||||
vld1.u8 {d2}, [r0], r1 ;load src data
|
||||
add r2, r12, r2, lsl #3 ;calculate Hfilter location (2coeffsx4bytes=8bytes)
|
||||
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vld1.u32 {d31}, [r2] ;first_pass filter
|
||||
|
||||
vld1.u8 {d4}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0-d1)
|
||||
vld1.u8 {d5}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {d6}, [r0], r1
|
||||
|
||||
vshr.u64 q4, q1, #8 ;construct src_ptr[1]
|
||||
vshr.u64 q5, q2, #8
|
||||
vshr.u64 d12, d6, #8
|
||||
|
||||
vzip.32 d2, d3 ;put 2-line data in 1 register (src_ptr[0])
|
||||
vzip.32 d4, d5
|
||||
vzip.32 d8, d9 ;put 2-line data in 1 register (src_ptr[1])
|
||||
vzip.32 d10, d11
|
||||
|
||||
vmull.u8 q7, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q8, d4, d0
|
||||
vmull.u8 q9, d6, d0
|
||||
|
||||
vmlal.u8 q7, d8, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q8, d10, d1
|
||||
vmlal.u8 q9, d12, d1
|
||||
|
||||
vqrshrn.u16 d28, q7, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d29, q8, #7
|
||||
vqrshrn.u16 d30, q9, #7
|
||||
|
||||
;Second pass: 4x4
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3 ;calculate Vfilter location
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0-d5)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d28, d0
|
||||
vmull.u8 q2, d29, d0
|
||||
|
||||
vext.8 d26, d28, d29, #4 ;construct src_ptr[pixel_step]
|
||||
vext.8 d27, d29, d30, #4
|
||||
|
||||
vmlal.u8 q1, d26, d1
|
||||
vmlal.u8 q2, d27, d1
|
||||
|
||||
add r0, r4, lr
|
||||
add r1, r0, lr
|
||||
add r2, r1, lr
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
|
||||
vst1.32 {d2[0]}, [r4] ;store result
|
||||
vst1.32 {d2[1]}, [r0]
|
||||
vst1.32 {d3[0]}, [r1]
|
||||
vst1.32 {d3[1]}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
|
||||
vld1.32 {d28[0]}, [r0], r1 ;load src data
|
||||
vld1.32 {d28[1]}, [r0], r1
|
||||
vld1.32 {d29[0]}, [r0], r1
|
||||
vld1.32 {d29[1]}, [r0], r1
|
||||
vld1.32 {d30[0]}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.32 {d28[0]}, [r4], lr ;store result
|
||||
vst1.32 {d28[1]}, [r4], lr
|
||||
vst1.32 {d29[0]}, [r4], lr
|
||||
vst1.32 {d29[1]}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
|
||||
bifilter4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,135 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict8x4_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict8x4_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
adr r12, bifilter8x4_coeff
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (5x8)
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vld1.u32 {d31}, [r2] ;load first_pass filter
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
vext.8 d11, d10, d11, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
vmlal.u8 q10, d11, d1
|
||||
|
||||
vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d23, q7, #7
|
||||
vqrshrn.u16 d24, q8, #7
|
||||
vqrshrn.u16 d25, q9, #7
|
||||
vqrshrn.u16 d26, q10, #7
|
||||
|
||||
;Second pass: 4x8
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3
|
||||
add r0, r4, lr
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
add r1, r0, lr
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q2, d23, d0
|
||||
vmull.u8 q3, d24, d0
|
||||
vmull.u8 q4, d25, d0
|
||||
|
||||
vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d24, d1
|
||||
vmlal.u8 q3, d25, d1
|
||||
vmlal.u8 q4, d26, d1
|
||||
|
||||
add r2, r1, lr
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
|
||||
vst1.u8 {d2}, [r4] ;store result
|
||||
vst1.u8 {d3}, [r0]
|
||||
vst1.u8 {d4}, [r1]
|
||||
vst1.u8 {d5}, [r2]
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
vld1.u8 {d22}, [r0], r1 ;load src data
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.u8 {d22}, [r4], lr ;store result
|
||||
vst1.u8 {d23}, [r4], lr
|
||||
vst1.u8 {d24}, [r4], lr
|
||||
vst1.u8 {d25}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
|
||||
bifilter8x4_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,183 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_bilinear_predict8x8_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *src_ptr,
|
||||
; r1 int src_pixels_per_line,
|
||||
; r2 int xoffset,
|
||||
; r3 int yoffset,
|
||||
; r4 unsigned char *dst_ptr,
|
||||
; stack(lr) int dst_pitch
|
||||
|
||||
|vp8_bilinear_predict8x8_neon| PROC
|
||||
push {r4, lr}
|
||||
|
||||
adr r12, bifilter8_coeff
|
||||
ldr r4, [sp, #8] ;load parameters from stack
|
||||
ldr lr, [sp, #12] ;load parameters from stack
|
||||
|
||||
cmp r2, #0 ;skip first_pass filter if xoffset=0
|
||||
beq skip_firstpass_filter
|
||||
|
||||
;First pass: output_height lines x output_width columns (9x8)
|
||||
add r2, r12, r2, lsl #3 ;calculate filter location
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vld1.u32 {d31}, [r2] ;load first_pass filter
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vdup.8 d0, d31[0] ;first_pass filter (d0 d1)
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vdup.8 d1, d31[4]
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
|
||||
vld1.u8 {q1}, [r0], r1 ;load src data
|
||||
vqrshrn.u16 d22, q6, #7 ;shift/round/saturate to u8
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vqrshrn.u16 d23, q7, #7
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vqrshrn.u16 d24, q8, #7
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vqrshrn.u16 d25, q9, #7
|
||||
|
||||
;first_pass filtering on the rest 5-line data
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
|
||||
vmull.u8 q6, d2, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q7, d4, d0
|
||||
vmull.u8 q8, d6, d0
|
||||
vmull.u8 q9, d8, d0
|
||||
vmull.u8 q10, d10, d0
|
||||
|
||||
vext.8 d3, d2, d3, #1 ;construct src_ptr[-1]
|
||||
vext.8 d5, d4, d5, #1
|
||||
vext.8 d7, d6, d7, #1
|
||||
vext.8 d9, d8, d9, #1
|
||||
vext.8 d11, d10, d11, #1
|
||||
|
||||
vmlal.u8 q6, d3, d1 ;(src_ptr[1] * vp8_filter[1])
|
||||
vmlal.u8 q7, d5, d1
|
||||
vmlal.u8 q8, d7, d1
|
||||
vmlal.u8 q9, d9, d1
|
||||
vmlal.u8 q10, d11, d1
|
||||
|
||||
vqrshrn.u16 d26, q6, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d27, q7, #7
|
||||
vqrshrn.u16 d28, q8, #7
|
||||
vqrshrn.u16 d29, q9, #7
|
||||
vqrshrn.u16 d30, q10, #7
|
||||
|
||||
;Second pass: 8x8
|
||||
secondpass_filter
|
||||
cmp r3, #0 ;skip second_pass filter if yoffset=0
|
||||
beq skip_secondpass_filter
|
||||
|
||||
add r3, r12, r3, lsl #3
|
||||
add r0, r4, lr
|
||||
|
||||
vld1.u32 {d31}, [r3] ;load second_pass filter
|
||||
add r1, r0, lr
|
||||
|
||||
vdup.8 d0, d31[0] ;second_pass filter parameters (d0 d1)
|
||||
vdup.8 d1, d31[4]
|
||||
|
||||
vmull.u8 q1, d22, d0 ;(src_ptr[0] * vp8_filter[0])
|
||||
vmull.u8 q2, d23, d0
|
||||
vmull.u8 q3, d24, d0
|
||||
vmull.u8 q4, d25, d0
|
||||
vmull.u8 q5, d26, d0
|
||||
vmull.u8 q6, d27, d0
|
||||
vmull.u8 q7, d28, d0
|
||||
vmull.u8 q8, d29, d0
|
||||
|
||||
vmlal.u8 q1, d23, d1 ;(src_ptr[pixel_step] * vp8_filter[1])
|
||||
vmlal.u8 q2, d24, d1
|
||||
vmlal.u8 q3, d25, d1
|
||||
vmlal.u8 q4, d26, d1
|
||||
vmlal.u8 q5, d27, d1
|
||||
vmlal.u8 q6, d28, d1
|
||||
vmlal.u8 q7, d29, d1
|
||||
vmlal.u8 q8, d30, d1
|
||||
|
||||
vqrshrn.u16 d2, q1, #7 ;shift/round/saturate to u8
|
||||
vqrshrn.u16 d3, q2, #7
|
||||
vqrshrn.u16 d4, q3, #7
|
||||
vqrshrn.u16 d5, q4, #7
|
||||
vqrshrn.u16 d6, q5, #7
|
||||
vqrshrn.u16 d7, q6, #7
|
||||
vqrshrn.u16 d8, q7, #7
|
||||
vqrshrn.u16 d9, q8, #7
|
||||
|
||||
vst1.u8 {d2}, [r4] ;store result
|
||||
vst1.u8 {d3}, [r0]
|
||||
vst1.u8 {d4}, [r1], lr
|
||||
vst1.u8 {d5}, [r1], lr
|
||||
vst1.u8 {d6}, [r1], lr
|
||||
vst1.u8 {d7}, [r1], lr
|
||||
vst1.u8 {d8}, [r1], lr
|
||||
vst1.u8 {d9}, [r1], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
;--------------------
|
||||
skip_firstpass_filter
|
||||
vld1.u8 {d22}, [r0], r1 ;load src data
|
||||
vld1.u8 {d23}, [r0], r1
|
||||
vld1.u8 {d24}, [r0], r1
|
||||
vld1.u8 {d25}, [r0], r1
|
||||
vld1.u8 {d26}, [r0], r1
|
||||
vld1.u8 {d27}, [r0], r1
|
||||
vld1.u8 {d28}, [r0], r1
|
||||
vld1.u8 {d29}, [r0], r1
|
||||
vld1.u8 {d30}, [r0], r1
|
||||
|
||||
b secondpass_filter
|
||||
|
||||
;---------------------
|
||||
skip_secondpass_filter
|
||||
vst1.u8 {d22}, [r4], lr ;store result
|
||||
vst1.u8 {d23}, [r4], lr
|
||||
vst1.u8 {d24}, [r4], lr
|
||||
vst1.u8 {d25}, [r4], lr
|
||||
vst1.u8 {d26}, [r4], lr
|
||||
vst1.u8 {d27}, [r4], lr
|
||||
vst1.u8 {d28}, [r4], lr
|
||||
vst1.u8 {d29}, [r4], lr
|
||||
|
||||
pop {r4, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;-----------------
|
||||
|
||||
bifilter8_coeff
|
||||
DCD 128, 0, 112, 16, 96, 32, 80, 48, 64, 64, 48, 80, 32, 96, 16, 112
|
||||
|
||||
END
|
|
@ -0,0 +1,584 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_build_intra_predictors_mby_neon_func|
|
||||
EXPORT |vp8_build_intra_predictors_mby_s_neon_func|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 unsigned char *y_buffer
|
||||
; r1 unsigned char *ypred_ptr
|
||||
; r2 int y_stride
|
||||
; r3 int mode
|
||||
; stack int Up
|
||||
; stack int Left
|
||||
|
||||
|vp8_build_intra_predictors_mby_neon_func| PROC
|
||||
push {r4-r8, lr}
|
||||
|
||||
cmp r3, #0
|
||||
beq case_dc_pred
|
||||
cmp r3, #1
|
||||
beq case_v_pred
|
||||
cmp r3, #2
|
||||
beq case_h_pred
|
||||
cmp r3, #3
|
||||
beq case_tm_pred
|
||||
|
||||
case_dc_pred
|
||||
ldr r4, [sp, #24] ; Up
|
||||
ldr r5, [sp, #28] ; Left
|
||||
|
||||
; Default the DC average to 128
|
||||
mov r12, #128
|
||||
vdup.u8 q0, r12
|
||||
|
||||
; Zero out running sum
|
||||
mov r12, #0
|
||||
|
||||
; compute shift and jump
|
||||
adds r7, r4, r5
|
||||
beq skip_dc_pred_up_left
|
||||
|
||||
; Load above row, if it exists
|
||||
cmp r4, #0
|
||||
beq skip_dc_pred_up
|
||||
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q1}, [r6]
|
||||
vpaddl.u8 q2, q1
|
||||
vpaddl.u16 q3, q2
|
||||
vpaddl.u32 q4, q3
|
||||
|
||||
vmov.32 r4, d8[0]
|
||||
vmov.32 r6, d9[0]
|
||||
|
||||
add r12, r4, r6
|
||||
|
||||
; Move back to interger registers
|
||||
|
||||
skip_dc_pred_up
|
||||
|
||||
cmp r5, #0
|
||||
beq skip_dc_pred_left
|
||||
|
||||
sub r0, r0, #1
|
||||
|
||||
; Load left row, if it exists
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0]
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
skip_dc_pred_left
|
||||
add r7, r7, #3 ; Shift
|
||||
sub r4, r7, #1
|
||||
mov r5, #1
|
||||
add r12, r12, r5, lsl r4
|
||||
mov r5, r12, lsr r7 ; expected_dc
|
||||
|
||||
vdup.u8 q0, r5
|
||||
|
||||
skip_dc_pred_up_left
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
|
||||
pop {r4-r8,pc}
|
||||
case_v_pred
|
||||
; Copy down above row
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q0}, [r6]
|
||||
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q0}, [r1]!
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_h_pred
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_tm_pred
|
||||
; Load yabove_row
|
||||
sub r3, r0, r2
|
||||
vld1.8 {q8}, [r3]
|
||||
|
||||
; Load ytop_left
|
||||
sub r3, r3, #1
|
||||
ldrb r7, [r3]
|
||||
|
||||
vdup.u16 q7, r7
|
||||
|
||||
; Compute yabove_row - ytop_left
|
||||
mov r3, #1
|
||||
vdup.u8 q0, r3
|
||||
|
||||
vmull.u8 q4, d16, d0
|
||||
vmull.u8 q5, d17, d0
|
||||
|
||||
vsub.s16 q4, q4, q7
|
||||
vsub.s16 q5, q5, q7
|
||||
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
mov r12, #4
|
||||
|
||||
case_tm_pred_loop
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u16 q0, r3
|
||||
vdup.u16 q1, r4
|
||||
vdup.u16 q2, r5
|
||||
vdup.u16 q3, r6
|
||||
|
||||
vqadd.s16 q8, q0, q4
|
||||
vqadd.s16 q9, q0, q5
|
||||
|
||||
vqadd.s16 q10, q1, q4
|
||||
vqadd.s16 q11, q1, q5
|
||||
|
||||
vqadd.s16 q12, q2, q4
|
||||
vqadd.s16 q13, q2, q5
|
||||
|
||||
vqadd.s16 q14, q3, q4
|
||||
vqadd.s16 q15, q3, q5
|
||||
|
||||
vqshrun.s16 d0, q8, #0
|
||||
vqshrun.s16 d1, q9, #0
|
||||
|
||||
vqshrun.s16 d2, q10, #0
|
||||
vqshrun.s16 d3, q11, #0
|
||||
|
||||
vqshrun.s16 d4, q12, #0
|
||||
vqshrun.s16 d5, q13, #0
|
||||
|
||||
vqshrun.s16 d6, q14, #0
|
||||
vqshrun.s16 d7, q15, #0
|
||||
|
||||
vst1.u8 {q0}, [r1]!
|
||||
vst1.u8 {q1}, [r1]!
|
||||
vst1.u8 {q2}, [r1]!
|
||||
vst1.u8 {q3}, [r1]!
|
||||
|
||||
subs r12, r12, #1
|
||||
bne case_tm_pred_loop
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; r0 unsigned char *y_buffer
|
||||
; r1 unsigned char *ypred_ptr
|
||||
; r2 int y_stride
|
||||
; r3 int mode
|
||||
; stack int Up
|
||||
; stack int Left
|
||||
|
||||
|vp8_build_intra_predictors_mby_s_neon_func| PROC
|
||||
push {r4-r8, lr}
|
||||
|
||||
mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
|
||||
|
||||
cmp r3, #0
|
||||
beq case_dc_pred_s
|
||||
cmp r3, #1
|
||||
beq case_v_pred_s
|
||||
cmp r3, #2
|
||||
beq case_h_pred_s
|
||||
cmp r3, #3
|
||||
beq case_tm_pred_s
|
||||
|
||||
case_dc_pred_s
|
||||
ldr r4, [sp, #24] ; Up
|
||||
ldr r5, [sp, #28] ; Left
|
||||
|
||||
; Default the DC average to 128
|
||||
mov r12, #128
|
||||
vdup.u8 q0, r12
|
||||
|
||||
; Zero out running sum
|
||||
mov r12, #0
|
||||
|
||||
; compute shift and jump
|
||||
adds r7, r4, r5
|
||||
beq skip_dc_pred_up_left_s
|
||||
|
||||
; Load above row, if it exists
|
||||
cmp r4, #0
|
||||
beq skip_dc_pred_up_s
|
||||
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q1}, [r6]
|
||||
vpaddl.u8 q2, q1
|
||||
vpaddl.u16 q3, q2
|
||||
vpaddl.u32 q4, q3
|
||||
|
||||
vmov.32 r4, d8[0]
|
||||
vmov.32 r6, d9[0]
|
||||
|
||||
add r12, r4, r6
|
||||
|
||||
; Move back to interger registers
|
||||
|
||||
skip_dc_pred_up_s
|
||||
|
||||
cmp r5, #0
|
||||
beq skip_dc_pred_left_s
|
||||
|
||||
sub r0, r0, #1
|
||||
|
||||
; Load left row, if it exists
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0]
|
||||
|
||||
add r12, r12, r3
|
||||
add r12, r12, r4
|
||||
add r12, r12, r5
|
||||
add r12, r12, r6
|
||||
|
||||
skip_dc_pred_left_s
|
||||
add r7, r7, #3 ; Shift
|
||||
sub r4, r7, #1
|
||||
mov r5, #1
|
||||
add r12, r12, r5, lsl r4
|
||||
mov r5, r12, lsr r7 ; expected_dc
|
||||
|
||||
vdup.u8 q0, r5
|
||||
|
||||
skip_dc_pred_up_left_s
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
|
||||
pop {r4-r8,pc}
|
||||
case_v_pred_s
|
||||
; Copy down above row
|
||||
sub r6, r0, r2
|
||||
vld1.8 {q0}, [r6]
|
||||
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_h_pred_s
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u8 q0, r3
|
||||
vdup.u8 q1, r4
|
||||
vdup.u8 q2, r5
|
||||
vdup.u8 q3, r6
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
case_tm_pred_s
|
||||
; Load yabove_row
|
||||
sub r3, r0, r2
|
||||
vld1.8 {q8}, [r3]
|
||||
|
||||
; Load ytop_left
|
||||
sub r3, r3, #1
|
||||
ldrb r7, [r3]
|
||||
|
||||
vdup.u16 q7, r7
|
||||
|
||||
; Compute yabove_row - ytop_left
|
||||
mov r3, #1
|
||||
vdup.u8 q0, r3
|
||||
|
||||
vmull.u8 q4, d16, d0
|
||||
vmull.u8 q5, d17, d0
|
||||
|
||||
vsub.s16 q4, q4, q7
|
||||
vsub.s16 q5, q5, q7
|
||||
|
||||
; Load 4x yleft_col
|
||||
sub r0, r0, #1
|
||||
mov r12, #4
|
||||
|
||||
case_tm_pred_loop_s
|
||||
ldrb r3, [r0], r2
|
||||
ldrb r4, [r0], r2
|
||||
ldrb r5, [r0], r2
|
||||
ldrb r6, [r0], r2
|
||||
vdup.u16 q0, r3
|
||||
vdup.u16 q1, r4
|
||||
vdup.u16 q2, r5
|
||||
vdup.u16 q3, r6
|
||||
|
||||
vqadd.s16 q8, q0, q4
|
||||
vqadd.s16 q9, q0, q5
|
||||
|
||||
vqadd.s16 q10, q1, q4
|
||||
vqadd.s16 q11, q1, q5
|
||||
|
||||
vqadd.s16 q12, q2, q4
|
||||
vqadd.s16 q13, q2, q5
|
||||
|
||||
vqadd.s16 q14, q3, q4
|
||||
vqadd.s16 q15, q3, q5
|
||||
|
||||
vqshrun.s16 d0, q8, #0
|
||||
vqshrun.s16 d1, q9, #0
|
||||
|
||||
vqshrun.s16 d2, q10, #0
|
||||
vqshrun.s16 d3, q11, #0
|
||||
|
||||
vqshrun.s16 d4, q12, #0
|
||||
vqshrun.s16 d5, q13, #0
|
||||
|
||||
vqshrun.s16 d6, q14, #0
|
||||
vqshrun.s16 d7, q15, #0
|
||||
|
||||
vst1.u8 {q0}, [r1], r2
|
||||
vst1.u8 {q1}, [r1], r2
|
||||
vst1.u8 {q2}, [r1], r2
|
||||
vst1.u8 {q3}, [r1], r2
|
||||
|
||||
subs r12, r12, #1
|
||||
bne case_tm_pred_loop_s
|
||||
|
||||
pop {r4-r8,pc}
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
END
|
|
@ -0,0 +1,59 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem16x16_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem16x16_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem16x16_neon| PROC
|
||||
|
||||
vld1.u8 {q0}, [r0], r1
|
||||
vld1.u8 {q1}, [r0], r1
|
||||
vld1.u8 {q2}, [r0], r1
|
||||
vst1.u8 {q0}, [r2], r3
|
||||
vld1.u8 {q3}, [r0], r1
|
||||
vst1.u8 {q1}, [r2], r3
|
||||
vld1.u8 {q4}, [r0], r1
|
||||
vst1.u8 {q2}, [r2], r3
|
||||
vld1.u8 {q5}, [r0], r1
|
||||
vst1.u8 {q3}, [r2], r3
|
||||
vld1.u8 {q6}, [r0], r1
|
||||
vst1.u8 {q4}, [r2], r3
|
||||
vld1.u8 {q7}, [r0], r1
|
||||
vst1.u8 {q5}, [r2], r3
|
||||
vld1.u8 {q8}, [r0], r1
|
||||
vst1.u8 {q6}, [r2], r3
|
||||
vld1.u8 {q9}, [r0], r1
|
||||
vst1.u8 {q7}, [r2], r3
|
||||
vld1.u8 {q10}, [r0], r1
|
||||
vst1.u8 {q8}, [r2], r3
|
||||
vld1.u8 {q11}, [r0], r1
|
||||
vst1.u8 {q9}, [r2], r3
|
||||
vld1.u8 {q12}, [r0], r1
|
||||
vst1.u8 {q10}, [r2], r3
|
||||
vld1.u8 {q13}, [r0], r1
|
||||
vst1.u8 {q11}, [r2], r3
|
||||
vld1.u8 {q14}, [r0], r1
|
||||
vst1.u8 {q12}, [r2], r3
|
||||
vld1.u8 {q15}, [r0], r1
|
||||
vst1.u8 {q13}, [r2], r3
|
||||
vst1.u8 {q14}, [r2], r3
|
||||
vst1.u8 {q15}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem16x16_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,34 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x4_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x4_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x4_neon| PROC
|
||||
vld1.u8 {d0}, [r0], r1
|
||||
vld1.u8 {d1}, [r0], r1
|
||||
vst1.u8 {d0}, [r2], r3
|
||||
vld1.u8 {d2}, [r0], r1
|
||||
vst1.u8 {d1}, [r2], r3
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vst1.u8 {d2}, [r2], r3
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x4_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,43 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_copy_mem8x8_neon|
|
||||
; ARM
|
||||
; REQUIRE8
|
||||
; PRESERVE8
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void copy_mem8x8_neon( unsigned char *src, int src_stride, unsigned char *dst, int dst_stride)
|
||||
;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
|
||||
|vp8_copy_mem8x8_neon| PROC
|
||||
|
||||
vld1.u8 {d0}, [r0], r1
|
||||
vld1.u8 {d1}, [r0], r1
|
||||
vst1.u8 {d0}, [r2], r3
|
||||
vld1.u8 {d2}, [r0], r1
|
||||
vst1.u8 {d1}, [r2], r3
|
||||
vld1.u8 {d3}, [r0], r1
|
||||
vst1.u8 {d2}, [r2], r3
|
||||
vld1.u8 {d4}, [r0], r1
|
||||
vst1.u8 {d3}, [r2], r3
|
||||
vld1.u8 {d5}, [r0], r1
|
||||
vst1.u8 {d4}, [r2], r3
|
||||
vld1.u8 {d6}, [r0], r1
|
||||
vst1.u8 {d5}, [r2], r3
|
||||
vld1.u8 {d7}, [r0], r1
|
||||
vst1.u8 {d6}, [r2], r3
|
||||
vst1.u8 {d7}, [r2], r3
|
||||
|
||||
mov pc, lr
|
||||
|
||||
ENDP ; |vp8_copy_mem8x8_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,54 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dc_only_idct_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
|
||||
; int pred_stride, unsigned char *dst_ptr,
|
||||
; int dst_stride)
|
||||
|
||||
; r0 input_dc
|
||||
; r1 pred_ptr
|
||||
; r2 pred_stride
|
||||
; r3 dst_ptr
|
||||
; sp dst_stride
|
||||
|
||||
|vp8_dc_only_idct_add_neon| PROC
|
||||
add r0, r0, #4
|
||||
asr r0, r0, #3
|
||||
ldr r12, [sp]
|
||||
vdup.16 q0, r0
|
||||
|
||||
vld1.32 {d2[0]}, [r1], r2
|
||||
vld1.32 {d2[1]}, [r1], r2
|
||||
vld1.32 {d4[0]}, [r1], r2
|
||||
vld1.32 {d4[1]}, [r1]
|
||||
|
||||
vaddw.u8 q1, q0, d2
|
||||
vaddw.u8 q2, q0, d4
|
||||
|
||||
vqmovun.s16 d2, q1
|
||||
vqmovun.s16 d4, q2
|
||||
|
||||
vst1.32 {d2[0]}, [r3], r12
|
||||
vst1.32 {d2[1]}, [r3], r12
|
||||
vst1.32 {d4[0]}, [r3], r12
|
||||
vst1.32 {d4[1]}, [r3]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -0,0 +1,131 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dequant_idct_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;void vp8_dequant_idct_add_neon(short *input, short *dq,
|
||||
; unsigned char *dest, int stride)
|
||||
; r0 short *input,
|
||||
; r1 short *dq,
|
||||
; r2 unsigned char *dest
|
||||
; r3 int stride
|
||||
|
||||
|vp8_dequant_idct_add_neon| PROC
|
||||
vld1.16 {q3, q4}, [r0]
|
||||
vld1.16 {q5, q6}, [r1]
|
||||
|
||||
add r1, r2, r3 ; r1 = dest + stride
|
||||
lsl r3, #1 ; 2x stride
|
||||
|
||||
vld1.32 {d14[0]}, [r2], r3
|
||||
vld1.32 {d14[1]}, [r1], r3
|
||||
vld1.32 {d15[0]}, [r2]
|
||||
vld1.32 {d15[1]}, [r1]
|
||||
|
||||
adr r12, cospi8sqrt2minus1 ; pointer to the first constant
|
||||
|
||||
vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon
|
||||
vmul.i16 q2, q4, q6
|
||||
|
||||
;|short_idct4x4llm_neon| PROC
|
||||
vld1.16 {d0}, [r12]
|
||||
vswp d3, d4 ;q2(vp[4] vp[12])
|
||||
|
||||
vqdmulh.s16 q3, q2, d0[2]
|
||||
vqdmulh.s16 q4, q2, d0[0]
|
||||
|
||||
vqadd.s16 d12, d2, d3 ;a1
|
||||
vqsub.s16 d13, d2, d3 ;b1
|
||||
|
||||
vshr.s16 q3, q3, #1
|
||||
vshr.s16 q4, q4, #1
|
||||
|
||||
vqadd.s16 q3, q3, q2
|
||||
vqadd.s16 q4, q4, q2
|
||||
|
||||
vqsub.s16 d10, d6, d9 ;c1
|
||||
vqadd.s16 d11, d7, d8 ;d1
|
||||
|
||||
vqadd.s16 d2, d12, d11
|
||||
vqadd.s16 d3, d13, d10
|
||||
vqsub.s16 d4, d13, d10
|
||||
vqsub.s16 d5, d12, d11
|
||||
|
||||
vtrn.32 d2, d4
|
||||
vtrn.32 d3, d5
|
||||
vtrn.16 d2, d3
|
||||
vtrn.16 d4, d5
|
||||
|
||||
; memset(input, 0, 32) -- 32bytes
|
||||
vmov.i16 q14, #0
|
||||
|
||||
vswp d3, d4
|
||||
vqdmulh.s16 q3, q2, d0[2]
|
||||
vqdmulh.s16 q4, q2, d0[0]
|
||||
|
||||
vqadd.s16 d12, d2, d3 ;a1
|
||||
vqsub.s16 d13, d2, d3 ;b1
|
||||
|
||||
vmov q15, q14
|
||||
|
||||
vshr.s16 q3, q3, #1
|
||||
vshr.s16 q4, q4, #1
|
||||
|
||||
vqadd.s16 q3, q3, q2
|
||||
vqadd.s16 q4, q4, q2
|
||||
|
||||
vqsub.s16 d10, d6, d9 ;c1
|
||||
vqadd.s16 d11, d7, d8 ;d1
|
||||
|
||||
vqadd.s16 d2, d12, d11
|
||||
vqadd.s16 d3, d13, d10
|
||||
vqsub.s16 d4, d13, d10
|
||||
vqsub.s16 d5, d12, d11
|
||||
|
||||
vst1.16 {q14, q15}, [r0]
|
||||
|
||||
vrshr.s16 d2, d2, #3
|
||||
vrshr.s16 d3, d3, #3
|
||||
vrshr.s16 d4, d4, #3
|
||||
vrshr.s16 d5, d5, #3
|
||||
|
||||
vtrn.32 d2, d4
|
||||
vtrn.32 d3, d5
|
||||
vtrn.16 d2, d3
|
||||
vtrn.16 d4, d5
|
||||
|
||||
vaddw.u8 q1, q1, d14
|
||||
vaddw.u8 q2, q2, d15
|
||||
|
||||
sub r2, r2, r3
|
||||
sub r1, r1, r3
|
||||
|
||||
vqmovun.s16 d0, q1
|
||||
vqmovun.s16 d1, q2
|
||||
|
||||
vst1.32 {d0[0]}, [r2], r3
|
||||
vst1.32 {d0[1]}, [r1], r3
|
||||
vst1.32 {d1[0]}, [r2]
|
||||
vst1.32 {d1[1]}, [r1]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP ; |vp8_dequant_idct_add_neon|
|
||||
|
||||
; Constant Pool
|
||||
cospi8sqrt2minus1 DCD 0x4e7b4e7b
|
||||
sinpi8sqrt2 DCD 0x8a8c8a8c
|
||||
|
||||
END
|
|
@ -0,0 +1,34 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_dequantize_b_loop_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
; r0 short *Q,
|
||||
; r1 short *DQC
|
||||
; r2 short *DQ
|
||||
|vp8_dequantize_b_loop_neon| PROC
|
||||
vld1.16 {q0, q1}, [r0]
|
||||
vld1.16 {q2, q3}, [r1]
|
||||
|
||||
vmul.i16 q4, q0, q2
|
||||
vmul.i16 q5, q1, q3
|
||||
|
||||
vst1.16 {q4, q5}, [r2]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_rtcd.h"
|
||||
|
||||
/* place these declarations here because we don't want to maintain them
|
||||
* outside of this scope
|
||||
*/
|
||||
void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
unsigned char *dst, int stride);
|
||||
void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
unsigned char *dst, int stride);
|
||||
|
||||
|
||||
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq,
|
||||
unsigned char *dst,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dst, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dst, stride);
|
||||
}
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q+32, dq, dst+8, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q+32, dq[0], dst+8, stride);
|
||||
}
|
||||
q += 64;
|
||||
dst += 4*stride;
|
||||
eobs += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq,
|
||||
unsigned char *dstu,
|
||||
unsigned char *dstv,
|
||||
int stride, char *eobs)
|
||||
{
|
||||
if (((short *)(eobs))[0])
|
||||
{
|
||||
if (((short *)eobs)[0] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstu += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[1])
|
||||
{
|
||||
if (((short *)eobs)[1] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstu, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstu, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
|
||||
if (((short *)(eobs))[2])
|
||||
{
|
||||
if (((short *)eobs)[2] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
|
||||
q += 32;
|
||||
dstv += 4*stride;
|
||||
|
||||
if (((short *)(eobs))[3])
|
||||
{
|
||||
if (((short *)eobs)[3] & 0xfefe)
|
||||
idct_dequant_full_2x_neon (q, dq, dstv, stride);
|
||||
else
|
||||
idct_dequant_0_2x_neon (q, dq[0], dstv, stride);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license and patent
|
||||
; grant that can be found in the LICENSE file in the root of the source
|
||||
; tree. All contributing project authors may be found in the AUTHORS
|
||||
; file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |idct_dequant_0_2x_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;void idct_dequant_0_2x_neon(short *q, short dq,
|
||||
; unsigned char *dst, int stride);
|
||||
; r0 *q
|
||||
; r1 dq
|
||||
; r2 *dst
|
||||
; r3 stride
|
||||
|idct_dequant_0_2x_neon| PROC
|
||||
push {r4, r5}
|
||||
|
||||
add r12, r2, #4
|
||||
vld1.32 {d2[0]}, [r2], r3
|
||||
vld1.32 {d8[0]}, [r12], r3
|
||||
vld1.32 {d2[1]}, [r2], r3
|
||||
vld1.32 {d8[1]}, [r12], r3
|
||||
vld1.32 {d4[0]}, [r2], r3
|
||||
vld1.32 {d10[0]}, [r12], r3
|
||||
vld1.32 {d4[1]}, [r2], r3
|
||||
vld1.32 {d10[1]}, [r12], r3
|
||||
|
||||
ldrh r12, [r0] ; lo q
|
||||
ldrh r4, [r0, #32] ; hi q
|
||||
mov r5, #0
|
||||
strh r5, [r0]
|
||||
strh r5, [r0, #32]
|
||||
|
||||
sxth r12, r12 ; lo
|
||||
mul r0, r12, r1
|
||||
add r0, r0, #4
|
||||
asr r0, r0, #3
|
||||
vdup.16 q0, r0
|
||||
sxth r4, r4 ; hi
|
||||
mul r0, r4, r1
|
||||
add r0, r0, #4
|
||||
asr r0, r0, #3
|
||||
vdup.16 q3, r0
|
||||
|
||||
vaddw.u8 q1, q0, d2 ; lo
|
||||
vaddw.u8 q2, q0, d4
|
||||
vaddw.u8 q4, q3, d8 ; hi
|
||||
vaddw.u8 q5, q3, d10
|
||||
|
||||
sub r2, r2, r3, lsl #2 ; dst - 4*stride
|
||||
add r0, r2, #4
|
||||
|
||||
vqmovun.s16 d2, q1 ; lo
|
||||
vqmovun.s16 d4, q2
|
||||
vqmovun.s16 d8, q4 ; hi
|
||||
vqmovun.s16 d10, q5
|
||||
|
||||
vst1.32 {d2[0]}, [r2], r3 ; lo
|
||||
vst1.32 {d8[0]}, [r0], r3 ; hi
|
||||
vst1.32 {d2[1]}, [r2], r3
|
||||
vst1.32 {d8[1]}, [r0], r3
|
||||
vst1.32 {d4[0]}, [r2], r3
|
||||
vst1.32 {d10[0]}, [r0], r3
|
||||
vst1.32 {d4[1]}, [r2]
|
||||
vst1.32 {d10[1]}, [r0]
|
||||
|
||||
pop {r4, r5}
|
||||
bx lr
|
||||
|
||||
ENDP ; |idct_dequant_0_2x_neon|
|
||||
END
|
|
@ -0,0 +1,196 @@
|
|||
;
|
||||
; Copyright (c) 2010 The Webm project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |idct_dequant_full_2x_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
;void idct_dequant_full_2x_neon(short *q, short *dq,
|
||||
; unsigned char *dst, int stride);
|
||||
; r0 *q,
|
||||
; r1 *dq,
|
||||
; r2 *dst
|
||||
; r3 stride
|
||||
|idct_dequant_full_2x_neon| PROC
|
||||
vld1.16 {q0, q1}, [r1] ; dq (same l/r)
|
||||
vld1.16 {q2, q3}, [r0] ; l q
|
||||
add r0, r0, #32
|
||||
vld1.16 {q4, q5}, [r0] ; r q
|
||||
add r12, r2, #4
|
||||
|
||||
; interleave the predictors
|
||||
vld1.32 {d28[0]}, [r2], r3 ; l pre
|
||||
vld1.32 {d28[1]}, [r12], r3 ; r pre
|
||||
vld1.32 {d29[0]}, [r2], r3
|
||||
vld1.32 {d29[1]}, [r12], r3
|
||||
vld1.32 {d30[0]}, [r2], r3
|
||||
vld1.32 {d30[1]}, [r12], r3
|
||||
vld1.32 {d31[0]}, [r2], r3
|
||||
vld1.32 {d31[1]}, [r12]
|
||||
|
||||
adr r1, cospi8sqrt2minus1 ; pointer to the first constant
|
||||
|
||||
; dequant: q[i] = q[i] * dq[i]
|
||||
vmul.i16 q2, q2, q0
|
||||
vmul.i16 q3, q3, q1
|
||||
vmul.i16 q4, q4, q0
|
||||
vmul.i16 q5, q5, q1
|
||||
|
||||
vld1.16 {d0}, [r1]
|
||||
|
||||
; q2: l0r0 q3: l8r8
|
||||
; q4: l4r4 q5: l12r12
|
||||
vswp d5, d8
|
||||
vswp d7, d10
|
||||
|
||||
; _CONSTANTS_ * 4,12 >> 16
|
||||
; q6: 4 * sinpi : c1/temp1
|
||||
; q7: 12 * sinpi : d1/temp2
|
||||
; q8: 4 * cospi
|
||||
; q9: 12 * cospi
|
||||
vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2
|
||||
vqdmulh.s16 q7, q5, d0[2]
|
||||
vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1
|
||||
vqdmulh.s16 q9, q5, d0[0]
|
||||
|
||||
vqadd.s16 q10, q2, q3 ; a1 = 0 + 8
|
||||
vqsub.s16 q11, q2, q3 ; b1 = 0 - 8
|
||||
|
||||
; vqdmulh only accepts signed values. this was a problem because
|
||||
; our constant had the high bit set, and was treated as a negative value.
|
||||
; vqdmulh also doubles the value before it shifts by 16. we need to
|
||||
; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
|
||||
; so we can shift the constant without losing precision. this avoids
|
||||
; shift again afterward, but also avoids the sign issue. win win!
|
||||
; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
|
||||
; pre-shift it
|
||||
vshr.s16 q8, q8, #1
|
||||
vshr.s16 q9, q9, #1
|
||||
|
||||
; q4: 4 + 4 * cospi : d1/temp1
|
||||
; q5: 12 + 12 * cospi : c1/temp2
|
||||
vqadd.s16 q4, q4, q8
|
||||
vqadd.s16 q5, q5, q9
|
||||
|
||||
; c1 = temp1 - temp2
|
||||
; d1 = temp1 + temp2
|
||||
vqsub.s16 q2, q6, q5
|
||||
vqadd.s16 q3, q4, q7
|
||||
|
||||
; [0]: a1+d1
|
||||
; [1]: b1+c1
|
||||
; [2]: b1-c1
|
||||
; [3]: a1-d1
|
||||
vqadd.s16 q4, q10, q3
|
||||
vqadd.s16 q5, q11, q2
|
||||
vqsub.s16 q6, q11, q2
|
||||
vqsub.s16 q7, q10, q3
|
||||
|
||||
; rotate
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vtrn.16 q4, q5
|
||||
vtrn.16 q6, q7
|
||||
; idct loop 2
|
||||
; q4: l 0, 4, 8,12 r 0, 4, 8,12
|
||||
; q5: l 1, 5, 9,13 r 1, 5, 9,13
|
||||
; q6: l 2, 6,10,14 r 2, 6,10,14
|
||||
; q7: l 3, 7,11,15 r 3, 7,11,15
|
||||
|
||||
; q8: 1 * sinpi : c1/temp1
|
||||
; q9: 3 * sinpi : d1/temp2
|
||||
; q10: 1 * cospi
|
||||
; q11: 3 * cospi
|
||||
vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2
|
||||
vqdmulh.s16 q9, q7, d0[2]
|
||||
vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1
|
||||
vqdmulh.s16 q11, q7, d0[0]
|
||||
|
||||
vqadd.s16 q2, q4, q6 ; a1 = 0 + 2
|
||||
vqsub.s16 q3, q4, q6 ; b1 = 0 - 2
|
||||
|
||||
; see note on shifting above
|
||||
vshr.s16 q10, q10, #1
|
||||
vshr.s16 q11, q11, #1
|
||||
|
||||
; q10: 1 + 1 * cospi : d1/temp1
|
||||
; q11: 3 + 3 * cospi : c1/temp2
|
||||
vqadd.s16 q10, q5, q10
|
||||
vqadd.s16 q11, q7, q11
|
||||
|
||||
; q8: c1 = temp1 - temp2
|
||||
; q9: d1 = temp1 + temp2
|
||||
vqsub.s16 q8, q8, q11
|
||||
vqadd.s16 q9, q10, q9
|
||||
|
||||
; a1+d1
|
||||
; b1+c1
|
||||
; b1-c1
|
||||
; a1-d1
|
||||
vqadd.s16 q4, q2, q9
|
||||
vqadd.s16 q5, q3, q8
|
||||
vqsub.s16 q6, q3, q8
|
||||
vqsub.s16 q7, q2, q9
|
||||
|
||||
; +4 >> 3 (rounding)
|
||||
vrshr.s16 q4, q4, #3 ; lo
|
||||
vrshr.s16 q5, q5, #3
|
||||
vrshr.s16 q6, q6, #3 ; hi
|
||||
vrshr.s16 q7, q7, #3
|
||||
|
||||
vtrn.32 q4, q6
|
||||
vtrn.32 q5, q7
|
||||
vtrn.16 q4, q5
|
||||
vtrn.16 q6, q7
|
||||
|
||||
; adding pre
|
||||
; input is still packed. pre was read interleaved
|
||||
vaddw.u8 q4, q4, d28
|
||||
vaddw.u8 q5, q5, d29
|
||||
vaddw.u8 q6, q6, d30
|
||||
vaddw.u8 q7, q7, d31
|
||||
|
||||
vmov.i16 q14, #0
|
||||
vmov q15, q14
|
||||
vst1.16 {q14, q15}, [r0] ; write over high input
|
||||
sub r0, r0, #32
|
||||
vst1.16 {q14, q15}, [r0] ; write over low input
|
||||
|
||||
sub r2, r2, r3, lsl #2 ; dst - 4*stride
|
||||
add r1, r2, #4 ; hi
|
||||
|
||||
;saturate and narrow
|
||||
vqmovun.s16 d0, q4 ; lo
|
||||
vqmovun.s16 d1, q5
|
||||
vqmovun.s16 d2, q6 ; hi
|
||||
vqmovun.s16 d3, q7
|
||||
|
||||
vst1.32 {d0[0]}, [r2], r3 ; lo
|
||||
vst1.32 {d0[1]}, [r1], r3 ; hi
|
||||
vst1.32 {d1[0]}, [r2], r3
|
||||
vst1.32 {d1[1]}, [r1], r3
|
||||
vst1.32 {d2[0]}, [r2], r3
|
||||
vst1.32 {d2[1]}, [r1], r3
|
||||
vst1.32 {d3[0]}, [r2]
|
||||
vst1.32 {d3[1]}, [r1]
|
||||
|
||||
bx lr
|
||||
|
||||
ENDP ; |idct_dequant_full_2x_neon|
|
||||
|
||||
; Constant Pool
|
||||
cospi8sqrt2minus1 DCD 0x4e7b
|
||||
; because the lowest bit in 0x8a8c is 0, we can pre-shift this
|
||||
sinpi8sqrt2 DCD 0x4546
|
||||
|
||||
END
|
|
@ -0,0 +1,87 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
EXPORT |vp8_short_inv_walsh4x4_neon|
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY ; name this block of code
|
||||
|
||||
;short vp8_short_inv_walsh4x4_neon(short *input, short *mb_dqcoeff)
|
||||
|vp8_short_inv_walsh4x4_neon| PROC
|
||||
|
||||
; read in all four lines of values: d0->d3
|
||||
vld1.i16 {q0-q1}, [r0@128]
|
||||
|
||||
; first for loop
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [12]
|
||||
vadd.s16 d6, d1, d2 ;b = [4] + [8]
|
||||
vsub.s16 d5, d0, d3 ;d = [0] - [12]
|
||||
vsub.s16 d7, d1, d2 ;c = [4] - [8]
|
||||
|
||||
vadd.s16 q0, q2, q3 ; a+b d+c
|
||||
vsub.s16 q1, q2, q3 ; a-b d-c
|
||||
|
||||
vtrn.32 d0, d2 ;d0: 0 1 8 9
|
||||
;d2: 2 3 10 11
|
||||
vtrn.32 d1, d3 ;d1: 4 5 12 13
|
||||
;d3: 6 7 14 15
|
||||
|
||||
vtrn.16 d0, d1 ;d0: 0 4 8 12
|
||||
;d1: 1 5 9 13
|
||||
vtrn.16 d2, d3 ;d2: 2 6 10 14
|
||||
;d3: 3 7 11 15
|
||||
|
||||
; second for loop
|
||||
|
||||
vadd.s16 d4, d0, d3 ;a = [0] + [3]
|
||||
vadd.s16 d6, d1, d2 ;b = [1] + [2]
|
||||
vsub.s16 d5, d0, d3 ;d = [0] - [3]
|
||||
vsub.s16 d7, d1, d2 ;c = [1] - [2]
|
||||
|
||||
vmov.i16 q8, #3
|
||||
|
||||
vadd.s16 q0, q2, q3 ; a+b d+c
|
||||
vsub.s16 q1, q2, q3 ; a-b d-c
|
||||
|
||||
vadd.i16 q0, q0, q8 ;e/f += 3
|
||||
vadd.i16 q1, q1, q8 ;g/h += 3
|
||||
|
||||
vshr.s16 q0, q0, #3 ;e/f >> 3
|
||||
vshr.s16 q1, q1, #3 ;g/h >> 3
|
||||
|
||||
mov r2, #64
|
||||
add r3, r1, #32
|
||||
|
||||
vst1.i16 d0[0], [r1],r2
|
||||
vst1.i16 d1[0], [r3],r2
|
||||
vst1.i16 d2[0], [r1],r2
|
||||
vst1.i16 d3[0], [r3],r2
|
||||
|
||||
vst1.i16 d0[1], [r1],r2
|
||||
vst1.i16 d1[1], [r3],r2
|
||||
vst1.i16 d2[1], [r1],r2
|
||||
vst1.i16 d3[1], [r3],r2
|
||||
|
||||
vst1.i16 d0[2], [r1],r2
|
||||
vst1.i16 d1[2], [r3],r2
|
||||
vst1.i16 d2[2], [r1],r2
|
||||
vst1.i16 d3[2], [r3],r2
|
||||
|
||||
vst1.i16 d0[3], [r1],r2
|
||||
vst1.i16 d1[3], [r3],r2
|
||||
vst1.i16 d2[3], [r1]
|
||||
vst1.i16 d3[3], [r3]
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_short_inv_walsh4x4_neon|
|
||||
|
||||
END
|
|
@ -0,0 +1,397 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_y_neon|
|
||||
EXPORT |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
ARM
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|vp8_loop_filter_horizontal_edge_y_neon| PROC
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1
|
||||
add r1, r1, r1
|
||||
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vld1.u8 {q3}, [r2@128], r1 ; p3
|
||||
vld1.u8 {q4}, [r12@128], r1 ; p2
|
||||
vld1.u8 {q5}, [r2@128], r1 ; p1
|
||||
vld1.u8 {q6}, [r12@128], r1 ; p0
|
||||
vld1.u8 {q7}, [r2@128], r1 ; q0
|
||||
vld1.u8 {q8}, [r12@128], r1 ; q1
|
||||
vld1.u8 {q9}, [r2@128] ; q2
|
||||
vld1.u8 {q10}, [r12@128] ; q3
|
||||
|
||||
sub r2, r2, r1, lsl #1
|
||||
sub r12, r12, r1, lsl #1
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vst1.u8 {q5}, [r2@128], r1 ; store op1
|
||||
vst1.u8 {q6}, [r12@128], r1 ; store op0
|
||||
vst1.u8 {q7}, [r2@128], r1 ; store oq0
|
||||
vst1.u8 {q8}, [r12@128], r1 ; store oq1
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_horizontal_edge_uv_neon| PROC
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines
|
||||
sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines
|
||||
|
||||
vld1.u8 {d6}, [r3@64], r1 ; p3
|
||||
vld1.u8 {d7}, [r12@64], r1 ; p3
|
||||
vld1.u8 {d8}, [r3@64], r1 ; p2
|
||||
vld1.u8 {d9}, [r12@64], r1 ; p2
|
||||
vld1.u8 {d10}, [r3@64], r1 ; p1
|
||||
vld1.u8 {d11}, [r12@64], r1 ; p1
|
||||
vld1.u8 {d12}, [r3@64], r1 ; p0
|
||||
vld1.u8 {d13}, [r12@64], r1 ; p0
|
||||
vld1.u8 {d14}, [r3@64], r1 ; q0
|
||||
vld1.u8 {d15}, [r12@64], r1 ; q0
|
||||
vld1.u8 {d16}, [r3@64], r1 ; q1
|
||||
vld1.u8 {d17}, [r12@64], r1 ; q1
|
||||
vld1.u8 {d18}, [r3@64], r1 ; q2
|
||||
vld1.u8 {d19}, [r12@64], r1 ; q2
|
||||
vld1.u8 {d20}, [r3@64] ; q3
|
||||
vld1.u8 {d21}, [r12@64] ; q3
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
sub r0, r0, r1, lsl #1
|
||||
sub r2, r2, r1, lsl #1
|
||||
|
||||
vst1.u8 {d10}, [r0@64], r1 ; store u op1
|
||||
vst1.u8 {d11}, [r2@64], r1 ; store v op1
|
||||
vst1.u8 {d12}, [r0@64], r1 ; store u op0
|
||||
vst1.u8 {d13}, [r2@64], r1 ; store v op0
|
||||
vst1.u8 {d14}, [r0@64], r1 ; store u oq0
|
||||
vst1.u8 {d15}, [r2@64], r1 ; store v oq0
|
||||
vst1.u8 {d16}, [r0@64] ; store u oq1
|
||||
vst1.u8 {d17}, [r2@64] ; store v oq1
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; int count)
|
||||
; r0 unsigned char *src
|
||||
; r1 int pitch
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
|
||||
|vp8_loop_filter_vertical_edge_y_neon| PROC
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r2, r0, #4 ; src ptr down by 4 columns
|
||||
add r1, r1, r1
|
||||
ldr r3, [sp, #4] ; load thresh
|
||||
add r12, r2, r1, asr #1
|
||||
|
||||
vld1.u8 {d6}, [r2], r1
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d10}, [r2], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d14}, [r2], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d18}, [r2], r1
|
||||
vld1.u8 {d20}, [r12], r1
|
||||
|
||||
vld1.u8 {d7}, [r2], r1 ; load second 8-line src data
|
||||
vld1.u8 {d9}, [r12], r1
|
||||
vld1.u8 {d11}, [r2], r1
|
||||
vld1.u8 {d13}, [r12], r1
|
||||
vld1.u8 {d15}, [r2], r1
|
||||
vld1.u8 {d17}, [r12], r1
|
||||
vld1.u8 {d19}, [r2]
|
||||
vld1.u8 {d21}, [r12]
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r3 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
|
||||
sub r0, r0, #2 ; dst ptr
|
||||
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
add r12, r0, r1, asr #1
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r12], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r12], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r12], r1
|
||||
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r12], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r12], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r12]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_y_neon|
|
||||
|
||||
; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
|
||||
; const signed char *flimit,
|
||||
; const signed char *limit,
|
||||
; const signed char *thresh,
|
||||
; unsigned char *v)
|
||||
; r0 unsigned char *u,
|
||||
; r1 int pitch,
|
||||
; r2 unsigned char blimit
|
||||
; r3 unsigned char limit
|
||||
; sp unsigned char thresh,
|
||||
; sp+4 unsigned char *v
|
||||
|vp8_loop_filter_vertical_edge_uv_neon| PROC
|
||||
push {lr}
|
||||
vdup.u8 q0, r2 ; duplicate blimit
|
||||
sub r12, r0, #4 ; move u pointer down by 4 columns
|
||||
ldr r2, [sp, #8] ; load v ptr
|
||||
vdup.u8 q1, r3 ; duplicate limit
|
||||
sub r3, r2, #4 ; move v pointer down by 4 columns
|
||||
|
||||
vld1.u8 {d6}, [r12], r1 ;load u data
|
||||
vld1.u8 {d7}, [r3], r1 ;load v data
|
||||
vld1.u8 {d8}, [r12], r1
|
||||
vld1.u8 {d9}, [r3], r1
|
||||
vld1.u8 {d10}, [r12], r1
|
||||
vld1.u8 {d11}, [r3], r1
|
||||
vld1.u8 {d12}, [r12], r1
|
||||
vld1.u8 {d13}, [r3], r1
|
||||
vld1.u8 {d14}, [r12], r1
|
||||
vld1.u8 {d15}, [r3], r1
|
||||
vld1.u8 {d16}, [r12], r1
|
||||
vld1.u8 {d17}, [r3], r1
|
||||
vld1.u8 {d18}, [r12], r1
|
||||
vld1.u8 {d19}, [r3], r1
|
||||
vld1.u8 {d20}, [r12]
|
||||
vld1.u8 {d21}, [r3]
|
||||
|
||||
ldr r12, [sp, #4] ; load thresh
|
||||
|
||||
;transpose to 8x16 matrix
|
||||
vtrn.32 q3, q7
|
||||
vtrn.32 q4, q8
|
||||
vtrn.32 q5, q9
|
||||
vtrn.32 q6, q10
|
||||
|
||||
vdup.u8 q2, r12 ; duplicate thresh
|
||||
|
||||
vtrn.16 q3, q5
|
||||
vtrn.16 q4, q6
|
||||
vtrn.16 q7, q9
|
||||
vtrn.16 q8, q10
|
||||
|
||||
vtrn.8 q3, q4
|
||||
vtrn.8 q5, q6
|
||||
vtrn.8 q7, q8
|
||||
vtrn.8 q9, q10
|
||||
|
||||
bl vp8_loop_filter_neon
|
||||
|
||||
vswp d12, d11
|
||||
vswp d16, d13
|
||||
vswp d14, d12
|
||||
vswp d16, d15
|
||||
|
||||
sub r0, r0, #2
|
||||
sub r2, r2, #2
|
||||
|
||||
;store op1, op0, oq0, oq1
|
||||
vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
|
||||
vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
|
||||
vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
|
||||
vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
|
||||
vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
|
||||
vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
|
||||
vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
|
||||
vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
|
||||
vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
|
||||
vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
|
||||
vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
|
||||
vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
|
||||
vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
|
||||
vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
|
||||
vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0]
|
||||
vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2]
|
||||
|
||||
pop {pc}
|
||||
ENDP ; |vp8_loop_filter_vertical_edge_uv_neon|
|
||||
|
||||
; void vp8_loop_filter_neon();
|
||||
; This is a helper function for the loopfilters. The invidual functions do the
|
||||
; necessary load, transpose (if necessary) and store.
|
||||
|
||||
; r0-r3 PRESERVE
|
||||
; q0 flimit
|
||||
; q1 limit
|
||||
; q2 thresh
|
||||
; q3 p3
|
||||
; q4 p2
|
||||
; q5 p1
|
||||
; q6 p0
|
||||
; q7 q0
|
||||
; q8 q1
|
||||
; q9 q2
|
||||
; q10 q3
|
||||
|vp8_loop_filter_neon| PROC
|
||||
|
||||
; vp8_filter_mask
|
||||
vabd.u8 q11, q3, q4 ; abs(p3 - p2)
|
||||
vabd.u8 q12, q4, q5 ; abs(p2 - p1)
|
||||
vabd.u8 q13, q5, q6 ; abs(p1 - p0)
|
||||
vabd.u8 q14, q8, q7 ; abs(q1 - q0)
|
||||
vabd.u8 q3, q9, q8 ; abs(q2 - q1)
|
||||
vabd.u8 q4, q10, q9 ; abs(q3 - q2)
|
||||
|
||||
vmax.u8 q11, q11, q12
|
||||
vmax.u8 q12, q13, q14
|
||||
vmax.u8 q3, q3, q4
|
||||
vmax.u8 q15, q11, q12
|
||||
|
||||
vabd.u8 q9, q6, q7 ; abs(p0 - q0)
|
||||
|
||||
; vp8_hevmask
|
||||
vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1
|
||||
vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1
|
||||
vmax.u8 q15, q15, q3
|
||||
|
||||
vmov.u8 q10, #0x80 ; 0x80
|
||||
|
||||
vabd.u8 q2, q5, q8 ; a = abs(p1 - q1)
|
||||
vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2
|
||||
|
||||
vcge.u8 q15, q1, q15
|
||||
|
||||
; vp8_filter() function
|
||||
; convert to signed
|
||||
veor q7, q7, q10 ; qs0
|
||||
vshr.u8 q2, q2, #1 ; a = a / 2
|
||||
veor q6, q6, q10 ; ps0
|
||||
|
||||
veor q5, q5, q10 ; ps1
|
||||
vqadd.u8 q9, q9, q2 ; a = b + a
|
||||
|
||||
veor q8, q8, q10 ; qs1
|
||||
|
||||
vmov.u8 q10, #3 ; #3
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q11, d15, d13
|
||||
|
||||
vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1
|
||||
|
||||
vmovl.u8 q4, d20
|
||||
|
||||
vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1)
|
||||
vorr q14, q13, q14 ; vp8_hevmask
|
||||
|
||||
vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0)
|
||||
vmul.i16 q11, q11, q4
|
||||
|
||||
vand q1, q1, q14 ; vp8_filter &= hev
|
||||
vand q15, q15, q9 ; vp8_filter_mask
|
||||
|
||||
vaddw.s8 q2, q2, d2
|
||||
vaddw.s8 q11, q11, d3
|
||||
|
||||
vmov.u8 q9, #4 ; #4
|
||||
|
||||
; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d2, q2
|
||||
vqmovn.s16 d3, q11
|
||||
vand q1, q1, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q1, q10 ; Filter2 = clamp(vp8_filter+3)
|
||||
vqadd.s8 q1, q1, q9 ; Filter1 = clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q1, q1, #3 ; Filter1 >>= 3
|
||||
|
||||
|
||||
vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1)
|
||||
|
||||
; outer tap adjustments: ++vp8_filter >> 1
|
||||
vrshr.s8 q1, q1, #1
|
||||
vbic q1, q1, q14 ; vp8_filter &= ~hev
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter)
|
||||
vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter)
|
||||
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
veor q5, q13, q0 ; *op1 = u^0x80
|
||||
veor q8, q12, q0 ; *oq1 = u^0x80
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_horizontal_edge_y_neon|
|
||||
|
||||
;-----------------
|
||||
|
||||
END
|
|
@ -0,0 +1,117 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
;EXPORT |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
EXPORT |vp8_loop_filter_bhs_neon|
|
||||
EXPORT |vp8_loop_filter_mbhs_neon|
|
||||
ARM
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 unsigned char *s, PRESERVE
|
||||
; r1 int p, PRESERVE
|
||||
; q1 limit, PRESERVE
|
||||
|
||||
|vp8_loop_filter_simple_horizontal_edge_neon| PROC
|
||||
|
||||
sub r3, r0, r1, lsl #1 ; move src pointer down by 2 lines
|
||||
|
||||
vld1.u8 {q7}, [r0@128], r1 ; q0
|
||||
vld1.u8 {q5}, [r3@128], r1 ; p0
|
||||
vld1.u8 {q8}, [r0@128] ; q1
|
||||
vld1.u8 {q6}, [r3@128] ; p1
|
||||
|
||||
vabd.u8 q15, q6, q7 ; abs(p0 - q0)
|
||||
vabd.u8 q14, q5, q8 ; abs(p1 - q1)
|
||||
|
||||
vqadd.u8 q15, q15, q15 ; abs(p0 - q0) * 2
|
||||
vshr.u8 q14, q14, #1 ; abs(p1 - q1) / 2
|
||||
vmov.u8 q0, #0x80 ; 0x80
|
||||
vmov.s16 q13, #3
|
||||
vqadd.u8 q15, q15, q14 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
|
||||
|
||||
veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value
|
||||
veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value
|
||||
veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value
|
||||
veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value
|
||||
|
||||
vcge.u8 q15, q1, q15 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > limit)*-1
|
||||
|
||||
vsubl.s8 q2, d14, d12 ; ( qs0 - ps0)
|
||||
vsubl.s8 q3, d15, d13
|
||||
|
||||
vqsub.s8 q4, q5, q8 ; q4: vp8_filter = vp8_signed_char_clamp(ps1-qs1)
|
||||
|
||||
vmul.s16 q2, q2, q13 ; 3 * ( qs0 - ps0)
|
||||
vmul.s16 q3, q3, q13
|
||||
|
||||
vmov.u8 q10, #0x03 ; 0x03
|
||||
vmov.u8 q9, #0x04 ; 0x04
|
||||
|
||||
vaddw.s8 q2, q2, d8 ; vp8_filter + 3 * ( qs0 - ps0)
|
||||
vaddw.s8 q3, q3, d9
|
||||
|
||||
vqmovn.s16 d8, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
|
||||
vqmovn.s16 d9, q3
|
||||
|
||||
vand q14, q4, q15 ; vp8_filter &= mask
|
||||
|
||||
vqadd.s8 q2, q14, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
|
||||
vqadd.s8 q3, q14, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
|
||||
vshr.s8 q2, q2, #3 ; Filter2 >>= 3
|
||||
vshr.s8 q4, q3, #3 ; Filter1 >>= 3
|
||||
|
||||
sub r0, r0, r1
|
||||
|
||||
;calculate output
|
||||
vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2)
|
||||
vqsub.s8 q10, q7, q4 ; u = vp8_signed_char_clamp(qs0 - Filter1)
|
||||
|
||||
veor q6, q11, q0 ; *op0 = u^0x80
|
||||
veor q7, q10, q0 ; *oq0 = u^0x80
|
||||
|
||||
vst1.u8 {q6}, [r3@128] ; store op0
|
||||
vst1.u8 {q7}, [r0@128] ; store oq0
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp8_loop_filter_simple_horizontal_edge_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_bhs_neon| PROC
|
||||
push {r4, lr}
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate blim
|
||||
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 4 * y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
; vp8_loop_filter_simple_horizontal_edge_neon preserves r0, r1 and q1
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 8* y_stride
|
||||
bl vp8_loop_filter_simple_horizontal_edge_neon
|
||||
add r0, r0, r1, lsl #2 ; src = y_ptr + 12 * y_stride
|
||||
pop {r4, lr}
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
; r0 unsigned char *y
|
||||
; r1 int ystride
|
||||
; r2 const unsigned char *blimit
|
||||
|
||||
|vp8_loop_filter_mbhs_neon| PROC
|
||||
ldrb r3, [r2] ; load blim from mem
|
||||
vdup.s8 q1, r3 ; duplicate mblim
|
||||
b vp8_loop_filter_simple_horizontal_edge_neon
|
||||
ENDP ;|vp8_loop_filter_bhs_neon|
|
||||
|
||||
END
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче