diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..ae616b28c --- /dev/null +++ b/.gitignore @@ -0,0 +1,62 @@ +*.a +*.asm.s +*.d +*.o +*~ +/*-*.mk +/*.asm +/*.doxy +/.bins +/.deps +/.docs +/.install-* +/.libs +/Makefile +/config.err +/config.mk +/decode_to_md5 +/decode_to_md5.c +/decode_to_md5.dox +/decode_with_drops +/decode_with_drops.c +/decode_with_drops.dox +/docs/ +/doxyfile +/error_resilient +/error_resilient.c +/error_resilient.dox +/force_keyframe +/force_keyframe.c +/force_keyframe.dox +/ivfdec +/ivfdec.dox +/ivfenc +/ivfenc.dox +/obj_int_extract +/postproc +/postproc.c +/postproc.dox +/samples.dox +/simple_decoder +/simple_decoder.c +/simple_decoder.dox +/simple_encoder +/simple_encoder.c +/simple_encoder.dox +/twopass_encoder +/twopass_encoder.c +/twopass_encoder.dox +/vp8_api1_migration.dox +/vp8_scalable_patterns +/vp8_scalable_patterns.c +/vp8_scalable_patterns.dox +/vp8_set_maps +/vp8_set_maps.c +/vp8_set_maps.dox +/vp8cx_set_ref +/vp8cx_set_ref.c +/vp8cx_set_ref.dox +/vpx_config.c +/vpx_config.h +/vpx_version.h +TAGS diff --git a/.mailmap b/.mailmap new file mode 100644 index 000000000..f77ff26e4 --- /dev/null +++ b/.mailmap @@ -0,0 +1,2 @@ +Adrian Grange +Johann Koenig diff --git a/AUTHORS b/AUTHORS index 9686ac13e..110e5e143 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,36 @@ -# Names should be added to this file like so: -# Name or Organization +# This file is automatically generated from the git commit history +# by tools/gen_authors.sh. +Aaron Watry +Adrian Grange +Alex Converse +Andres Mejia +Fabio Pedretti +Frank Galligan +Fredrik Söderquist +Fritz Koenig +Giuseppe Scrivano +Guillermo Ballester Valor +James Zern +Jan Kratochvil +Jeff Muizelaar +Jim Bankoski +Johann Koenig +John Koleszar +Justin Clift +Justin Lebar +Luca Barbato +Makoto Kato +Martin Ettl +Michael Kohler +Paul Wilkins +Pavol Rusnak +Philip Jägenstedt +Scott LaVarnway +Timothy B. Terriberry +Tom Finegan +Yaowu Xu +Yunqing Wang Google Inc. +The Mozilla Foundation +The Xiph.Org Foundation diff --git a/CHANGELOG b/CHANGELOG index d6c8ce8c4..b8da8f8e3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,176 @@ +2010-10-28 v0.9.5 "Aylesbury" + Our first named release, focused on a faster decoder, and a better encoder. + + - Upgrading: + This release incorporates backwards-incompatible changes to the + ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec. + + vpxdec + * the -q (quiet) option has been removed, and replaced with + -v (verbose). the output is quiet by default. Use -v to see + the version number of the binary. + + * The default behavior is now to write output to a single file + instead of individual frames. The -y option has been removed. + Y4M output is the default. + + * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12 + options must be specified. + + $ ivfdec -o OUTPUT INPUT + $ vpxdec --i420 -o OUTPUT INPUT + + * If an output file is not specified, the default is to write + Y4M to stdout. This makes piping more natural. + + $ ivfdec -y -o - INPUT | ... + $ vpxdec INPUT | ... + + * The output file has additional flexibility for formatting the + filename. It supports escape characters for constructing a + filename from the width, height, and sequence number. This + replaces the -p option. To get the equivalent: + + $ ivfdec -p frame INPUT + $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT + + vpxenc + * The output file must be specified with -o, rather than as the + last argument. + + $ ivfenc INPUT OUTPUT + $ vpxenc -o OUTPUT INPUT + + * The output defaults to webm. To get IVF output, use the --ivf + option. + + $ ivfenc INPUT OUTPUT.ivf + $ vpxenc -o OUTPUT.ivf --ivf INPUT + + + - Enhancements: + ivfenc and ivfdec have been renamed to vpxenc, vpxdec. + vpxdec supports .webm input + vpxdec writes .y4m by default + vpxenc writes .webm output by default + vpxenc --psnr now shows the average/overall PSNR at the end + ARM platforms now support runtime cpu detection + vpxdec visualizations added for motion vectors, block modes, references + vpxdec now silent by default + vpxdec --progress shows frame-by-frame timing information + vpxenc supports the distinction between --fps and --timebase + NASM is now a supported assembler + configure: enable PIC for shared libs by default + configure: add --enable-small + configure: support for ppc32-linux-gcc + configure: support for sparc-solaris-gcc + + - Bugs: + Improve handling of invalid frames + Fix valgrind errors in the NEON loop filters. + Fix loopfilter delta zero transitions + Fix valgrind errors in vp8_sixtap_predict8x4_armv6(). + Build fixes for darwin-icc + + - Speed: + 20-40% (average 28%) improvement in libvpx decoder speed, + including: + Rewrite vp8_short_walsh4x4_sse2() + Optimizations on the loopfilters. + Miscellaneous improvements for Atom + Add 4-tap version of 2nd-pass ARMv6 MC filter. + Improved multithread utilization + Better instruction choices on x86 + reorder data to use wider instructions + Update NEON wide idcts + Make block access to frame buffer sequential + Improved subset block search + Bilinear subpixel optimizations for ssse3. + Decrease memory footprint + + Encoder speed improvements (percentage gain not measured): + Skip unnecessary search of identical frames + Add SSE2 subtract functions + Improve bounds checking in vp8_diamond_search_sadx4() + Added vp8_fast_quantize_b_sse2 + + - Quality: + Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality + encoding mode, and up to 60% improvement on very noisy, still + or slow moving source video + + Motion compensated temporal filter for Alt-Ref Noise Reduction + Improved use of trellis quantization on 2nd order Y blocks + Tune effect of motion on KF/GF boost in two pass + Allow coefficient optimization for good quality speed 0. + Improved control of active min quantizer for two pass. + Enable ARFs for non-lagged compress + +2010-09-02 v0.9.2 + - Enhancements: + Disable frame dropping by default + Improved multithreaded performance + Improved Force Key Frame Behaviour + Increased rate control buffer level precision + Fix bug in 1st pass motion compensation + ivfenc: correct fixed kf interval, --disable-kf + - Speed: + Changed above and left context data layout + Rework idct calling structure. + Removed unnecessary MB_MODE_INFO copies + x86: SSSE3 sixtap prediction + Reworked IDCT to include reconstruction (add) step + Swap alt/gold/new/last frame buffer ptrs instead of copying. + Improve SSE2 loopfilter functions + Change bitreader to use a larger window. + Avoid loopfilter reinitialization when possible + - Quality: + Normalize quantizer's zero bin and rounding factors + Add trellis quantization. + Make the quantizer exact. + Updates to ARNR filtering algorithm + Fix breakout thresh computation for golden & AltRef frames + Redo the forward 4x4 dct + Improve the accuracy of forward walsh-hadamard transform + Further adjustment of RD behaviour with Q and Zbin. + - Build System: + Allow linking of libs built with MinGW to MSVC + Fix target auto-detection on mingw32 + Allow --cpu= to work for x86. + configure: pass original arguments through to make dist + Fix builds without runtime CPU detection + msvs: fix install of codec sources + msvs: Change devenv.com command line for better msys support + msvs: Add vs9 targets. + Add x86_64-linux-icc target + - Bugs: + Potential crashes on older MinGW builds + Fix two-pass framrate for Y4M input. + Fixed simple loop filter, other crashes on ARM v6 + arm: fix missing dependency with --enable-shared + configure: support directories containing .o + Replace pinsrw (SSE) with MMX instructions + apple: include proper mach primatives + Fixed rate control bug with long key frame interval. + Fix DSO link errors on x86-64 when not using a version script + Fixed buffer selection for UV in AltRef filtering + + +2010-06-17 v0.9.1 + - Enhancements: + * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O + * Speed optimizations + - Bugfixes: + * Rate control + * Prevent out-of-bounds accesses on invalid data + - Build system updates: + * Detect toolchain to be used automatically for native builds + * Support building shared libraries + * Better autotools emulation (--prefix, --libdir, DESTDIR) + - Updated LICENSE + * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html + + 2010-05-18 v0.9.0 - Initial open source release. Welcome to WebM and VP8! diff --git a/LICENSE b/LICENSE index 6b0e86768..7a6f99547 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,20 @@ -Copyright (c) 2010, Google, Inc. - -All rights reserved. +Copyright (c) 2010, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: +modification, are permitted provided that the following conditions are +met: -- Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. -- Neither the name of Google nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. + * Neither the name of Google nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -30,19 +28,3 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -Subject to the terms and conditions of the above License, Google -hereby grants to You a perpetual, worldwide, non-exclusive, -no-charge, royalty-free, irrevocable (except as stated in this -section) patent license to make, have made, use, offer to sell, sell, -import, and otherwise transfer this implementation of VP8, where such -license applies only to those patent claims, both currently owned by -Google and acquired in the future, licensable by Google that are -necessarily infringed by this implementation of VP8. If You or your -agent or exclusive licensee institute or order or agree to the -institution of patent litigation against any entity (including a -cross-claim or counterclaim in a lawsuit) alleging that this -implementation of VP8 or any code incorporated within this -implementation of VP8 constitutes direct or contributory patent -infringement, or inducement of patent infringement, then any rights -granted to You under this License for this implementation of VP8 -shall terminate as of the date such litigation is filed. diff --git a/PATENTS b/PATENTS new file mode 100644 index 000000000..4414d8385 --- /dev/null +++ b/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the WebM Project. + +Google hereby grants to you a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer, and otherwise run, modify and propagate the contents of this +implementation of VP8, where such license applies only to those patent +claims, both currently owned by Google and acquired in the future, +licensable by Google that are necessarily infringed by this +implementation of VP8. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of VP8 or any code incorporated within this +implementation of VP8 constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of VP8 +shall terminate as of the date such litigation is filed. diff --git a/README b/README index cfaf4ccd0..c1a76687f 100644 --- a/README +++ b/README @@ -9,18 +9,18 @@ COMPILING THE APPLICATIONS/LIBRARIES: the application. 1. Prerequisites - + * All x86 targets require the Yasm[1] assembler be installed. * All Windows builds require that Cygwin[2] be installed. * Building the documentation requires PHP[3] and Doxygen[4]. If you do not have these packages, you must pass --disable-install-docs to the configure script. - + [1]: http://www.tortall.net/projects/yasm [2]: http://www.cygwin.com [3]: http://php.net [4]: http://www.doxygen.org - + 2. Out-of-tree builds Out of tree builds are a supported method of building the application. For an out of tree build, the source tree is kept separate from the object @@ -89,7 +89,7 @@ COMPILING THE APPLICATIONS/LIBRARIES: toolchain, the following command could be used (note, POSIX SH syntax, adapt to your shell as necessary): - $ CROSS=mipsel-linux-uclibc- ../libvpx/src/configure + $ CROSS=mipsel-linux-uclibc- ../libvpx/configure In addition, the executables to be invoked can be overridden by specifying the environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be diff --git a/args.c b/args.c index f2ad697e3..782929022 100644 --- a/args.c +++ b/args.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -119,9 +120,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) char *long_val = def->has_val ? "=" : ""; if (def->short_name && def->long_name) - snprintf(option_text, 37, "-%s%s, --%s%s", - def->short_name, short_val, + { + char *comma = def->has_val ? "," : ", "; + + snprintf(option_text, 37, "-%s%s%s --%s%6s", + def->short_name, short_val, comma, def->long_name, long_val); + } else if (def->short_name) snprintf(option_text, 37, "-%s%s", def->short_name, short_val); diff --git a/args.h b/args.h index c063f5316..4fafcf8a4 100644 --- a/args.h +++ b/args.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/build/.gitattributes b/build/.gitattributes new file mode 100644 index 000000000..03db79bc0 --- /dev/null +++ b/build/.gitattributes @@ -0,0 +1,2 @@ +*-vs8/*.rules -crlf +*-msvs/*.rules -crlf diff --git a/build/arm-wince-vs8/.gitattributes b/build/arm-wince-vs8/.gitattributes deleted file mode 100644 index be1eeb95a..000000000 --- a/build/arm-wince-vs8/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -*.rules -crlf diff --git a/build/arm-wince-vs8/obj_int_extract.bat b/build/arm-wince-vs8/obj_int_extract.bat index 9013dbd71..a361fc346 100644 --- a/build/arm-wince-vs8/obj_int_extract.bat +++ b/build/arm-wince-vs8/obj_int_extract.bat @@ -1,10 +1,11 @@ @echo off -REM Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -REM -REM Use of this source code is governed by a BSD-style license and patent -REM grant that can be found in the LICENSE file in the root of the source -REM tree. All contributing project authors may be found in the AUTHORS -REM file in the root of the source tree. +REM Copyright (c) 2010 The WebM project authors. All Rights Reserved. +REM +REM Use of this source code is governed by a BSD-style license +REM that can be found in the LICENSE file in the root of the source +REM tree. An additional intellectual property rights grant can be found +REM in the file PATENTS. All contributing project authors may +REM be found in the AUTHORS file in the root of the source tree. echo on diff --git a/build/arm-wince-vs8/vpx_decoder.sln b/build/arm-wince-vs8/vpx.sln similarity index 98% rename from build/arm-wince-vs8/vpx_decoder.sln rename to build/arm-wince-vs8/vpx.sln index 226205761..3e49929f2 100644 --- a/build/arm-wince-vs8/vpx_decoder.sln +++ b/build/arm-wince-vs8/vpx.sln @@ -8,7 +8,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcproj", EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_int_extract", "obj_int_extract.vcproj", "{E1360C65-D375-4335-8057-7ED99CC3F9B2}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx_decoder", "vpx_decoder.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}" +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx", "vpx.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}" ProjectSection(ProjectDependencies) = postProject {E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2} EndProjectSection diff --git a/build/make/Makefile b/build/make/Makefile index 5ea1f229e..40fa6d50c 100755 --- a/build/make/Makefile +++ b/build/make/Makefile @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## @@ -38,13 +39,8 @@ dist: @if [ -d "$(DIST_DIR)/src" ]; then \ mkdir -p "$(DIST_DIR)/build"; \ cd "$(DIST_DIR)/build"; \ - if [ "$(TGT_CC)" = "rvct" ] ; then \ - echo "../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC)"; \ - ../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC); \ - else \ - echo "../src/configure --target=$(TOOLCHAIN)"; \ - ../src/configure --target=$(TOOLCHAIN); \ - fi; \ + echo "Rerunning configure $(CONFIGURE_ARGS)"; \ + ../src/configure $(CONFIGURE_ARGS); \ $(if $(filter vs%,$(TGT_CC)),make NO_LAUNCH_DEVENV=1;) \ fi @if [ -d "$(DIST_DIR)" ]; then \ @@ -56,7 +52,6 @@ dist: fi -svnstat: ALL_TARGETS:=$(firstword $(ALL_TARGETS)) endif ifneq ($(target),) @@ -70,7 +65,7 @@ endif BUILD_ROOT?=. VPATH=$(SRC_PATH_BARE) CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) -ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) +ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/ DIST_DIR?=dist HOSTCC?=gcc TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN))) @@ -219,6 +214,20 @@ $(1): $(qexec)$$(AR) $$(ARFLAGS) $$@ $$? endef +define so_template +# Not using a pattern rule here because we don't want to generate empty +# archives when they are listed as a dependency in files not responsible +# for creating them. +# +# This needs further abstraction for dealing with non-GNU linkers. +$(1): + $(if $(quiet),@echo " [LD] $$@") + $(qexec)$$(LD) -shared $$(LDFLAGS) \ + -Wl,--no-undefined -Wl,-soname,$$(SONAME) \ + -Wl,--version-script,$$(SO_VERSION_SCRIPT) -o $$@ \ + $$(filter %.o,$$?) $$(extralibs) +endef + define lipo_lib_template $(1): $(addsuffix /$(1),$(FAT_ARCHS)) $(if $(quiet),@echo " [LIPO] $$@") @@ -282,6 +291,7 @@ LIBS=$(call enabled,LIBS) .libs: $(LIBS) @touch $@ $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib)))) +$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib)))) INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS) ifeq ($(MAKECMDGOALS),dist) @@ -319,6 +329,7 @@ ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_def.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_proj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh + DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/yasm.rules DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh # # This isn't really ARCH_ARM dependent, it's dependant on whether we're @@ -347,12 +358,3 @@ INSTALL_TARGETS += .install-docs .install-srcs .install-libs .install-bins all-$(target): $(BUILD_TARGETS) install:: $(INSTALL_TARGETS) dist: $(INSTALL_TARGETS) - -# -# Development helper targets -# -ifneq ($(SRC_PATH_BARE),) -.PHONY: svnstat -svnstat: - svn stat $(SRC_PATH_BARE) -endif diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl index 6fcba8462..3dff048b5 100755 --- a/build/make/ads2gas.pl +++ b/build/make/ads2gas.pl @@ -1,11 +1,12 @@ #!/usr/bin/perl ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl index 569c3e762..5014c61fb 100755 --- a/build/make/ads2gas_apple.pl +++ b/build/make/ads2gas_apple.pl @@ -1,11 +1,12 @@ #!/usr/bin/env perl ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh index dcaa82ca6..571e46ec3 100755 --- a/build/make/armlink_adapter.sh +++ b/build/make/armlink_adapter.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/make/configure.sh b/build/make/configure.sh index 1f4f24db9..d25d6400e 100755 --- a/build/make/configure.sh +++ b/build/make/configure.sh @@ -120,8 +120,8 @@ EOF show_targets() { while [ -n "$*" ]; do - if [ "${1%%-*}" == "${2%%-*}" ]; then - if [ "${2%%-*}" == "${3%%-*}" ]; then + if [ "${1%%-*}" = "${2%%-*}" ]; then + if [ "${2%%-*}" = "${3%%-*}" ]; then printf " %-24s %-24s %-24s\n" "$1" "$2" "$3" shift; shift; shift else @@ -255,9 +255,10 @@ TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h" TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c" TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o" TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x" +TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm" clean_temp_files() { - rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X} + rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM} } # @@ -322,6 +323,21 @@ check_add_ldflags() { add_ldflags "$@" } +check_asm_align() { + log check_asm_align "$@" + cat >${TMP_ASM} <${TMP_X} + log_file ${TMP_X} + if ! grep -q '\.rodata .* 16$' ${TMP_X}; then + die "${AS} ${ASFLAGS} does not support section alignment (nasm <=2.08?)" + fi +} + write_common_config_banner() { echo '# This file automatically generated by configure. Do not edit!' > config.mk echo "TOOLCHAIN := ${toolchain}" >> config.mk @@ -378,7 +394,7 @@ EOF fmt_deps = sed -e 's;^__image.axf;\$(dir \$@)\$(notdir \$<).o \$@;' #hide EOF else cat >> $1 << EOF -fmt_deps = sed -e 's;^\(.*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;' #hide +fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;' EOF fi @@ -395,8 +411,6 @@ EOF write_common_target_config_h() { cat > ${TMP_H} << EOF /* This file automatically generated by configure. Do not edit! */ -#define INLINE ${INLINE} -#define FORCEINLINE ${FORCEINLINE:-${INLINE}} #define RESTRICT ${RESTRICT} EOF print_config_h ARCH "${TMP_H}" ${ARCH_LIST} @@ -442,13 +456,18 @@ process_common_cmdline() { disable builtin_libc alt_libc="${optval}" ;; + --as=*) + [ "${optval}" = yasm -o "${optval}" = nasm -o "${optval}" = auto ] \ + || die "Must be yasm, nasm or auto: ${optval}" + alt_as="${optval}" + ;; --prefix=*) prefix="${optval}" ;; --libdir=*) libdir="${optval}" ;; - --libc|--prefix|--libdir) + --libc|--as|--prefix|--libdir) die "Option ${opt} requires argument" ;; --help|-h) show_help @@ -475,7 +494,7 @@ post_process_common_cmdline() { prefix="${prefix%/}" libdir="${libdir:-${prefix}/lib}" libdir="${libdir%/}" - if [ "${libdir#${prefix}}" == "${libdir}" ]; then + if [ "${libdir#${prefix}}" = "${libdir}" ]; then die "Libdir ${libdir} must be a subdirectory of ${prefix}" fi } @@ -497,34 +516,47 @@ setup_gnu_toolchain() { process_common_toolchain() { if [ -z "$toolchain" ]; then - uname="$(uname -a)" + gcctarget="$(gcc -dumpmachine 2> /dev/null)" # detect tgt_isa - case "$uname" in - *x86_64*) + case "$gcctarget" in + *x86_64*|*amd64*) tgt_isa=x86_64 ;; *i[3456]86*) tgt_isa=x86 ;; + *powerpc64*) + tgt_isa=ppc64 + ;; + *powerpc*) + tgt_isa=ppc32 + ;; + *sparc*) + tgt_isa=sparc + ;; esac # detect tgt_os - case "$uname" in - *Darwin\ Kernel\ Version\ 8*) + case "$gcctarget" in + *darwin8*) tgt_isa=universal tgt_os=darwin8 ;; - *Darwin\ Kernel\ Version\ 9*) + *darwin9*) tgt_isa=universal tgt_os=darwin9 ;; - *Msys*|*Cygwin*) + *mingw32*|*cygwin*) + [ -z "$tgt_isa" ] && tgt_isa=x86 tgt_os=win32 ;; - *Linux*|*BSD*) + *linux*|*bsd*) tgt_os=linux ;; + *solaris2.10) + tgt_os=solaris + ;; esac if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then @@ -557,15 +589,18 @@ process_common_toolchain() { mips*) enable mips;; esac + # PIC is probably what we want when building shared libs + enabled shared && soft_enable pic + # Handle darwin variants case ${toolchain} in - *-darwin8-gcc) + *-darwin8-*) add_cflags "-isysroot /Developer/SDKs/MacOSX10.4u.sdk" add_cflags "-mmacosx-version-min=10.4" add_ldflags "-isysroot /Developer/SDKs/MacOSX10.4u.sdk" add_ldflags "-mmacosx-version-min=10.4" ;; - *-darwin9-gcc) + *-darwin9-*) add_cflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk" add_cflags "-mmacosx-version-min=10.5" add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk" @@ -573,6 +608,13 @@ process_common_toolchain() { ;; esac + # Handle Solaris variants. Solaris 10 needs -lposix4 + case ${toolchain} in + *-solaris-*) + add_extralibs -lposix4 + ;; + esac + # Process ARM architecture variants case ${toolchain} in arm*|iwmmxt*) @@ -756,8 +798,8 @@ process_common_toolchain() { link_with_cc=gcc setup_gnu_toolchain add_asflags -force_cpusubtype_ALL -I"\$(dir \$<)darwin" - add_cflags -maltivec -faltivec soft_enable altivec + enabled altivec && add_cflags -maltivec case "$tgt_os" in linux*) @@ -769,6 +811,7 @@ process_common_toolchain() { add_cflags ${darwin_arch} -m${bits} -fasm-blocks add_asflags ${darwin_arch} -force_cpusubtype_ALL -I"\$(dir \$<)darwin" add_ldflags ${darwin_arch} -m${bits} + enabled altivec && add_cflags -faltivec ;; esac ;; @@ -781,8 +824,12 @@ process_common_toolchain() { soft_enable sse2 soft_enable sse3 soft_enable ssse3 + soft_enable sse4_1 case ${tgt_os} in + win*) + enabled gcc && add_cflags -fno-common + ;; solaris*) CC=${CC:-${CROSS}gcc} LD=${LD:-${CROSS}gcc} @@ -790,6 +837,7 @@ process_common_toolchain() { ;; esac + AS="${alt_as:-${AS:-auto}}" case ${tgt_cc} in icc*) CC=${CC:-icc} @@ -797,16 +845,37 @@ process_common_toolchain() { setup_gnu_toolchain add_cflags -use-msasm -use-asm add_ldflags -i-static + enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3 + enabled x86_64 && AR=xiar + case ${tune_cpu} in + atom*) + tune_cflags="-x" + tune_cpu="SSE3_ATOM" + ;; + *) + tune_cflags="-march=" + ;; + esac ;; gcc*) add_cflags -m${bits} add_ldflags -m${bits} link_with_cc=gcc + tune_cflags="-march=" setup_gnu_toolchain ;; esac - AS=yasm + case "${AS}" in + auto|"") + which nasm >/dev/null 2>&1 && AS=nasm + which yasm >/dev/null 2>&1 && AS=yasm + [ "${AS}" = auto -o -z "${AS}" ] \ + && die "Neither yasm nor nasm have been found" + ;; + esac + log_echo " using $AS" + [ "${AS##*/}" = nasm ] && add_asflags -Ox AS_SFX=.asm case ${tgt_os} in win*) @@ -815,7 +884,9 @@ process_common_toolchain() { ;; linux*|solaris*) add_asflags -f elf${bits} - enabled debug && add_asflags -g dwarf2 + enabled debug && [ "${AS}" = yasm ] && add_asflags -g dwarf2 + enabled debug && [ "${AS}" = nasm ] && add_asflags -g + [ "${AS##*/}" = nasm ] && check_asm_align ;; darwin*) add_asflags -f macho${bits} @@ -828,7 +899,7 @@ process_common_toolchain() { # enabled icc && ! enabled pic && add_cflags -fno-pic -mdynamic-no-pic enabled icc && ! enabled pic && add_cflags -fno-pic ;; - *) log "Warning: Unknown os $tgt_os while setting up yasm flags" + *) log "Warning: Unknown os $tgt_os while setting up $AS flags" ;; esac ;; @@ -859,9 +930,9 @@ process_common_toolchain() { enabled gcov && check_add_cflags -fprofile-arcs -ftest-coverage && check_add_ldflags -fprofile-arcs -ftest-coverage - enabled optimizations && check_add_cflags -O3 - if enabled rvct; then - enabled optimizations && check_add_cflags -Otime + if enabled optimizations; then + enabled rvct && check_add_cflags -Otime + enabled small && check_add_cflags -O2 || check_add_cflags -O3 fi # Position Independant Code (PIC) support, for building relocatable @@ -888,8 +959,8 @@ EOF # glibc needs these if enabled linux; then - add_cflags -D_LARGEFILE_SOURCE - add_cflags -D_FILE_OFFSET_BITS=64 + add_cflags -D_LARGEFILE_SOURCE + add_cflags -D_FILE_OFFSET_BITS=64 fi } diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh index c1118e1a8..7c6c5d565 100755 --- a/build/make/gen_asm_deps.sh +++ b/build/make/gen_asm_deps.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/make/gen_msvs_def.sh b/build/make/gen_msvs_def.sh index 68b240624..4defcc2e7 100755 --- a/build/make/gen_msvs_def.sh +++ b/build/make/gen_msvs_def.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh index 477dec7fe..584477f92 100755 --- a/build/make/gen_msvs_proj.sh +++ b/build/make/gen_msvs_proj.sh @@ -1,16 +1,18 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## self=$0 self_basename=${self##*/} +self_dirname=$(dirname "$0") EOL=$'\n' show_help() { @@ -204,7 +206,7 @@ for opt in "$@"; do ;; --ver=*) vs_ver="$optval" case $optval in - [78]) + [789]) ;; *) die Unrecognized Visual Studio Version in $opt ;; @@ -246,6 +248,8 @@ case "${vs_ver:-8}" in ;; 8) vs_ver_id="8.00" ;; + 9) vs_ver_id="9.00" + ;; esac [ -n "$name" ] || die "Project name (--name) must be specified!" @@ -291,8 +295,8 @@ case "$target" in x86*) platforms[0]="Win32" # these are only used by vs7 - asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} \$(InputPath)" - asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} \$(InputPath)" + asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} "\$(InputPath)"" + asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} "\$(InputPath)"" ;; arm*|iwmmx*) case "${name}" in @@ -342,19 +346,19 @@ generate_vcproj() { open_tag ToolFiles case "$target" in - x86*) $uses_asm && tag DefaultToolFile FileName="yasm.rules" + x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules" ;; arm*|iwmmx*) - if [ "$name" == "vpx_decoder" ];then + if [ "$name" == "vpx" ];then case "$target" in armv5*) - tag DefaultToolFile FileName="armasmv5.rules" + tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules" ;; armv6*) - tag DefaultToolFile FileName="armasmv6.rules" + tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv6.rules" ;; iwmmxt*) - tag DefaultToolFile FileName="armasmxscale.rules" + tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmxscale.rules" ;; esac fi @@ -374,7 +378,7 @@ generate_vcproj() { if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then case "$name" in - vpx_decoder) tag Tool \ + vpx) tag Tool \ Name="VCPreBuildEventTool" \ CommandLine="call obj_int_extract.bat \$(ConfigurationName)" tag Tool \ @@ -435,7 +439,7 @@ generate_vcproj() { Name="VCCLCompilerTool" \ Optimization="0" \ AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \ + PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ RuntimeLibrary="$debug_runtime" \ UsePrecompiledHeader="0" \ WarningLevel="3" \ @@ -508,7 +512,7 @@ generate_vcproj() { if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then case "$name" in - vpx_decoder) tag DeploymentTool \ + vpx) tag DeploymentTool \ ForceDirty="-1" \ RegisterOutput="0" ;; @@ -532,7 +536,7 @@ generate_vcproj() { if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then case "$name" in - vpx_decoder) tag Tool \ + vpx) tag Tool \ Name="VCPreBuildEventTool" \ CommandLine="call obj_int_extract.bat \$(ConfigurationName)" tag Tool \ @@ -593,7 +597,7 @@ generate_vcproj() { x86*) tag Tool \ Name="VCCLCompilerTool" \ AdditionalIncludeDirectories="$incs" \ - PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \ + PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \ RuntimeLibrary="$release_runtime" \ UsePrecompiledHeader="0" \ WarningLevel="3" \ @@ -670,7 +674,7 @@ generate_vcproj() { if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then case "$name" in - vpx_decoder) tag DeploymentTool \ + vpx) tag DeploymentTool \ ForceDirty="-1" \ RegisterOutput="0" ;; diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh index b670ec598..9cf090067 100755 --- a/build/make/gen_msvs_sln.sh +++ b/build/make/gen_msvs_sln.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## @@ -24,7 +25,7 @@ files. Options: --help Print this message --out=outfile Redirect output to a file - --ver=version Version (7,8) of visual studio to generate for + --ver=version Version (7,8,9) of visual studio to generate for --target=isa-os-cc Target specifier EOF exit 1 @@ -192,11 +193,11 @@ ${TAB}rm -rf "$platform"/"$config" ifneq (\$(found_devenv),) ifeq (\$(CONFIG_VS_VERSION),7) $nows_sln_config: $outfile -${TAB}devenv.com $outfile /build "$config" +${TAB}devenv.com $outfile -build "$config" else $nows_sln_config: $outfile -${TAB}devenv.com $outfile /build "$sln_config" +${TAB}devenv.com $outfile -build "$sln_config" endif else @@ -223,7 +224,7 @@ for opt in "$@"; do ;; --ver=*) vs_ver="$optval" case $optval in - [78]) + [789]) ;; *) die Unrecognized Visual Studio Version in $opt ;; @@ -234,7 +235,7 @@ for opt in "$@"; do 7) sln_vers="8.00" sln_vers_str="Visual Studio .NET 2003" ;; - 8) + [89]) ;; *) die "Unrecognized Visual Studio Version '$optval' in $opt" ;; @@ -256,6 +257,9 @@ case "${vs_ver:-8}" in 8) sln_vers="9.00" sln_vers_str="Visual Studio 2005" ;; + 9) sln_vers="10.00" + sln_vers_str="Visual Studio 2008" + ;; esac for f in "${file_list[@]}"; do diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c index d9afb9d30..e01870f27 100644 --- a/build/make/obj_int_extract.c +++ b/build/make/obj_int_extract.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/build/make/version.sh b/build/make/version.sh index 2bda70106..3efb956bb 100755 --- a/build/make/version.sh +++ b/build/make/version.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/build/x86-msvs/yasm.rules b/build/x86-msvs/yasm.rules new file mode 100644 index 000000000..ee1fefbca --- /dev/null +++ b/build/x86-msvs/yasm.rules @@ -0,0 +1,115 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/configure b/configure index 210c69b8b..3022edcf1 100755 --- a/configure +++ b/configure @@ -23,22 +23,25 @@ Advanced options: ${toggle_libs} don't build libraries ${toggle_examples} don't build examples --libc=PATH path to alternate libc + --as={yasm|nasm|auto} use specified assembler [auto, yasm preferred] ${toggle_fast_unaligned} don't use unaligned accesses, even when supported by hardware [auto] ${toggle_codec_srcs} in/exclude codec library source code ${toggle_debug_libs} in/exclude debug version of libraries - ${toggle_eval_limit} enable limited evaluation build ${toggle_md5} support for output of checksum data ${toggle_static_msvcrt} use static MSVCRT (VS builds only) ${toggle_vp8} VP8 codec support ${toggle_psnr} output of PSNR data, if supported (encoders) ${toggle_mem_tracker} track memory usage - ${toggle_eval_limit} decoder limitted to 500 frames ${toggle_postproc} postprocessing ${toggle_multithread} multithreaded encoding and decoding. ${toggle_spatial_resampling} spatial sampling (scaling) support ${toggle_realtime_only} enable this option while building for real-time encoding ${toggle_runtime_cpu_detect} runtime cpu detection + ${toggle_shared} shared library support + ${toggle_small} favor smaller size over speed + ${toggle_arm_asm_detok} assembly version of the detokenizer (ARM platforms only) + ${toggle_postproc_visualizer} macro block / block level visualizers Codecs: Codecs can be selectively enabled or disabled individually, or by family: @@ -95,9 +98,11 @@ all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8 all_platforms="${all_platforms} mips32-linux-gcc" all_platforms="${all_platforms} ppc32-darwin8-gcc" all_platforms="${all_platforms} ppc32-darwin9-gcc" +all_platforms="${all_platforms} ppc32-linux-gcc" all_platforms="${all_platforms} ppc64-darwin8-gcc" all_platforms="${all_platforms} ppc64-darwin9-gcc" all_platforms="${all_platforms} ppc64-linux-gcc" +all_platforms="${all_platforms} sparc-solaris-gcc" all_platforms="${all_platforms} x86-darwin8-gcc" all_platforms="${all_platforms} x86-darwin8-icc" all_platforms="${all_platforms} x86-darwin9-gcc" @@ -108,10 +113,13 @@ all_platforms="${all_platforms} x86-solaris-gcc" all_platforms="${all_platforms} x86-win32-gcc" all_platforms="${all_platforms} x86-win32-vs7" all_platforms="${all_platforms} x86-win32-vs8" +all_platforms="${all_platforms} x86-win32-vs9" all_platforms="${all_platforms} x86_64-darwin9-gcc" all_platforms="${all_platforms} x86_64-linux-gcc" +all_platforms="${all_platforms} x86_64-linux-icc" all_platforms="${all_platforms} x86_64-solaris-gcc" all_platforms="${all_platforms} x86_64-win64-vs8" +all_platforms="${all_platforms} x86_64-win64-vs9" all_platforms="${all_platforms} universal-darwin8-gcc" all_platforms="${all_platforms} universal-darwin9-gcc" all_platforms="${all_platforms} generic-gnu" @@ -192,6 +200,7 @@ ARCH_EXT_LIST=" sse2 sse3 ssse3 + sse4_1 altivec " @@ -231,11 +240,8 @@ CONFIG_LIST=" dequant_tokens dc_recon - new_tokens - eval_limit runtime_cpu_detect postproc - postproc_generic multithread psnr ${CODECS} @@ -245,6 +251,10 @@ CONFIG_LIST=" static_msvcrt spatial_resampling realtime_only + shared + small + arm_asm_detok + postproc_visualizer experimental ${EXPERIMENT_LIST} @@ -267,6 +277,7 @@ CMDLINE_SELECT=" libs examples libc + as fast_unaligned codec_srcs debug_libs @@ -274,10 +285,7 @@ CMDLINE_SELECT=" dequant_tokens dc_recon - new_tokens - eval_limit postproc - postproc_generic multithread psnr ${CODECS} @@ -286,6 +294,11 @@ CMDLINE_SELECT=" mem_tracker spatial_resampling realtime_only + shared + small + arm_asm_detok + postproc_visualizer + experimental " @@ -335,8 +348,6 @@ post_process_cmdline() { for c in ${CODECS}; do enabled ${c} && enable ${c##*_}s done - - } @@ -377,7 +388,6 @@ process_targets() { enabled codec_srcs && DIST_DIR="${DIST_DIR}-src" ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost" ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt" - enabled eval_limit && DIST_DIR="${DIST_DIR}-eval" ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs" DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}" case "${tgt_os}" in @@ -388,6 +398,12 @@ process_targets() { if [ -f "${source_path}/build/make/version.sh" ]; then local ver=`"$source_path/build/make/version.sh" --bare $source_path` DIST_DIR="${DIST_DIR}-${ver}" + ver=${ver%%-*} + VERSION_PATCH=${ver##*.} + ver=${ver%.*} + VERSION_MINOR=${ver##*.} + ver=${ver#v} + VERSION_MAJOR=${ver%.*} fi enabled child || cat <> config.mk ifeq (\$(MAKECMDGOALS),dist) @@ -396,6 +412,12 @@ else DIST_DIR?=\$(DESTDIR)${prefix} endif LIBSUBDIR=${libdir##${prefix}/} + +VERSION_MAJOR=${VERSION_MAJOR} +VERSION_MINOR=${VERSION_MINOR} +VERSION_PATCH=${VERSION_PATCH} + +CONFIGURE_ARGS=${CONFIGURE_ARGS} EOF enabled child || echo "CONFIGURE_ARGS?=${CONFIGURE_ARGS}" >> config.mk @@ -415,6 +437,12 @@ EOF } process_detect() { + if enabled shared; then + # Can only build shared libs on a subset of platforms. Doing this check + # here rather than at option parse time because the target auto-detect + # magic happens after the command line has been parsed. + enabled linux || die "--enable-shared only supported on ELF for now" + fi if [ -z "$CC" ]; then echo "Bypassing toolchain for environment detection." enable external_build @@ -515,7 +543,7 @@ process_toolchain() { enabled gcc || soft_disable ccache if enabled mips; then enable dequant_tokens - enable dc_recon + enable dc_recon fi # Enable the postbuild target if building for visual studio. @@ -524,13 +552,15 @@ process_toolchain() { enable solution vs_version=${tgt_cc##vs} all_targets="${all_targets} solution" - INLINE=__inline - FORCEINLINE=__forceinline ;; esac # Other toolchain specific defaults case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac + + if enabled postproc_visualizer; then + enabled postproc || die "postproc_visualizer requires postproc to be enabled" + fi } diff --git a/docs.mk b/docs.mk index 868bcb995..28df9d262 100644 --- a/docs.mk +++ b/docs.mk @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/example_xma.c b/example_xma.c index 753ca3c40..72eb47092 100644 --- a/example_xma.c +++ b/example_xma.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/examples.mk b/examples.mk index f05fd818e..a30205d31 100644 --- a/examples.mk +++ b/examples.mk @@ -1,28 +1,51 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## # List of examples to build. UTILS are files that are taken from the source # tree directly, and GEN_EXAMPLES are files that are created from the # examples folder. -UTILS-$(CONFIG_DECODERS) += ivfdec.c -ivfdec.SRCS += md5_utils.c md5_utils.h -ivfdec.SRCS += vpx_ports/vpx_timer.h -ivfdec.SRCS += vpx/vpx_integer.h -ivfdec.SRCS += args.c args.h vpx_ports/config.h -ivfdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 -ivfdec.DESCRIPTION = Full featured decoder -UTILS-$(CONFIG_ENCODERS) += ivfenc.c -ivfenc.SRCS += args.c args.h vpx_ports/config.h -ivfenc.SRCS += vpx_ports/mem_ops.h vpx_ports/mem_ops_aligned.h -ivfenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 -ivfenc.DESCRIPTION = Full featured encoder +UTILS-$(CONFIG_DECODERS) += vpxdec.c +vpxdec.SRCS += md5_utils.c md5_utils.h +vpxdec.SRCS += vpx_ports/vpx_timer.h +vpxdec.SRCS += vpx/vpx_integer.h +vpxdec.SRCS += args.c args.h vpx_ports/config.h +vpxdec.SRCS += tools_common.c tools_common.h +vpxdec.SRCS += nestegg/halloc/halloc.h +vpxdec.SRCS += nestegg/halloc/src/align.h +vpxdec.SRCS += nestegg/halloc/src/halloc.c +vpxdec.SRCS += nestegg/halloc/src/hlist.h +vpxdec.SRCS += nestegg/halloc/src/macros.h +vpxdec.SRCS += nestegg/include/nestegg/nestegg.h +vpxdec.SRCS += nestegg/src/nestegg.c +vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 +vpxdec.DESCRIPTION = Full featured decoder +UTILS-$(CONFIG_ENCODERS) += vpxenc.c +vpxenc.SRCS += args.c args.h y4minput.c y4minput.h +vpxenc.SRCS += tools_common.c tools_common.h +vpxenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h +vpxenc.SRCS += vpx_ports/mem_ops_aligned.h +vpxenc.SRCS += libmkv/EbmlIDs.h +vpxenc.SRCS += libmkv/EbmlWriter.c +vpxenc.SRCS += libmkv/EbmlWriter.h +vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 +vpxenc.DESCRIPTION = Full featured encoder + +# Clean up old ivfenc, ivfdec binaries. +ifeq ($(CONFIG_MSVS),yes) +CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe) +CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe) +else +CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe,} +CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe,} +endif # XMA example disabled for now, not used in VP8 #UTILS-$(CONFIG_DECODERS) += example_xma.c diff --git a/examples/decode_to_md5.txt b/examples/decode_to_md5.txt index 0599b135c..b3dd56876 100644 --- a/examples/decode_to_md5.txt +++ b/examples/decode_to_md5.txt @@ -26,21 +26,21 @@ is processed, then U, then V. It is important to honor the image's `stride` values. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_DX unsigned char md5_sum[16]; -md5_ctx_t md5; +MD5Context md5; int i; -md5_init(&md5); +MD5Init(&md5); for(plane=0; plane < 3; plane++) { unsigned char *buf =img->planes[plane]; for(y=0; yd_h >> (plane?1:0); y++) { - md5_update(&md5, buf, img->d_w >> (plane?1:0)); + MD5Update(&md5, buf, img->d_w >> (plane?1:0)); buf += img->stride[plane]; } } -md5_finalize(&md5, md5_sum); +MD5Final(md5_sum, &md5); for(i=0; i<16; i++) fprintf(outfile, "%02x",md5_sum[i]); fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h, diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c index 59f0c11ed..26b745d34 100644 --- a/examples/decoder_tmpl.c +++ b/examples/decoder_tmpl.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -60,8 +61,8 @@ int main(int argc, char **argv) { die("Failed to open %s for writing", argv[2]); /* Read file header */ - fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile); - if(!(file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I' + if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ + && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I' && file_hdr[3]=='F')) die("%s is not an IVF file.", argv[1]); diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt index 6da38c2a8..310c66d54 100644 --- a/examples/decoder_tmpl.txt +++ b/examples/decoder_tmpl.txt @@ -48,7 +48,7 @@ for(plane=0; plane < 3; plane++) { unsigned char *buf =img->planes[plane]; for(y=0; yd_h >> (plane?1:0); y++) { - fwrite(buf, 1, img->d_w >> (plane?1:0), outfile); + if(fwrite(buf, 1, img->d_w >> (plane?1:0), outfile)); buf += img->stride[plane]; } } diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c index 7b33e2b7c..d9e4d0317 100644 --- a/examples/encoder_tmpl.c +++ b/examples/encoder_tmpl.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -84,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile, mem_put_le32(header+24, frame_cnt); /* length */ mem_put_le32(header+28, 0); /* unused */ - fwrite(header, 1, 32, outfile); + if(fwrite(header, 1, 32, outfile)); } @@ -102,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile, mem_put_le32(header+4, pts&0xFFFFFFFF); mem_put_le32(header+8, pts >> 32); - fwrite(header, 1, 12, outfile); + if(fwrite(header, 1, 12, outfile)); } int main(int argc, char **argv) { diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt index 87055ca13..3273164da 100644 --- a/examples/encoder_tmpl.txt +++ b/examples/encoder_tmpl.txt @@ -61,8 +61,8 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME case VPX_CODEC_CX_FRAME_PKT: write_ivf_frame_header(outfile, pkt); - fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile); + if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile)); break; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME diff --git a/examples/gen_example_code.sh b/examples/gen_example_code.sh index f2e45c583..1133b4951 100755 --- a/examples/gen_example_code.sh +++ b/examples/gen_example_code.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/examples/gen_example_doxy.php b/examples/gen_example_doxy.php index 08beade5a..701bbd30d 100755 --- a/examples/gen_example_doxy.php +++ b/examples/gen_example_doxy.php @@ -1,11 +1,12 @@ #!/usr/bin/env php /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/examples/gen_example_text.sh b/examples/gen_example_text.sh index 0e1f796ea..9a8703d7f 100755 --- a/examples/gen_example_text.sh +++ b/examples/gen_example_text.sh @@ -1,11 +1,12 @@ #!/bin/bash ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig b/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig deleted file mode 100644 index b0ed7c99c..000000000 --- a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig +++ /dev/null @@ -1,2909 +0,0 @@ - -# -# Original Markdown -# Copyright (c) 2004-2006 John Gruber -# -# - - -define( 'MARKDOWN_VERSION', "1.0.1m" ); # Sat 21 Jun 2008 -define( 'MARKDOWNEXTRA_VERSION', "1.2.3" ); # Wed 31 Dec 2008 - - -# -# Global default settings: -# - -# Change to ">" for HTML output -@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); - -# Define the width of a tab for code blocks. -@define( 'MARKDOWN_TAB_WIDTH', 4 ); - -# Optional title attribute for footnote links and backlinks. -@define( 'MARKDOWN_FN_LINK_TITLE', "" ); -@define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); - -# Optional class attribute for footnote links and backlinks. -@define( 'MARKDOWN_FN_LINK_CLASS', "" ); -@define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); - - -# -# WordPress settings: -# - -# Change to false to remove Markdown from posts and/or comments. -@define( 'MARKDOWN_WP_POSTS', true ); -@define( 'MARKDOWN_WP_COMMENTS', true ); - - - -### Standard Function Interface ### - -@define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); - -function Markdown($text) { -# -# Initialize the parser and return the result of its transform method. -# - # Setup static parser variable. - static $parser; - if (!isset($parser)) { - $parser_class = MARKDOWN_PARSER_CLASS; - $parser = new $parser_class; - } - - # Transform text using parser. - return $parser->transform($text); -} - - -### WordPress Plugin Interface ### - -/* -Plugin Name: Markdown Extra -Plugin URI: http://www.michelf.com/projects/php-markdown/ -Description: Markdown syntax allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by John Gruber. More... -Version: 1.2.2 -Author: Michel Fortin -Author URI: http://www.michelf.com/ -*/ - -if (isset($wp_version)) { - # More details about how it works here: - # - - # Post content and excerpts - # - Remove WordPress paragraph generator. - # - Run Markdown on excerpt, then remove all tags. - # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. - if (MARKDOWN_WP_POSTS) { - remove_filter('the_content', 'wpautop'); - remove_filter('the_content_rss', 'wpautop'); - remove_filter('the_excerpt', 'wpautop'); - add_filter('the_content', 'mdwp_MarkdownPost', 6); - add_filter('the_content_rss', 'mdwp_MarkdownPost', 6); - add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6); - add_filter('get_the_excerpt', 'trim', 7); - add_filter('the_excerpt', 'mdwp_add_p'); - add_filter('the_excerpt_rss', 'mdwp_strip_p'); - - remove_filter('content_save_pre', 'balanceTags', 50); - remove_filter('excerpt_save_pre', 'balanceTags', 50); - add_filter('the_content', 'balanceTags', 50); - add_filter('get_the_excerpt', 'balanceTags', 9); - } - - # Add a footnote id prefix to posts when inside a loop. - function mdwp_MarkdownPost($text) { - static $parser; - if (!$parser) { - $parser_class = MARKDOWN_PARSER_CLASS; - $parser = new $parser_class; - } - if (is_single() || is_page() || is_feed()) { - $parser->fn_id_prefix = ""; - } else { - $parser->fn_id_prefix = get_the_ID() . "."; - } - return $parser->transform($text); - } - - # Comments - # - Remove WordPress paragraph generator. - # - Remove WordPress auto-link generator. - # - Scramble important tags before passing them to the kses filter. - # - Run Markdown on excerpt then remove paragraph tags. - if (MARKDOWN_WP_COMMENTS) { - remove_filter('comment_text', 'wpautop', 30); - remove_filter('comment_text', 'make_clickable'); - add_filter('pre_comment_content', 'Markdown', 6); - add_filter('pre_comment_content', 'mdwp_hide_tags', 8); - add_filter('pre_comment_content', 'mdwp_show_tags', 12); - add_filter('get_comment_text', 'Markdown', 6); - add_filter('get_comment_excerpt', 'Markdown', 6); - add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); - - global $mdwp_hidden_tags, $mdwp_placeholders; - $mdwp_hidden_tags = explode(' ', - '

 
  • '); - $mdwp_placeholders = explode(' ', str_rot13( - 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. - 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); - } - - function mdwp_add_p($text) { - if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { - $text = '

    '.$text.'

    '; - $text = preg_replace('{\n{2,}}', "

    \n\n

    ", $text); - } - return $text; - } - - function mdwp_strip_p($t) { return preg_replace('{}i', '', $t); } - - function mdwp_hide_tags($text) { - global $mdwp_hidden_tags, $mdwp_placeholders; - return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); - } - function mdwp_show_tags($text) { - global $mdwp_hidden_tags, $mdwp_placeholders; - return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); - } -} - - -### bBlog Plugin Info ### - -function identify_modifier_markdown() { - return array( - 'name' => 'markdown', - 'type' => 'modifier', - 'nicename' => 'PHP Markdown Extra', - 'description' => 'A text-to-HTML conversion tool for web writers', - 'authors' => 'Michel Fortin and John Gruber', - 'licence' => 'GPL', - 'version' => MARKDOWNEXTRA_VERSION, - 'help' => 'Markdown syntax allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by John Gruber. More...', - ); -} - - -### Smarty Modifier Interface ### - -function smarty_modifier_markdown($text) { - return Markdown($text); -} - - -### Textile Compatibility Mode ### - -# Rename this file to "classTextile.php" and it can replace Textile everywhere. - -if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { - # Try to include PHP SmartyPants. Should be in the same directory. - @include_once 'smartypants.php'; - # Fake Textile class. It calls Markdown instead. - class Textile { - function TextileThis($text, $lite='', $encode='') { - if ($lite == '' && $encode == '') $text = Markdown($text); - if (function_exists('SmartyPants')) $text = SmartyPants($text); - return $text; - } - # Fake restricted version: restrictions are not supported for now. - function TextileRestricted($text, $lite='', $noimage='') { - return $this->TextileThis($text, $lite); - } - # Workaround to ensure compatibility with TextPattern 4.0.3. - function blockLite($text) { return $text; } - } -} - - - -# -# Markdown Parser Class -# - -class Markdown_Parser { - - # Regex to match balanced [brackets]. - # Needed to insert a maximum bracked depth while converting to PHP. - var $nested_brackets_depth = 6; - var $nested_brackets_re; - - var $nested_url_parenthesis_depth = 4; - var $nested_url_parenthesis_re; - - # Table of hash values for escaped characters: - var $escape_chars = '\`*_{}[]()>#+-.!'; - var $escape_chars_re; - - # Change to ">" for HTML output. - var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; - var $tab_width = MARKDOWN_TAB_WIDTH; - - # Change to `true` to disallow markup or entities. - var $no_markup = false; - var $no_entities = true; - - # Predefined urls and titles for reference links and images. - var $predef_urls = array(); - var $predef_titles = array(); - - - function Markdown_Parser() { - # - # Constructor function. Initialize appropriate member variables. - # - $this->_initDetab(); - $this->prepareItalicsAndBold(); - - $this->nested_brackets_re = - str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). - str_repeat('\])*', $this->nested_brackets_depth); - - $this->nested_url_parenthesis_re = - str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). - str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); - - $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; - - # Sort document, block, and span gamut in ascendent priority order. - asort($this->document_gamut); - asort($this->block_gamut); - asort($this->span_gamut); - } - - - # Internal hashes used during transformation. - var $urls = array(); - var $titles = array(); - var $html_hashes = array(); - - # Status flag to avoid invalid nesting. - var $in_anchor = false; - - - function setup() { - # - # Called before the transformation process starts to setup parser - # states. - # - # Clear global hashes. - $this->urls = $this->predef_urls; - $this->titles = $this->predef_titles; - $this->html_hashes = array(); - - $in_anchor = false; - } - - function teardown() { - # - # Called after the transformation process to clear any variable - # which may be taking up memory unnecessarly. - # - $this->urls = array(); - $this->titles = array(); - $this->html_hashes = array(); - } - - - function transform($text) { - # - # Main function. Performs some preprocessing on the input text - # and pass it through the document gamut. - # - $this->setup(); - - # Remove UTF-8 BOM and marker character in input, if present. - $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); - - # Standardize line endings: - # DOS to Unix and Mac to Unix - $text = preg_replace('{\r\n?}', "\n", $text); - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = $this->detab($text); - - # Turn block-level HTML blocks into hash entries - $text = $this->hashHTMLBlocks($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ ]*\n+/ . - $text = preg_replace('/^[ ]+$/m', '', $text); - - # Run document gamut methods. - foreach ($this->document_gamut as $method => $priority) { - $text = $this->$method($text); - } - - $this->teardown(); - - return $text . "\n"; - } - - var $document_gamut = array( - # Strip link definitions, store in hashes. - "stripLinkDefinitions" => 20, - - "runBasicBlockGamut" => 30, - ); - - - function stripLinkDefinitions($text) { - # - # Strips link definitions from text, stores the URLs and titles in - # hash references. - # - $less_than_tab = $this->tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - $text = preg_replace_callback('{ - ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 - [ ]* - \n? # maybe *one* newline - [ ]* - ? # url = $2 - [ ]* - \n? # maybe one newline - [ ]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.*?) # title = $3 - [")] - [ ]* - )? # title is optional - (?:\n+|\Z) - }xm', - array(&$this, '_stripLinkDefinitions_callback'), - $text); - return $text; - } - function _stripLinkDefinitions_callback($matches) { - $link_id = strtolower($matches[1]); - $this->urls[$link_id] = $matches[2]; - $this->titles[$link_id] =& $matches[3]; - return ''; # String that will replace the block - } - - - function hashHTMLBlocks($text) { - if ($this->no_markup) return $text; - - $less_than_tab = $this->tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

    s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - # - # * List "a" is made of tags which can be both inline or block-level. - # These will be treated block-level when the start tag is alone on - # its line, otherwise they're not matched here and will be taken as - # inline later. - # * List "b" is made of tags which are always block-level; - # - $block_tags_a_re = 'ins|del'; - $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. - 'script|noscript|form|fieldset|iframe|math'; - - # Regular expression for the content of a block tag. - $nested_tags_level = 4; - $attr = ' - (?> # optional tag attributes - \s # starts with whitespace - (?> - [^>"/]+ # text outside quotes - | - /+(?!>) # slash not followed by ">" - | - "[^"]*" # text inside double quotes (tolerate ">") - | - \'[^\']*\' # text inside single quotes (tolerate ">") - )* - )? - '; - $content = - str_repeat(' - (?> - [^<]+ # content without tag - | - <\2 # nested opening tag - '.$attr.' # attributes - (?> - /> - | - >', $nested_tags_level). # end of opening tag - '.*?'. # last level nested tag content - str_repeat(' - # closing nested tag - ) - | - <(?!/\2\s*> # other tags with a different name - ) - )*', - $nested_tags_level); - $content2 = str_replace('\2', '\3', $content); - - # First, look for nested blocks, e.g.: - #

    - #
    - # tags for inner block must be indented. - #
    - #
    - # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `
    ` and stop at the first `
    `. - $text = preg_replace_callback('{(?> - (?> - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - - # Match from `\n` to `\n`, handling nested tags - # in between. - - [ ]{0,'.$less_than_tab.'} - <('.$block_tags_b_re.')# start tag = $2 - '.$attr.'> # attributes followed by > and \n - '.$content.' # content, support nesting - # the matching end tag - [ ]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - - | # Special version for tags of group a. - - [ ]{0,'.$less_than_tab.'} - <('.$block_tags_a_re.')# start tag = $3 - '.$attr.'>[ ]*\n # attributes followed by > - '.$content2.' # content, support nesting - # the matching end tag - [ ]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - - | # Special case just for
    . It was easier to make a special - # case than to make the other regex more complicated. - - [ ]{0,'.$less_than_tab.'} - <(hr) # start tag = $2 - '.$attr.' # attributes - /?> # the matching end tag - [ ]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - - | # Special case for standalone HTML comments: - - [ ]{0,'.$less_than_tab.'} - (?s: - - ) - [ ]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - - | # PHP and ASP-style processor instructions ( - ) - [ ]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - - ) - )}Sxmi', - array(&$this, '_hashHTMLBlocks_callback'), - $text); - - return $text; - } - function _hashHTMLBlocks_callback($matches) { - $text = $matches[1]; - $key = $this->hashBlock($text); - return "\n\n$key\n\n"; - } - - - function hashPart($text, $boundary = 'X') { - # - # Called whenever a tag must be hashed when a function insert an atomic - # element in the text stream. Passing $text to through this function gives - # a unique text-token which will be reverted back when calling unhash. - # - # The $boundary argument specify what character should be used to surround - # the token. By convension, "B" is used for block elements that needs not - # to be wrapped into paragraph tags at the end, ":" is used for elements - # that are word separators and "X" is used in the general case. - # - # Swap back any tag hash found in $text so we do not have to `unhash` - # multiple times at the end. - $text = $this->unhash($text); - - # Then hash the block. - static $i = 0; - $key = "$boundary\x1A" . ++$i . $boundary; - $this->html_hashes[$key] = $text; - return $key; # String that will replace the tag. - } - - - function hashBlock($text) { - # - # Shortcut function for hashPart with block-level boundaries. - # - return $this->hashPart($text, 'B'); - } - - - var $block_gamut = array( - # - # These are all the transformations that form block-level - # tags like paragraphs, headers, and list items. - # - "doHeaders" => 10, - "doHorizontalRules" => 20, - - "doLists" => 40, - "doCodeBlocks" => 50, - "doBlockQuotes" => 60, - ); - - function runBlockGamut($text) { - # - # Run block gamut tranformations. - # - # We need to escape raw HTML in Markdown source before doing anything - # else. This need to be done for each block, and not only at the - # begining in the Markdown function since hashed blocks can be part of - # list items and could have been indented. Indented blocks would have - # been seen as a code block in a previous pass of hashHTMLBlocks. - $text = $this->hashHTMLBlocks($text); - - return $this->runBasicBlockGamut($text); - } - - function runBasicBlockGamut($text) { - # - # Run block gamut tranformations, without hashing HTML blocks. This is - # useful when HTML blocks are known to be already hashed, like in the first - # whole-document pass. - # - foreach ($this->block_gamut as $method => $priority) { - $text = $this->$method($text); - } - - # Finally form paragraph and restore hashed blocks. - $text = $this->formParagraphs($text); - - return $text; - } - - - function doHorizontalRules($text) { - # Do Horizontal Rules: - return preg_replace( - '{ - ^[ ]{0,3} # Leading space - ([-*_]) # $1: First marker - (?> # Repeated marker group - [ ]{0,2} # Zero, one, or two spaces. - \1 # Marker character - ){2,} # Group repeated at least twice - [ ]* # Tailing spaces - $ # End of line. - }mx', - "\n".$this->hashBlock("empty_element_suffix")."\n", - $text); - } - - - var $span_gamut = array( - # - # These are all the transformations that occur *within* block-level - # tags like paragraphs, headers, and list items. - # - # Process character escapes, code spans, and inline HTML - # in one shot. - "parseSpan" => -30, - - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - "doImages" => 10, - "doAnchors" => 20, - - # Make links out of things like `` - # Must come after doAnchors, because you can use < and > - # delimiters in inline links like [this](). - "doAutoLinks" => 30, - "encodeAmpsAndAngles" => 40, - - "doItalicsAndBold" => 50, - "doHardBreaks" => 60, - ); - - function runSpanGamut($text) { - # - # Run span gamut tranformations. - # - foreach ($this->span_gamut as $method => $priority) { - $text = $this->$method($text); - } - - return $text; - } - - - function doHardBreaks($text) { - # Do hard breaks: - return preg_replace_callback('/ {2,}\n/', - array(&$this, '_doHardBreaks_callback'), $text); - } - function _doHardBreaks_callback($matches) { - return $this->hashPart("empty_element_suffix\n"); - } - - - function doAnchors($text) { - # - # Turn Markdown link shortcuts into XHTML tags. - # - if ($this->in_anchor) return $text; - $this->in_anchor = true; - - # - # First, handle reference-style links: [link text] [id] - # - $text = preg_replace_callback('{ - ( # wrap whole match in $1 - \[ - ('.$this->nested_brackets_re.') # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }xs', - array(&$this, '_doAnchors_reference_callback'), $text); - - # - # Next, inline-style links: [link text](url "optional title") - # - $text = preg_replace_callback('{ - ( # wrap whole match in $1 - \[ - ('.$this->nested_brackets_re.') # link text = $2 - \] - \( # literal paren - [ ]* - (?: - <(\S*)> # href = $3 - | - ('.$this->nested_url_parenthesis_re.') # href = $4 - ) - [ ]* - ( # $5 - ([\'"]) # quote char = $6 - (.*?) # Title = $7 - \6 # matching quote - [ ]* # ignore any spaces/tabs between closing quote and ) - )? # title is optional - \) - ) - }xs', - array(&$this, '_DoAnchors_inline_callback'), $text); - - # - # Last, handle reference-style shortcuts: [link text] - # These must come last in case you've also got [link test][1] - # or [link test](/foo) - # -// $text = preg_replace_callback('{ -// ( # wrap whole match in $1 -// \[ -// ([^\[\]]+) # link text = $2; can\'t contain [ or ] -// \] -// ) -// }xs', -// array(&$this, '_doAnchors_reference_callback'), $text); - - $this->in_anchor = false; - return $text; - } - function _doAnchors_reference_callback($matches) { - $whole_match = $matches[1]; - $link_text = $matches[2]; - $link_id =& $matches[3]; - - if ($link_id == "") { - # for shortcut links like [this][] or [this]. - $link_id = $link_text; - } - - # lower-case and turn embedded newlines into spaces - $link_id = strtolower($link_id); - $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); - - if (isset($this->urls[$link_id])) { - $url = $this->urls[$link_id]; - $url = $this->encodeAttribute($url); - - $result = "titles[$link_id] ) ) { - $title = $this->titles[$link_id]; - $title = $this->encodeAttribute($title); - $result .= " title=\"$title\""; - } - - $link_text = $this->runSpanGamut($link_text); - $result .= ">$link_text"; - $result = $this->hashPart($result); - } - else { - $result = $whole_match; - } - return $result; - } - function _doAnchors_inline_callback($matches) { - $whole_match = $matches[1]; - $link_text = $this->runSpanGamut($matches[2]); - $url = $matches[3] == '' ? $matches[4] : $matches[3]; - $title =& $matches[7]; - - $url = $this->encodeAttribute($url); - - $result = "encodeAttribute($title); - $result .= " title=\"$title\""; - } - - $link_text = $this->runSpanGamut($link_text); - $result .= ">$link_text"; - - return $this->hashPart($result); - } - - - function doImages($text) { - # - # Turn Markdown image shortcuts into tags. - # - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text = preg_replace_callback('{ - ( # wrap whole match in $1 - !\[ - ('.$this->nested_brackets_re.') # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }xs', - array(&$this, '_doImages_reference_callback'), $text); - - # - # Next, handle inline images: ![alt text](url "optional title") - # Don't forget: encode * and _ - # - $text = preg_replace_callback('{ - ( # wrap whole match in $1 - !\[ - ('.$this->nested_brackets_re.') # alt text = $2 - \] - \s? # One optional whitespace character - \( # literal paren - [ ]* - (?: - <(\S*)> # src url = $3 - | - ('.$this->nested_url_parenthesis_re.') # src url = $4 - ) - [ ]* - ( # $5 - ([\'"]) # quote char = $6 - (.*?) # title = $7 - \6 # matching quote - [ ]* - )? # title is optional - \) - ) - }xs', - array(&$this, '_doImages_inline_callback'), $text); - - return $text; - } - function _doImages_reference_callback($matches) { - $whole_match = $matches[1]; - $alt_text = $matches[2]; - $link_id = strtolower($matches[3]); - - if ($link_id == "") { - $link_id = strtolower($alt_text); # for shortcut links like ![this][]. - } - - $alt_text = $this->encodeAttribute($alt_text); - if (isset($this->urls[$link_id])) { - $url = $this->encodeAttribute($this->urls[$link_id]); - $result = "\"$alt_text\"";titles[$link_id])) { - $title = $this->titles[$link_id]; - $title = $this->encodeAttribute($title); - $result .= " title=\"$title\""; - } - $result .= $this->empty_element_suffix; - $result = $this->hashPart($result); - } - else { - # If there's no such link ID, leave intact: - $result = $whole_match; - } - - return $result; - } - function _doImages_inline_callback($matches) { - $whole_match = $matches[1]; - $alt_text = $matches[2]; - $url = $matches[3] == '' ? $matches[4] : $matches[3]; - $title =& $matches[7]; - - $alt_text = $this->encodeAttribute($alt_text); - $url = $this->encodeAttribute($url); - $result = "\"$alt_text\"";encodeAttribute($title); - $result .= " title=\"$title\""; # $title already quoted - } - $result .= $this->empty_element_suffix; - - return $this->hashPart($result); - } - - - function doHeaders($text) { - # Setext-style headers: - # Header 1 - # ======== - # - # Header 2 - # -------- - # - $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', - array(&$this, '_doHeaders_callback_setext'), $text); - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - # - $text = preg_replace_callback('{ - ^(\#{1,6}) # $1 = string of #\'s - [ ]* - (.+?) # $2 = Header text - [ ]* - \#* # optional closing #\'s (not counted) - \n+ - }xm', - array(&$this, '_doHeaders_callback_atx'), $text); - - return $text; - } - function _doHeaders_callback_setext($matches) { - # Terrible hack to check we haven't found an empty list item. - if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) - return $matches[0]; - - $level = $matches[2]{0} == '=' ? 1 : 2; - $block = "".$this->runSpanGamut($matches[1]).""; - return "\n" . $this->hashBlock($block) . "\n\n"; - } - function _doHeaders_callback_atx($matches) { - $level = strlen($matches[1]); - $block = "".$this->runSpanGamut($matches[2]).""; - return "\n" . $this->hashBlock($block) . "\n\n"; - } - - - function doLists($text) { - # - # Form HTML ordered (numbered) and unordered (bulleted) lists. - # - $less_than_tab = $this->tab_width - 1; - - # Re-usable patterns to match list item bullets and number markers: - $marker_ul_re = '[*+-]'; - $marker_ol_re = '\d+[.]'; - $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; - - $markers_relist = array($marker_ul_re, $marker_ol_re); - - foreach ($markers_relist as $marker_re) { - # Re-usable pattern to match any entirel ul or ol list: - $whole_list_re = ' - ( # $1 = whole list - ( # $2 - [ ]{0,'.$less_than_tab.'} - ('.$marker_re.') # $3 = first list item marker - [ ]+ - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ ]* - '.$marker_re.'[ ]+ - ) - ) - ) - '; // mx - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _ProcessListItems(). - - if ($this->list_level) { - $text = preg_replace_callback('{ - ^ - '.$whole_list_re.' - }mx', - array(&$this, '_doLists_callback'), $text); - } - else { - $text = preg_replace_callback('{ - (?:(?<=\n)\n|\A\n?) # Must eat the newline - '.$whole_list_re.' - }mx', - array(&$this, '_doLists_callback'), $text); - } - } - - return $text; - } - function _doLists_callback($matches) { - # Re-usable patterns to match list item bullets and number markers: - $marker_ul_re = '[*+-]'; - $marker_ol_re = '\d+[.]'; - $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; - - $list = $matches[1]; - $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol"; - - $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); - - $list .= "\n"; - $result = $this->processListItems($list, $marker_any_re); - - $result = $this->hashBlock("<$list_type>\n" . $result . ""); - return "\n". $result ."\n\n"; - } - - var $list_level = 0; - - function processListItems($list_str, $marker_any_re) { - # - # Process the contents of a single ordered or unordered list, splitting it - # into individual list items. - # - # The $this->list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - - $this->list_level++; - - # trim trailing blank lines: - $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); - - $list_str = preg_replace_callback('{ - (\n)? # leading line = $1 - (^[ ]*) # leading whitespace = $2 - ('.$marker_any_re.' # list marker and space = $3 - (?:[ ]+|(?=\n)) # space only required if item is not empty - ) - ((?s:.*?)) # list item text = $4 - (?:(\n+(?=\n))|\n) # tailing blank line = $5 - (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) - }xm', - array(&$this, '_processListItems_callback'), $list_str); - - $this->list_level--; - return $list_str; - } - function _processListItems_callback($matches) { - $item = $matches[4]; - $leading_line =& $matches[1]; - $leading_space =& $matches[2]; - $marker_space = $matches[3]; - $tailing_blank_line =& $matches[5]; - - if ($leading_line || $tailing_blank_line || - preg_match('/\n{2,}/', $item)) - { - # Replace marker with the appropriate whitespace indentation - $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; - $item = $this->runBlockGamut($this->outdent($item)."\n"); - } - else { - # Recursion for sub-lists: - $item = $this->doLists($this->outdent($item)); - $item = preg_replace('/\n+$/', '', $item); - $item = $this->runSpanGamut($item); - } - - return "
  • " . $item . "
  • \n"; - } - - - function doCodeBlocks($text) { - # - # Process Markdown `
    ` blocks.
    -  #
    -    $text = preg_replace_callback('{
    -        (?:\n\n|\A\n?)
    -        (             # $1 = the code block -- one or more lines, starting with a space/tab
    -          (?>
    -          [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
    -          .*\n+
    -          )+
    -        )
    -        ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
    -      }xm',
    -      array(&$this, '_doCodeBlocks_callback'), $text);
    -
    -    return $text;
    -  }
    -  function _doCodeBlocks_callback($matches) {
    -    $codeblock = $matches[1];
    -
    -    $codeblock = $this->outdent($codeblock);
    -    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
    -
    -    # trim leading newlines and trailing newlines
    -    $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
    -
    -    $codeblock = "
    $codeblock\n
    "; - return "\n\n".$this->hashBlock($codeblock)."\n\n"; - } - - - function makeCodeSpan($code) { - # - # Create a code span markup for $code. Called from handleSpanToken. - # - $code = htmlspecialchars(trim($code), ENT_NOQUOTES); - return $this->hashPart("$code"); - } - - - var $em_relist = array( - '' => '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(?em_relist as $em => $em_re) { - foreach ($this->strong_relist as $strong => $strong_re) { - # Construct list of allowed token expressions. - $token_relist = array(); - if (isset($this->em_strong_relist["$em$strong"])) { - $token_relist[] = $this->em_strong_relist["$em$strong"]; - } - $token_relist[] = $em_re; - $token_relist[] = $strong_re; - - # Construct master expression from list. - $token_re = '{('. implode('|', $token_relist) .')}'; - $this->em_strong_prepared_relist["$em$strong"] = $token_re; - } - } - } - - function doItalicsAndBold($text) { - $token_stack = array(''); - $text_stack = array(''); - $em = ''; - $strong = ''; - $tree_char_em = false; - - while (1) { - # - # Get prepared regular expression for seraching emphasis tokens - # in current context. - # - $token_re = $this->em_strong_prepared_relist["$em$strong"]; - - # - # Each loop iteration seach for the next emphasis token. - # Each token is then passed to handleSpanToken. - # - $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); - $text_stack[0] .= $parts[0]; - $token =& $parts[1]; - $text =& $parts[2]; - - if (empty($token)) { - # Reached end of text span: empty stack without emitting. - # any more emphasis. - while ($token_stack[0]) { - $text_stack[1] .= array_shift($token_stack); - $text_stack[0] .= array_shift($text_stack); - } - break; - } - - $token_len = strlen($token); - if ($tree_char_em) { - # Reached closing marker while inside a three-char emphasis. - if ($token_len == 3) { - # Three-char closing marker, close em and strong. - array_shift($token_stack); - $span = array_shift($text_stack); - $span = $this->runSpanGamut($span); - $span = "$span"; - $text_stack[0] .= $this->hashPart($span); - $em = ''; - $strong = ''; - } else { - # Other closing marker: close one em or strong and - # change current token state to match the other - $token_stack[0] = str_repeat($token{0}, 3-$token_len); - $tag = $token_len == 2 ? "strong" : "em"; - $span = $text_stack[0]; - $span = $this->runSpanGamut($span); - $span = "<$tag>$span"; - $text_stack[0] = $this->hashPart($span); - $$tag = ''; # $$tag stands for $em or $strong - } - $tree_char_em = false; - } else if ($token_len == 3) { - if ($em) { - # Reached closing marker for both em and strong. - # Closing strong marker: - for ($i = 0; $i < 2; ++$i) { - $shifted_token = array_shift($token_stack); - $tag = strlen($shifted_token) == 2 ? "strong" : "em"; - $span = array_shift($text_stack); - $span = $this->runSpanGamut($span); - $span = "<$tag>$span"; - $text_stack[0] .= $this->hashPart($span); - $$tag = ''; # $$tag stands for $em or $strong - } - } else { - # Reached opening three-char emphasis marker. Push on token - # stack; will be handled by the special condition above. - $em = $token{0}; - $strong = "$em$em"; - array_unshift($token_stack, $token); - array_unshift($text_stack, ''); - $tree_char_em = true; - } - } else if ($token_len == 2) { - if ($strong) { - # Unwind any dangling emphasis marker: - if (strlen($token_stack[0]) == 1) { - $text_stack[1] .= array_shift($token_stack); - $text_stack[0] .= array_shift($text_stack); - } - # Closing strong marker: - array_shift($token_stack); - $span = array_shift($text_stack); - $span = $this->runSpanGamut($span); - $span = "$span"; - $text_stack[0] .= $this->hashPart($span); - $strong = ''; - } else { - array_unshift($token_stack, $token); - array_unshift($text_stack, ''); - $strong = $token; - } - } else { - # Here $token_len == 1 - if ($em) { - if (strlen($token_stack[0]) == 1) { - # Closing emphasis marker: - array_shift($token_stack); - $span = array_shift($text_stack); - $span = $this->runSpanGamut($span); - $span = "$span"; - $text_stack[0] .= $this->hashPart($span); - $em = ''; - } else { - $text_stack[0] .= $token; - } - } else { - array_unshift($token_stack, $token); - array_unshift($text_stack, ''); - $em = $token; - } - } - } - return $text_stack[0]; - } - - - function doBlockQuotes($text) { - $text = preg_replace_callback('/ - ( # Wrap whole match in $1 - (?> - ^[ ]*>[ ]? # ">" at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - /xm', - array(&$this, '_doBlockQuotes_callback'), $text); - - return $text; - } - function _doBlockQuotes_callback($matches) { - $bq = $matches[1]; - # trim one level of quoting - trim whitespace-only lines - $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); - $bq = $this->runBlockGamut($bq); # recurse - - $bq = preg_replace('/^/m', " ", $bq); - # These leading spaces cause problem with
     content, 
    -    # so we need to fix that:
    -    $bq = preg_replace_callback('{(\s*
    .+?
    )}sx', - array(&$this, '_DoBlockQuotes_callback2'), $bq); - - return "\n". $this->hashBlock("
    \n$bq\n
    ")."\n\n"; - } - function _doBlockQuotes_callback2($matches) { - $pre = $matches[1]; - $pre = preg_replace('/^ /m', '', $pre); - return $pre; - } - - - function formParagraphs($text) { - # - # Params: - # $text - string to process with html

    tags - # - # Strip leading and trailing lines: - $text = preg_replace('/\A\n+|\n+\z/', '', $text); - - $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); - - # - # Wrap

    tags and unhashify HTML blocks - # - foreach ($grafs as $key => $value) { - if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { - # Is a paragraph. - $value = $this->runSpanGamut($value); - $value = preg_replace('/^([ ]*)/', "

    ", $value); - $value .= "

    "; - $grafs[$key] = $this->unhash($value); - } - else { - # Is a block. - # Modify elements of @grafs in-place... - $graf = $value; - $block = $this->html_hashes[$graf]; - $graf = $block; -// if (preg_match('{ -// \A -// ( # $1 =
    tag -//
    ]* -// \b -// markdown\s*=\s* ([\'"]) # $2 = attr quote char -// 1 -// \2 -// [^>]* -// > -// ) -// ( # $3 = contents -// .* -// ) -// (
    ) # $4 = closing tag -// \z -// }xs', $block, $matches)) -// { -// list(, $div_open, , $div_content, $div_close) = $matches; -// -// # We can't call Markdown(), because that resets the hash; -// # that initialization code should be pulled into its own sub, though. -// $div_content = $this->hashHTMLBlocks($div_content); -// -// # Run document gamut methods on the content. -// foreach ($this->document_gamut as $method => $priority) { -// $div_content = $this->$method($div_content); -// } -// -// $div_open = preg_replace( -// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); -// -// $graf = $div_open . "\n" . $div_content . "\n" . $div_close; -// } - $grafs[$key] = $graf; - } - } - - return implode("\n\n", $grafs); - } - - - function encodeAttribute($text) { - # - # Encode text for a double-quoted HTML attribute. This function - # is *not* suitable for attributes enclosed in single quotes. - # - $text = $this->encodeAmpsAndAngles($text); - $text = str_replace('"', '"', $text); - return $text; - } - - - function encodeAmpsAndAngles($text) { - # - # Smart processing for ampersands and angle brackets that need to - # be encoded. Valid character entities are left alone unless the - # no-entities mode is set. - # - if ($this->no_entities) { - $text = str_replace('&', '&', $text); - } else { - # Ampersand-encoding based entirely on Nat Irons's Amputator - # MT plugin: - $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', - '&', $text);; - } - # Encode remaining <'s - $text = str_replace('<', '<', $text); - - return $text; - } - - - function doAutoLinks($text) { - $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', - array(&$this, '_doAutoLinks_url_callback'), $text); - - # Email addresses: - $text = preg_replace_callback('{ - < - (?:mailto:)? - ( - [-.\w\x80-\xFF]+ - \@ - [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ - ) - > - }xi', - array(&$this, '_doAutoLinks_email_callback'), $text); - - return $text; - } - function _doAutoLinks_url_callback($matches) { - $url = $this->encodeAttribute($matches[1]); - $link = "$url"; - return $this->hashPart($link); - } - function _doAutoLinks_email_callback($matches) { - $address = $matches[1]; - $link = $this->encodeEmailAddress($address); - return $this->hashPart($link); - } - - - function encodeEmailAddress($addr) { - # - # Input: an email address, e.g. "foo@example.com" - # - # Output: the email address as a mailto link, with each character - # of the address encoded as either a decimal or hex entity, in - # the hopes of foiling most address harvesting spam bots. E.g.: - # - #

    foo@exampl - # e.com

    - # - # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. - # With some optimizations by Milian Wolff. - # - $addr = "mailto:" . $addr; - $chars = preg_split('/(? $char) { - $ord = ord($char); - # Ignore non-ascii chars. - if ($ord < 128) { - $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. - # roughly 10% raw, 45% hex, 45% dec - # '@' *must* be encoded. I insist. - if ($r > 90 && $char != '@') /* do nothing */; - else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; - else $chars[$key] = '&#'.$ord.';'; - } - } - - $addr = implode('', $chars); - $text = implode('', array_slice($chars, 7)); # text without `mailto:` - $addr = "$text"; - - return $addr; - } - - - function parseSpan($str) { - # - # Take the string $str and parse it into tokens, hashing embeded HTML, - # escaped characters and handling code spans. - # - $output = ''; - - $span_re = '{ - ( - \\\\'.$this->escape_chars_re.' - | - (?no_markup ? '' : ' - | - # comment - | - <\?.*?\?> | <%.*?%> # processing instruction - | - <[/!$]?[-a-zA-Z0-9:]+ # regular tags - (?> - \s - (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* - )? - > - ').' - ) - }xs'; - - while (1) { - # - # Each loop iteration seach for either the next tag, the next - # openning code span marker, or the next escaped character. - # Each token is then passed to handleSpanToken. - # - $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); - - # Create token from text preceding tag. - if ($parts[0] != "") { - $output .= $parts[0]; - } - - # Check if we reach the end. - if (isset($parts[1])) { - $output .= $this->handleSpanToken($parts[1], $parts[2]); - $str = $parts[2]; - } - else { - break; - } - } - - return $output; - } - - - function handleSpanToken($token, &$str) { - # - # Handle $token provided by parseSpan by determining its nature and - # returning the corresponding value that should replace it. - # - switch ($token{0}) { - case "\\": - return $this->hashPart("&#". ord($token{1}). ";"); - case "`": - # Search for end marker in remaining text. - if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', - $str, $matches)) - { - $str = $matches[2]; - $codespan = $this->makeCodeSpan($matches[1]); - return $this->hashPart($codespan); - } - return $token; // return as text since no ending marker found. - default: - return $this->hashPart($token); - } - } - - - function outdent($text) { - # - # Remove one level of line-leading tabs or spaces - # - return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); - } - - - # String length function for detab. `_initDetab` will create a function to - # hanlde UTF-8 if the default function does not exist. - var $utf8_strlen = 'mb_strlen'; - - function detab($text) { - # - # Replace tabs with the appropriate amount of space. - # - # For each line we separate the line in blocks delemited by - # tab characters. Then we reconstruct every line by adding the - # appropriate number of space between each blocks. - - $text = preg_replace_callback('/^.*\t.*$/m', - array(&$this, '_detab_callback'), $text); - - return $text; - } - function _detab_callback($matches) { - $line = $matches[0]; - $strlen = $this->utf8_strlen; # strlen function for UTF-8. - - # Split in blocks. - $blocks = explode("\t", $line); - # Add each blocks to the line. - $line = $blocks[0]; - unset($blocks[0]); # Do not add first block twice. - foreach ($blocks as $block) { - # Calculate amount of space, insert spaces, insert block. - $amount = $this->tab_width - - $strlen($line, 'UTF-8') % $this->tab_width; - $line .= str_repeat(" ", $amount) . $block; - } - return $line; - } - function _initDetab() { - # - # Check for the availability of the function in the `utf8_strlen` property - # (initially `mb_strlen`). If the function is not available, create a - # function that will loosely count the number of UTF-8 characters with a - # regular expression. - # - if (function_exists($this->utf8_strlen)) return; - $this->utf8_strlen = create_function('$text', 'return preg_match_all( - "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", - $text, $m);'); - } - - - function unhash($text) { - # - # Swap back in all the tags hashed by _HashHTMLBlocks. - # - return preg_replace_callback('/(.)\x1A[0-9]+\1/', - array(&$this, '_unhash_callback'), $text); - } - function _unhash_callback($matches) { - return $this->html_hashes[$matches[0]]; - } - -} - - -# -# Markdown Extra Parser Class -# - -class MarkdownExtra_Parser extends Markdown_Parser { - - # Prefix for footnote ids. - var $fn_id_prefix = ""; - - # Optional title attribute for footnote links and backlinks. - var $fn_link_title = MARKDOWN_FN_LINK_TITLE; - var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; - - # Optional class attribute for footnote links and backlinks. - var $fn_link_class = MARKDOWN_FN_LINK_CLASS; - var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; - - # Predefined abbreviations. - var $predef_abbr = array(); - - - function MarkdownExtra_Parser() { - # - # Constructor function. Initialize the parser object. - # - # Add extra escapable characters before parent constructor - # initialize the table. - $this->escape_chars .= ':|'; - - # Insert extra document, block, and span transformations. - # Parent constructor will do the sorting. - $this->document_gamut += array( - "doFencedCodeBlocks" => 5, - "stripFootnotes" => 15, - "stripAbbreviations" => 25, - "appendFootnotes" => 50, - ); - $this->block_gamut += array( - "doFencedCodeBlocks" => 5, - "doTables" => 15, - "doDefLists" => 45, - ); - $this->span_gamut += array( - "doFootnotes" => 5, - "doAbbreviations" => 70, - ); - - parent::Markdown_Parser(); - } - - - # Extra variables used during extra transformations. - var $footnotes = array(); - var $footnotes_ordered = array(); - var $abbr_desciptions = array(); - var $abbr_word_re = ''; - - # Give the current footnote number. - var $footnote_counter = 1; - - - function setup() { - # - # Setting up Extra-specific variables. - # - parent::setup(); - - $this->footnotes = array(); - $this->footnotes_ordered = array(); - $this->abbr_desciptions = array(); - $this->abbr_word_re = ''; - $this->footnote_counter = 1; - - foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { - if ($this->abbr_word_re) - $this->abbr_word_re .= '|'; - $this->abbr_word_re .= preg_quote($abbr_word); - $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); - } - } - - function teardown() { - # - # Clearing Extra-specific variables. - # - $this->footnotes = array(); - $this->footnotes_ordered = array(); - $this->abbr_desciptions = array(); - $this->abbr_word_re = ''; - - parent::teardown(); - } - - - ### HTML Block Parser ### - - # Tags that are always treated as block tags: - var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; - - # Tags treated as block tags only if the opening tag is alone on it's line: - var $context_block_tags_re = 'script|noscript|math|ins|del'; - - # Tags where markdown="1" default to span mode: - var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; - - # Tags which must not have their contents modified, no matter where - # they appear: - var $clean_tags_re = 'script|math'; - - # Tags that do not need to be closed. - var $auto_close_tags_re = 'hr|img'; - - - function hashHTMLBlocks($text) { - # - # Hashify HTML Blocks and "clean tags". - # - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap

    s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded. - # - # This works by calling _HashHTMLBlocks_InMarkdown, which then calls - # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" - # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back - # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. - # These two functions are calling each other. It's recursive! - # - # - # Call the HTML-in-Markdown hasher. - # - list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); - - return $text; - } - function _hashHTMLBlocks_inMarkdown($text, $indent = 0, - $enclosing_tag_re = '', $span = false) - { - # - # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. - # - # * $indent is the number of space to be ignored when checking for code - # blocks. This is important because if we don't take the indent into - # account, something like this (which looks right) won't work as expected: - # - #

    - #
    - # Hello World. <-- Is this a Markdown code block or text? - #
    <-- Is this a Markdown code block or a real tag? - #
    - # - # If you don't like this, just don't indent the tag on which - # you apply the markdown="1" attribute. - # - # * If $enclosing_tag_re is not empty, stops at the first unmatched closing - # tag with that name. Nested tags supported. - # - # * If $span is true, text inside must treated as span. So any double - # newline will be replaced by a single newline so that it does not create - # paragraphs. - # - # Returns an array of that form: ( processed text , remaining text ) - # - if ($text === '') return array('', ''); - - # Regex to check for the presense of newlines around a block tag. - $newline_before_re = '/(?:^\n?|\n\n)*$/'; - $newline_after_re = - '{ - ^ # Start of text following the tag. - (?>[ ]*)? # Optional comment. - [ ]*\n # Must be followed by newline. - }xs'; - - # Regex to match any tag. - $block_tag_re = - '{ - ( # $2: Capture hole tag. - # Tag name. - '.$this->block_tags_re.' | - '.$this->context_block_tags_re.' | - '.$this->clean_tags_re.' | - (?!\s)'.$enclosing_tag_re.' - ) - (?: - (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. - (?> - ".*?" | # Double quotes (can contain `>`) - \'.*?\' | # Single quotes (can contain `>`) - .+? # Anything but quotes and `>`. - )*? - )? - > # End of tag. - | - # HTML Comment - | - <\?.*?\?> | <%.*?%> # Processing instruction - | - # CData Block - | - # Code span marker - `+ - '. ( !$span ? ' # If not in span. - | - # Indented code block - (?> ^[ ]*\n? | \n[ ]*\n ) - [ ]{'.($indent+4).'}[^\n]* \n - (?> - (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n - )* - | - # Fenced code block marker - (?> ^ | \n ) - [ ]{'.($indent).'}~~~+[ ]*\n - ' : '' ). ' # End (if not is span). - ) - }xs'; - - - $depth = 0; # Current depth inside the tag tree. - $parsed = ""; # Parsed text that will be returned. - - # - # Loop through every tag until we find the closing tag of the parent - # or loop until reaching the end of text if no parent tag specified. - # - do { - # - # Split the text using the first $tag_match pattern found. - # Text before pattern will be first in the array, text after - # pattern will be at the end, and between will be any catches made - # by the pattern. - # - $parts = preg_split($block_tag_re, $text, 2, - PREG_SPLIT_DELIM_CAPTURE); - - # If in Markdown span mode, add a empty-string span-level hash - # after each newline to prevent triggering any block element. - if ($span) { - $void = $this->hashPart("", ':'); - $newline = "$void\n"; - $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; - } - - $parsed .= $parts[0]; # Text before current tag. - - # If end of $text has been reached. Stop loop. - if (count($parts) < 3) { - $text = ""; - break; - } - - $tag = $parts[1]; # Tag to handle. - $text = $parts[2]; # Remaining text after current tag. - $tag_re = preg_quote($tag); # For use in a regular expression. - - # - # Check for: Code span marker - # - if ($tag{0} == "`") { - # Find corresponding end marker. - $tag_re = preg_quote($tag); - if (preg_match('{^(?>.+?|\n(?!\n))*?(?.*\n)+?'.$tag_re.' *\n}', $text, - $matches)) - { - # End marker found: pass text unchanged until marker. - $parsed .= $tag . $matches[0]; - $text = substr($text, strlen($matches[0])); - } - else { - # No end marker: just skip it. - $parsed .= $tag; - } - } - } - # - # Check for: Opening Block level tag or - # Opening Context Block tag (like ins and del) - # used as a block tag (tag is alone on it's line). - # - else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || - ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && - preg_match($newline_before_re, $parsed) && - preg_match($newline_after_re, $text) ) - ) - { - # Need to parse tag and following text using the HTML parser. - list($block_text, $text) = - $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); - - # Make sure it stays outside of any paragraph by adding newlines. - $parsed .= "\n\n$block_text\n\n"; - } - # - # Check for: Clean tag (like script, math) - # HTML Comments, processing instructions. - # - else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || - $tag{1} == '!' || $tag{1} == '?') - { - # Need to parse tag and following text using the HTML parser. - # (don't check for markdown attribute) - list($block_text, $text) = - $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); - - $parsed .= $block_text; - } - # - # Check for: Tag with same name as enclosing tag. - # - else if ($enclosing_tag_re !== '' && - # Same name as enclosing tag. - preg_match('{^= 0); - - return array($parsed, $text); - } - function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { - # - # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. - # - # * Calls $hash_method to convert any blocks. - # * Stops when the first opening tag closes. - # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. - # (it is not inside clean tags) - # - # Returns an array of that form: ( processed text , remaining text ) - # - if ($text === '') return array('', ''); - - # Regex to match `markdown` attribute inside of a tag. - $markdown_attr_re = ' - { - \s* # Eat whitespace before the `markdown` attribute - markdown - \s*=\s* - (?> - (["\']) # $1: quote delimiter - (.*?) # $2: attribute value - \1 # matching delimiter - | - ([^\s>]*) # $3: unquoted attribute value - ) - () # $4: make $3 always defined (avoid warnings) - }xs'; - - # Regex to match any tag. - $tag_re = '{ - ( # $2: Capture hole tag. - - ".*?" | # Double quotes (can contain `>`) - \'.*?\' | # Single quotes (can contain `>`) - .+? # Anything but quotes and `>`. - )*? - )? - > # End of tag. - | - # HTML Comment - | - <\?.*?\?> | <%.*?%> # Processing instruction - | - # CData Block - ) - }xs'; - - $original_text = $text; # Save original text in case of faliure. - - $depth = 0; # Current depth inside the tag tree. - $block_text = ""; # Temporary text holder for current text. - $parsed = ""; # Parsed text that will be returned. - - # - # Get the name of the starting tag. - # (This pattern makes $base_tag_name_re safe without quoting.) - # - if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) - $base_tag_name_re = $matches[1]; - - # - # Loop through every tag until we find the corresponding closing tag. - # - do { - # - # Split the text using the first $tag_match pattern found. - # Text before pattern will be first in the array, text after - # pattern will be at the end, and between will be any catches made - # by the pattern. - # - $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); - - if (count($parts) < 3) { - # - # End of $text reached with unbalenced tag(s). - # In that case, we return original text unchanged and pass the - # first character as filtered to prevent an infinite loop in the - # parent function. - # - return array($original_text{0}, substr($original_text, 1)); - } - - $block_text .= $parts[0]; # Text before current tag. - $tag = $parts[1]; # Tag to handle. - $text = $parts[2]; # Remaining text after current tag. - - # - # Check for: Auto-close tag (like
    ) - # Comments and Processing Instructions. - # - if (preg_match('{^auto_close_tags_re.')\b}', $tag) || - $tag{1} == '!' || $tag{1} == '?') - { - # Just add the tag to the block as if it was text. - $block_text .= $tag; - } - else { - # - # Increase/decrease nested tag count. Only do so if - # the tag's name match base tag's. - # - if (preg_match('{^mode = $attr_m[2] . $attr_m[3]; - $span_mode = $this->mode == 'span' || $this->mode != 'block' && - preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); - - # Calculate indent before tag. - if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { - $strlen = $this->utf8_strlen; - $indent = $strlen($matches[1], 'UTF-8'); - } else { - $indent = 0; - } - - # End preceding block with this tag. - $block_text .= $tag; - $parsed .= $this->$hash_method($block_text); - - # Get enclosing tag name for the ParseMarkdown function. - # (This pattern makes $tag_name_re safe without quoting.) - preg_match('/^<([\w:$]*)\b/', $tag, $matches); - $tag_name_re = $matches[1]; - - # Parse the content using the HTML-in-Markdown parser. - list ($block_text, $text) - = $this->_hashHTMLBlocks_inMarkdown($text, $indent, - $tag_name_re, $span_mode); - - # Outdent markdown text. - if ($indent > 0) { - $block_text = preg_replace("/^[ ]{1,$indent}/m", "", - $block_text); - } - - # Append tag content to parsed text. - if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; - else $parsed .= "$block_text"; - - # Start over a new block. - $block_text = ""; - } - else $block_text .= $tag; - } - - } while ($depth > 0); - - # - # Hash last block text that wasn't processed inside the loop. - # - $parsed .= $this->$hash_method($block_text); - - return array($parsed, $text); - } - - - function hashClean($text) { - # - # Called whenever a tag must be hashed when a function insert a "clean" tag - # in $text, it pass through this function and is automaticaly escaped, - # blocking invalid nested overlap. - # - return $this->hashPart($text, 'C'); - } - - - function doHeaders($text) { - # - # Redefined to add id attribute support. - # - # Setext-style headers: - # Header 1 {#header1} - # ======== - # - # Header 2 {#header2} - # -------- - # - $text = preg_replace_callback( - '{ - (^.+?) # $1: Header text - (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute - [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer - }mx', - array(&$this, '_doHeaders_callback_setext'), $text); - - # atx-style headers: - # # Header 1 {#header1} - # ## Header 2 {#header2} - # ## Header 2 with closing hashes ## {#header3} - # ... - # ###### Header 6 {#header2} - # - $text = preg_replace_callback('{ - ^(\#{1,6}) # $1 = string of #\'s - [ ]* - (.+?) # $2 = Header text - [ ]* - \#* # optional closing #\'s (not counted) - (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute - [ ]* - \n+ - }xm', - array(&$this, '_doHeaders_callback_atx'), $text); - - return $text; - } - function _doHeaders_attr($attr) { - if (empty($attr)) return ""; - return " id=\"$attr\""; - } - function _doHeaders_callback_setext($matches) { - if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) - return $matches[0]; - $level = $matches[3]{0} == '=' ? 1 : 2; - $attr = $this->_doHeaders_attr($id =& $matches[2]); - $block = "".$this->runSpanGamut($matches[1]).""; - return "\n" . $this->hashBlock($block) . "\n\n"; - } - function _doHeaders_callback_atx($matches) { - $level = strlen($matches[1]); - $attr = $this->_doHeaders_attr($id =& $matches[3]); - $block = "".$this->runSpanGamut($matches[2]).""; - return "\n" . $this->hashBlock($block) . "\n\n"; - } - - - function doTables($text) { - # - # Form HTML tables. - # - $less_than_tab = $this->tab_width - 1; - # - # Find tables with leading pipe. - # - # | Header 1 | Header 2 - # | -------- | -------- - # | Cell 1 | Cell 2 - # | Cell 3 | Cell 4 - # - $text = preg_replace_callback(' - { - ^ # Start of a line - [ ]{0,'.$less_than_tab.'} # Allowed whitespace. - [|] # Optional leading pipe (present) - (.+) \n # $1: Header row (at least one pipe) - - [ ]{0,'.$less_than_tab.'} # Allowed whitespace. - [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline - - ( # $3: Cells - (?> - [ ]* # Allowed whitespace. - [|] .* \n # Row content. - )* - ) - (?=\n|\Z) # Stop at final double newline. - }xm', - array(&$this, '_doTable_leadingPipe_callback'), $text); - - # - # Find tables without leading pipe. - # - # Header 1 | Header 2 - # -------- | -------- - # Cell 1 | Cell 2 - # Cell 3 | Cell 4 - # - $text = preg_replace_callback(' - { - ^ # Start of a line - [ ]{0,'.$less_than_tab.'} # Allowed whitespace. - (\S.*[|].*) \n # $1: Header row (at least one pipe) - - [ ]{0,'.$less_than_tab.'} # Allowed whitespace. - ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline - - ( # $3: Cells - (?> - .* [|] .* \n # Row content - )* - ) - (?=\n|\Z) # Stop at final double newline. - }xm', - array(&$this, '_DoTable_callback'), $text); - - return $text; - } - function _doTable_leadingPipe_callback($matches) { - $head = $matches[1]; - $underline = $matches[2]; - $content = $matches[3]; - - # Remove leading pipe for each row. - $content = preg_replace('/^ *[|]/m', '', $content); - - return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); - } - function _doTable_callback($matches) { - $head = $matches[1]; - $underline = $matches[2]; - $content = $matches[3]; - - # Remove any tailing pipes for each line. - $head = preg_replace('/[|] *$/m', '', $head); - $underline = preg_replace('/[|] *$/m', '', $underline); - $content = preg_replace('/[|] *$/m', '', $content); - - # Reading alignement from header underline. - $separators = preg_split('/ *[|] */', $underline); - foreach ($separators as $n => $s) { - if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; - else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; - else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; - else $attr[$n] = ''; - } - - # Parsing span elements, including code spans, character escapes, - # and inline HTML tags, so that pipes inside those gets ignored. - $head = $this->parseSpan($head); - $headers = preg_split('/ *[|] */', $head); - $col_count = count($headers); - - # Write column headers. - $text = "\n"; - $text .= "\n"; - $text .= "\n"; - foreach ($headers as $n => $header) - $text .= " ".$this->runSpanGamut(trim($header))."\n"; - $text .= "\n"; - $text .= "\n"; - - # Split content by row. - $rows = explode("\n", trim($content, "\n")); - - $text .= "\n"; - foreach ($rows as $row) { - # Parsing span elements, including code spans, character escapes, - # and inline HTML tags, so that pipes inside those gets ignored. - $row = $this->parseSpan($row); - - # Split row by cell. - $row_cells = preg_split('/ *[|] */', $row, $col_count); - $row_cells = array_pad($row_cells, $col_count, ''); - - $text .= "\n"; - foreach ($row_cells as $n => $cell) - $text .= " ".$this->runSpanGamut(trim($cell))."\n"; - $text .= "\n"; - } - $text .= "\n"; - $text .= "
    "; - - return $this->hashBlock($text) . "\n"; - } - - - function doDefLists($text) { - # - # Form HTML definition lists. - # - $less_than_tab = $this->tab_width - 1; - - # Re-usable pattern to match any entire dl list: - $whole_list_re = '(?> - ( # $1 = whole list - ( # $2 - [ ]{0,'.$less_than_tab.'} - ((?>.*\S.*\n)+) # $3 = defined term - \n? - [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another term - [ ]{0,'.$less_than_tab.'} - (?: \S.*\n )+? # defined term - \n? - [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition - ) - (?! # Negative lookahead for another definition - [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition - ) - ) - ) - )'; // mx - - $text = preg_replace_callback('{ - (?>\A\n?|(?<=\n\n)) - '.$whole_list_re.' - }mx', - array(&$this, '_doDefLists_callback'), $text); - - return $text; - } - function _doDefLists_callback($matches) { - # Re-usable patterns to match list item bullets and number markers: - $list = $matches[1]; - - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $result = trim($this->processDefListItems($list)); - $result = "
    \n" . $result . "\n
    "; - return $this->hashBlock($result) . "\n\n"; - } - - - function processDefListItems($list_str) { - # - # Process the contents of a single definition list, splitting it - # into individual term and definition list items. - # - $less_than_tab = $this->tab_width - 1; - - # trim trailing blank lines: - $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); - - # Process definition terms. - $list_str = preg_replace_callback('{ - (?>\A\n?|\n\n+) # leading line - ( # definition terms = $1 - [ ]{0,'.$less_than_tab.'} # leading whitespace - (?![:][ ]|[ ]) # negative lookahead for a definition - # mark (colon) or more whitespace. - (?> \S.* \n)+? # actual term (not whitespace). - ) - (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed - # with a definition mark. - }xm', - array(&$this, '_processDefListItems_callback_dt'), $list_str); - - # Process actual definitions. - $list_str = preg_replace_callback('{ - \n(\n+)? # leading line = $1 - ( # marker space = $2 - [ ]{0,'.$less_than_tab.'} # whitespace before colon - [:][ ]+ # definition mark (colon) - ) - ((?s:.+?)) # definition text = $3 - (?= \n+ # stop at next definition mark, - (?: # next term or end of text - [ ]{0,'.$less_than_tab.'} [:][ ] | -
    | \z - ) - ) - }xm', - array(&$this, '_processDefListItems_callback_dd'), $list_str); - - return $list_str; - } - function _processDefListItems_callback_dt($matches) { - $terms = explode("\n", trim($matches[1])); - $text = ''; - foreach ($terms as $term) { - $term = $this->runSpanGamut(trim($term)); - $text .= "\n
    " . $term . "
    "; - } - return $text . "\n"; - } - function _processDefListItems_callback_dd($matches) { - $leading_line = $matches[1]; - $marker_space = $matches[2]; - $def = $matches[3]; - - if ($leading_line || preg_match('/\n{2,}/', $def)) { - # Replace marker with the appropriate whitespace indentation - $def = str_repeat(' ', strlen($marker_space)) . $def; - $def = $this->runBlockGamut($this->outdent($def . "\n\n")); - $def = "\n". $def ."\n"; - } - else { - $def = rtrim($def); - $def = $this->runSpanGamut($this->outdent($def)); - } - - return "\n
    " . $def . "
    \n"; - } - - - function doFencedCodeBlocks($text) { - # - # Adding the fenced code block syntax to regular Markdown: - # - # ~~~ - # Code block - # ~~~ - # - $less_than_tab = $this->tab_width; - - $text = preg_replace_callback('{ - (?:\n|\A) - # 1: Opening marker - ( - ~{3,} # Marker: three tilde or more. - ) - [ ]* \n # Whitespace and newline following marker. - - # 2: Content - ( - (?> - (?!\1 [ ]* \n) # Not a closing marker. - .*\n+ - )+ - ) - - # Closing marker. - \1 [ ]* \n - }xm', - array(&$this, '_doFencedCodeBlocks_callback'), $text); - - return $text; - } - function _doFencedCodeBlocks_callback($matches) { - $codeblock = $matches[2]; - $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); - $codeblock = preg_replace_callback('/^\n+/', - array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); - $codeblock = "
    $codeblock
    "; - return "\n\n".$this->hashBlock($codeblock)."\n\n"; - } - function _doFencedCodeBlocks_newlines($matches) { - return str_repeat("empty_element_suffix", - strlen($matches[0])); - } - - - # - # Redefining emphasis markers so that emphasis by underscore does not - # work in the middle of a word. - # - var $em_relist = array( - '' => '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(? '(?:(? '(?<=\S)(? '(?<=\S)(? tags - # - # Strip leading and trailing lines: - $text = preg_replace('/\A\n+|\n+\z/', '', $text); - - $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); - - # - # Wrap

    tags and unhashify HTML blocks - # - foreach ($grafs as $key => $value) { - $value = trim($this->runSpanGamut($value)); - - # Check if this should be enclosed in a paragraph. - # Clean tag hashes & block tag hashes are left alone. - $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); - - if ($is_p) { - $value = "

    $value

    "; - } - $grafs[$key] = $value; - } - - # Join grafs in one text, then unhash HTML tags. - $text = implode("\n\n", $grafs); - - # Finish by removing any tag hashes still present in $text. - $text = $this->unhash($text); - - return $text; - } - - - ### Footnotes - - function stripFootnotes($text) { - # - # Strips link definitions from text, stores the URLs and titles in - # hash references. - # - $less_than_tab = $this->tab_width - 1; - - # Link defs are in the form: [^id]: url "optional title" - $text = preg_replace_callback('{ - ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 - [ ]* - \n? # maybe *one* newline - ( # text = $2 (no blank lines allowed) - (?: - .+ # actual text - | - \n # newlines but - (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. - (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed - # by non-indented content - )* - ) - }xm', - array(&$this, '_stripFootnotes_callback'), - $text); - return $text; - } - function _stripFootnotes_callback($matches) { - $note_id = $this->fn_id_prefix . $matches[1]; - $this->footnotes[$note_id] = $this->outdent($matches[2]); - return ''; # String that will replace the block - } - - - function doFootnotes($text) { - # - # Replace footnote references in $text [^id] with a special text-token - # which will be replaced by the actual footnote marker in appendFootnotes. - # - if (!$this->in_anchor) { - $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); - } - return $text; - } - - - function appendFootnotes($text) { - # - # Append footnote list to text. - # - $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', - array(&$this, '_appendFootnotes_callback'), $text); - - if (!empty($this->footnotes_ordered)) { - $text .= "\n\n"; - $text .= "
    \n"; - $text .= "fn_backlink_class != "") { - $class = $this->fn_backlink_class; - $class = $this->encodeAttribute($class); - $attr .= " class=\"$class\""; - } - if ($this->fn_backlink_title != "") { - $title = $this->fn_backlink_title; - $title = $this->encodeAttribute($title); - $attr .= " title=\"$title\""; - } - $num = 0; - - while (!empty($this->footnotes_ordered)) { - $footnote = reset($this->footnotes_ordered); - $note_id = key($this->footnotes_ordered); - unset($this->footnotes_ordered[$note_id]); - - $footnote .= "\n"; # Need to append newline before parsing. - $footnote = $this->runBlockGamut("$footnote\n"); - $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', - array(&$this, '_appendFootnotes_callback'), $footnote); - - $attr = str_replace("%%", ++$num, $attr); - $note_id = $this->encodeAttribute($note_id); - - # Add backlink to last paragraph; create new paragraph if needed. - $backlink = ""; - if (preg_match('{

    $}', $footnote)) { - $footnote = substr($footnote, 0, -4) . " $backlink

    "; - } else { - $footnote .= "\n\n

    $backlink

    "; - } - - $text .= "
  • \n"; - $text .= $footnote . "\n"; - $text .= "
  • \n\n"; - } - - $text .= "\n"; - $text .= "
    "; - } - return $text; - } - function _appendFootnotes_callback($matches) { - $node_id = $this->fn_id_prefix . $matches[1]; - - # Create footnote marker only if it has a corresponding footnote *and* - # the footnote hasn't been used by another marker. - if (isset($this->footnotes[$node_id])) { - # Transfert footnote content to the ordered list. - $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; - unset($this->footnotes[$node_id]); - - $num = $this->footnote_counter++; - $attr = " rel=\"footnote\""; - if ($this->fn_link_class != "") { - $class = $this->fn_link_class; - $class = $this->encodeAttribute($class); - $attr .= " class=\"$class\""; - } - if ($this->fn_link_title != "") { - $title = $this->fn_link_title; - $title = $this->encodeAttribute($title); - $attr .= " title=\"$title\""; - } - - $attr = str_replace("%%", $num, $attr); - $node_id = $this->encodeAttribute($node_id); - - return - "". - "$num". - ""; - } - - return "[^".$matches[1]."]"; - } - - - ### Abbreviations ### - - function stripAbbreviations($text) { - # - # Strips abbreviations from text, stores titles in hash references. - # - $less_than_tab = $this->tab_width - 1; - - # Link defs are in the form: [id]*: url "optional title" - $text = preg_replace_callback('{ - ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 - (.*) # text = $2 (no blank lines allowed) - }xm', - array(&$this, '_stripAbbreviations_callback'), - $text); - return $text; - } - function _stripAbbreviations_callback($matches) { - $abbr_word = $matches[1]; - $abbr_desc = $matches[2]; - if ($this->abbr_word_re) - $this->abbr_word_re .= '|'; - $this->abbr_word_re .= preg_quote($abbr_word); - $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); - return ''; # String that will replace the block - } - - - function doAbbreviations($text) { - # - # Find defined abbreviations in text and wrap them in elements. - # - if ($this->abbr_word_re) { - // cannot use the /x modifier because abbr_word_re may - // contain significant spaces: - $text = preg_replace_callback('{'. - '(?abbr_word_re.')'. - '(?![\w\x1A])'. - '}', - array(&$this, '_doAbbreviations_callback'), $text); - } - return $text; - } - function _doAbbreviations_callback($matches) { - $abbr = $matches[0]; - if (isset($this->abbr_desciptions[$abbr])) { - $desc = $this->abbr_desciptions[$abbr]; - if (empty($desc)) { - return $this->hashPart("$abbr"); - } else { - $desc = $this->encodeAttribute($desc); - return $this->hashPart("$abbr"); - } - } else { - return $matches[0]; - } - } - -} - - -/* - -PHP Markdown Extra -================== - -Description ------------ - -This is a PHP port of the original Markdown formatter written in Perl -by John Gruber. This special "Extra" version of PHP Markdown features -further enhancements to the syntax for making additional constructs -such as tables and definition list. - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like
    and as well). - -For more information about Markdown's syntax, see: - - - - -Bugs ----- - -To file bug reports please send email to: - - - -Please include with your report: (1) the example input; (2) the output you -expected; (3) the output Markdown actually produced. - - -Version History ---------------- - -See the readme file for detailed release notes for this version. - - -Copyright and License ---------------------- - -PHP Markdown & Extra -Copyright (c) 2004-2008 Michel Fortin - -All rights reserved. - -Based on Markdown -Copyright (c) 2003-2006 John Gruber - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -*/ -?> \ No newline at end of file diff --git a/ivfdec.c b/ivfdec.c deleted file mode 100644 index 8ee60b6fa..000000000 --- a/ivfdec.c +++ /dev/null @@ -1,593 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -/* This is a simple program that reads ivf files and decodes them - * using the new interface. Decoded frames are output as YV12 raw. - */ -#include -#include -#include -#include -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx_config.h" -#include "vpx/vpx_decoder.h" -#include "vpx_ports/vpx_timer.h" -#if CONFIG_VP8_DECODER -#include "vpx/vp8dx.h" -#endif -#if CONFIG_MD5 -#include "md5_utils.h" -#endif - -static const char *exec_name; - -static const struct -{ - char const *name; - const vpx_codec_iface_t *iface; - unsigned int fourcc; - unsigned int fourcc_mask; -} ifaces[] = -{ -#if CONFIG_VP8_DECODER - {"vp8", &vpx_codec_vp8_dx_algo, 0x00385056, 0x00FFFFFF}, -#endif -}; - -#include "args.h" -static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, - "Codec to use"); -static const arg_def_t prefixarg = ARG_DEF("p", "prefix", 1, - "Prefix to use when saving frames"); -static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0, - "Output file is YV12 "); -static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0, - "Output file is I420 (default)"); -static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0, - "Synonym for --yv12"); -static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0, - "Don't process the decoded frames"); -static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0, - "Show progress after each frame decodes"); -static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1, - "Stop decoding after n frames"); -static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0, - "Postprocess decoded frames"); -static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0, - "Show timing summary"); -static const arg_def_t outputfile = ARG_DEF("o", "output-raw-file", 1, - "Output raw yv12 file instead of images"); -static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1, - "Max threads to use"); -static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, - "Suppress version string"); - -#if CONFIG_MD5 -static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0, - "Compute the MD5 sum of the decoded frame"); -#endif -static const arg_def_t *all_args[] = -{ - &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg, - &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile, - &threadsarg, &quietarg, -#if CONFIG_MD5 - &md5arg, -#endif - NULL -}; - -#if CONFIG_VP8_DECODER -static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1, - "Enable VP8 postproc add noise"); -static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0, - "Enable VP8 deblocking"); -static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1, - "Enable VP8 demacroblocking, w/ level"); -static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1, - "Enable VP8 visible debug info"); - - -static const arg_def_t *vp8_pp_args[] = -{ - &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info, - NULL -}; -#endif - -static void usage_exit() -{ - int i; - - fprintf(stderr, "Usage: %s filename\n\n" - "Options:\n", exec_name); - arg_show_usage(stderr, all_args); -#if CONFIG_VP8_DECODER - fprintf(stderr, "\nvp8 Postprocessing Options:\n"); - arg_show_usage(stderr, vp8_pp_args); -#endif - fprintf(stderr, "\nIncluded decoders:\n\n"); - - for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) - fprintf(stderr, " %-6s - %s\n", - ifaces[i].name, - vpx_codec_iface_name(ifaces[i].iface)); - - exit(EXIT_FAILURE); -} - -void die(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - usage_exit(); -} - -static unsigned int mem_get_le16(const void *vmem) -{ - unsigned int val; - const unsigned char *mem = (const unsigned char *)vmem; - - val = mem[1] << 8; - val |= mem[0]; - return val; -} - -static unsigned int mem_get_le32(const void *vmem) -{ - unsigned int val; - const unsigned char *mem = (const unsigned char *)vmem; - - val = mem[3] << 24; - val |= mem[2] << 16; - val |= mem[1] << 8; - val |= mem[0]; - return val; -} - -#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t)) -#define RAW_FRAME_HDR_SZ (sizeof(uint32_t)) -static int read_frame(FILE *infile, - uint8_t **buf, - uint32_t *buf_sz, - uint32_t *buf_alloc_sz, - int is_ivf) -{ - char raw_hdr[IVF_FRAME_HDR_SZ]; - uint32_t new_buf_sz; - - /* For both the raw and ivf formats, the frame size is the first 4 bytes - * of the frame header. We just need to special case on the header - * size. - */ - if (fread(raw_hdr, is_ivf ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, - infile) != 1) - { - if (!feof(infile)) - fprintf(stderr, "Failed to read frame size\n"); - - new_buf_sz = 0; - } - else - { - new_buf_sz = mem_get_le32(raw_hdr); - - if (new_buf_sz > 256 * 1024 * 1024) - { - fprintf(stderr, "Error: Read invalid frame size (%u)\n", - new_buf_sz); - new_buf_sz = 0; - } - - if (!is_ivf && new_buf_sz > 256 * 1024) - fprintf(stderr, "Warning: Read invalid frame size (%u)" - " - not a raw file?\n", new_buf_sz); - - if (new_buf_sz > *buf_alloc_sz) - { - uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz); - - if (new_buf) - { - *buf = new_buf; - *buf_alloc_sz = 2 * new_buf_sz; - } - else - { - fprintf(stderr, "Failed to allocate compressed data buffer\n"); - new_buf_sz = 0; - } - } - } - - *buf_sz = new_buf_sz; - - if (*buf_sz) - { - if (fread(*buf, 1, *buf_sz, infile) != *buf_sz) - { - fprintf(stderr, "Failed to read full frame\n"); - return 1; - } - - return 0; - } - - return 1; -} - -void *out_open(const char *out_fn, int do_md5) -{ - void *out = NULL; - - if (do_md5) - { -#if CONFIG_MD5 - md5_ctx_t *md5_ctx = out = malloc(sizeof(md5_ctx_t)); - (void)out_fn; - md5_init(md5_ctx); -#endif - } - else - { - FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout; - - if (!outfile) - { - fprintf(stderr, "Failed to output file"); - exit(EXIT_FAILURE); - } - } - - return out; -} - -void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5) -{ - if (do_md5) - { -#if CONFIG_MD5 - md5_update(out, buf, len); -#endif - } - else - { - fwrite(buf, 1, len, out); - } -} - -void out_close(void *out, const char *out_fn, int do_md5) -{ - if (do_md5) - { -#if CONFIG_MD5 - uint8_t md5[16]; - int i; - - md5_finalize(out, md5); - free(out); - - for (i = 0; i < 16; i++) - printf("%02x", md5[i]); - - printf(" %s\n", out_fn); -#endif - } - else - { - fclose(out); - } -} - -unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc) -{ - char raw_hdr[32]; - int is_ivf = 0; - - if (fread(raw_hdr, 1, 32, infile) == 32) - { - if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K' - && raw_hdr[2] == 'I' && raw_hdr[3] == 'F') - { - is_ivf = 1; - - if (mem_get_le16(raw_hdr + 4) != 0) - fprintf(stderr, "Error: Unrecognized IVF version! This file may not" - " decode properly."); - - *fourcc = mem_get_le32(raw_hdr + 8); - } - } - - if (!is_ivf) - rewind(infile); - - return is_ivf; -} - -int main(int argc, const char **argv_) -{ - vpx_codec_ctx_t decoder; - char *prefix = NULL, *fn = NULL; - int i; - uint8_t *buf = NULL; - uint32_t buf_sz = 0, buf_alloc_sz = 0; - FILE *infile; - int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0; - int stop_after = 0, postproc = 0, summary = 0, quiet = 0; - vpx_codec_iface_t *iface = NULL; - unsigned int is_ivf, fourcc; - unsigned long dx_time = 0; - struct arg arg; - char **argv, **argi, **argj; - const char *fn2 = 0; - void *out = NULL; - vpx_codec_dec_cfg_t cfg = {0}; -#if CONFIG_VP8_DECODER - vp8_postproc_cfg_t vp8_pp_cfg = {0}; -#endif - - /* Parse command line */ - exec_name = argv_[0]; - argv = argv_dup(argc - 1, argv_ + 1); - - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - memset(&arg, 0, sizeof(arg)); - arg.argv_step = 1; - - if (arg_match(&arg, &codecarg, argi)) - { - int j, k = -1; - - for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++) - if (!strcmp(ifaces[j].name, arg.val)) - k = j; - - if (k >= 0) - iface = ifaces[k].iface; - else - die("Error: Unrecognized argument (%s) to --codec\n", - arg.val); - } - else if (arg_match(&arg, &outputfile, argi)) - fn2 = arg.val; - else if (arg_match(&arg, &prefixarg, argi)) - prefix = strdup(arg.val); - else if (arg_match(&arg, &use_yv12, argi)) - flipuv = 1; - else if (arg_match(&arg, &use_i420, argi)) - flipuv = 0; - else if (arg_match(&arg, &flipuvarg, argi)) - flipuv = 1; - else if (arg_match(&arg, &noblitarg, argi)) - noblit = 1; - else if (arg_match(&arg, &progressarg, argi)) - progress = 1; - else if (arg_match(&arg, &limitarg, argi)) - stop_after = arg_parse_uint(&arg); - else if (arg_match(&arg, &postprocarg, argi)) - postproc = 1; - else if (arg_match(&arg, &md5arg, argi)) - do_md5 = 1; - else if (arg_match(&arg, &summaryarg, argi)) - summary = 1; - else if (arg_match(&arg, &threadsarg, argi)) - cfg.threads = arg_parse_uint(&arg); - else if (arg_match(&arg, &quietarg, argi)) - quiet = 1; - -#if CONFIG_VP8_DECODER - else if (arg_match(&arg, &addnoise_level, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE; - vp8_pp_cfg.noise_level = arg_parse_uint(&arg); - } - else if (arg_match(&arg, &demacroblock_level, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK; - vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg); - } - else if (arg_match(&arg, &deblock, argi)) - { - postproc = 1; - vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK; - } - else if (arg_match(&arg, &pp_debug_info, argi)) - { - unsigned int level = arg_parse_uint(&arg); - - postproc = 1; - vp8_pp_cfg.post_proc_flag &= ~0x7; - - if (level) - vp8_pp_cfg.post_proc_flag |= 8 << (level - 1); - } - -#endif - else - argj++; - } - - /* Check for unrecognized options */ - for (argi = argv; *argi; argi++) - if (argi[0][0] == '-' && strlen(argi[0]) > 1) - die("Error: Unrecognized option %s\n", *argi); - - /* Handle non-option arguments */ - fn = argv[0]; - - if (!fn) - usage_exit(); - - if (!prefix) - prefix = strdup("img"); - - /* Open file */ - infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin; - - if (!infile) - { - fprintf(stderr, "Failed to open file"); - return EXIT_FAILURE; - } - - if (fn2) - out = out_open(fn2, do_md5); - - is_ivf = file_is_ivf(infile, &fourcc); - - if (is_ivf) - { - /* Try to determine the codec from the fourcc. */ - for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++) - if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc) - { - vpx_codec_iface_t *ivf_iface = ifaces[i].iface; - - if (iface && iface != ivf_iface) - fprintf(stderr, "Notice -- IVF header indicates codec: %s\n", - ifaces[i].name); - else - iface = ivf_iface; - - break; - } - } - - if (vpx_codec_dec_init(&decoder, iface ? iface : ifaces[0].iface, &cfg, - postproc ? VPX_CODEC_USE_POSTPROC : 0)) - { - fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - - if (!quiet) - fprintf(stderr, "%s\n", decoder.name); - -#if CONFIG_VP8_DECODER - - if (vp8_pp_cfg.post_proc_flag - && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) - { - fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - -#endif - - /* Decode file */ - while (!read_frame(infile, &buf, &buf_sz, &buf_alloc_sz, is_ivf)) - { - vpx_codec_iter_t iter = NULL; - vpx_image_t *img; - struct vpx_usec_timer timer; - - vpx_usec_timer_start(&timer); - - if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0)) - { - const char *detail = vpx_codec_error_detail(&decoder); - fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder)); - - if (detail) - fprintf(stderr, " Additional information: %s\n", detail); - - goto fail; - } - - vpx_usec_timer_mark(&timer); - dx_time += vpx_usec_timer_elapsed(&timer); - - ++frame_in; - - if (progress) - fprintf(stderr, "decoded frame %d.\n", frame_in); - - if ((img = vpx_codec_get_frame(&decoder, &iter))) - ++frame_out; - - if (!noblit) - { - if (img) - { - unsigned int y; - char out_fn[128+24]; - uint8_t *buf; - const char *sfx = flipuv ? "yv12" : "i420"; - - if (!fn2) - { - sprintf(out_fn, "%s-%dx%d-%04d.%s", - prefix, img->d_w, img->d_h, frame_in, sfx); - out = out_open(out_fn, do_md5); - } - - buf = img->planes[VPX_PLANE_Y]; - - for (y = 0; y < img->d_h; y++) - { - out_put(out, buf, img->d_w, do_md5); - buf += img->stride[VPX_PLANE_Y]; - } - - buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U]; - - for (y = 0; y < (1 + img->d_h) / 2; y++) - { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); - buf += img->stride[VPX_PLANE_U]; - } - - buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V]; - - for (y = 0; y < (1 + img->d_h) / 2; y++) - { - out_put(out, buf, (1 + img->d_w) / 2, do_md5); - buf += img->stride[VPX_PLANE_V]; - } - - if (!fn2) - out_close(out, out_fn, do_md5); - } - } - - if (stop_after && frame_in >= stop_after) - break; - } - - if (summary) - { - fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\n", - frame_in, frame_out, dx_time, (float)frame_out * 1000000.0 / (float)dx_time); - } - -fail: - - if (vpx_codec_destroy(&decoder)) - { - fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder)); - return EXIT_FAILURE; - } - - if (fn2) - out_close(out, fn2, do_md5); - - free(buf); - fclose(infile); - free(prefix); - free(argv); - - return EXIT_SUCCESS; -} diff --git a/ivfenc.c b/ivfenc.c deleted file mode 100644 index b7a9f2ece..000000000 --- a/ivfenc.c +++ /dev/null @@ -1,1067 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -/* This is a simple program that encodes YV12 files and generates ivf - * files using the new interface. - */ -#if defined(_MSC_VER) -#define USE_POSIX_MMAP 0 -#else -#define USE_POSIX_MMAP 1 -#endif - -#include -#include -#include -#include -#include "vpx/vpx_encoder.h" -#if USE_POSIX_MMAP -#include -#include -#include -#include -#include -#endif -#include "vpx_config.h" -#include "vpx/vp8cx.h" -#include "vpx_ports/mem_ops.h" -#include "vpx_ports/vpx_timer.h" - -static const char *exec_name; - -static const struct codec_item -{ - char const *name; - const vpx_codec_iface_t *iface; - unsigned int fourcc; -} codecs[] = -{ -#if CONFIG_EXPERIMENTAL && CONFIG_VP8_ENCODER - {"vp8x", &vpx_codec_vp8x_cx_algo, 0x78385056}, -#endif -#if CONFIG_VP8_ENCODER - {"vp8", &vpx_codec_vp8_cx_algo, 0x30385056}, -#endif -}; - -static void usage_exit(); - -void die(const char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vprintf(fmt, ap); - printf("\n"); - usage_exit(); -} - -static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s) -{ - if (ctx->err) - { - const char *detail = vpx_codec_error_detail(ctx); - - printf("%s: %s\n", s, vpx_codec_error(ctx)); - - if (detail) - printf(" %s\n", detail); - - exit(EXIT_FAILURE); - } -} - -/* This structure is used to abstract the different ways of handling - * first pass statistics. - */ -typedef struct -{ - vpx_fixed_buf_t buf; - int pass; - FILE *file; - char *buf_ptr; - size_t buf_alloc_sz; -} stats_io_t; - -int stats_open_file(stats_io_t *stats, const char *fpf, int pass) -{ - int res; - - stats->pass = pass; - - if (pass == 0) - { - stats->file = fopen(fpf, "wb"); - stats->buf.sz = 0; - stats->buf.buf = NULL, - res = (stats->file != NULL); - } - else - { -#if 0 -#elif USE_POSIX_MMAP - struct stat stat_buf; - int fd; - - fd = open(fpf, O_RDONLY); - stats->file = fdopen(fd, "rb"); - fstat(fd, &stat_buf); - stats->buf.sz = stat_buf.st_size; - stats->buf.buf = mmap(NULL, stats->buf.sz, PROT_READ, MAP_PRIVATE, - fd, 0); - res = (stats->buf.buf != NULL); -#else - size_t nbytes; - - stats->file = fopen(fpf, "rb"); - - if (fseek(stats->file, 0, SEEK_END)) - { - fprintf(stderr, "First-pass stats file must be seekable!\n"); - exit(EXIT_FAILURE); - } - - stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file); - rewind(stats->file); - - stats->buf.buf = malloc(stats->buf_alloc_sz); - - if (!stats->buf.buf) - { - fprintf(stderr, "Failed to allocate first-pass stats buffer (%d bytes)\n", - stats->buf_alloc_sz); - exit(EXIT_FAILURE); - } - - nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file); - res = (nbytes == stats->buf.sz); -#endif - } - - return res; -} - -int stats_open_mem(stats_io_t *stats, int pass) -{ - int res; - stats->pass = pass; - - if (!pass) - { - stats->buf.sz = 0; - stats->buf_alloc_sz = 64 * 1024; - stats->buf.buf = malloc(stats->buf_alloc_sz); - } - - stats->buf_ptr = stats->buf.buf; - res = (stats->buf.buf != NULL); - return res; -} - - -void stats_close(stats_io_t *stats) -{ - if (stats->file) - { - if (stats->pass == 1) - { -#if 0 -#elif USE_POSIX_MMAP - munmap(stats->buf.buf, stats->buf.sz); -#else - free(stats->buf.buf); -#endif - } - - fclose(stats->file); - stats->file = NULL; - } - else - { - if (stats->pass == 1) - free(stats->buf.buf); - } -} - -void stats_write(stats_io_t *stats, const void *pkt, size_t len) -{ - if (stats->file) - { - fwrite(pkt, 1, len, stats->file); - } - else - { - if (stats->buf.sz + len > stats->buf_alloc_sz) - { - size_t new_sz = stats->buf_alloc_sz + 64 * 1024; - char *new_ptr = realloc(stats->buf.buf, new_sz); - - if (new_ptr) - { - stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf); - stats->buf.buf = new_ptr; - stats->buf_alloc_sz = new_sz; - } /* else ... */ - } - - memcpy(stats->buf_ptr, pkt, len); - stats->buf.sz += len; - stats->buf_ptr += len; - } -} - -vpx_fixed_buf_t stats_get(stats_io_t *stats) -{ - return stats->buf; -} - -#define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */ -static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf) -{ - int plane = 0; - - if (is_ivf) - { - char junk[IVF_FRAME_HDR_SZ]; - - /* Skip the frame header. We know how big the frame should be. See - * write_ivf_frame_header() for documentation on the frame header - * layout. - */ - fread(junk, 1, IVF_FRAME_HDR_SZ, f); - } - - for (plane = 0; plane < 3; plane++) - { - unsigned char *ptr; - int w = (plane ? (1 + img->d_w) / 2 : img->d_w); - int h = (plane ? (1 + img->d_h) / 2 : img->d_h); - int r; - - /* Determine the correct plane based on the image format. The for-loop - * always counts in Y,U,V order, but this may not match the order of - * the data on disk. - */ - switch (plane) - { - case 1: - ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U]; - break; - case 2: - ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V]; - break; - default: - ptr = img->planes[plane]; - } - - for (r = 0; r < h; r++) - { - fread(ptr, 1, w, f); - ptr += img->stride[plane]; - } - } - - return !feof(f); -} - - -#define IVF_FILE_HDR_SZ (32) -unsigned int file_is_ivf(FILE *infile, - unsigned int *fourcc, - unsigned int *width, - unsigned int *height) -{ - char raw_hdr[IVF_FILE_HDR_SZ]; - int is_ivf = 0; - - /* See write_ivf_file_header() for more documentation on the file header - * layout. - */ - if (fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ) - { - if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K' - && raw_hdr[2] == 'I' && raw_hdr[3] == 'F') - { - is_ivf = 1; - - if (mem_get_le16(raw_hdr + 4) != 0) - fprintf(stderr, "Error: Unrecognized IVF version! This file may not" - " decode properly."); - - *fourcc = mem_get_le32(raw_hdr + 8); - } - } - - if (is_ivf) - { - *width = mem_get_le16(raw_hdr + 12); - *height = mem_get_le16(raw_hdr + 14); - } - else - rewind(infile); - - return is_ivf; -} - - -static void write_ivf_file_header(FILE *outfile, - const vpx_codec_enc_cfg_t *cfg, - unsigned int fourcc, - int frame_cnt) -{ - char header[32]; - - if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) - return; - - header[0] = 'D'; - header[1] = 'K'; - header[2] = 'I'; - header[3] = 'F'; - mem_put_le16(header + 4, 0); /* version */ - mem_put_le16(header + 6, 32); /* headersize */ - mem_put_le32(header + 8, fourcc); /* headersize */ - mem_put_le16(header + 12, cfg->g_w); /* width */ - mem_put_le16(header + 14, cfg->g_h); /* height */ - mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ - mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ - mem_put_le32(header + 24, frame_cnt); /* length */ - mem_put_le32(header + 28, 0); /* unused */ - - fwrite(header, 1, 32, outfile); -} - - -static void write_ivf_frame_header(FILE *outfile, - const vpx_codec_cx_pkt_t *pkt) -{ - char header[12]; - vpx_codec_pts_t pts; - - if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) - return; - - pts = pkt->data.frame.pts; - mem_put_le32(header, pkt->data.frame.sz); - mem_put_le32(header + 4, pts & 0xFFFFFFFF); - mem_put_le32(header + 8, pts >> 32); - - fwrite(header, 1, 12, outfile); -} - -#include "args.h" - -static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0, - "Input file is YV12 "); -static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0, - "Input file is I420 (default)"); -static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, - "Codec to use"); -static const arg_def_t passes = ARG_DEF("p", "passes", 1, - "Number of passes (1/2)"); -static const arg_def_t pass_arg = ARG_DEF(NULL, "pass", 1, - "Pass to execute (1/2)"); -static const arg_def_t fpf_name = ARG_DEF(NULL, "fpf", 1, - "First pass statistics file name"); -static const arg_def_t limit = ARG_DEF(NULL, "limit", 1, - "Stop encoding after n input frames"); -static const arg_def_t deadline = ARG_DEF("d", "deadline", 1, - "Deadline per frame (usec)"); -static const arg_def_t best_dl = ARG_DEF(NULL, "best", 0, - "Use Best Quality Deadline"); -static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, - "Use Good Quality Deadline"); -static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0, - "Use Realtime Quality Deadline"); -static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, - "Show encoder parameters"); -static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0, - "Show PSNR in status line"); -static const arg_def_t *main_args[] = -{ - &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl, - &verbosearg, &psnrarg, - NULL -}; - -static const arg_def_t usage = ARG_DEF("u", "usage", 1, - "Usage profile number to use"); -static const arg_def_t threads = ARG_DEF("t", "threads", 1, - "Max number of threads to use"); -static const arg_def_t profile = ARG_DEF(NULL, "profile", 1, - "Bitstream profile number to use"); -static const arg_def_t width = ARG_DEF("w", "width", 1, - "Frame width"); -static const arg_def_t height = ARG_DEF("h", "height", 1, - "Frame height"); -static const arg_def_t timebase = ARG_DEF(NULL, "timebase", 1, - "Stream timebase (frame duration)"); -static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1, - "Enable error resiliency features"); -static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1, - "Max number of frames to lag"); - -static const arg_def_t *global_args[] = -{ - &use_yv12, &use_i420, &usage, &threads, &profile, - &width, &height, &timebase, &error_resilient, - &lag_in_frames, NULL -}; - -static const arg_def_t dropframe_thresh = ARG_DEF(NULL, "drop-frame", 1, - "Temporal resampling threshold (buf %)"); -static const arg_def_t resize_allowed = ARG_DEF(NULL, "resize-allowed", 1, - "Spatial resampling enabled (bool)"); -static const arg_def_t resize_up_thresh = ARG_DEF(NULL, "resize-up", 1, - "Upscale threshold (buf %)"); -static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1, - "Downscale threshold (buf %)"); -static const arg_def_t end_usage = ARG_DEF(NULL, "end-usage", 1, - "VBR=0 | CBR=1"); -static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1, - "Bitrate (kbps)"); -static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1, - "Minimum (best) quantizer"); -static const arg_def_t max_quantizer = ARG_DEF(NULL, "max-q", 1, - "Maximum (worst) quantizer"); -static const arg_def_t undershoot_pct = ARG_DEF(NULL, "undershoot-pct", 1, - "Datarate undershoot (min) target (%)"); -static const arg_def_t overshoot_pct = ARG_DEF(NULL, "overshoot-pct", 1, - "Datarate overshoot (max) target (%)"); -static const arg_def_t buf_sz = ARG_DEF(NULL, "buf-sz", 1, - "Client buffer size (ms)"); -static const arg_def_t buf_initial_sz = ARG_DEF(NULL, "buf-initial-sz", 1, - "Client initial buffer size (ms)"); -static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1, - "Client optimal buffer size (ms)"); -static const arg_def_t *rc_args[] = -{ - &dropframe_thresh, &resize_allowed, &resize_up_thresh, &resize_down_thresh, - &end_usage, &target_bitrate, &min_quantizer, &max_quantizer, - &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz, - NULL -}; - - -static const arg_def_t bias_pct = ARG_DEF(NULL, "bias-pct", 1, - "CBR/VBR bias (0=CBR, 100=VBR)"); -static const arg_def_t minsection_pct = ARG_DEF(NULL, "minsection-pct", 1, - "GOP min bitrate (% of target)"); -static const arg_def_t maxsection_pct = ARG_DEF(NULL, "maxsection-pct", 1, - "GOP max bitrate (% of target)"); -static const arg_def_t *rc_twopass_args[] = -{ - &bias_pct, &minsection_pct, &maxsection_pct, NULL -}; - - -static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1, - "Minimum keyframe interval (frames)"); -static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1, - "Maximum keyframe interval (frames)"); -static const arg_def_t *kf_args[] = -{ - &kf_min_dist, &kf_max_dist, NULL -}; - - -#if CONFIG_VP8_ENCODER -static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1, - "Noise sensitivity (frames to blur)"); -static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1, - "Filter sharpness (0-7)"); -static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1, - "Motion detection threshold"); -#endif - -#if CONFIG_VP8_ENCODER -static const arg_def_t cpu_used = ARG_DEF(NULL, "cpu-used", 1, - "CPU Used (-16..16)"); -#endif - - -#if CONFIG_VP8_ENCODER -static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1, - "Number of token partitions to use, log2"); -static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1, - "Enable automatic alt reference frames"); -static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, - "alt_ref Max Frames"); -static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1, - "alt_ref Strength"); -static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1, - "alt_ref Type"); - -static const arg_def_t *vp8_args[] = -{ - &cpu_used, &auto_altref, &noise_sens, &sharpness, &static_thresh, - &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, NULL -}; -static const int vp8_arg_ctrl_map[] = -{ - VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, - VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, - VP8E_SET_TOKEN_PARTITIONS, - VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH , VP8E_SET_ARNR_TYPE, 0 -}; -#endif - -static const arg_def_t *no_args[] = { NULL }; - -static void usage_exit() -{ - int i; - - printf("Usage: %s src_filename dst_filename\n", exec_name); - - printf("\n_options:\n"); - arg_show_usage(stdout, main_args); - printf("\n_encoder Global Options:\n"); - arg_show_usage(stdout, global_args); - printf("\n_rate Control Options:\n"); - arg_show_usage(stdout, rc_args); - printf("\n_twopass Rate Control Options:\n"); - arg_show_usage(stdout, rc_twopass_args); - printf("\n_keyframe Placement Options:\n"); - arg_show_usage(stdout, kf_args); -#if CONFIG_VP8_ENCODER - printf("\n_vp8 Specific Options:\n"); - arg_show_usage(stdout, vp8_args); -#endif - printf("\n" - "Included encoders:\n" - "\n"); - - for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++) - printf(" %-6s - %s\n", - codecs[i].name, - vpx_codec_iface_name(codecs[i].iface)); - - exit(EXIT_FAILURE); -} - -#define ARG_CTRL_CNT_MAX 10 - - -int main(int argc, const char **argv_) -{ - vpx_codec_ctx_t encoder; - const char *in_fn = NULL, *out_fn = NULL, *stats_fn = NULL; - int i; - FILE *infile, *outfile; - vpx_codec_enc_cfg_t cfg; - vpx_codec_err_t res; - int pass, one_pass_only = 0; - stats_io_t stats; - vpx_image_t raw; - const struct codec_item *codec = codecs; - int frame_avail, got_data; - - struct arg arg; - char **argv, **argi, **argj; - int arg_usage = 0, arg_passes = 1, arg_deadline = 0; - int arg_ctrls[ARG_CTRL_CNT_MAX][2], arg_ctrl_cnt = 0; - int arg_limit = 0; - static const arg_def_t **ctrl_args = no_args; - static const int *ctrl_args_map = NULL; - int verbose = 0, show_psnr = 0; - int arg_use_i420 = 1; - unsigned long cx_time = 0; - unsigned int is_ivf, fourcc; - - exec_name = argv_[0]; - - if (argc < 3) - usage_exit(); - - - /* First parse the codec and usage values, because we want to apply other - * parameters on top of the default configuration provided by the codec. - */ - argv = argv_dup(argc - 1, argv_ + 1); - - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - arg.argv_step = 1; - - if (arg_match(&arg, &codecarg, argi)) - { - int j, k = -1; - - for (j = 0; j < sizeof(codecs) / sizeof(codecs[0]); j++) - if (!strcmp(codecs[j].name, arg.val)) - k = j; - - if (k >= 0) - codec = codecs + k; - else - die("Error: Unrecognized argument (%s) to --codec\n", - arg.val); - - } - else if (arg_match(&arg, &passes, argi)) - { - arg_passes = arg_parse_uint(&arg); - - if (arg_passes < 1 || arg_passes > 2) - die("Error: Invalid number of passes (%d)\n", arg_passes); - } - else if (arg_match(&arg, &pass_arg, argi)) - { - one_pass_only = arg_parse_uint(&arg); - - if (one_pass_only < 1 || one_pass_only > 2) - die("Error: Invalid pass selected (%d)\n", one_pass_only); - } - else if (arg_match(&arg, &fpf_name, argi)) - stats_fn = arg.val; - else if (arg_match(&arg, &usage, argi)) - arg_usage = arg_parse_uint(&arg); - else if (arg_match(&arg, &deadline, argi)) - arg_deadline = arg_parse_uint(&arg); - else if (arg_match(&arg, &best_dl, argi)) - arg_deadline = VPX_DL_BEST_QUALITY; - else if (arg_match(&arg, &good_dl, argi)) - arg_deadline = VPX_DL_GOOD_QUALITY; - else if (arg_match(&arg, &rt_dl, argi)) - arg_deadline = VPX_DL_REALTIME; - else if (arg_match(&arg, &use_yv12, argi)) - { - arg_use_i420 = 0; - } - else if (arg_match(&arg, &use_i420, argi)) - { - arg_use_i420 = 1; - } - else if (arg_match(&arg, &verbosearg, argi)) - verbose = 1; - else if (arg_match(&arg, &limit, argi)) - arg_limit = arg_parse_uint(&arg); - else if (arg_match(&arg, &psnrarg, argi)) - show_psnr = 1; - else - argj++; - } - - /* Ensure that --passes and --pass are consistent. If --pass is set and --passes=2, - * ensure --fpf was set. - */ - if (one_pass_only) - { - /* DWIM: Assume the user meant passes=2 if pass=2 is specified */ - if (one_pass_only > arg_passes) - { - printf("Warning: Assuming --pass=%d implies --passes=%d\n", - one_pass_only, one_pass_only); - arg_passes = one_pass_only; - } - - if (arg_passes == 2 && !stats_fn) - die("Must specify --fpf when --pass=%d and --passes=2\n", one_pass_only); - } - - /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(codec->iface, &cfg, arg_usage); - - if (res) - { - printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); - return EXIT_FAILURE; - } - - /* Now parse the remainder of the parameters. */ - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - arg.argv_step = 1; - - if (0); - else if (arg_match(&arg, &threads, argi)) - cfg.g_threads = arg_parse_uint(&arg); - else if (arg_match(&arg, &profile, argi)) - cfg.g_profile = arg_parse_uint(&arg); - else if (arg_match(&arg, &width, argi)) - cfg.g_w = arg_parse_uint(&arg); - else if (arg_match(&arg, &height, argi)) - cfg.g_h = arg_parse_uint(&arg); - else if (arg_match(&arg, &timebase, argi)) - cfg.g_timebase = arg_parse_rational(&arg); - else if (arg_match(&arg, &error_resilient, argi)) - cfg.g_error_resilient = arg_parse_uint(&arg); - else if (arg_match(&arg, &lag_in_frames, argi)) - cfg.g_lag_in_frames = arg_parse_uint(&arg); - else if (arg_match(&arg, &dropframe_thresh, argi)) - cfg.rc_dropframe_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_allowed, argi)) - cfg.rc_resize_allowed = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_up_thresh, argi)) - cfg.rc_resize_up_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_down_thresh, argi)) - cfg.rc_resize_down_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &resize_down_thresh, argi)) - cfg.rc_resize_down_thresh = arg_parse_uint(&arg); - else if (arg_match(&arg, &end_usage, argi)) - cfg.rc_end_usage = arg_parse_uint(&arg); - else if (arg_match(&arg, &target_bitrate, argi)) - cfg.rc_target_bitrate = arg_parse_uint(&arg); - else if (arg_match(&arg, &min_quantizer, argi)) - cfg.rc_min_quantizer = arg_parse_uint(&arg); - else if (arg_match(&arg, &max_quantizer, argi)) - cfg.rc_max_quantizer = arg_parse_uint(&arg); - else if (arg_match(&arg, &undershoot_pct, argi)) - cfg.rc_undershoot_pct = arg_parse_uint(&arg); - else if (arg_match(&arg, &overshoot_pct, argi)) - cfg.rc_overshoot_pct = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_sz, argi)) - cfg.rc_buf_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_initial_sz, argi)) - cfg.rc_buf_initial_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &buf_optimal_sz, argi)) - cfg.rc_buf_optimal_sz = arg_parse_uint(&arg); - else if (arg_match(&arg, &bias_pct, argi)) - { - cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg); - - if (arg_passes < 2) - printf("Warning: option %s ignored in one-pass mode.\n", - arg.name); - } - else if (arg_match(&arg, &minsection_pct, argi)) - { - cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg); - - if (arg_passes < 2) - printf("Warning: option %s ignored in one-pass mode.\n", - arg.name); - } - else if (arg_match(&arg, &maxsection_pct, argi)) - { - cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg); - - if (arg_passes < 2) - printf("Warning: option %s ignored in one-pass mode.\n", - arg.name); - } - else if (arg_match(&arg, &kf_min_dist, argi)) - cfg.kf_min_dist = arg_parse_uint(&arg); - else if (arg_match(&arg, &kf_max_dist, argi)) - cfg.kf_max_dist = arg_parse_uint(&arg); - else - argj++; - } - - /* Handle codec specific options */ -#if CONFIG_VP8_ENCODER - - if (codec->iface == &vpx_codec_vp8_cx_algo || - codec->iface == &vpx_codec_vp8x_cx_algo) - { - ctrl_args = vp8_args; - ctrl_args_map = vp8_arg_ctrl_map; - } - -#endif - - for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) - { - int match = 0; - - arg.argv_step = 1; - - for (i = 0; ctrl_args[i]; i++) - { - if (arg_match(&arg, ctrl_args[i], argi)) - { - match = 1; - - if (arg_ctrl_cnt < ARG_CTRL_CNT_MAX) - { - arg_ctrls[arg_ctrl_cnt][0] = ctrl_args_map[i]; - arg_ctrls[arg_ctrl_cnt][1] = arg_parse_int(&arg); - arg_ctrl_cnt++; - } - } - } - - if (!match) - argj++; - } - - /* Check for unrecognized options */ - for (argi = argv; *argi; argi++) - if (argi[0][0] == '-') - die("Error: Unrecognized option %s\n", *argi); - - /* Handle non-option arguments */ - in_fn = argv[0]; - out_fn = argv[1]; - - if (!in_fn || !out_fn) - usage_exit(); - - /* Parse certain options from the input file, if possible */ - infile = fopen(in_fn, "rb"); - - if (!infile) - { - printf("Failed to open input file"); - return EXIT_FAILURE; - } - - is_ivf = file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h); - - if (is_ivf) - { - switch (fourcc) - { - case 0x32315659: - arg_use_i420 = 0; - break; - case 0x30323449: - arg_use_i420 = 1; - break; - default: - printf("Unsupported fourcc (%08x) in IVF\n", fourcc); - return EXIT_FAILURE; - } - } - - fclose(infile); - - -#define SHOW(field) printf(" %-28s = %d\n", #field, cfg.field) - - if (verbose) - { - printf("Codec: %s\n", vpx_codec_iface_name(codec->iface)); - printf("Source file: %s Format: %s\n", in_fn, arg_use_i420 ? "I420" : "YV12"); - printf("Destination file: %s\n", out_fn); - printf("Encoder parameters:\n"); - - SHOW(g_usage); - SHOW(g_threads); - SHOW(g_profile); - SHOW(g_w); - SHOW(g_h); - SHOW(g_timebase.num); - SHOW(g_timebase.den); - SHOW(g_error_resilient); - SHOW(g_pass); - SHOW(g_lag_in_frames); - SHOW(rc_dropframe_thresh); - SHOW(rc_resize_allowed); - SHOW(rc_resize_up_thresh); - SHOW(rc_resize_down_thresh); - SHOW(rc_end_usage); - SHOW(rc_target_bitrate); - SHOW(rc_min_quantizer); - SHOW(rc_max_quantizer); - SHOW(rc_undershoot_pct); - SHOW(rc_overshoot_pct); - SHOW(rc_buf_sz); - SHOW(rc_buf_initial_sz); - SHOW(rc_buf_optimal_sz); - SHOW(rc_2pass_vbr_bias_pct); - SHOW(rc_2pass_vbr_minsection_pct); - SHOW(rc_2pass_vbr_maxsection_pct); - SHOW(kf_mode); - SHOW(kf_min_dist); - SHOW(kf_max_dist); - } - - vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12, - cfg.g_w, cfg.g_h, 1); - - // This was added so that ivfenc will create monotically increasing - // timestamps. Since we create new timestamps for alt-reference frames - // we need to make room in the series of timestamps. Since there can - // only be 1 alt-ref frame ( current bitstream) multiplying by 2 - // gives us enough room. - cfg.g_timebase.den *= 2; - - memset(&stats, 0, sizeof(stats)); - - for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++) - { - int frames_in = 0, frames_out = 0; - unsigned long nbytes = 0; - - infile = fopen(in_fn, "rb"); - - if (!infile) - { - printf("Failed to open input file"); - return EXIT_FAILURE; - } - - outfile = fopen(out_fn, "wb"); - - if (!outfile) - { - printf("Failed to open output file"); - return EXIT_FAILURE; - } - - if (stats_fn) - { - if (!stats_open_file(&stats, stats_fn, pass)) - { - printf("Failed to open statistics store\n"); - return EXIT_FAILURE; - } - } - else - { - if (!stats_open_mem(&stats, pass)) - { - printf("Failed to open statistics store\n"); - return EXIT_FAILURE; - } - } - - cfg.g_pass = arg_passes == 2 - ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS - : VPX_RC_ONE_PASS; -#if VPX_ENCODER_ABI_VERSION > (1 + VPX_CODEC_ABI_VERSION) - - if (pass) - { - cfg.rc_twopass_stats_in = stats_get(&stats); - } - -#endif - - write_ivf_file_header(outfile, &cfg, codec->fourcc, 0); - - - /* Construct Encoder Context */ - if (cfg.kf_min_dist == cfg.kf_max_dist) - cfg.kf_mode = VPX_KF_FIXED; - - vpx_codec_enc_init(&encoder, codec->iface, &cfg, - show_psnr ? VPX_CODEC_USE_PSNR : 0); - ctx_exit_on_error(&encoder, "Failed to initialize encoder"); - - /* Note that we bypass the vpx_codec_control wrapper macro because - * we're being clever to store the control IDs in an array. Real - * applications will want to make use of the enumerations directly - */ - for (i = 0; i < arg_ctrl_cnt; i++) - { - if (vpx_codec_control_(&encoder, arg_ctrls[i][0], arg_ctrls[i][1])) - printf("Error: Tried to set control %d = %d\n", - arg_ctrls[i][0], arg_ctrls[i][1]); - - ctx_exit_on_error(&encoder, "Failed to control codec"); - } - - frame_avail = 1; - got_data = 0; - - while (frame_avail || got_data) - { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *pkt; - struct vpx_usec_timer timer; - - if (!arg_limit || frames_in < arg_limit) - { - frame_avail = read_frame(infile, &raw, is_ivf); - - if (frame_avail) - frames_in++; - - printf("\rPass %d/%d frame %4d/%-4d %7ldB \033[K", pass + 1, - arg_passes, frames_in, frames_out, nbytes); - } - else - frame_avail = 0; - - vpx_usec_timer_start(&timer); - - // since we halved our timebase we need to double the timestamps - // and duration we pass in. - vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, (frames_in - 1) * 2, - 2, 0, arg_deadline); - vpx_usec_timer_mark(&timer); - cx_time += vpx_usec_timer_elapsed(&timer); - ctx_exit_on_error(&encoder, "Failed to encode frame"); - got_data = 0; - - while ((pkt = vpx_codec_get_cx_data(&encoder, &iter))) - { - got_data = 1; - - switch (pkt->kind) - { - case VPX_CODEC_CX_FRAME_PKT: - frames_out++; - printf(" %6luF", - (unsigned long)pkt->data.frame.sz); - write_ivf_frame_header(outfile, pkt); - fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile); - nbytes += pkt->data.raw.sz; - break; - case VPX_CODEC_STATS_PKT: - frames_out++; - printf(" %6luS", - (unsigned long)pkt->data.twopass_stats.sz); - stats_write(&stats, - pkt->data.twopass_stats.buf, - pkt->data.twopass_stats.sz); - nbytes += pkt->data.raw.sz; - break; - case VPX_CODEC_PSNR_PKT: - - if (show_psnr) - { - int i; - - for (i = 0; i < 4; i++) - printf("%.3lf ", pkt->data.psnr.psnr[i]); - } - - break; - default: - break; - } - } - - fflush(stdout); - } - - /* this bitrate calc is simplified and relies on the fact that this - * application uses 1/timebase for framerate. - */ - printf("\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s" - " %7lu %s (%.2f fps)\033[K", pass + 1, - arg_passes, frames_in, frames_out, nbytes, nbytes * 8 / frames_in, - nbytes * 8 *(int64_t)cfg.g_timebase.den/2/ cfg.g_timebase.num / frames_in, - cx_time > 9999999 ? cx_time / 1000 : cx_time, - cx_time > 9999999 ? "ms" : "us", - (float)frames_in * 1000000.0 / (float)cx_time); - - vpx_codec_destroy(&encoder); - - fclose(infile); - - if (!fseek(outfile, 0, SEEK_SET)) - write_ivf_file_header(outfile, &cfg, codec->fourcc, frames_out); - - fclose(outfile); - stats_close(&stats); - printf("\n"); - - if (one_pass_only) - break; - } - - vpx_img_free(&raw); - free(argv); - return EXIT_SUCCESS; -} diff --git a/libmkv/EbmlBufferWriter.c b/libmkv/EbmlBufferWriter.c new file mode 100644 index 000000000..d9b04a81a --- /dev/null +++ b/libmkv/EbmlBufferWriter.c @@ -0,0 +1,60 @@ +//#include +#include "EbmlBufferWriter.h" +#include "EbmlWriter.h" +//#include +//#include +//#include //_alloca +#include +#include +#include + +void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) +{ + unsigned char *src = glob->buf; + src += glob->offset; + memcpy(src, buffer_in, len); + glob->offset += len; +} + +static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q) +{ + while (q != p) + { + --q; + + unsigned long cbWritten; + memcpy(&(glob->buf[glob->offset]), q, 1); + glob->offset ++; + } +} + +void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len) +{ + //assert(buf); + + const unsigned char *const p = (const unsigned char *)(buffer_in); + const unsigned char *const q = p + len; + + _Serialize(glob, p, q); +} + + +void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) +{ + Ebml_WriteID(glob, class_id); + ebmlLoc->offset = glob->offset; + //todo this is always taking 8 bytes, this may need later optimization + unsigned long long unknownLen = 0x01FFFFFFFFFFFFFFLLU; + Ebml_Serialize(glob, (void *)&unknownLen, 8); //this is a key that says lenght unknown +} + +void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) +{ + unsigned long long size = glob->offset - ebmlLoc->offset - 8; + unsigned long long curOffset = glob->offset; + glob->offset = ebmlLoc->offset; + size |= 0x0100000000000000LLU; + Ebml_Serialize(glob, &size, 8); + glob->offset = curOffset; +} + diff --git a/libmkv/EbmlBufferWriter.h b/libmkv/EbmlBufferWriter.h new file mode 100644 index 000000000..ba0a9b3ab --- /dev/null +++ b/libmkv/EbmlBufferWriter.h @@ -0,0 +1,21 @@ +#ifndef EBMLBUFFERWRITER_HPP +#define EBMLBUFFERWRITER_HPP + +typedef struct +{ + unsigned long long offset; +} EbmlLoc; + +typedef struct +{ + unsigned char *buf; + unsigned int length; + unsigned int offset; +} EbmlGlobal; + + +void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id); +void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc); + + +#endif diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h new file mode 100644 index 000000000..429747063 --- /dev/null +++ b/libmkv/EbmlIDs.h @@ -0,0 +1,231 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + + +#ifndef MKV_DEFS_HPP +#define MKV_DEFS_HPP 1 + +//Commenting out values not available in webm, but available in matroska + +enum mkv +{ + EBML = 0x1A45DFA3, + EBMLVersion = 0x4286, + EBMLReadVersion = 0x42F7, + EBMLMaxIDLength = 0x42F2, + EBMLMaxSizeLength = 0x42F3, + DocType = 0x4282, + DocTypeVersion = 0x4287, + DocTypeReadVersion = 0x4285, +// CRC_32 = 0xBF, + Void = 0xEC, + SignatureSlot = 0x1B538667, + SignatureAlgo = 0x7E8A, + SignatureHash = 0x7E9A, + SignaturePublicKey = 0x7EA5, + Signature = 0x7EB5, + SignatureElements = 0x7E5B, + SignatureElementList = 0x7E7B, + SignedElement = 0x6532, + //segment + Segment = 0x18538067, + //Meta Seek Information + SeekHead = 0x114D9B74, + Seek = 0x4DBB, + SeekID = 0x53AB, + SeekPosition = 0x53AC, + //Segment Information + Info = 0x1549A966, +// SegmentUID = 0x73A4, +// SegmentFilename = 0x7384, +// PrevUID = 0x3CB923, +// PrevFilename = 0x3C83AB, +// NextUID = 0x3EB923, +// NextFilename = 0x3E83BB, +// SegmentFamily = 0x4444, +// ChapterTranslate = 0x6924, +// ChapterTranslateEditionUID = 0x69FC, +// ChapterTranslateCodec = 0x69BF, +// ChapterTranslateID = 0x69A5, + TimecodeScale = 0x2AD7B1, + Segment_Duration = 0x4489, + DateUTC = 0x4461, +// Title = 0x7BA9, + MuxingApp = 0x4D80, + WritingApp = 0x5741, + //Cluster + Cluster = 0x1F43B675, + Timecode = 0xE7, +// SilentTracks = 0x5854, +// SilentTrackNumber = 0x58D7, +// Position = 0xA7, + PrevSize = 0xAB, + BlockGroup = 0xA0, + Block = 0xA1, +// BlockVirtual = 0xA2, +// BlockAdditions = 0x75A1, +// BlockMore = 0xA6, +// BlockAddID = 0xEE, +// BlockAdditional = 0xA5, + BlockDuration = 0x9B, +// ReferencePriority = 0xFA, + ReferenceBlock = 0xFB, +// ReferenceVirtual = 0xFD, +// CodecState = 0xA4, +// Slices = 0x8E, +// TimeSlice = 0xE8, + LaceNumber = 0xCC, +// FrameNumber = 0xCD, +// BlockAdditionID = 0xCB, +// MkvDelay = 0xCE, +// Cluster_Duration = 0xCF, + SimpleBlock = 0xA3, +// EncryptedBlock = 0xAF, + //Track + Tracks = 0x1654AE6B, + TrackEntry = 0xAE, + TrackNumber = 0xD7, + TrackUID = 0x73C5, + TrackType = 0x83, + FlagEnabled = 0xB9, + FlagDefault = 0x88, + FlagForced = 0x55AA, + FlagLacing = 0x9C, +// MinCache = 0x6DE7, +// MaxCache = 0x6DF8, + DefaultDuration = 0x23E383, +// TrackTimecodeScale = 0x23314F, +// TrackOffset = 0x537F, +// MaxBlockAdditionID = 0x55EE, + Name = 0x536E, + Language = 0x22B59C, + CodecID = 0x86, + CodecPrivate = 0x63A2, + CodecName = 0x258688, +// AttachmentLink = 0x7446, +// CodecSettings = 0x3A9697, +// CodecInfoURL = 0x3B4040, +// CodecDownloadURL = 0x26B240, +// CodecDecodeAll = 0xAA, +// TrackOverlay = 0x6FAB, +// TrackTranslate = 0x6624, +// TrackTranslateEditionUID = 0x66FC, +// TrackTranslateCodec = 0x66BF, +// TrackTranslateTrackID = 0x66A5, + //video + Video = 0xE0, + FlagInterlaced = 0x9A, +// StereoMode = 0x53B8, + PixelWidth = 0xB0, + PixelHeight = 0xBA, + PixelCropBottom = 0x54AA, + PixelCropTop = 0x54BB, + PixelCropLeft = 0x54CC, + PixelCropRight = 0x54DD, + DisplayWidth = 0x54B0, + DisplayHeight = 0x54BA, + DisplayUnit = 0x54B2, + AspectRatioType = 0x54B3, +// ColourSpace = 0x2EB524, +// GammaValue = 0x2FB523, + FrameRate = 0x2383E3, + //end video + //audio + Audio = 0xE1, + SamplingFrequency = 0xB5, + OutputSamplingFrequency = 0x78B5, + Channels = 0x9F, +// ChannelPositions = 0x7D7B, + BitDepth = 0x6264, + //end audio + //content encoding +// ContentEncodings = 0x6d80, +// ContentEncoding = 0x6240, +// ContentEncodingOrder = 0x5031, +// ContentEncodingScope = 0x5032, +// ContentEncodingType = 0x5033, +// ContentCompression = 0x5034, +// ContentCompAlgo = 0x4254, +// ContentCompSettings = 0x4255, +// ContentEncryption = 0x5035, +// ContentEncAlgo = 0x47e1, +// ContentEncKeyID = 0x47e2, +// ContentSignature = 0x47e3, +// ContentSigKeyID = 0x47e4, +// ContentSigAlgo = 0x47e5, +// ContentSigHashAlgo = 0x47e6, + //end content encoding + //Cueing Data + Cues = 0x1C53BB6B, + CuePoint = 0xBB, + CueTime = 0xB3, + CueTrackPositions = 0xB7, + CueTrack = 0xF7, + CueClusterPosition = 0xF1, + CueBlockNumber = 0x5378, +// CueCodecState = 0xEA, +// CueReference = 0xDB, +// CueRefTime = 0x96, +// CueRefCluster = 0x97, +// CueRefNumber = 0x535F, +// CueRefCodecState = 0xEB, + //Attachment +// Attachments = 0x1941A469, +// AttachedFile = 0x61A7, +// FileDescription = 0x467E, +// FileName = 0x466E, +// FileMimeType = 0x4660, +// FileData = 0x465C, +// FileUID = 0x46AE, +// FileReferral = 0x4675, + //Chapters +// Chapters = 0x1043A770, +// EditionEntry = 0x45B9, +// EditionUID = 0x45BC, +// EditionFlagHidden = 0x45BD, +// EditionFlagDefault = 0x45DB, +// EditionFlagOrdered = 0x45DD, +// ChapterAtom = 0xB6, +// ChapterUID = 0x73C4, +// ChapterTimeStart = 0x91, +// ChapterTimeEnd = 0x92, +// ChapterFlagHidden = 0x98, +// ChapterFlagEnabled = 0x4598, +// ChapterSegmentUID = 0x6E67, +// ChapterSegmentEditionUID = 0x6EBC, +// ChapterPhysicalEquiv = 0x63C3, +// ChapterTrack = 0x8F, +// ChapterTrackNumber = 0x89, +// ChapterDisplay = 0x80, +// ChapString = 0x85, +// ChapLanguage = 0x437C, +// ChapCountry = 0x437E, +// ChapProcess = 0x6944, +// ChapProcessCodecID = 0x6955, +// ChapProcessPrivate = 0x450D, +// ChapProcessCommand = 0x6911, +// ChapProcessTime = 0x6922, +// ChapProcessData = 0x6933, + //Tagging +// Tags = 0x1254C367, +// Tag = 0x7373, +// Targets = 0x63C0, +// TargetTypeValue = 0x68CA, +// TargetType = 0x63CA, +// Tagging_TrackUID = 0x63C5, +// Tagging_EditionUID = 0x63C9, +// Tagging_ChapterUID = 0x63C4, +// AttachmentUID = 0x63C6, +// SimpleTag = 0x67C8, +// TagName = 0x45A3, +// TagLanguage = 0x447A, +// TagDefault = 0x4484, +// TagString = 0x4487, +// TagBinary = 0x4485, +}; +#endif diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c new file mode 100644 index 000000000..9d564c177 --- /dev/null +++ b/libmkv/EbmlWriter.c @@ -0,0 +1,166 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + + +#include "EbmlWriter.h" +#include +#include +#include +#if defined(_MSC_VER) +#define LITERALU64(n) n +#else +#define LITERALU64(n) n##LLU +#endif + +void Ebml_WriteLen(EbmlGlobal *glob, long long val) +{ + //TODO check and make sure we are not > than 0x0100000000000000LLU + unsigned char size = 8; //size in bytes to output + unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size + + for (size = 1; size < 8; size ++) + { + if (val < minVal) + break; + + minVal = (minVal << 7); + } + + val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); + + Ebml_Serialize(glob, (void *) &val, size); +} + +void Ebml_WriteString(EbmlGlobal *glob, const char *str) +{ + const size_t size_ = strlen(str); + const unsigned long long size = size_; + Ebml_WriteLen(glob, size); + //TODO: it's not clear from the spec whether the nul terminator + //should be serialized too. For now we omit the null terminator. + Ebml_Write(glob, str, size); +} + +void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) +{ + const size_t strlen = wcslen(wstr); + + //TODO: it's not clear from the spec whether the nul terminator + //should be serialized too. For now we include it. + const unsigned long long size = strlen; + + Ebml_WriteLen(glob, size); + Ebml_Write(glob, wstr, size); +} + +void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) +{ + if (class_id >= 0x01000000) + Ebml_Serialize(glob, (void *)&class_id, 4); + else if (class_id >= 0x00010000) + Ebml_Serialize(glob, (void *)&class_id, 3); + else if (class_id >= 0x00000100) + Ebml_Serialize(glob, (void *)&class_id, 2); + else + Ebml_Serialize(glob, (void *)&class_id, 1); +} +void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) +{ + unsigned char sizeSerialized = 8 | 0x80; + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &sizeSerialized, 1); + Ebml_Serialize(glob, &ui, 8); +} + +void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) +{ + unsigned char size = 8; //size in bytes to output + unsigned char sizeSerialized = 0; + unsigned long minVal; + + Ebml_WriteID(glob, class_id); + minVal = 0x7fLU; //mask to compare for byte size + + for (size = 1; size < 4; size ++) + { + if (ui < minVal) + { + break; + } + + minVal <<= 7; + } + + sizeSerialized = 0x80 | size; + Ebml_Serialize(glob, &sizeSerialized, 1); + Ebml_Serialize(glob, &ui, size); +} +//TODO: perhaps this is a poor name for this id serializer helper function +void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) +{ + int size; + for (size=4; size > 1; size--) + { + if (bin & 0x000000ff << ((size-1) * 8)) + break; + } + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, size); + Ebml_WriteID(glob, bin); +} + +void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) +{ + unsigned char len = 0x88; + + Ebml_WriteID(glob, class_id); + Ebml_Serialize(glob, &len, 1); + Ebml_Serialize(glob, &d, 8); +} + +void Ebml_WriteSigned16(EbmlGlobal *glob, short val) +{ + signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; + Ebml_Serialize(glob, &out, 3); +} + +void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) +{ + Ebml_WriteID(glob, class_id); + Ebml_WriteString(glob, s); +} + +void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) +{ + Ebml_WriteID(glob, class_id); + Ebml_WriteUTF8(glob, s); +} + +void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) +{ + unsigned char size = 4; + Ebml_WriteID(glob, class_id); + Ebml_WriteLen(glob, data_length); + Ebml_Write(glob, data, data_length); +} + +void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) +{ + unsigned char tmp = 0; + unsigned long i = 0; + + Ebml_WriteID(glob, 0xEC); + Ebml_WriteLen(glob, vSize); + + for (i = 0; i < vSize; i++) + { + Ebml_Write(glob, &tmp, 1); + } +} + +//TODO Serialize Date diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h new file mode 100644 index 000000000..8c7fe7c66 --- /dev/null +++ b/libmkv/EbmlWriter.h @@ -0,0 +1,38 @@ +#ifndef EBMLWRITER_HPP +#define EBMLWRITER_HPP + +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + +//note: you must define write and serialize functions as well as your own EBML_GLOBAL +//These functions MUST be implemented +#include +#include "vpx/vpx_integer.h" + +typedef struct EbmlGlobal EbmlGlobal; +void Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long); +void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); +///// + + +void Ebml_WriteLen(EbmlGlobal *glob, long long val); +void Ebml_WriteString(EbmlGlobal *glob, const char *str); +void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); +void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); +void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui); +void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); +void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); +void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); +//TODO make this more generic to signed +void Ebml_WriteSigned16(EbmlGlobal *glob, short val); +void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); +void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); +void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); +void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); +//TODO need date function +#endif diff --git a/libmkv/Makefile b/libmkv/Makefile new file mode 100644 index 000000000..b53377b21 --- /dev/null +++ b/libmkv/Makefile @@ -0,0 +1,25 @@ +#Variables +CC=gcc +LINKER=gcc +FLAGS= + + +#Build Targets +EbmlWriter.o: EbmlWriter.c EbmlWriter.h + $(CC) $(FLAGS) -c EbmlWriter.c + +EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h + $(CC) $(FLAGS) -c EbmlBufferWriter.c + +MkvElement.o: MkvElement.c WebMElement.h + $(CC) $(FLAGS) -c MkvElement.c + +testlibmkv.o: testlibmkv.c + $(CC) $(FLAGS) -c testlibmkv.c + +testlibmkv: testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o + $(LINKER) $(FLAGS) testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o -o testlibmkv + +clean: + rm -rf *.o testlibmkv + \ No newline at end of file diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c new file mode 100644 index 000000000..25a90249a --- /dev/null +++ b/libmkv/WebMElement.c @@ -0,0 +1,220 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + + +#include "EbmlBufferWriter.h" +#include "EbmlIDs.h" +#include "WebMElement.h" +#include + +#define kVorbisPrivateMaxSize 4000 + +void writeHeader(EbmlGlobal *glob) +{ + EbmlLoc start; + Ebml_StartSubElement(glob, &start, EBML); + Ebml_SerializeUnsigned(glob, EBMLVersion, 1); + Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version + Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length + Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length + Ebml_SerializeString(glob, DocType, "webm"); //Doc Type + Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version + Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version + Ebml_EndSubElement(glob, &start); +} + +void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode, + int isKeyframe, unsigned char lacingFlag, int discardable, + unsigned char *data, unsigned long dataLength) +{ + Ebml_WriteID(glob, SimpleBlock); + unsigned long blockLength = 4 + dataLength; + blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF + Ebml_Serialize(glob, &blockLength, 4); + trackNumber |= 0x80; //TODO check track nubmer < 128 + Ebml_Write(glob, &trackNumber, 1); + //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes + Ebml_Serialize(glob, &timeCode, 2); + unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable; + Ebml_Write(glob, &flags, 1); + Ebml_Write(glob, data, dataLength); +} + +static UInt64 generateTrackID(unsigned int trackNumber) +{ + UInt64 t = time(NULL) * trackNumber; + UInt64 r = rand(); + r = r << 32; + r += rand(); + UInt64 rval = t ^ r; + return rval; +} + +void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, + char *codecId, unsigned int pixelWidth, unsigned int pixelHeight, + double frameRate) +{ + EbmlLoc start; + Ebml_StartSubElement(glob, &start, TrackEntry); + Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); + UInt64 trackID = generateTrackID(trackNumber); + Ebml_SerializeUnsigned(glob, TrackUID, trackID); + Ebml_SerializeString(glob, CodecName, "VP8"); //TODO shouldn't be fixed + + Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1 + Ebml_SerializeString(glob, CodecID, codecId); + { + EbmlLoc videoStart; + Ebml_StartSubElement(glob, &videoStart, Video); + Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); + Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); + Ebml_SerializeFloat(glob, FrameRate, frameRate); + Ebml_EndSubElement(glob, &videoStart); //Video + } + Ebml_EndSubElement(glob, &start); //Track Entry +} +void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, + char *codecId, double samplingFrequency, unsigned int channels, + unsigned char *private, unsigned long privateSize) +{ + EbmlLoc start; + Ebml_StartSubElement(glob, &start, TrackEntry); + Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); + UInt64 trackID = generateTrackID(trackNumber); + Ebml_SerializeUnsigned(glob, TrackUID, trackID); + Ebml_SerializeUnsigned(glob, TrackType, 2); //audio is always 2 + //I am using defaults for thesed required fields + /* Ebml_SerializeUnsigned(glob, FlagEnabled, 1); + Ebml_SerializeUnsigned(glob, FlagDefault, 1); + Ebml_SerializeUnsigned(glob, FlagForced, 1); + Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/ + Ebml_SerializeString(glob, CodecID, codecId); + Ebml_SerializeData(glob, CodecPrivate, private, privateSize); + + Ebml_SerializeString(glob, CodecName, "VORBIS"); //fixed for now + { + EbmlLoc AudioStart; + Ebml_StartSubElement(glob, &AudioStart, Audio); + Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency); + Ebml_SerializeUnsigned(glob, Channels, channels); + Ebml_EndSubElement(glob, &AudioStart); + } + Ebml_EndSubElement(glob, &start); +} +void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo, unsigned long timeCodeScale, double duration) +{ + Ebml_StartSubElement(ebml, startInfo, Info); + Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale); + Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); //Currently fixed to using milliseconds + Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1"); + Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1"); + Ebml_EndSubElement(ebml, startInfo); +} + +/* +void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc) +{ + Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067); +} + +void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc) +{ + Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74); +} +void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation) +{ + EbmlLoc ebmlLoc; + Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb); + Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID); + Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition); + Ebml_EndSubElement(ebml_out, ebmlLoc); +} + +void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation) +{ + Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID); + if (segmentInformation.filename != 0) + Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename); + Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale); + Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration); + //TODO date + Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX"); + Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp); +} + +void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc) +{ + Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B); +} + +static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track) +{ + Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber); + Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID); + Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType); + Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0); + Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0); + Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0); + if (track.Language != 0) + Ebml_SerializeString(ebml_out, 0x22B59C, track.Language); + if (track.CodecID != 0) + Ebml_SerializeString(ebml_out, 0x86, track.CodecID); + if (track.CodecPrivate != 0) + Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength); + if (track.CodecName != 0) + Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName); +} + +void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video) +{ + EbmlLoc trackHeadLoc, videoHeadLoc; + Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); //start Track + Mkv_WriteGenericTrackData(ebml_out, track); + Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0); //start Video + Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0); + Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth); + Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight); + Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth); + Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight); + Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit); + Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate); + Ebml_EndSubElement(ebml_out, videoHeadLoc); + Ebml_EndSubElement(ebml_out, trackHeadLoc); + +} + +void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video) +{ + EbmlLoc trackHeadLoc, audioHeadLoc; + Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); + Mkv_WriteGenericTrackData(ebml_out, track); + Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0); //start Audio + Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency); + Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels); + Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth); + Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio + Ebml_EndSubElement(ebml_out, trackHeadLoc); +} + +void Mkv_WriteEbmlClusterHead(Ebml& ebml_out, EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead) +{ + Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675); + Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode); +} + +void Mkv_WriteSimpleBlockHead(Ebml& ebml_out, EbmlLoc& ebmlLoc, SimpleBlockStruct& block) +{ + Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3); + Ebml_Write1UInt(ebml_out, block.TrackNumber); + Ebml_WriteSigned16(ebml_out,block.TimeCode); + unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable; + Ebml_Write1UInt(ebml_out, flags); //TODO this may be the wrong function + Ebml_Serialize(ebml_out, block.data, block.dataLength); + Ebml_EndSubElement(ebml_out,ebmlLoc); +} +*/ diff --git a/libmkv/WebMElement.h b/libmkv/WebMElement.h new file mode 100644 index 000000000..b4208f285 --- /dev/null +++ b/libmkv/WebMElement.h @@ -0,0 +1,35 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + + +#ifndef MKV_CONTEXT_HPP +#define MKV_CONTEXT_HPP 1 + +void writeSimpleBock(EbmlGlobal *ebml, unsigned char trackNumber, unsigned short timeCode, + int isKeyframe, unsigned char lacingFlag, int discardable, + unsigned char *data, unsigned long dataLength); + + +// these are helper functions +void writeHeader(EbmlGlobal *ebml); +void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo , unsigned long timeCodeScale, double duration); +//this function is a helper only, it assumes a lot of defaults +void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing, + char *codecId, unsigned int pixelWidth, unsigned int pixelHeight, + double frameRate); +void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, + char *codecId, double samplingFrequency, unsigned int channels, + unsigned char *private, unsigned long privateSize); + +void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode, + int isKeyframe, unsigned char lacingFlag, int discardable, + unsigned char *data, unsigned long dataLength); + + + +#endif \ No newline at end of file diff --git a/libmkv/testlibmkv.c b/libmkv/testlibmkv.c new file mode 100644 index 000000000..7edfc4347 --- /dev/null +++ b/libmkv/testlibmkv.c @@ -0,0 +1,63 @@ +// Copyright (c) 2010 The WebM project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. + + + +#include "EbmlIDs.h" +#include "EbmlBufferWriter.h" +#include "WebMElement.h" + +#include +int main(int argc, char *argv[]) +{ + //init the datatype we're using for ebml output + unsigned char data[8192]; + EbmlGlobal ebml; + ebml.buf = data; + ebml.offset = 0; + ebml.length = 8192; + + writeHeader(&ebml); + { + EbmlLoc startSegment; + Ebml_StartSubElement(&ebml, &startSegment, Segment); //segment + { + //segment info + EbmlLoc startInfo; + Ebml_StartSubElement(&ebml, &startInfo, Info); + Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv"); + Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv"); + Ebml_EndSubElement(&ebml, &startInfo); + } + + { + EbmlLoc trackStart; + Ebml_StartSubElement(&ebml, &trackStart, Tracks); + writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97); + //writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0); + Ebml_EndSubElement(&ebml, &trackStart); + } + + { + EbmlLoc clusterStart; + Ebml_StartSubElement(&ebml, &clusterStart, Cluster); //cluster + Ebml_SerializeUnsigned(&ebml, Timecode, 0); + + unsigned char someData[4] = {1, 2, 3, 4}; + writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4); + Ebml_EndSubElement(&ebml, &clusterStart); + } //end cluster + Ebml_EndSubElement(&ebml, &startSegment); + } + + //dump ebml stuff to the file + FILE *file_out = fopen("test.mkv", "wb"); + size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out); + fclose(file_out); + return 0; +} \ No newline at end of file diff --git a/libs.doxy_template b/libs.doxy_template index eb37dfc18..02e290242 100644 --- a/libs.doxy_template +++ b/libs.doxy_template @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/libs.mk b/libs.mk index 544e71a2a..9ded3945a 100644 --- a/libs.mk +++ b/libs.mk @@ -1,15 +1,18 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm) +CODEC_SRCS-yes += libs.mk + include $(SRC_PATH_BARE)/vpx/vpx_codec.mk CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS)) @@ -58,7 +61,6 @@ CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) # This variable uses deferred expansion intentionally, since the results of # $(wildcard) may change during the course of the Make. VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) -CODEC_SRCS-yes += $(SRC_PATH_BARE)/libs.mk # to show up in the msvs workspace endif # The following pairs define a mapping of locations in the distribution @@ -89,9 +91,13 @@ ifeq ($(ARCH_X86)$(ARCH_X86_64),yes) CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm +CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c endif +CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm -CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports +CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com +CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc +CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec INSTALL-LIBS-yes += include/vpx/vpx_codec.h INSTALL-LIBS-yes += include/vpx/vpx_image.h @@ -129,7 +135,6 @@ ARM_ARCH=v6 endif obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c @cp $(SRC_PATH_BARE)/build/arm-wince-vs8/obj_int_extract.bat . - @cp $(SRC_PATH_BARE)/build/arm-wince-vs8/armasm$(ARM_ARCH).rules . @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\ --exe\ @@ -142,7 +147,6 @@ obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat -PROJECTS-$(BUILD_LIBVPX) += armasm$(ARM_ARCH).rules endif vpx.def: $(call enabled,CODEC_EXPORTS) @@ -174,6 +178,31 @@ LIBVPX_OBJS=$(call objs,$(CODEC_SRCS)) OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS) LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) + +BUILD_LIBVPX_SO := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED)) +LIBVPX_SO := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH) +LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO) +$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver +$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm -pthread +$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR) +$(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver +LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \ + libvpx.so libvpx.so.$(VERSION_MAJOR) \ + libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR)) + +libvpx.ver: $(call enabled,CODEC_EXPORTS) + @echo " [CREATE] $@" + $(qexec)echo "{ global:" > $@ + $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done + $(qexec)echo "local: *; };" >> $@ +CLEAN-OBJS += libvpx.ver + +$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)): + @echo " [LN] $@" + $(qexec)ln -sf $(LIBVPX_SO) $@ + +INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS) +INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO) endif LIBS-$(LIPO_LIBVPX) += libvpx.a diff --git a/mainpage.dox b/mainpage.dox index 3596ce0d7..49dff7b5b 100644 --- a/mainpage.dox +++ b/mainpage.dox @@ -11,7 +11,7 @@ source codec deployed on millions of computers and devices worldwide. This distribution of the WebM VP8 Codec SDK includes the following support: - + \if vp8_encoder - \ref vp8_encoder \endif \if vp8_decoder - \ref vp8_decoder \endif diff --git a/md5_utils.c b/md5_utils.c index 16c6f7e68..455d9cd2b 100644 --- a/md5_utils.c +++ b/md5_utils.c @@ -1,298 +1,253 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * This code implements the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + * + * Changed so as no longer to depend on Colin Plumb's `usual.h' header + * definitions + * - Ian Jackson . + * Still in the public domain. */ +#include /* for stupid systems */ -/* -Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -rights reserved. - -License to copy and use this software is granted provided that it -is identified as the "RSA Data Security, Inc. MD5 Message-Digest -Algorithm" in all material mentioning or referencing this software -or this function. - -License is also granted to make and use derivative works provided -that such works are identified as "derived from the RSA Data -Security, Inc. MD5 Message-Digest Algorithm" in all material -mentioning or referencing the derived work. - -RSA Data Security, Inc. makes no representations concerning either -the merchantability of this software or the suitability of this -software for any particular purpose. It is provided "as is" -without express or implied warranty of any kind. - -These notices must be retained in any copies of any part of this -documentation and/or software. -*/ +#include /* for memcpy() */ #include "md5_utils.h" -#include -/* Constants for md5_transform routine. - */ -#define S11 7 -#define S12 12 -#define S13 17 -#define S14 22 -#define S21 5 -#define S22 9 -#define S23 14 -#define S24 20 -#define S31 4 -#define S32 11 -#define S33 16 -#define S34 23 -#define S41 6 -#define S42 10 -#define S43 15 -#define S44 21 - -static void md5_transform(uint32_t state[4], const uint8_t block[64]); -static void Encode(uint8_t *output, const uint32_t *input, unsigned int len); -static void Decode(uint32_t *output, const uint8_t *input, unsigned int len); -#define md5_memset memset -#define md5_memcpy memcpy - -static unsigned char PADDING[64] = +void +byteSwap(UWORD32 *buf, unsigned words) { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; + md5byte *p; -/* F, G, H and I are basic MD5 functions. - */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (z)) | ((y) & (~z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) -#define I(x, y, z) ((y) ^ ((x) | (~z))) + /* Only swap bytes for big endian machines */ + int i = 1; -/* ROTATE_LEFT rotates x left n bits. - */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) + if (*(char *)&i == 1) + return; -/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4. -Rotation is separate from addition to prevent recomputation. - */ -#define FF(a, b, c, d, x, s, ac) { \ - (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define GG(a, b, c, d, x, s, ac) { \ - (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define HH(a, b, c, d, x, s, ac) { \ - (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } -#define II(a, b, c, d, x, s, ac) { \ - (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \ - (a) = ROTATE_LEFT ((a), (s)); \ - (a) += (b); \ - } + p = (md5byte *)buf; -/* MD5 initialization. Begins an MD5 operation, writing a new context. - */ -void md5_init(md5_ctx_t *context) -{ - context->count[0] = context->count[1] = 0; - /* Load magic initialization constants. - */ - context->state[0] = 0x67452301; - context->state[1] = 0xefcdab89; - context->state[2] = 0x98badcfe; - context->state[3] = 0x10325476; -} - -/* MD5 block update operation. Continues an MD5 message-digest - operation, processing another message block, and updating the - context. - */ -void md5_update(md5_ctx_t *context, const uint8_t *input, unsigned int input_len) -{ - unsigned int i, index, part_len; - - /* Compute number of bytes mod 64 */ - index = (unsigned int)((context->count[0] >> 3) & 0x3F); - - /* Update number of bits */ - if ((context->count[0] += ((uint32_t)input_len << 3)) - < ((uint32_t)input_len << 3)) - context->count[1]++; - - context->count[1] += ((uint32_t)input_len >> 29); - - part_len = 64 - index; - - /* Transform as many times as possible. */ - if (input_len >= part_len) + do { - memcpy(&context->buffer[index], input, part_len); - md5_transform(context->state, context->buffer); - - for (i = part_len; i + 63 < input_len; i += 64) - md5_transform(context->state, &input[i]); - - index = 0; + *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 | + ((unsigned)p[1] << 8 | p[0]); + p += 4; } - else - i = 0; - - /* Buffer remaining input */ - memcpy(&context->buffer[index], &input[i], input_len - i); + while (--words); } -/* MD5 finalization. Ends an MD5 message-digest operation, writing the - the message digest and zeroizing the context. +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. */ -void md5_finalize(md5_ctx_t *context, uint8_t digest[16]) +void +MD5Init(struct MD5Context *ctx) { - unsigned char bits[8]; - unsigned int index, pad_len; + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; - /* Save number of bits */ - Encode(bits, context->count, 8); - - /* Pad out to 56 mod 64. - */ - index = (unsigned int)((context->count[0] >> 3) & 0x3f); - pad_len = (index < 56) ? (56 - index) : (120 - index); - md5_update(context, PADDING, pad_len); - - /* Append length (before padding) */ - md5_update(context, bits, 8); - /* Store state in digest */ - Encode(digest, context->state, 16); - - /* Zeroize sensitive information. - */ - memset(context, 0, sizeof(*context)); + ctx->bytes[0] = 0; + ctx->bytes[1] = 0; } -/* MD5 basic transformation. Transforms state based on block. +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. */ -static void md5_transform(uint32_t state[4], const uint8_t block[64]) +void +MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) { - uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16]; + UWORD32 t; - Decode(x, block, 64); + /* Update byte count */ - /* Round 1 */ - FF(a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */ - FF(d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */ - FF(c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */ - FF(b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */ - FF(a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */ - FF(d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */ - FF(c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */ - FF(b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */ - FF(a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */ - FF(d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */ - FF(c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */ - FF(b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */ - FF(a, b, c, d, x[12], S11, 0x6b901122); /* 13 */ - FF(d, a, b, c, x[13], S12, 0xfd987193); /* 14 */ - FF(c, d, a, b, x[14], S13, 0xa679438e); /* 15 */ - FF(b, c, d, a, x[15], S14, 0x49b40821); /* 16 */ + t = ctx->bytes[0]; - /* Round 2 */ - GG(a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */ - GG(d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */ - GG(c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */ - GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */ - GG(a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */ - GG(d, a, b, c, x[10], S22, 0x2441453); /* 22 */ - GG(c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */ - GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */ - GG(a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */ - GG(d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */ - GG(c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */ - GG(b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */ - GG(a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */ - GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */ - GG(c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */ - GG(b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */ + if ((ctx->bytes[0] = t + len) < t) + ctx->bytes[1]++; /* Carry from low to high */ - /* Round 3 */ - HH(a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */ - HH(d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */ - HH(c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */ - HH(b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */ - HH(a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */ - HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */ - HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */ - HH(b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */ - HH(a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */ - HH(d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */ - HH(c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */ - HH(b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */ - HH(a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */ - HH(d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */ - HH(c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */ - HH(b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */ + t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */ - /* Round 4 */ - II(a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */ - II(d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */ - II(c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */ - II(b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */ - II(a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */ - II(d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */ - II(c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */ - II(b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */ - II(a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */ - II(d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */ - II(c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */ - II(b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */ - II(a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */ - II(d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */ - II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */ - II(b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */ - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - - /* Zeroize sensitive information. - */ - memset(x, 0, sizeof(x)); -} - -/* Encodes input (uint32_t) into output (unsigned char). Assumes len is - a multiple of 4. - */ -static void Encode(uint8_t *output, const uint32_t *input, unsigned int len) -{ - unsigned int i, j; - - for (i = 0, j = 0; j < len; i++, j += 4) + if (t > len) { - output[j] = (unsigned char)(input[i] & 0xff); - output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); - output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); - output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); + memcpy((md5byte *)ctx->in + 64 - t, buf, len); + return; } + + /* First chunk is an odd size */ + memcpy((md5byte *)ctx->in + 64 - t, buf, t); + byteSwap(ctx->in, 16); + MD5Transform(ctx->buf, ctx->in); + buf += t; + len -= t; + + /* Process data in 64-byte chunks */ + while (len >= 64) + { + memcpy(ctx->in, buf, 64); + byteSwap(ctx->in, 16); + MD5Transform(ctx->buf, ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + memcpy(ctx->in, buf, len); } -/* Decodes input (unsigned char) into output (uint32_t). Assumes len is - a multiple of 4. +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) */ -static void Decode(uint32_t *output, const uint8_t *input, unsigned int len) +void +MD5Final(md5byte digest[16], struct MD5Context *ctx) { - unsigned int i, j; + int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */ + md5byte *p = (md5byte *)ctx->in + count; - for (i = 0, j = 0; j < len; i++, j += 4) - output[i] = ((uint32_t)input[j]) | (((uint32_t)input[j+1]) << 8) | - (((uint32_t)input[j+2]) << 16) | (((uint32_t)input[j+3]) << 24); + /* Set the first char of padding to 0x80. There is always room. */ + *p++ = 0x80; + + /* Bytes of padding needed to make 56 bytes (-8..55) */ + count = 56 - 1 - count; + + if (count < 0) /* Padding forces an extra block */ + { + memset(p, 0, count + 8); + byteSwap(ctx->in, 16); + MD5Transform(ctx->buf, ctx->in); + p = (md5byte *)ctx->in; + count = 56; + } + + memset(p, 0, count); + byteSwap(ctx->in, 14); + + /* Append length in bits and transform */ + ctx->in[14] = ctx->bytes[0] << 3; + ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29; + MD5Transform(ctx->buf, ctx->in); + + byteSwap(ctx->buf, 4); + memcpy(digest, ctx->buf, 16); + memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ } + +#ifndef ASM_MD5 + +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f,w,x,y,z,in,s) \ + (w += f(x,y,z) + in, w = (w<>(32-s)) + x) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +void +MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) +{ + register UWORD32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +#endif diff --git a/md5_utils.h b/md5_utils.h index 6c0e93e52..5ca1b5f28 100644 --- a/md5_utils.h +++ b/md5_utils.h @@ -1,44 +1,42 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * This is the header file for the MD5 message-digest algorithm. + * The algorithm is due to Ron Rivest. This code was + * written by Colin Plumb in 1993, no copyright is claimed. + * This code is in the public domain; do with it what you wish. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Equivalent code is available from RSA Data Security, Inc. + * This code has been tested against that, and is equivalent, + * except that you don't need to include two pages of legalese + * with every copy. + * + * To compute the message digest of a chunk of bytes, declare an + * MD5Context structure, pass it to MD5Init, call MD5Update as + * needed on buffers full of bytes, and then call MD5Final, which + * will fill a supplied 16-byte array with the digest. + * + * Changed so as no longer to depend on Colin Plumb's `usual.h' + * header definitions + * - Ian Jackson . + * Still in the public domain. */ -/* -Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -rights reserved. +#ifndef MD5_H +#define MD5_H -License to copy and use this software is granted provided that it -is identified as the "RSA Data Security, Inc. MD5 Message-Digest -Algorithm" in all material mentioning or referencing this software -or this function. +#define md5byte unsigned char +#define UWORD32 unsigned int -License is also granted to make and use derivative works provided -that such works are identified as "derived from the RSA Data -Security, Inc. MD5 Message-Digest Algorithm" in all material -mentioning or referencing the derived work. - -RSA Data Security, Inc. makes no representations concerning either -the merchantability of this software or the suitability of this -software for any particular purpose. It is provided "as is" -without express or implied warranty of any kind. - -These notices must be retained in any copies of any part of this -documentation and/or software. -*/ -#include "vpx/vpx_integer.h" - -/* MD5 context. */ -typedef struct +typedef struct MD5Context MD5Context; +struct MD5Context { - uint32_t state[4]; /* state (ABCD) */ - uint32_t count[2]; /* number of bits, modulo 2^64 (lsb first) */ - uint8_t buffer[64]; /* input buffer */ -} md5_ctx_t; + UWORD32 buf[4]; + UWORD32 bytes[2]; + UWORD32 in[16]; +}; -void md5_init(md5_ctx_t *ctx); -void md5_update(md5_ctx_t *ctx, const uint8_t *buf, unsigned int len); -void md5_finalize(md5_ctx_t *ctx, uint8_t md5[16]); +void MD5Init(struct MD5Context *context); +void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len); +void MD5Final(unsigned char digest[16], struct MD5Context *context); +void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]); + +#endif /* !MD5_H */ diff --git a/nestegg/.gitignore b/nestegg/.gitignore new file mode 100644 index 000000000..b2ba99c3f --- /dev/null +++ b/nestegg/.gitignore @@ -0,0 +1,40 @@ +*.lo +*.o +*.swp +*~ +.deps +.dirstamp +.libs +Makefile +Makefile.in +_stdint.h +aclocal.m4 +autom4te.cache +compile +config.guess +config.h +config.h.in +config.log +config.status +config.sub +configure +depcomp +docs/Doxyfile +docs/doxygen-build.stamp +docs/html +install-sh +libtool +ltmain.sh +m4/libtool.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/ltversion.m4 +m4/lt~obsolete.m4 +missing +nestegg-uninstalled.pc +nestegg.pc +src/.dirstamp +src/libnestegg.la +stamp-h1 +test/test +include/nestegg/nestegg-stdint.h diff --git a/nestegg/AUTHORS b/nestegg/AUTHORS new file mode 100644 index 000000000..8204f40f4 --- /dev/null +++ b/nestegg/AUTHORS @@ -0,0 +1 @@ +Matthew Gregan diff --git a/nestegg/INSTALL b/nestegg/INSTALL new file mode 100644 index 000000000..401df4184 --- /dev/null +++ b/nestegg/INSTALL @@ -0,0 +1,8 @@ +Build instructions for libnestegg +================================= + +0. Change directory into the source directory. +1. Run |autoreconf --install| to generate configure. +2. Run |./configure| to configure the build. +3. Run |make| to build. +4. Run |make check| to run the test suite. diff --git a/nestegg/LICENSE b/nestegg/LICENSE new file mode 100644 index 000000000..a67984a61 --- /dev/null +++ b/nestegg/LICENSE @@ -0,0 +1,13 @@ +Copyright © 2010 Mozilla Foundation + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/nestegg/Makefile.am b/nestegg/Makefile.am new file mode 100644 index 000000000..500699160 --- /dev/null +++ b/nestegg/Makefile.am @@ -0,0 +1,51 @@ +AUTOMAKE_OPTIONS = foreign 1.11 no-dist-gzip dist-bzip2 subdir-objects +ACLOCAL_AMFLAGS = -I m4 + +INCLUDES = -I$(top_srcdir)/include -I. -I$(top_srcdir)/halloc +AM_CFLAGS = -ansi -pedantic -Wall -Wextra -Wno-long-long -O0 -g + +SUBDIRS = docs + +EXTRA_DIST = \ + AUTHORS README LICENSE \ + nestegg-uninstalled.pc.in \ + m4/as-ac-expand.m4 \ + m4/pkg.m4 \ + m4/ax_create_stdint_h.m4 \ + halloc/src/halloc.c \ + halloc/halloc.h \ + halloc/src/align.h \ + halloc/src/hlist.h \ + halloc/src/macros.h + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = nestegg.pc + +nesteggincludedir = $(includedir)/nestegg +nestegginclude_HEADERS = include/nestegg/nestegg.h include/nestegg/nestegg-stdint.h + +lib_LTLIBRARIES = src/libnestegg.la + +src_libnestegg_la_SOURCES = \ + src/nestegg.c \ + halloc/src/halloc.c \ + halloc/halloc.h \ + halloc/src/align.h \ + halloc/src/hlist.h \ + halloc/src/macros.h + +check_PROGRAMS = test/test + +test_test_SOURCES = test/test.c +test_test_LDADD = src/libnestegg.la + +DISTCLEANFILES = include/nestegg/nestegg-stdint.h + +dist-hook: + find $(distdir) -type d -name '.git' | xargs rm -rf + +debug: + $(MAKE) all CFLAGS="@DEBUG@" + +profile: + $(MAKE) all CFLAGS="@PROFILE@" diff --git a/nestegg/README b/nestegg/README new file mode 100644 index 000000000..47c8237d2 --- /dev/null +++ b/nestegg/README @@ -0,0 +1,6 @@ +See INSTALL for build instructions. + +Licensed under an ISC-style license. See LICENSE for details. + +The source under the halloc/ directory is licensed under a BSD license. See +halloc/halloc.h for details. diff --git a/nestegg/TODO b/nestegg/TODO new file mode 100644 index 000000000..bf0cb04c4 --- /dev/null +++ b/nestegg/TODO @@ -0,0 +1,21 @@ +- Document when read, seek, tell callbacks are used. +- Add an automated testsuite. +- Test (and fix, if necessary) support for unknown sizes. +- Test (and fix, if necessary) support for large files. +- Read past unknown elements rather than seeking. +- Try to handle unknown elements with unknown sizes. +- Formalize handling of default element values. +- Try to resynchronize stream when read_block fails so that failure to parse + a single block can be treated as non-fatal. +- Make logging more useful to API users. +- Avoid reparsing Cues and ignore any SeekHead at end of file. +- Optionally build a Cue index as Clusters are parsed. +- Support seeking without Cues. +- Avoid building a list of Clusters as they are parsed and retain only the + last one parsed. +- Add an asynchronous error code to struct nestegg and ensure that API calls + continue to fail safely one a fatal error has been returned. +- Modify parser/data structures to provide a clean separation. Perhaps the + parser should return a generic tree of nodes that a second pass uses to + initialize the main data structures. +- Use pool allocator for all allocations. diff --git a/nestegg/configure.ac b/nestegg/configure.ac new file mode 100644 index 000000000..70f6e0d59 --- /dev/null +++ b/nestegg/configure.ac @@ -0,0 +1,124 @@ +dnl ------------------------------------------------ +dnl Initialization and Versioning +dnl ------------------------------------------------ + +AC_INIT(libnestegg,[0.1git]) + +AC_CANONICAL_HOST +AC_CANONICAL_TARGET + +AC_CONFIG_MACRO_DIR([m4]) + +AM_CONFIG_HEADER([config.h]) +AC_CONFIG_SRCDIR([src/nestegg.c]) +AM_INIT_AUTOMAKE + +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +dnl Library versioning +dnl CURRENT, REVISION, AGE +dnl - library source changed -> increment REVISION +dnl - interfaces added/removed/changed -> increment CURRENT, REVISION = 0 +dnl - interfaces added -> increment AGE +dnl - interfaces removed -> AGE = 0 + +NESTEGG_CURRENT=0 +NESTEGG_REVISION=0 +NESTEGG_AGE=1 +AC_SUBST(NESTEGG_CURRENT) +AC_SUBST(NESTEGG_REVISION) +AC_SUBST(NESTEGG_AGE) + + +dnl -------------------------------------------------- +dnl Check for programs +dnl -------------------------------------------------- + +dnl save $CFLAGS since AC_PROG_CC likes to insert "-g -O2" +dnl if $CFLAGS is blank +cflags_save="$CFLAGS" +AC_PROG_CC +AC_PROG_CPP +CFLAGS="$cflags_save" + +AM_PROG_CC_C_O +AC_LIBTOOL_WIN32_DLL +AM_PROG_LIBTOOL + +dnl Check for doxygen +AC_ARG_ENABLE([doc], + AS_HELP_STRING([--enable-doc], [Build API documentation]), + [ac_enable_doc=$enableval], [ac_enable_doc=auto]) + +if test "x$ac_enable_doc" != "xno"; then + AC_CHECK_PROG(HAVE_DOXYGEN, doxygen, true, false) + + if test "x$HAVE_DOXYGEN" = "xfalse" -a "x$ac_enable_doc" = "xyes"; then + AC_MSG_ERROR([*** API documentation explicitly requested but Doxygen not found]) + fi +else + HAVE_DOXYGEN=false +fi +AM_CONDITIONAL(HAVE_DOXYGEN,$HAVE_DOXYGEN) +if test $HAVE_DOXYGEN = "false"; then + AC_MSG_WARN([*** doxygen not found, API documentation will not be built]) +fi + +# Generate portable stdint.h replacement +AX_CREATE_STDINT_H(include/nestegg/nestegg-stdint.h) + +# Test whenever ld supports -version-script +AC_PROG_LD +AC_PROG_LD_GNU +AC_MSG_CHECKING([how to control symbol export]) + +dnl -------------------------------------------------- +dnl Do substitutions +dnl -------------------------------------------------- + +AC_SUBST(DEBUG) +AC_SUBST(PROFILE) + +AC_OUTPUT([ + Makefile + docs/Makefile + docs/Doxyfile + nestegg.pc + nestegg-uninstalled.pc +]) + +AS_AC_EXPAND(LIBDIR, ${libdir}) +AS_AC_EXPAND(INCLUDEDIR, ${includedir}) +AS_AC_EXPAND(BINDIR, ${bindir}) +AS_AC_EXPAND(DOCDIR, ${docdir}) + +if test $HAVE_DOXYGEN = "false"; then + doc_build="no" +else + doc_build="yes" +fi + +AC_MSG_RESULT([ +------------------------------------------------------------------------ + $PACKAGE $VERSION: Automatic configuration OK. + + General configuration: + + API Documentation: .......... ${doc_build} + + Installation paths: + + libnestegg: .................. ${LIBDIR} + C header files: .............. ${INCLUDEDIR}/nestegg + Documentation: ............... ${DOCDIR} + + Building: + + Type 'make' to compile $PACKAGE. + + Type 'make install' to install $PACKAGE. + + Example programs will be built but not installed. +------------------------------------------------------------------------ +]) + diff --git a/nestegg/docs/Doxyfile.in b/nestegg/docs/Doxyfile.in new file mode 100644 index 000000000..e0e9249aa --- /dev/null +++ b/nestegg/docs/Doxyfile.in @@ -0,0 +1,1551 @@ +# Doxyfile 1.6.2 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = @PACKAGE@ + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = @VERSION@ + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = . + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = YES + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it parses. +# With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this tag. +# The format is ext=language, where ext is a file extension, and language is one of +# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, +# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat +# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = NO + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by +# doxygen. The layout file controls the global structure of the generated output files +# in an output format independent way. The create the layout file that represents +# doxygen's defaults, run doxygen with the -l option. You can optionally specify a +# file name after the option, if omitted DoxygenLayout.xml will be used as the name +# of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = YES + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = @top_srcdir@/include/nestegg + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = NO + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = NO + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER +# are set, an additional index file will be generated that can be used as input for +# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated +# HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. +# For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's +# filter section matches. +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be implemented using a PHP enabled web server instead of at the web client using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server based approach is that it scales better to large projects and allows full text search. The disadvances is that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/nestegg/docs/Makefile.am b/nestegg/docs/Makefile.am new file mode 100644 index 000000000..42cf8eec4 --- /dev/null +++ b/nestegg/docs/Makefile.am @@ -0,0 +1,38 @@ +doc_DATA = doxygen-build.stamp + +EXTRA_DIST = Doxyfile.in + +if HAVE_DOXYGEN +doxygen-build.stamp: Doxyfile + doxygen + touch doxygen-build.stamp +else +doxygen-build.stamp: + echo "*** Warning: Doxygen not found; documentation will not be built." + touch doxygen-build.stamp +endif + +dist_docdir = $(distdir)/libnestegg + +dist-hook: + if test -d html; then \ + mkdir $(dist_docdir); \ + echo -n "copying built documenation..."; \ + cp -rp html $(dist_docdir)/html; \ + echo "OK"; \ + fi + + +install-data-local: doxygen-build.stamp + $(mkinstalldirs) $(DESTDIR)$(docdir) + if test -d html; then \ + cp -rp html $(DESTDIR)$(docdir)/html; \ + fi + +uninstall-local: + rm -rf $(DESTDIR)$(docdir) + +clean-local: + if test -d html; then rm -rf html; fi + if test -f doxygen-build.stamp; then rm -f doxygen-build.stamp; fi + diff --git a/nestegg/halloc/README b/nestegg/halloc/README new file mode 100644 index 000000000..380fba2b8 --- /dev/null +++ b/nestegg/halloc/README @@ -0,0 +1,45 @@ +halloc 1.2.1 +============ + + Hierarchical memory heap interface - an extension to standard + malloc/free interface that simplifies tasks of memory disposal + when allocated structures exhibit hierarchical properties. + + http://swapped.cc/halloc += + To build libhalloc.a with GNU tools run + make + + To install in /usr/include and /usr/lib + make install + + To cleanup the build files + make clean += + halloc-1.2.1 + * fixed a double-free bug in _set_allocator() as per + Matthew Gregan comments + + * switched to using NULL instead of 0 where applicable + + halloc-1.2.0 + * added missing include to halloc.c + + * improved standard compliance thanks to the feedback + received from Stan Tobias. Two things were fixed - + + - hblock_t structure no longer uses zero-sized 'data' + array, which happened to be common, but non-standard + extension; + + - secondly, added the code to test the behaviour of + realloc(ptr, 0). Standard allows it NOT to act as + free(), in which case halloc will use its own version + of allocator calling free() when neccessary. + + halloc-1.1.0 + * initial public release (rewrite of hhmalloc library) + +============================================================================= +Copyright (c) 2004-2010, Alex Pankratov (ap@swapped.cc). All rights reserved. + diff --git a/nestegg/halloc/halloc.h b/nestegg/halloc/halloc.h new file mode 100644 index 000000000..10af4e8d8 --- /dev/null +++ b/nestegg/halloc/halloc.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2004-2010 Alex Pankratov. All rights reserved. + * + * Hierarchical memory allocator, 1.2.1 + * http://swapped.cc/halloc + */ + +/* + * The program is distributed under terms of BSD license. + * You can obtain the copy of the license by visiting: + * + * http://www.opensource.org/licenses/bsd-license.php + */ + +#ifndef _LIBP_HALLOC_H_ +#define _LIBP_HALLOC_H_ + +#include /* size_t */ + +/* + * Core API + */ +void * halloc (void * block, size_t len); +void hattach(void * block, void * parent); + +/* + * standard malloc/free api + */ +void * h_malloc (size_t len); +void * h_calloc (size_t n, size_t len); +void * h_realloc(void * p, size_t len); +void h_free (void * p); +char * h_strdup (const char * str); + +/* + * the underlying allocator + */ +typedef void * (* realloc_t)(void * ptr, size_t len); + +extern realloc_t halloc_allocator; + +#endif + diff --git a/nestegg/halloc/src/align.h b/nestegg/halloc/src/align.h new file mode 100644 index 000000000..4c6e1831f --- /dev/null +++ b/nestegg/halloc/src/align.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2010 Alex Pankratov. All rights reserved. + * + * Hierarchical memory allocator, 1.2.1 + * http://swapped.cc/halloc + */ + +/* + * The program is distributed under terms of BSD license. + * You can obtain the copy of the license by visiting: + * + * http://www.opensource.org/licenses/bsd-license.php + */ + +#ifndef _LIBP_ALIGN_H_ +#define _LIBP_ALIGN_H_ + +/* + * a type with the most strict alignment requirements + */ +union max_align +{ + char c; + short s; + long l; + int i; + float f; + double d; + void * v; + void (*q)(void); +}; + +typedef union max_align max_align_t; + +#endif + diff --git a/nestegg/halloc/src/halloc.c b/nestegg/halloc/src/halloc.c new file mode 100644 index 000000000..38fd6c11a --- /dev/null +++ b/nestegg/halloc/src/halloc.c @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2004i-2010 Alex Pankratov. All rights reserved. + * + * Hierarchical memory allocator, 1.2.1 + * http://swapped.cc/halloc + */ + +/* + * The program is distributed under terms of BSD license. + * You can obtain the copy of the license by visiting: + * + * http://www.opensource.org/licenses/bsd-license.php + */ + +#include /* realloc */ +#include /* memset & co */ + +#include "../halloc.h" +#include "align.h" +#include "hlist.h" + +/* + * block control header + */ +typedef struct hblock +{ +#ifndef NDEBUG +#define HH_MAGIC 0x20040518L + long magic; +#endif + hlist_item_t siblings; /* 2 pointers */ + hlist_head_t children; /* 1 pointer */ + max_align_t data[1]; /* not allocated, see below */ + +} hblock_t; + +#define sizeof_hblock offsetof(hblock_t, data) + +/* + * + */ +realloc_t halloc_allocator = NULL; + +#define allocator halloc_allocator + +/* + * static methods + */ +static void _set_allocator(void); +static void * _realloc(void * ptr, size_t n); + +static int _relate(hblock_t * b, hblock_t * p); +static void _free_children(hblock_t * p); + +/* + * Core API + */ +void * halloc(void * ptr, size_t len) +{ + hblock_t * p; + + /* set up default allocator */ + if (! allocator) + { + _set_allocator(); + assert(allocator); + } + + /* calloc */ + if (! ptr) + { + if (! len) + return NULL; + + p = allocator(0, len + sizeof_hblock); + if (! p) + return NULL; +#ifndef NDEBUG + p->magic = HH_MAGIC; +#endif + hlist_init(&p->children); + hlist_init_item(&p->siblings); + + return p->data; + } + + p = structof(ptr, hblock_t, data); + assert(p->magic == HH_MAGIC); + + /* realloc */ + if (len) + { + p = allocator(p, len + sizeof_hblock); + if (! p) + return NULL; + + hlist_relink(&p->siblings); + hlist_relink_head(&p->children); + + return p->data; + } + + /* free */ + _free_children(p); + hlist_del(&p->siblings); + allocator(p, 0); + + return NULL; +} + +void hattach(void * block, void * parent) +{ + hblock_t * b, * p; + + if (! block) + { + assert(! parent); + return; + } + + /* detach */ + b = structof(block, hblock_t, data); + assert(b->magic == HH_MAGIC); + + hlist_del(&b->siblings); + + if (! parent) + return; + + /* attach */ + p = structof(parent, hblock_t, data); + assert(p->magic == HH_MAGIC); + + /* sanity checks */ + assert(b != p); /* trivial */ + assert(! _relate(p, b)); /* heavy ! */ + + hlist_add(&p->children, &b->siblings); +} + +/* + * malloc/free api + */ +void * h_malloc(size_t len) +{ + return halloc(0, len); +} + +void * h_calloc(size_t n, size_t len) +{ + void * ptr = halloc(0, len*=n); + return ptr ? memset(ptr, 0, len) : NULL; +} + +void * h_realloc(void * ptr, size_t len) +{ + return halloc(ptr, len); +} + +void h_free(void * ptr) +{ + halloc(ptr, 0); +} + +char * h_strdup(const char * str) +{ + size_t len = strlen(str); + char * ptr = halloc(0, len + 1); + return ptr ? (ptr[len] = 0, memcpy(ptr, str, len)) : NULL; +} + +/* + * static stuff + */ +static void _set_allocator(void) +{ + void * p; + assert(! allocator); + + /* + * the purpose of the test below is to check the behaviour + * of realloc(ptr, 0), which is defined in the standard + * as an implementation-specific. if it returns zero, + * then it's equivalent to free(). it can however return + * non-zero, in which case it cannot be used for freeing + * memory blocks and we'll need to supply our own version + * + * Thanks to Stan Tobias for pointing this tricky part out. + */ + allocator = realloc; + if (! (p = malloc(1))) + /* hmm */ + return; + + if ((p = realloc(p, 0))) + { + /* realloc cannot be used as free() */ + allocator = _realloc; + free(p); + } +} + +static void * _realloc(void * ptr, size_t n) +{ + /* + * free'ing realloc() + */ + if (n) + return realloc(ptr, n); + free(ptr); + return NULL; +} + +static int _relate(hblock_t * b, hblock_t * p) +{ + hlist_item_t * i; + + if (!b || !p) + return 0; + + /* + * since there is no 'parent' pointer, which would've allowed + * O(log(n)) upward traversal, the check must use O(n) downward + * iteration of the entire hierarchy; and this can be VERY SLOW + */ + hlist_for_each(i, &p->children) + { + hblock_t * q = structof(i, hblock_t, siblings); + if (q == b || _relate(b, q)) + return 1; + } + return 0; +} + +static void _free_children(hblock_t * p) +{ + hlist_item_t * i, * tmp; + +#ifndef NDEBUG + /* + * this catches loops in hierarchy with almost zero + * overhead (compared to _relate() running time) + */ + assert(p && p->magic == HH_MAGIC); + p->magic = 0; +#endif + hlist_for_each_safe(i, tmp, &p->children) + { + hblock_t * q = structof(i, hblock_t, siblings); + _free_children(q); + allocator(q, 0); + } +} + diff --git a/nestegg/halloc/src/hlist.h b/nestegg/halloc/src/hlist.h new file mode 100644 index 000000000..2791f78c7 --- /dev/null +++ b/nestegg/halloc/src/hlist.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2004-2010 Alex Pankratov. All rights reserved. + * + * Hierarchical memory allocator, 1.2.1 + * http://swapped.cc/halloc + */ + +/* + * The program is distributed under terms of BSD license. + * You can obtain the copy of the license by visiting: + * + * http://www.opensource.org/licenses/bsd-license.php + */ + +#ifndef _LIBP_HLIST_H_ +#define _LIBP_HLIST_H_ + +#include +#include "macros.h" /* static_inline */ + +/* + * weak double-linked list w/ tail sentinel + */ +typedef struct hlist_head hlist_head_t; +typedef struct hlist_item hlist_item_t; + +/* + * + */ +struct hlist_head +{ + hlist_item_t * next; +}; + +struct hlist_item +{ + hlist_item_t * next; + hlist_item_t ** prev; +}; + +/* + * shared tail sentinel + */ +struct hlist_item hlist_null; + +/* + * + */ +#define __hlist_init(h) { &hlist_null } +#define __hlist_init_item(i) { &hlist_null, &(i).next } + +static_inline void hlist_init(hlist_head_t * h); +static_inline void hlist_init_item(hlist_item_t * i); + +/* static_inline void hlist_purge(hlist_head_t * h); */ + +/* static_inline bool_t hlist_empty(const hlist_head_t * h); */ + +/* static_inline hlist_item_t * hlist_head(const hlist_head_t * h); */ + +/* static_inline hlist_item_t * hlist_next(const hlist_item_t * i); */ +/* static_inline hlist_item_t * hlist_prev(const hlist_item_t * i, + const hlist_head_t * h); */ + +static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i); + +/* static_inline void hlist_add_prev(hlist_item_t * l, hlist_item_t * i); */ +/* static_inline void hlist_add_next(hlist_item_t * l, hlist_item_t * i); */ + +static_inline void hlist_del(hlist_item_t * i); + +static_inline void hlist_relink(hlist_item_t * i); +static_inline void hlist_relink_head(hlist_head_t * h); + +#define hlist_for_each(i, h) \ + for (i = (h)->next; i != &hlist_null; i = i->next) + +#define hlist_for_each_safe(i, tmp, h) \ + for (i = (h)->next, tmp = i->next; \ + i!= &hlist_null; \ + i = tmp, tmp = i->next) + +/* + * static + */ +static_inline void hlist_init(hlist_head_t * h) +{ + assert(h); + h->next = &hlist_null; +} + +static_inline void hlist_init_item(hlist_item_t * i) +{ + assert(i); + i->prev = &i->next; + i->next = &hlist_null; +} + +static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i) +{ + hlist_item_t * next; + assert(h && i); + + next = i->next = h->next; + next->prev = &i->next; + h->next = i; + i->prev = &h->next; +} + +static_inline void hlist_del(hlist_item_t * i) +{ + hlist_item_t * next; + assert(i); + + next = i->next; + next->prev = i->prev; + *i->prev = next; + + hlist_init_item(i); +} + +static_inline void hlist_relink(hlist_item_t * i) +{ + assert(i); + *i->prev = i; + i->next->prev = &i->next; +} + +static_inline void hlist_relink_head(hlist_head_t * h) +{ + assert(h); + h->next->prev = &h->next; +} + +#endif + diff --git a/nestegg/halloc/src/macros.h b/nestegg/halloc/src/macros.h new file mode 100644 index 000000000..c36b516ee --- /dev/null +++ b/nestegg/halloc/src/macros.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2010 Alex Pankratov. All rights reserved. + * + * Hierarchical memory allocator, 1.2.1 + * http://swapped.cc/halloc + */ + +/* + * The program is distributed under terms of BSD license. + * You can obtain the copy of the license by visiting: + * + * http://www.opensource.org/licenses/bsd-license.php + */ + +#ifndef _LIBP_MACROS_H_ +#define _LIBP_MACROS_H_ + +#include /* offsetof */ + +/* + restore pointer to the structure by a pointer to its field + */ +#define structof(p,t,f) ((t*)(- offsetof(t,f) + (char*)(p))) + +/* + * redefine for the target compiler + */ +#ifdef _WIN32 +#define static_inline static __inline +#else +#define static_inline static __inline__ +#endif + + +#endif + diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h new file mode 100644 index 000000000..7447d141d --- /dev/null +++ b/nestegg/include/nestegg/nestegg.h @@ -0,0 +1,292 @@ +/* + * Copyright © 2010 Mozilla Foundation + * + * This program is made available under an ISC-style license. See the + * accompanying file LICENSE for details. + */ +#ifndef NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 +#define NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 + +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @mainpage + + @section intro Introduction + + This is the documentation fot the libnestegg C API. + libnestegg is a demultiplexing library for Matroska and WebMedia media files. + + @section example Example code + + @code + nestegg * demux_ctx; + nestegg_init(&demux_ctx, io, NULL); + + nestegg_packet * pkt; + while ((r = nestegg_read_packet(demux_ctx, &pkt)) > 0) { + unsigned int track; + + nestegg_packet_track(pkt, &track); + + // This example decodes the first track only. + if (track == 0) { + unsigned int chunk, chunks; + + nestegg_packet_count(pkt, &chunks); + + // Decode each chunk of data. + for (chunk = 0; chunk < chunks; ++chunk) { + unsigned char * data; + size_t data_size; + + nestegg_packet_data(pkt, chunk, &data, &data_size); + + example_codec_decode(codec_ctx, data, data_size); + } + } + + nestegg_free_packet(pkt); + } + + nestegg_destroy(demux_ctx); + @endcode +*/ + + +/** @file + The libnestegg C API. */ + +#define NESTEGG_TRACK_VIDEO 0 /**< Track is of type video. */ +#define NESTEGG_TRACK_AUDIO 1 /**< Track is of type audio. */ + +#define NESTEGG_CODEC_VP8 0 /**< Track uses Google On2 VP8 codec. */ +#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */ + +#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */ +#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */ +#define NESTEGG_SEEK_END 2 /**< Seek offset relative to end of stream. */ + +#define NESTEGG_LOG_DEBUG 1 /**< Debug level log message. */ +#define NESTEGG_LOG_INFO 10 /**< Informational level log message. */ +#define NESTEGG_LOG_WARNING 100 /**< Warning level log message. */ +#define NESTEGG_LOG_ERROR 1000 /**< Error level log message. */ +#define NESTEGG_LOG_CRITICAL 10000 /**< Critical level log message. */ + +typedef struct nestegg nestegg; /**< Opaque handle referencing the stream state. */ +typedef struct nestegg_packet nestegg_packet; /**< Opaque handle referencing a packet of data. */ + +/** User supplied IO context. */ +typedef struct { + /** User supplied read callback. + @param buffer Buffer to read data into. + @param length Length of supplied buffer in bytes. + @param userdata The #userdata supplied by the user. + @retval 1 Read succeeded. + @retval 0 End of stream. + @retval -1 Error. */ + int (* read)(void * buffer, size_t length, void * userdata); + + /** User supplied seek callback. + @param offset Offset within the stream to seek to. + @param whence Seek direction. One of #NESTEGG_SEEK_SET, + #NESTEGG_SEEK_CUR, or #NESTEGG_SEEK_END. + @param userdata The #userdata supplied by the user. + @retval 0 Seek succeeded. + @retval -1 Error. */ + int (* seek)(int64_t offset, int whence, void * userdata); + + /** User supplied tell callback. + @param userdata The #userdata supplied by the user. + @returns Current position within the stream. + @retval -1 Error. */ + int64_t (* tell)(void * userdata); + + /** User supplied pointer to be passed to the IO callbacks. */ + void * userdata; +} nestegg_io; + +/** Parameters specific to a video track. */ +typedef struct { + unsigned int width; /**< Width of the video frame in pixels. */ + unsigned int height; /**< Height of the video frame in pixels. */ + unsigned int display_width; /**< Display width of the video frame in pixels. */ + unsigned int display_height; /**< Display height of the video frame in pixels. */ + unsigned int crop_bottom; /**< Pixels to crop from the bottom of the frame. */ + unsigned int crop_top; /**< Pixels to crop from the top of the frame. */ + unsigned int crop_left; /**< Pixels to crop from the left of the frame. */ + unsigned int crop_right; /**< Pixels to crop from the right of the frame. */ +} nestegg_video_params; + +/** Parameters specific to an audio track. */ +typedef struct { + double rate; /**< Sampling rate in Hz. */ + unsigned int channels; /**< Number of audio channels. */ + unsigned int depth; /**< Bits per sample. */ +} nestegg_audio_params; + +/** Logging callback function pointer. */ +typedef void (* nestegg_log)(nestegg * context, unsigned int severity, char const * format, ...); + +/** Initialize a nestegg context. During initialization the parser will + read forward in the stream processing all elements until the first + block of media is reached. All track metadata has been processed at this point. + @param context Storage for the new nestegg context. @see nestegg_destroy + @param io User supplied IO context. + @param callback Optional logging callback function pointer. May be NULL. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback); + +/** Destroy a nestegg context and free associated memory. + @param context #nestegg context to be freed. @see nestegg_init */ +void nestegg_destroy(nestegg * context); + +/** Query the duration of the media stream in nanoseconds. + @param context Stream context initialized by #nestegg_init. + @param duration Storage for the queried duration. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_duration(nestegg * context, uint64_t * duration); + +/** Query the tstamp scale of the media stream in nanoseconds. + Timecodes presented by nestegg have been scaled by this value + before presentation to the caller. + @param context Stream context initialized by #nestegg_init. + @param scale Storage for the queried scale factor. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_tstamp_scale(nestegg * context, uint64_t * scale); + +/** Query the number of tracks in the media stream. + @param context Stream context initialized by #nestegg_init. + @param tracks Storage for the queried track count. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_count(nestegg * context, unsigned int * tracks); + +/** Seek @a track to @a tstamp. Stream seek will terminate at the earliest + key point in the stream at or before @a tstamp. Other tracks in the + stream will output packets with unspecified but nearby timestamps. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @param tstamp Absolute timestamp in nanoseconds. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_seek(nestegg * context, unsigned int track, uint64_t tstamp); + +/** Query the type specified by @a track. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @retval #NESTEGG_TRACK_VIDEO Track type is video. + @retval #NESTEGG_TRACK_AUDIO Track type is audio. + @retval -1 Error. */ +int nestegg_track_type(nestegg * context, unsigned int track); + +/** Query the codec ID specified by @a track. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @retval #NESTEGG_CODEC_VP8 Track codec is VP8. + @retval #NESTEGG_CODEC_VORBIS Track codec is Vorbis. + @retval -1 Error. */ +int nestegg_track_codec_id(nestegg * context, unsigned int track); + +/** Query the number of codec initialization chunks for @a track. Each + chunk of data should be passed to the codec initialization functions in + the order returned. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @param count Storage for the queried chunk count. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_codec_data_count(nestegg * context, unsigned int track, + unsigned int * count); + +/** Get a pointer to chunk number @a item of codec initialization data for + @a track. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @param item Zero based chunk item number. + @param data Storage for the queried data pointer. + The data is owned by the #nestegg context. + @param length Storage for the queried data size. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_codec_data(nestegg * context, unsigned int track, unsigned int item, + unsigned char ** data, size_t * length); + +/** Query the video parameters specified by @a track. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @param params Storage for the queried video parameters. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_video_params(nestegg * context, unsigned int track, + nestegg_video_params * params); + +/** Query the audio parameters specified by @a track. + @param context Stream context initialized by #nestegg_init. + @param track Zero based track number. + @param params Storage for the queried audio parameters. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_track_audio_params(nestegg * context, unsigned int track, + nestegg_audio_params * params); + +/** Read a packet of media data. A packet consists of one or more chunks of + data associated with a single track. nestegg_read_packet should be + called in a loop while the return value is 1 to drive the stream parser + forward. @see nestegg_free_packet + @param context Context returned by #nestegg_init. + @param packet Storage for the returned nestegg_packet. + @retval 1 Additional packets may be read in subsequent calls. + @retval 0 End of stream. + @retval -1 Error. */ +int nestegg_read_packet(nestegg * context, nestegg_packet ** packet); + +/** Destroy a nestegg_packet and free associated memory. + @param packet #nestegg_packet to be freed. @see nestegg_read_packet */ +void nestegg_free_packet(nestegg_packet * packet); + +/** Query the track number of @a packet. + @param packet Packet initialized by #nestegg_read_packet. + @param track Storage for the queried zero based track index. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_packet_track(nestegg_packet * packet, unsigned int * track); + +/** Query the time stamp in nanoseconds of @a packet. + @param packet Packet initialized by #nestegg_read_packet. + @param tstamp Storage for the queried timestamp in nanoseconds. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_packet_tstamp(nestegg_packet * packet, uint64_t * tstamp); + +/** Query the number of data chunks contained in @a packet. + @param packet Packet initialized by #nestegg_read_packet. + @param count Storage for the queried timestamp in nanoseconds. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_packet_count(nestegg_packet * packet, unsigned int * count); + +/** Get a pointer to chunk number @a item of packet data. + @param packet Packet initialized by #nestegg_read_packet. + @param item Zero based chunk item number. + @param data Storage for the queried data pointer. + The data is owned by the #nestegg_packet packet. + @param length Storage for the queried data size. + @retval 0 Success. + @retval -1 Error. */ +int nestegg_packet_data(nestegg_packet * packet, unsigned int item, + unsigned char ** data, size_t * length); + +#ifdef __cplusplus +} +#endif + +#endif /* NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 */ diff --git a/nestegg/m4/as-ac-expand.m4 b/nestegg/m4/as-ac-expand.m4 new file mode 100644 index 000000000..d6c9e3306 --- /dev/null +++ b/nestegg/m4/as-ac-expand.m4 @@ -0,0 +1,43 @@ +dnl as-ac-expand.m4 0.2.0 +dnl autostars m4 macro for expanding directories using configure's prefix +dnl thomas@apestaart.org + +dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR) +dnl example +dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir) +dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local + +AC_DEFUN([AS_AC_EXPAND], +[ + EXP_VAR=[$1] + FROM_VAR=[$2] + + dnl first expand prefix and exec_prefix if necessary + prefix_save=$prefix + exec_prefix_save=$exec_prefix + + dnl if no prefix given, then use /usr/local, the default prefix + if test "x$prefix" = "xNONE"; then + prefix="$ac_default_prefix" + fi + dnl if no exec_prefix given, then use prefix + if test "x$exec_prefix" = "xNONE"; then + exec_prefix=$prefix + fi + + full_var="$FROM_VAR" + dnl loop until it doesn't change anymore + while true; do + new_full_var="`eval echo $full_var`" + if test "x$new_full_var" = "x$full_var"; then break; fi + full_var=$new_full_var + done + + dnl clean up + full_var=$new_full_var + AC_SUBST([$1], "$full_var") + + dnl restore prefix and exec_prefix + prefix=$prefix_save + exec_prefix=$exec_prefix_save +]) diff --git a/nestegg/m4/ax_create_stdint_h.m4 b/nestegg/m4/ax_create_stdint_h.m4 new file mode 100644 index 000000000..228105b11 --- /dev/null +++ b/nestegg/m4/ax_create_stdint_h.m4 @@ -0,0 +1,695 @@ +dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEDERS-TO-CHECK])] +dnl +dnl the "ISO C9X: 7.18 Integer types " section requires the +dnl existence of an include file that defines a set of +dnl typedefs, especially uint8_t,int32_t,uintptr_t. Many older +dnl installations will not provide this file, but some will have the +dnl very same definitions in . In other enviroments we can +dnl use the inet-types in which would define the typedefs +dnl int8_t and u_int8_t respectivly. +dnl +dnl This macros will create a local "_stdint.h" or the headerfile given +dnl as an argument. In many cases that file will just "#include +dnl " or "#include ", while in other environments +dnl it will provide the set of basic 'stdint's definitions/typedefs: +dnl +dnl int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t +dnl int_least32_t.. int_fast32_t.. intmax_t +dnl +dnl which may or may not rely on the definitions of other files, or +dnl using the AC_CHECK_SIZEOF macro to determine the actual sizeof each +dnl type. +dnl +dnl if your header files require the stdint-types you will want to +dnl create an installable file mylib-int.h that all your other +dnl installable header may include. So if you have a library package +dnl named "mylib", just use +dnl +dnl AX_CREATE_STDINT_H(mylib-int.h) +dnl +dnl in configure.ac and go to install that very header file in +dnl Makefile.am along with the other headers (mylib.h) - and the +dnl mylib-specific headers can simply use "#include " to +dnl obtain the stdint-types. +dnl +dnl Remember, if the system already had a valid , the +dnl generated file will include it directly. No need for fuzzy +dnl HAVE_STDINT_H things... (oops, GCC 4.2.x has deliberatly disabled +dnl its stdint.h for non-c99 compilation and the c99-mode is not the +dnl default. Therefore this macro will not use the compiler's stdint.h +dnl - please complain to the GCC developers). +dnl +dnl @category C +dnl @author Guido U. Draheim +dnl @version 2006-10-13 +dnl @license GPLWithACException + +AC_DEFUN([AX_CHECK_DATA_MODEL],[ + AC_CHECK_SIZEOF(char) + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(long) + AC_CHECK_SIZEOF(void*) + ac_cv_char_data_model="" + ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_char" + ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_short" + ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_int" + ac_cv_long_data_model="" + ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_int" + ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_long" + ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_voidp" + AC_MSG_CHECKING([data model]) + case "$ac_cv_char_data_model/$ac_cv_long_data_model" in + 122/242) ac_cv_data_model="IP16" ; n="standard 16bit machine" ;; + 122/244) ac_cv_data_model="LP32" ; n="standard 32bit machine" ;; + 122/*) ac_cv_data_model="i16" ; n="unusual int16 model" ;; + 124/444) ac_cv_data_model="ILP32" ; n="standard 32bit unixish" ;; + 124/488) ac_cv_data_model="LP64" ; n="standard 64bit unixish" ;; + 124/448) ac_cv_data_model="LLP64" ; n="unusual 64bit unixish" ;; + 124/*) ac_cv_data_model="i32" ; n="unusual int32 model" ;; + 128/888) ac_cv_data_model="ILP64" ; n="unusual 64bit numeric" ;; + 128/*) ac_cv_data_model="i64" ; n="unusual int64 model" ;; + 222/*2) ac_cv_data_model="DSP16" ; n="strict 16bit dsptype" ;; + 333/*3) ac_cv_data_model="DSP24" ; n="strict 24bit dsptype" ;; + 444/*4) ac_cv_data_model="DSP32" ; n="strict 32bit dsptype" ;; + 666/*6) ac_cv_data_model="DSP48" ; n="strict 48bit dsptype" ;; + 888/*8) ac_cv_data_model="DSP64" ; n="strict 64bit dsptype" ;; + 222/*|333/*|444/*|666/*|888/*) : + ac_cv_data_model="iDSP" ; n="unusual dsptype" ;; + *) ac_cv_data_model="none" ; n="very unusual model" ;; + esac + AC_MSG_RESULT([$ac_cv_data_model ($ac_cv_long_data_model, $n)]) +]) + +dnl AX_CHECK_HEADER_STDINT_X([HEADERLIST][,ACTION-IF]) +AC_DEFUN([AX_CHECK_HEADER_STDINT_X],[ +AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[ + ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h) + AC_MSG_RESULT([(..)]) + for i in m4_ifval([$1],[$1],[stdint.h inttypes.h sys/inttypes.h sys/types.h]) + do + unset ac_cv_type_uintptr_t + unset ac_cv_type_uint64_t + AC_CHECK_TYPE(uintptr_t,[ac_cv_header_stdint_x=$i],continue,[#include <$i>]) + AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>]) + m4_ifvaln([$1],[$1]) break + done + AC_MSG_CHECKING([for stdint uintptr_t]) + ]) +]) + +AC_DEFUN([AX_CHECK_HEADER_STDINT_O],[ +AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[ + ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h) + AC_MSG_RESULT([(..)]) + for i in m4_ifval([$1],[$1],[inttypes.h sys/inttypes.h sys/types.h stdint.h]) + do + unset ac_cv_type_uint32_t + unset ac_cv_type_uint64_t + AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],continue,[#include <$i>]) + AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>]) + m4_ifvaln([$1],[$1]) break + break; + done + AC_MSG_CHECKING([for stdint uint32_t]) + ]) +]) + +AC_DEFUN([AX_CHECK_HEADER_STDINT_U],[ +AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[ + ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h) + AC_MSG_RESULT([(..)]) + for i in m4_ifval([$1],[$1],[sys/types.h inttypes.h sys/inttypes.h]) ; do + unset ac_cv_type_u_int32_t + unset ac_cv_type_u_int64_t + AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],continue,[#include <$i>]) + AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>]) + m4_ifvaln([$1],[$1]) break + break; + done + AC_MSG_CHECKING([for stdint u_int32_t]) + ]) +]) + +AC_DEFUN([AX_CREATE_STDINT_H], +[# ------ AX CREATE STDINT H ------------------------------------- +AC_MSG_CHECKING([for stdint types]) +ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)` +# try to shortcircuit - if the default include path of the compiler +# can find a "stdint.h" header then we assume that all compilers can. +AC_CACHE_VAL([ac_cv_header_stdint_t],[ +old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS="" +old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS="" +old_CFLAGS="$CFLAGS" ; CFLAGS="" +AC_TRY_COMPILE([#include ],[int_least32_t v = 0;], +[ac_cv_stdint_result="(assuming C99 compatible system)" + ac_cv_header_stdint_t="stdint.h"; ], +[ac_cv_header_stdint_t=""]) +if test "$GCC" = "yes" && test ".$ac_cv_header_stdint_t" = "."; then +CFLAGS="-std=c99" +AC_TRY_COMPILE([#include ],[int_least32_t v = 0;], +[AC_MSG_WARN(your GCC compiler has a defunct stdint.h for its default-mode)]) +fi +CXXFLAGS="$old_CXXFLAGS" +CPPFLAGS="$old_CPPFLAGS" +CFLAGS="$old_CFLAGS" ]) + +v="... $ac_cv_header_stdint_h" +if test "$ac_stdint_h" = "stdint.h" ; then + AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)]) +elif test "$ac_stdint_h" = "inttypes.h" ; then + AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)]) +elif test "_$ac_cv_header_stdint_t" = "_" ; then + AC_MSG_RESULT([(putting them into $ac_stdint_h)$v]) +else + ac_cv_header_stdint="$ac_cv_header_stdint_t" + AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)]) +fi + +if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit.. + +dnl .....intro message done, now do a few system checks..... +dnl btw, all old CHECK_TYPE macros do automatically "DEFINE" a type, +dnl therefore we use the autoconf implementation detail CHECK_TYPE_NEW +dnl instead that is triggered with 3 or more arguments (see types.m4) + +inttype_headers=`echo $2 | sed -e 's/,/ /g'` + +ac_cv_stdint_result="(no helpful system typedefs seen)" +AX_CHECK_HEADER_STDINT_X(dnl + stdint.h inttypes.h sys/inttypes.h $inttype_headers, + ac_cv_stdint_result="(seen uintptr_t$and64 in $i)") + +if test "_$ac_cv_header_stdint_x" = "_" ; then +AX_CHECK_HEADER_STDINT_O(dnl, + inttypes.h sys/inttypes.h stdint.h $inttype_headers, + ac_cv_stdint_result="(seen uint32_t$and64 in $i)") +fi + +if test "_$ac_cv_header_stdint_x" = "_" ; then +if test "_$ac_cv_header_stdint_o" = "_" ; then +AX_CHECK_HEADER_STDINT_U(dnl, + sys/types.h inttypes.h sys/inttypes.h $inttype_headers, + ac_cv_stdint_result="(seen u_int32_t$and64 in $i)") +fi fi + +dnl if there was no good C99 header file, do some typedef checks... +if test "_$ac_cv_header_stdint_x" = "_" ; then + AC_MSG_CHECKING([for stdint datatype model]) + AC_MSG_RESULT([(..)]) + AX_CHECK_DATA_MODEL +fi + +if test "_$ac_cv_header_stdint_x" != "_" ; then + ac_cv_header_stdint="$ac_cv_header_stdint_x" +elif test "_$ac_cv_header_stdint_o" != "_" ; then + ac_cv_header_stdint="$ac_cv_header_stdint_o" +elif test "_$ac_cv_header_stdint_u" != "_" ; then + ac_cv_header_stdint="$ac_cv_header_stdint_u" +else + ac_cv_header_stdint="stddef.h" +fi + +AC_MSG_CHECKING([for extra inttypes in chosen header]) +AC_MSG_RESULT([($ac_cv_header_stdint)]) +dnl see if int_least and int_fast types are present in _this_ header. +unset ac_cv_type_int_least32_t +unset ac_cv_type_int_fast32_t +AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>]) +AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>]) +AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>]) + +fi # shortcircut to system "stdint.h" +# ------------------ PREPARE VARIABLES ------------------------------ +if test "$GCC" = "yes" ; then +ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1` +else +ac_cv_stdint_message="using $CC" +fi + +AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl +$ac_cv_stdint_result]) + +dnl ----------------------------------------------------------------- +# ----------------- DONE inttypes.h checks START header ------------- +AC_CONFIG_COMMANDS([$ac_stdint_h],[ +AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h) +ac_stdint=$tmp/_stdint.h + +echo "#ifndef" $_ac_stdint_h >$ac_stdint +echo "#define" $_ac_stdint_h "1" >>$ac_stdint +echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint +echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint +echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint +if test "_$ac_cv_header_stdint_t" != "_" ; then +echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint +echo "#include " >>$ac_stdint +echo "#endif" >>$ac_stdint +echo "#endif" >>$ac_stdint +else + +cat >>$ac_stdint < +#else +#include + +/* .................... configured part ............................ */ + +STDINT_EOF + +echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint +if test "_$ac_cv_header_stdint_x" != "_" ; then + ac_header="$ac_cv_header_stdint_x" + echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint +else + echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint +fi + +echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint +if test "_$ac_cv_header_stdint_o" != "_" ; then + ac_header="$ac_cv_header_stdint_o" + echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint +else + echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint +fi + +echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint +if test "_$ac_cv_header_stdint_u" != "_" ; then + ac_header="$ac_cv_header_stdint_u" + echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint +else + echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint +fi + +echo "" >>$ac_stdint + +if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then + echo "#include <$ac_header>" >>$ac_stdint + echo "" >>$ac_stdint +fi fi + +echo "/* which 64bit typedef has been found */" >>$ac_stdint +if test "$ac_cv_type_uint64_t" = "yes" ; then +echo "#define _STDINT_HAVE_UINT64_T" "1" >>$ac_stdint +else +echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint +fi +if test "$ac_cv_type_u_int64_t" = "yes" ; then +echo "#define _STDINT_HAVE_U_INT64_T" "1" >>$ac_stdint +else +echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint +fi +echo "" >>$ac_stdint + +echo "/* which type model has been detected */" >>$ac_stdint +if test "_$ac_cv_char_data_model" != "_" ; then +echo "#define _STDINT_CHAR_MODEL" "$ac_cv_char_data_model" >>$ac_stdint +echo "#define _STDINT_LONG_MODEL" "$ac_cv_long_data_model" >>$ac_stdint +else +echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint +echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint +fi +echo "" >>$ac_stdint + +echo "/* whether int_least types were detected */" >>$ac_stdint +if test "$ac_cv_type_int_least32_t" = "yes"; then +echo "#define _STDINT_HAVE_INT_LEAST32_T" "1" >>$ac_stdint +else +echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint +fi +echo "/* whether int_fast types were detected */" >>$ac_stdint +if test "$ac_cv_type_int_fast32_t" = "yes"; then +echo "#define _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint +else +echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint +fi +echo "/* whether intmax_t type was detected */" >>$ac_stdint +if test "$ac_cv_type_intmax_t" = "yes"; then +echo "#define _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint +else +echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint +fi +echo "" >>$ac_stdint + + cat >>$ac_stdint <= 199901L +#define _HAVE_UINT64_T +#define _HAVE_LONGLONG_UINT64_T +typedef long long int64_t; +typedef unsigned long long uint64_t; + +#elif !defined __STRICT_ANSI__ +#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__ +#define _HAVE_UINT64_T +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; + +#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__ +/* note: all ELF-systems seem to have loff-support which needs 64-bit */ +#if !defined _NO_LONGLONG +#define _HAVE_UINT64_T +#define _HAVE_LONGLONG_UINT64_T +typedef long long int64_t; +typedef unsigned long long uint64_t; +#endif + +#elif defined __alpha || (defined __mips && defined _ABIN32) +#if !defined _NO_LONGLONG +typedef long int64_t; +typedef unsigned long uint64_t; +#endif + /* compiler/cpu type to define int64_t */ +#endif +#endif +#endif + +#if defined _STDINT_HAVE_U_INT_TYPES +/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */ +typedef u_int8_t uint8_t; +typedef u_int16_t uint16_t; +typedef u_int32_t uint32_t; + +/* glibc compatibility */ +#ifndef __int8_t_defined +#define __int8_t_defined +#endif +#endif + +#ifdef _STDINT_NEED_INT_MODEL_T +/* we must guess all the basic types. Apart from byte-adressable system, */ +/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */ +/* (btw, those nibble-addressable systems are way off, or so we assume) */ + +dnl /* have a look at "64bit and data size neutrality" at */ +dnl /* http://unix.org/version2/whatsnew/login_64bit.html */ +dnl /* (the shorthand "ILP" types always have a "P" part) */ + +#if defined _STDINT_BYTE_MODEL +#if _STDINT_LONG_MODEL+0 == 242 +/* 2:4:2 = IP16 = a normal 16-bit system */ +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned long uint32_t; +#ifndef __int8_t_defined +#define __int8_t_defined +typedef char int8_t; +typedef short int16_t; +typedef long int32_t; +#endif +#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444 +/* 2:4:4 = LP32 = a 32-bit system derived from a 16-bit */ +/* 4:4:4 = ILP32 = a normal 32-bit system */ +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#ifndef __int8_t_defined +#define __int8_t_defined +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +#endif +#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488 +/* 4:8:4 = IP32 = a 32-bit system prepared for 64-bit */ +/* 4:8:8 = LP64 = a normal 64-bit system */ +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#ifndef __int8_t_defined +#define __int8_t_defined +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +#endif +/* this system has a "long" of 64bit */ +#ifndef _HAVE_UINT64_T +#define _HAVE_UINT64_T +typedef unsigned long uint64_t; +typedef long int64_t; +#endif +#elif _STDINT_LONG_MODEL+0 == 448 +/* LLP64 a 64-bit system derived from a 32-bit system */ +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#ifndef __int8_t_defined +#define __int8_t_defined +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +#endif +/* assuming the system has a "long long" */ +#ifndef _HAVE_UINT64_T +#define _HAVE_UINT64_T +#define _HAVE_LONGLONG_UINT64_T +typedef unsigned long long uint64_t; +typedef long long int64_t; +#endif +#else +#define _STDINT_NO_INT32_T +#endif +#else +#define _STDINT_NO_INT8_T +#define _STDINT_NO_INT32_T +#endif +#endif + +/* + * quote from SunOS-5.8 sys/inttypes.h: + * Use at your own risk. As of February 1996, the committee is squarely + * behind the fixed sized types; the "least" and "fast" types are still being + * discussed. The probability that the "fast" types may be removed before + * the standard is finalized is high enough that they are not currently + * implemented. + */ + +#if defined _STDINT_NEED_INT_LEAST_T +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +#ifdef _HAVE_UINT64_T +typedef int64_t int_least64_t; +#endif + +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +#ifdef _HAVE_UINT64_T +typedef uint64_t uint_least64_t; +#endif + /* least types */ +#endif + +#if defined _STDINT_NEED_INT_FAST_T +typedef int8_t int_fast8_t; +typedef int int_fast16_t; +typedef int32_t int_fast32_t; +#ifdef _HAVE_UINT64_T +typedef int64_t int_fast64_t; +#endif + +typedef uint8_t uint_fast8_t; +typedef unsigned uint_fast16_t; +typedef uint32_t uint_fast32_t; +#ifdef _HAVE_UINT64_T +typedef uint64_t uint_fast64_t; +#endif + /* fast types */ +#endif + +#ifdef _STDINT_NEED_INTMAX_T +#ifdef _HAVE_UINT64_T +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; +#else +typedef long intmax_t; +typedef unsigned long uintmax_t; +#endif +#endif + +#ifdef _STDINT_NEED_INTPTR_T +#ifndef __intptr_t_defined +#define __intptr_t_defined +/* we encourage using "long" to store pointer values, never use "int" ! */ +#if _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484 +typedef unsigned int uintptr_t; +typedef int intptr_t; +#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444 +typedef unsigned long uintptr_t; +typedef long intptr_t; +#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T +typedef uint64_t uintptr_t; +typedef int64_t intptr_t; +#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */ +typedef unsigned long uintptr_t; +typedef long intptr_t; +#endif +#endif +#endif + +/* The ISO C99 standard specifies that in C++ implementations these + should only be defined if explicitly requested. */ +#if !defined __cplusplus || defined __STDC_CONSTANT_MACROS +#ifndef UINT32_C + +/* Signed. */ +# define INT8_C(c) c +# define INT16_C(c) c +# define INT32_C(c) c +# ifdef _HAVE_LONGLONG_UINT64_T +# define INT64_C(c) c ## L +# else +# define INT64_C(c) c ## LL +# endif + +/* Unsigned. */ +# define UINT8_C(c) c ## U +# define UINT16_C(c) c ## U +# define UINT32_C(c) c ## U +# ifdef _HAVE_LONGLONG_UINT64_T +# define UINT64_C(c) c ## UL +# else +# define UINT64_C(c) c ## ULL +# endif + +/* Maximal type. */ +# ifdef _HAVE_LONGLONG_UINT64_T +# define INTMAX_C(c) c ## L +# define UINTMAX_C(c) c ## UL +# else +# define INTMAX_C(c) c ## LL +# define UINTMAX_C(c) c ## ULL +# endif + + /* literalnumbers */ +#endif +#endif + +/* These limits are merily those of a two complement byte-oriented system */ + +/* Minimum of signed integral types. */ +# define INT8_MIN (-128) +# define INT16_MIN (-32767-1) +# define INT32_MIN (-2147483647-1) +# define INT64_MIN (-__INT64_C(9223372036854775807)-1) +/* Maximum of signed integral types. */ +# define INT8_MAX (127) +# define INT16_MAX (32767) +# define INT32_MAX (2147483647) +# define INT64_MAX (__INT64_C(9223372036854775807)) + +/* Maximum of unsigned integral types. */ +# define UINT8_MAX (255) +# define UINT16_MAX (65535) +# define UINT32_MAX (4294967295U) +# define UINT64_MAX (__UINT64_C(18446744073709551615)) + +/* Minimum of signed integral types having a minimum size. */ +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# define INT_LEAST64_MIN INT64_MIN +/* Maximum of signed integral types having a minimum size. */ +# define INT_LEAST8_MAX INT8_MAX +# define INT_LEAST16_MAX INT16_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST64_MAX INT64_MAX + +/* Maximum of unsigned integral types having a minimum size. */ +# define UINT_LEAST8_MAX UINT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define UINT_LEAST64_MAX UINT64_MAX + + /* shortcircuit*/ +#endif + /* once */ +#endif +#endif +STDINT_EOF +fi + if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then + AC_MSG_NOTICE([$ac_stdint_h is unchanged]) + else + ac_dir=`AS_DIRNAME(["$ac_stdint_h"])` + AS_MKDIR_P(["$ac_dir"]) + rm -f $ac_stdint_h + mv $ac_stdint $ac_stdint_h + fi +],[# variables for create stdint.h replacement +PACKAGE="$PACKAGE" +VERSION="$VERSION" +ac_stdint_h="$ac_stdint_h" +_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h) +ac_cv_stdint_message="$ac_cv_stdint_message" +ac_cv_header_stdint_t="$ac_cv_header_stdint_t" +ac_cv_header_stdint_x="$ac_cv_header_stdint_x" +ac_cv_header_stdint_o="$ac_cv_header_stdint_o" +ac_cv_header_stdint_u="$ac_cv_header_stdint_u" +ac_cv_type_uint64_t="$ac_cv_type_uint64_t" +ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t" +ac_cv_char_data_model="$ac_cv_char_data_model" +ac_cv_long_data_model="$ac_cv_long_data_model" +ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t" +ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t" +ac_cv_type_intmax_t="$ac_cv_type_intmax_t" +]) +]) diff --git a/nestegg/m4/pkg.m4 b/nestegg/m4/pkg.m4 new file mode 100644 index 000000000..996e29454 --- /dev/null +++ b/nestegg/m4/pkg.m4 @@ -0,0 +1,157 @@ +# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +# +# Copyright © 2004 Scott James Remnant . +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# PKG_PROG_PKG_CONFIG([MIN-VERSION]) +# ---------------------------------- +AC_DEFUN([PKG_PROG_PKG_CONFIG], +[m4_pattern_forbid([^_?PKG_[A-Z_]+$]) +m4_pattern_allow([^PKG_CONFIG(_PATH)?$]) +AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl +if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then + AC_PATH_TOOL([PKG_CONFIG], [pkg-config]) +fi +if test -n "$PKG_CONFIG"; then + _pkg_min_version=m4_default([$1], [0.9.0]) + AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version]) + if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + PKG_CONFIG="" + fi + +fi[]dnl +])# PKG_PROG_PKG_CONFIG + +# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +# +# Check to see whether a particular set of modules exists. Similar +# to PKG_CHECK_MODULES(), but does not set variables or print errors. +# +# +# Similar to PKG_CHECK_MODULES, make sure that the first instance of +# this or PKG_CHECK_MODULES is called, or make sure to call +# PKG_CHECK_EXISTS manually +# -------------------------------------------------------------- +AC_DEFUN([PKG_CHECK_EXISTS], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +if test -n "$PKG_CONFIG" && \ + AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then + m4_ifval([$2], [$2], [:]) +m4_ifvaln([$3], [else + $3])dnl +fi]) + + +# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +# --------------------------------------------- +m4_define([_PKG_CONFIG], +[if test -n "$PKG_CONFIG"; then + if test -n "$$1"; then + pkg_cv_[]$1="$$1" + else + PKG_CHECK_EXISTS([$3], + [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`], + [pkg_failed=yes]) + fi +else + pkg_failed=untried +fi[]dnl +])# _PKG_CONFIG + +# _PKG_SHORT_ERRORS_SUPPORTED +# ----------------------------- +AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi[]dnl +])# _PKG_SHORT_ERRORS_SUPPORTED + + +# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +# [ACTION-IF-NOT-FOUND]) +# +# +# Note that if there is a possibility the first call to +# PKG_CHECK_MODULES might not happen, you should be sure to include an +# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac +# +# +# -------------------------------------------------------------- +AC_DEFUN([PKG_CHECK_MODULES], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl +AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl + +pkg_failed=no +AC_MSG_CHECKING([for $1]) + +_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) +_PKG_CONFIG([$1][_LIBS], [libs], [$2]) + +m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS +and $1[]_LIBS to avoid the need to call pkg-config. +See the pkg-config man page for more details.]) + +if test $pkg_failed = yes; then + _PKG_SHORT_ERRORS_SUPPORTED + if test $_pkg_short_errors_supported = yes; then + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"` + fi + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + + ifelse([$4], , [AC_MSG_ERROR(dnl +[Package requirements ($2) were not met: + +$$1_PKG_ERRORS + +Consider adjusting the PKG_CONFIG_PATH environment variable if you +installed software in a non-standard prefix. + +_PKG_TEXT +])], + [AC_MSG_RESULT([no]) + $4]) +elif test $pkg_failed = untried; then + ifelse([$4], , [AC_MSG_FAILURE(dnl +[The pkg-config script could not be found or is too old. Make sure it +is in your PATH or set the PKG_CONFIG environment variable to the full +path to pkg-config. + +_PKG_TEXT + +To get pkg-config, see .])], + [$4]) +else + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + AC_MSG_RESULT([yes]) + ifelse([$3], , :, [$3]) +fi[]dnl +])# PKG_CHECK_MODULES diff --git a/nestegg/nestegg-uninstalled.pc.in b/nestegg/nestegg-uninstalled.pc.in new file mode 100644 index 000000000..19bb680ac --- /dev/null +++ b/nestegg/nestegg-uninstalled.pc.in @@ -0,0 +1,13 @@ +# nestegg uninstalled pkg-config file + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: nestegg +Description: WebM/Matroska demuxer +Version: @VERSION@ +Conflicts: +Libs: -L${libdir} -lnestegg +Cflags: -I${includedir} diff --git a/nestegg/nestegg.pc.in b/nestegg/nestegg.pc.in new file mode 100644 index 000000000..32c09d79d --- /dev/null +++ b/nestegg/nestegg.pc.in @@ -0,0 +1,13 @@ +# nestegg installed pkg-config file + +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: nestegg +Description: WebM/Matroska demuxer +Version: @VERSION@ +Conflicts: +Libs: -L${libdir} -lnestegg +Cflags: -I${includedir} diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c new file mode 100644 index 000000000..63a0e83e5 --- /dev/null +++ b/nestegg/src/nestegg.c @@ -0,0 +1,1938 @@ +/* + * Copyright © 2010 Mozilla Foundation + * + * This program is made available under an ISC-style license. See the + * accompanying file LICENSE for details. + */ +#include +#include +#include + +#include "nestegg/halloc/halloc.h" +#include "nestegg/include/nestegg/nestegg.h" + +/* EBML Elements */ +#define ID_EBML 0x1a45dfa3 +#define ID_EBML_VERSION 0x4286 +#define ID_EBML_READ_VERSION 0x42f7 +#define ID_EBML_MAX_ID_LENGTH 0x42f2 +#define ID_EBML_MAX_SIZE_LENGTH 0x42f3 +#define ID_DOCTYPE 0x4282 +#define ID_DOCTYPE_VERSION 0x4287 +#define ID_DOCTYPE_READ_VERSION 0x4285 + +/* Global Elements */ +#define ID_VOID 0xec +#define ID_CRC32 0xbf + +/* WebMedia Elements */ +#define ID_SEGMENT 0x18538067 + +/* Seek Head Elements */ +#define ID_SEEK_HEAD 0x114d9b74 +#define ID_SEEK 0x4dbb +#define ID_SEEK_ID 0x53ab +#define ID_SEEK_POSITION 0x53ac + +/* Info Elements */ +#define ID_INFO 0x1549a966 +#define ID_TIMECODE_SCALE 0x2ad7b1 +#define ID_DURATION 0x4489 + +/* Cluster Elements */ +#define ID_CLUSTER 0x1f43b675 +#define ID_TIMECODE 0xe7 +#define ID_BLOCK_GROUP 0xa0 +#define ID_SIMPLE_BLOCK 0xa3 + +/* BlockGroup Elements */ +#define ID_BLOCK 0xa1 +#define ID_BLOCK_DURATION 0x9b +#define ID_REFERENCE_BLOCK 0xfb + +/* Tracks Elements */ +#define ID_TRACKS 0x1654ae6b +#define ID_TRACK_ENTRY 0xae +#define ID_TRACK_NUMBER 0xd7 +#define ID_TRACK_UID 0x73c5 +#define ID_TRACK_TYPE 0x83 +#define ID_FLAG_ENABLED 0xb9 +#define ID_FLAG_DEFAULT 0x88 +#define ID_FLAG_LACING 0x9c +#define ID_TRACK_TIMECODE_SCALE 0x23314f +#define ID_LANGUAGE 0x22b59c +#define ID_CODEC_ID 0x86 +#define ID_CODEC_PRIVATE 0x63a2 + +/* Video Elements */ +#define ID_VIDEO 0xe0 +#define ID_PIXEL_WIDTH 0xb0 +#define ID_PIXEL_HEIGHT 0xba +#define ID_PIXEL_CROP_BOTTOM 0x54aa +#define ID_PIXEL_CROP_TOP 0x54bb +#define ID_PIXEL_CROP_LEFT 0x54cc +#define ID_PIXEL_CROP_RIGHT 0x54dd +#define ID_DISPLAY_WIDTH 0x54b0 +#define ID_DISPLAY_HEIGHT 0x54ba + +/* Audio Elements */ +#define ID_AUDIO 0xe1 +#define ID_SAMPLING_FREQUENCY 0xb5 +#define ID_CHANNELS 0x9f +#define ID_BIT_DEPTH 0x6264 + +/* Cues Elements */ +#define ID_CUES 0x1c53bb6b +#define ID_CUE_POINT 0xbb +#define ID_CUE_TIME 0xb3 +#define ID_CUE_TRACK_POSITIONS 0xb7 +#define ID_CUE_TRACK 0xf7 +#define ID_CUE_CLUSTER_POSITION 0xf1 +#define ID_CUE_BLOCK_NUMBER 0x5378 + +/* EBML Types */ +enum ebml_type_enum { + TYPE_UNKNOWN, + TYPE_MASTER, + TYPE_UINT, + TYPE_FLOAT, + TYPE_INT, + TYPE_STRING, + TYPE_BINARY +}; + +#define LIMIT_STRING (1 << 20) +#define LIMIT_BINARY (1 << 24) +#define LIMIT_BLOCK (1 << 30) +#define LIMIT_FRAME (1 << 28) + +/* Field Flags */ +#define DESC_FLAG_NONE 0 +#define DESC_FLAG_MULTI (1 << 0) +#define DESC_FLAG_SUSPEND (1 << 1) +#define DESC_FLAG_OFFSET (1 << 2) + +/* Block Header Flags */ +#define BLOCK_FLAGS_LACING 6 + +/* Lacing Constants */ +#define LACING_NONE 0 +#define LACING_XIPH 1 +#define LACING_FIXED 2 +#define LACING_EBML 3 + +/* Track Types */ +#define TRACK_TYPE_VIDEO 1 +#define TRACK_TYPE_AUDIO 2 + +/* Track IDs */ +#define TRACK_ID_VP8 "V_VP8" +#define TRACK_ID_VORBIS "A_VORBIS" + +enum vint_mask { + MASK_NONE, + MASK_FIRST_BIT +}; + +struct ebml_binary { + unsigned char * data; + size_t length; +}; + +struct ebml_list_node { + struct ebml_list_node * next; + uint64_t id; + void * data; +}; + +struct ebml_list { + struct ebml_list_node * head; + struct ebml_list_node * tail; +}; + +struct ebml_type { + union ebml_value { + uint64_t u; + double f; + int64_t i; + char * s; + struct ebml_binary b; + } v; + enum ebml_type_enum type; + int read; +}; + +/* EBML Definitions */ +struct ebml { + struct ebml_type ebml_version; + struct ebml_type ebml_read_version; + struct ebml_type ebml_max_id_length; + struct ebml_type ebml_max_size_length; + struct ebml_type doctype; + struct ebml_type doctype_version; + struct ebml_type doctype_read_version; +}; + +/* Matroksa Definitions */ +struct seek { + struct ebml_type id; + struct ebml_type position; +}; + +struct seek_head { + struct ebml_list seek; +}; + +struct info { + struct ebml_type timecode_scale; + struct ebml_type duration; +}; + +struct block_group { + struct ebml_type duration; + struct ebml_type reference_block; +}; + +struct cluster { + struct ebml_type timecode; + struct ebml_list block_group; +}; + +struct video { + struct ebml_type pixel_width; + struct ebml_type pixel_height; + struct ebml_type pixel_crop_bottom; + struct ebml_type pixel_crop_top; + struct ebml_type pixel_crop_left; + struct ebml_type pixel_crop_right; + struct ebml_type display_width; + struct ebml_type display_height; +}; + +struct audio { + struct ebml_type sampling_frequency; + struct ebml_type channels; + struct ebml_type bit_depth; +}; + +struct track_entry { + struct ebml_type number; + struct ebml_type uid; + struct ebml_type type; + struct ebml_type flag_enabled; + struct ebml_type flag_default; + struct ebml_type flag_lacing; + struct ebml_type track_timecode_scale; + struct ebml_type language; + struct ebml_type codec_id; + struct ebml_type codec_private; + struct video video; + struct audio audio; +}; + +struct tracks { + struct ebml_list track_entry; +}; + +struct cue_track_positions { + struct ebml_type track; + struct ebml_type cluster_position; + struct ebml_type block_number; +}; + +struct cue_point { + struct ebml_type time; + struct ebml_list cue_track_positions; +}; + +struct cues { + struct ebml_list cue_point; +}; + +struct segment { + struct ebml_list seek_head; + struct info info; + struct ebml_list cluster; + struct tracks tracks; + struct cues cues; +}; + +/* Misc. */ +struct pool_ctx { + char dummy; +}; + +struct list_node { + struct list_node * previous; + struct ebml_element_desc * node; + unsigned char * data; +}; + +struct saved_state { + int64_t stream_offset; + struct list_node * ancestor; + uint64_t last_id; + uint64_t last_size; +}; + +struct frame { + unsigned char * data; + size_t length; + struct frame * next; +}; + +/* Public (opaque) Structures */ +struct nestegg { + nestegg_io * io; + nestegg_log log; + struct pool_ctx * alloc_pool; + uint64_t last_id; + uint64_t last_size; + struct list_node * ancestor; + struct ebml ebml; + struct segment segment; + int64_t segment_offset; + unsigned int track_count; +}; + +struct nestegg_packet { + uint64_t track; + uint64_t timecode; + struct frame * frame; +}; + +/* Element Descriptor */ +struct ebml_element_desc { + char const * name; + uint64_t id; + enum ebml_type_enum type; + size_t offset; + unsigned int flags; + struct ebml_element_desc * children; + size_t size; + size_t data_offset; +}; + +#define E_FIELD(ID, TYPE, STRUCT, FIELD) \ + { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, NULL, 0, 0 } +#define E_MASTER(ID, TYPE, STRUCT, FIELD) \ + { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_MULTI, ne_ ## FIELD ## _elements, \ + sizeof(struct FIELD), 0 } +#define E_SINGLE_MASTER_O(ID, TYPE, STRUCT, FIELD) \ + { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_OFFSET, ne_ ## FIELD ## _elements, 0, \ + offsetof(STRUCT, FIELD ## _offset) } +#define E_SINGLE_MASTER(ID, TYPE, STRUCT, FIELD) \ + { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, ne_ ## FIELD ## _elements, 0, 0 } +#define E_SUSPEND(ID, TYPE) \ + { #ID, ID, TYPE, 0, DESC_FLAG_SUSPEND, NULL, 0, 0 } +#define E_LAST \ + { NULL, 0, 0, 0, DESC_FLAG_NONE, NULL, 0, 0 } + +/* EBML Element Lists */ +static struct ebml_element_desc ne_ebml_elements[] = { + E_FIELD(ID_EBML_VERSION, TYPE_UINT, struct ebml, ebml_version), + E_FIELD(ID_EBML_READ_VERSION, TYPE_UINT, struct ebml, ebml_read_version), + E_FIELD(ID_EBML_MAX_ID_LENGTH, TYPE_UINT, struct ebml, ebml_max_id_length), + E_FIELD(ID_EBML_MAX_SIZE_LENGTH, TYPE_UINT, struct ebml, ebml_max_size_length), + E_FIELD(ID_DOCTYPE, TYPE_STRING, struct ebml, doctype), + E_FIELD(ID_DOCTYPE_VERSION, TYPE_UINT, struct ebml, doctype_version), + E_FIELD(ID_DOCTYPE_READ_VERSION, TYPE_UINT, struct ebml, doctype_read_version), + E_LAST +}; + +/* WebMedia Element Lists */ +static struct ebml_element_desc ne_seek_elements[] = { + E_FIELD(ID_SEEK_ID, TYPE_BINARY, struct seek, id), + E_FIELD(ID_SEEK_POSITION, TYPE_UINT, struct seek, position), + E_LAST +}; + +static struct ebml_element_desc ne_seek_head_elements[] = { + E_MASTER(ID_SEEK, TYPE_MASTER, struct seek_head, seek), + E_LAST +}; + +static struct ebml_element_desc ne_info_elements[] = { + E_FIELD(ID_TIMECODE_SCALE, TYPE_UINT, struct info, timecode_scale), + E_FIELD(ID_DURATION, TYPE_FLOAT, struct info, duration), + E_LAST +}; + +static struct ebml_element_desc ne_block_group_elements[] = { + E_SUSPEND(ID_BLOCK, TYPE_BINARY), + E_FIELD(ID_BLOCK_DURATION, TYPE_UINT, struct block_group, duration), + E_FIELD(ID_REFERENCE_BLOCK, TYPE_INT, struct block_group, reference_block), + E_LAST +}; + +static struct ebml_element_desc ne_cluster_elements[] = { + E_FIELD(ID_TIMECODE, TYPE_UINT, struct cluster, timecode), + E_MASTER(ID_BLOCK_GROUP, TYPE_MASTER, struct cluster, block_group), + E_SUSPEND(ID_SIMPLE_BLOCK, TYPE_BINARY), + E_LAST +}; + +static struct ebml_element_desc ne_video_elements[] = { + E_FIELD(ID_PIXEL_WIDTH, TYPE_UINT, struct video, pixel_width), + E_FIELD(ID_PIXEL_HEIGHT, TYPE_UINT, struct video, pixel_height), + E_FIELD(ID_PIXEL_CROP_BOTTOM, TYPE_UINT, struct video, pixel_crop_bottom), + E_FIELD(ID_PIXEL_CROP_TOP, TYPE_UINT, struct video, pixel_crop_top), + E_FIELD(ID_PIXEL_CROP_LEFT, TYPE_UINT, struct video, pixel_crop_left), + E_FIELD(ID_PIXEL_CROP_RIGHT, TYPE_UINT, struct video, pixel_crop_right), + E_FIELD(ID_DISPLAY_WIDTH, TYPE_UINT, struct video, display_width), + E_FIELD(ID_DISPLAY_HEIGHT, TYPE_UINT, struct video, display_height), + E_LAST +}; + +static struct ebml_element_desc ne_audio_elements[] = { + E_FIELD(ID_SAMPLING_FREQUENCY, TYPE_FLOAT, struct audio, sampling_frequency), + E_FIELD(ID_CHANNELS, TYPE_UINT, struct audio, channels), + E_FIELD(ID_BIT_DEPTH, TYPE_UINT, struct audio, bit_depth), + E_LAST +}; + +static struct ebml_element_desc ne_track_entry_elements[] = { + E_FIELD(ID_TRACK_NUMBER, TYPE_UINT, struct track_entry, number), + E_FIELD(ID_TRACK_UID, TYPE_UINT, struct track_entry, uid), + E_FIELD(ID_TRACK_TYPE, TYPE_UINT, struct track_entry, type), + E_FIELD(ID_FLAG_ENABLED, TYPE_UINT, struct track_entry, flag_enabled), + E_FIELD(ID_FLAG_DEFAULT, TYPE_UINT, struct track_entry, flag_default), + E_FIELD(ID_FLAG_LACING, TYPE_UINT, struct track_entry, flag_lacing), + E_FIELD(ID_TRACK_TIMECODE_SCALE, TYPE_FLOAT, struct track_entry, track_timecode_scale), + E_FIELD(ID_LANGUAGE, TYPE_STRING, struct track_entry, language), + E_FIELD(ID_CODEC_ID, TYPE_STRING, struct track_entry, codec_id), + E_FIELD(ID_CODEC_PRIVATE, TYPE_BINARY, struct track_entry, codec_private), + E_SINGLE_MASTER(ID_VIDEO, TYPE_MASTER, struct track_entry, video), + E_SINGLE_MASTER(ID_AUDIO, TYPE_MASTER, struct track_entry, audio), + E_LAST +}; + +static struct ebml_element_desc ne_tracks_elements[] = { + E_MASTER(ID_TRACK_ENTRY, TYPE_MASTER, struct tracks, track_entry), + E_LAST +}; + +static struct ebml_element_desc ne_cue_track_positions_elements[] = { + E_FIELD(ID_CUE_TRACK, TYPE_UINT, struct cue_track_positions, track), + E_FIELD(ID_CUE_CLUSTER_POSITION, TYPE_UINT, struct cue_track_positions, cluster_position), + E_FIELD(ID_CUE_BLOCK_NUMBER, TYPE_UINT, struct cue_track_positions, block_number), + E_LAST +}; + +static struct ebml_element_desc ne_cue_point_elements[] = { + E_FIELD(ID_CUE_TIME, TYPE_UINT, struct cue_point, time), + E_MASTER(ID_CUE_TRACK_POSITIONS, TYPE_MASTER, struct cue_point, cue_track_positions), + E_LAST +}; + +static struct ebml_element_desc ne_cues_elements[] = { + E_MASTER(ID_CUE_POINT, TYPE_MASTER, struct cues, cue_point), + E_LAST +}; + +static struct ebml_element_desc ne_segment_elements[] = { + E_MASTER(ID_SEEK_HEAD, TYPE_MASTER, struct segment, seek_head), + E_SINGLE_MASTER(ID_INFO, TYPE_MASTER, struct segment, info), + E_MASTER(ID_CLUSTER, TYPE_MASTER, struct segment, cluster), + E_SINGLE_MASTER(ID_TRACKS, TYPE_MASTER, struct segment, tracks), + E_SINGLE_MASTER(ID_CUES, TYPE_MASTER, struct segment, cues), + E_LAST +}; + +static struct ebml_element_desc ne_top_level_elements[] = { + E_SINGLE_MASTER(ID_EBML, TYPE_MASTER, nestegg, ebml), + E_SINGLE_MASTER_O(ID_SEGMENT, TYPE_MASTER, nestegg, segment), + E_LAST +}; + +#undef E_FIELD +#undef E_MASTER +#undef E_SINGLE_MASTER_O +#undef E_SINGLE_MASTER +#undef E_SUSPEND +#undef E_LAST + +static struct pool_ctx * +ne_pool_init(void) +{ + struct pool_ctx * pool; + + pool = h_malloc(sizeof(*pool)); + if (!pool) + abort(); + return pool; +} + +static void +ne_pool_destroy(struct pool_ctx * pool) +{ + h_free(pool); +} + +static void * +ne_pool_alloc(size_t size, struct pool_ctx * pool) +{ + void * p; + + p = h_malloc(size); + if (!p) + abort(); + hattach(p, pool); + memset(p, 0, size); + return p; +} + +static void * +ne_alloc(size_t size) +{ + void * p; + + p = calloc(1, size); + if (!p) + abort(); + return p; +} + +static int +ne_io_read(nestegg_io * io, void * buffer, size_t length) +{ + return io->read(buffer, length, io->userdata); +} + +static int +ne_io_seek(nestegg_io * io, int64_t offset, int whence) +{ + return io->seek(offset, whence, io->userdata); +} + +static int +ne_io_read_skip(nestegg_io * io, size_t length) +{ + size_t get; + unsigned char buf[8192]; + int r = 1; + + while (length > 0) { + get = length < sizeof(buf) ? length : sizeof(buf); + r = ne_io_read(io, buf, get); + if (r != 1) + break; + length -= get; + } + + return r; +} + +static int64_t +ne_io_tell(nestegg_io * io) +{ + return io->tell(io->userdata); +} + +static int +ne_bare_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length, enum vint_mask maskflag) +{ + int r; + unsigned char b; + size_t maxlen = 8; + unsigned int count = 1, mask = 1 << 7; + + r = ne_io_read(io, &b, 1); + if (r != 1) + return r; + + while (count < maxlen) { + if ((b & mask) != 0) + break; + mask >>= 1; + count += 1; + } + + if (length) + *length = count; + *value = b; + + if (maskflag == MASK_FIRST_BIT) + *value = b & ~mask; + + while (--count) { + r = ne_io_read(io, &b, 1); + if (r != 1) + return r; + *value <<= 8; + *value |= b; + } + + return 1; +} + +static int +ne_read_id(nestegg_io * io, uint64_t * value, uint64_t * length) +{ + return ne_bare_read_vint(io, value, length, MASK_NONE); +} + +static int +ne_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length) +{ + return ne_bare_read_vint(io, value, length, MASK_FIRST_BIT); +} + +static int +ne_read_svint(nestegg_io * io, int64_t * value, uint64_t * length) +{ + int r; + uint64_t uvalue; + uint64_t ulength; + int64_t svint_subtr[] = { + 0x3f, 0x1fff, + 0xfffff, 0x7ffffff, + 0x3ffffffffLL, 0x1ffffffffffLL, + 0xffffffffffffLL, 0x7fffffffffffffLL + }; + + r = ne_bare_read_vint(io, &uvalue, &ulength, MASK_FIRST_BIT); + if (r != 1) + return r; + *value = uvalue - svint_subtr[ulength - 1]; + if (length) + *length = ulength; + return r; +} + +static int +ne_read_uint(nestegg_io * io, uint64_t * val, uint64_t length) +{ + unsigned char b; + int r; + + if (length == 0 || length > 8) + return -1; + r = ne_io_read(io, &b, 1); + if (r != 1) + return r; + *val = b; + while (--length) { + r = ne_io_read(io, &b, 1); + if (r != 1) + return r; + *val <<= 8; + *val |= b; + } + return 1; +} + +static int +ne_read_int(nestegg_io * io, int64_t * val, uint64_t length) +{ + int r; + uint64_t uval, base; + + r = ne_read_uint(io, &uval, length); + if (r != 1) + return r; + + if (length < sizeof(int64_t)) { + base = 1; + base <<= length * 8 - 1; + if (uval >= base) { + base = 1; + base <<= length * 8; + } else { + base = 0; + } + *val = uval - base; + } else { + *val = (int64_t) uval; + } + + return 1; +} + +static int +ne_read_float(nestegg_io * io, double * val, uint64_t length) +{ + union { + uint64_t u; + float f; + double d; + } value; + int r; + + /* length == 10 not implemented */ + if (length != 4 && length != 8) + return -1; + r = ne_read_uint(io, &value.u, length); + if (r != 1) + return r; + if (length == 4) + *val = value.f; + else + *val = value.d; + return 1; +} + +static int +ne_read_string(nestegg * ctx, char ** val, uint64_t length) +{ + char * str; + int r; + + if (length == 0 || length > LIMIT_STRING) + return -1; + str = ne_pool_alloc(length + 1, ctx->alloc_pool); + r = ne_io_read(ctx->io, (unsigned char *) str, length); + if (r != 1) + return r; + str[length] = '\0'; + *val = str; + return 1; +} + +static int +ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length) +{ + if (length == 0 || length > LIMIT_BINARY) + return -1; + val->data = ne_pool_alloc(length, ctx->alloc_pool); + val->length = length; + return ne_io_read(ctx->io, val->data, length); +} + +static int +ne_get_uint(struct ebml_type type, uint64_t * value) +{ + if (!type.read) + return -1; + + assert(type.type == TYPE_UINT); + + *value = type.v.u; + + return 0; +} + +static int +ne_get_float(struct ebml_type type, double * value) +{ + if (!type.read) + return -1; + + assert(type.type == TYPE_FLOAT); + + *value = type.v.f; + + return 0; +} + +static int +ne_get_string(struct ebml_type type, char ** value) +{ + if (!type.read) + return -1; + + assert(type.type == TYPE_STRING); + + *value = type.v.s; + + return 0; +} + +static int +ne_get_binary(struct ebml_type type, struct ebml_binary * value) +{ + if (!type.read) + return -1; + + assert(type.type == TYPE_BINARY); + + *value = type.v.b; + + return 0; +} + +static int +ne_is_ancestor_element(uint64_t id, struct list_node * ancestor) +{ + struct ebml_element_desc * element; + + for (; ancestor; ancestor = ancestor->previous) + for (element = ancestor->node; element->id; ++element) + if (element->id == id) + return 1; + + return 0; +} + +static struct ebml_element_desc * +ne_find_element(uint64_t id, struct ebml_element_desc * elements) +{ + struct ebml_element_desc * element; + + for (element = elements; element->id; ++element) + if (element->id == id) + return element; + + return NULL; +} + +static void +ne_ctx_push(nestegg * ctx, struct ebml_element_desc * ancestor, void * data) +{ + struct list_node * item; + + item = ne_alloc(sizeof(*item)); + item->previous = ctx->ancestor; + item->node = ancestor; + item->data = data; + ctx->ancestor = item; +} + +static void +ne_ctx_pop(nestegg * ctx) +{ + struct list_node * item; + + item = ctx->ancestor; + ctx->ancestor = item->previous; + free(item); +} + +static int +ne_ctx_save(nestegg * ctx, struct saved_state * s) +{ + s->stream_offset = ne_io_tell(ctx->io); + if (s->stream_offset < 0) + return -1; + s->ancestor = ctx->ancestor; + s->last_id = ctx->last_id; + s->last_size = ctx->last_size; + return 0; +} + +static int +ne_ctx_restore(nestegg * ctx, struct saved_state * s) +{ + int r; + + r = ne_io_seek(ctx->io, s->stream_offset, NESTEGG_SEEK_SET); + if (r != 0) + return -1; + ctx->ancestor = s->ancestor; + ctx->last_id = s->last_id; + ctx->last_size = s->last_size; + return 0; +} + +static int +ne_peek_element(nestegg * ctx, uint64_t * id, uint64_t * size) +{ + int r; + + if (ctx->last_id && ctx->last_size) { + if (id) + *id = ctx->last_id; + if (size) + *size = ctx->last_size; + return 1; + } + + r = ne_read_id(ctx->io, &ctx->last_id, NULL); + if (r != 1) + return r; + + r = ne_read_vint(ctx->io, &ctx->last_size, NULL); + if (r != 1) + return r; + + if (id) + *id = ctx->last_id; + if (size) + *size = ctx->last_size; + + return 1; +} + +static int +ne_read_element(nestegg * ctx, uint64_t * id, uint64_t * size) +{ + int r; + + r = ne_peek_element(ctx, id, size); + if (r != 1) + return r; + + ctx->last_id = 0; + ctx->last_size = 0; + + return 1; +} + +static void +ne_read_master(nestegg * ctx, struct ebml_element_desc * desc) +{ + struct ebml_list * list; + struct ebml_list_node * node, * oldtail; + + assert(desc->type == TYPE_MASTER && desc->flags & DESC_FLAG_MULTI); + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "multi master element %llx (%s)", + desc->id, desc->name); + + list = (struct ebml_list *) (ctx->ancestor->data + desc->offset); + + node = ne_pool_alloc(sizeof(*node), ctx->alloc_pool); + node->id = desc->id; + node->data = ne_pool_alloc(desc->size, ctx->alloc_pool); + + oldtail = list->tail; + if (oldtail) + oldtail->next = node; + list->tail = node; + if (!list->head) + list->head = node; + + ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p", node->data); + + ne_ctx_push(ctx, desc->children, node->data); +} + +static void +ne_read_single_master(nestegg * ctx, struct ebml_element_desc * desc) +{ + assert(desc->type == TYPE_MASTER && !(desc->flags & DESC_FLAG_MULTI)); + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "single master element %llx (%s)", + desc->id, desc->name); + ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p (%u)", + ctx->ancestor->data + desc->offset, desc->offset); + + ne_ctx_push(ctx, desc->children, ctx->ancestor->data + desc->offset); +} + +static int +ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length) +{ + struct ebml_type * storage; + int r; + + storage = (struct ebml_type *) (ctx->ancestor->data + desc->offset); + + if (storage->read) { + ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) already read, skipping", + desc->id, desc->name); + return 0; + } + + storage->type = desc->type; + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) -> %p (%u)", + desc->id, desc->name, storage, desc->offset); + + r = -1; + + switch (desc->type) { + case TYPE_UINT: + r = ne_read_uint(ctx->io, &storage->v.u, length); + break; + case TYPE_FLOAT: + r = ne_read_float(ctx->io, &storage->v.f, length); + break; + case TYPE_INT: + r = ne_read_int(ctx->io, &storage->v.i, length); + break; + case TYPE_STRING: + r = ne_read_string(ctx, &storage->v.s, length); + break; + case TYPE_BINARY: + r = ne_read_binary(ctx, &storage->v.b, length); + break; + case TYPE_MASTER: + case TYPE_UNKNOWN: + assert(0); + break; + } + + if (r == 1) + storage->read = 1; + + return r; +} + +static int +ne_parse(nestegg * ctx, struct ebml_element_desc * top_level) +{ + int r; + int64_t * data_offset; + uint64_t id, size; + struct ebml_element_desc * element; + + /* loop until we need to return: + - hit suspend point + - parse complete + - error occurred */ + + /* loop over elements at current level reading them if sublevel found, + push ctx onto stack and continue if sublevel ended, pop ctx off stack + and continue */ + + if (!ctx->ancestor) + return -1; + + for (;;) { + r = ne_peek_element(ctx, &id, &size); + if (r != 1) + break; + + element = ne_find_element(id, ctx->ancestor->node); + if (element) { + if (element->flags & DESC_FLAG_SUSPEND) { + assert(element->type == TYPE_BINARY); + ctx->log(ctx, NESTEGG_LOG_DEBUG, "suspend parse at %llx", id); + r = 1; + break; + } + + r = ne_read_element(ctx, &id, &size); + if (r != 1) + break; + + if (element->flags & DESC_FLAG_OFFSET) { + data_offset = (int64_t *) (ctx->ancestor->data + element->data_offset); + *data_offset = ne_io_tell(ctx->io); + if (*data_offset < 0) { + r = -1; + break; + } + } + + if (element->type == TYPE_MASTER) { + if (element->flags & DESC_FLAG_MULTI) + ne_read_master(ctx, element); + else + ne_read_single_master(ctx, element); + continue; + } else { + r = ne_read_simple(ctx, element, size); + if (r < 0) + break; + } + } else if (ne_is_ancestor_element(id, ctx->ancestor->previous)) { + ctx->log(ctx, NESTEGG_LOG_DEBUG, "parent element %llx", id); + if (top_level && ctx->ancestor->node == top_level) { + ctx->log(ctx, NESTEGG_LOG_DEBUG, "*** parse about to back up past top_level"); + r = 1; + break; + } + ne_ctx_pop(ctx); + } else { + r = ne_read_element(ctx, &id, &size); + if (r != 1) + break; + + if (id != ID_VOID && id != ID_CRC32) + ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id); + r = ne_io_read_skip(ctx->io, size); + if (r != 1) + break; + } + } + + if (r != 1) + while (ctx->ancestor) + ne_ctx_pop(ctx); + + return r; +} + +static uint64_t +ne_xiph_lace_value(unsigned char ** np) +{ + uint64_t lace; + uint64_t value; + unsigned char * p = *np; + + lace = *p++; + value = lace; + while (lace == 255) { + lace = *p++; + value += lace; + } + + *np = p; + + return value; +} + +static int +ne_read_xiph_lace_value(nestegg_io * io, uint64_t * value, size_t * consumed) +{ + int r; + uint64_t lace; + + r = ne_read_uint(io, &lace, 1); + if (r != 1) + return r; + *consumed += 1; + + *value = lace; + while (lace == 255) { + r = ne_read_uint(io, &lace, 1); + if (r != 1) + return r; + *consumed += 1; + *value += lace; + } + + return 1; +} + +static int +ne_read_xiph_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes) +{ + int r; + size_t i = 0; + uint64_t sum = 0; + + while (--n) { + r = ne_read_xiph_lace_value(io, &sizes[i], read); + if (r != 1) + return r; + sum += sizes[i]; + i += 1; + } + + if (*read + sum > block) + return -1; + + /* last frame is the remainder of the block */ + sizes[i] = block - *read - sum; + return 1; +} + +static int +ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes) +{ + int r; + uint64_t lace, sum, length; + int64_t slace; + size_t i = 0; + + r = ne_read_vint(io, &lace, &length); + if (r != 1) + return r; + *read += length; + + sizes[i] = lace; + sum = sizes[i]; + + i += 1; + n -= 1; + + while (--n) { + r = ne_read_svint(io, &slace, &length); + if (r != 1) + return r; + *read += length; + sizes[i] = sizes[i - 1] + slace; + sum += sizes[i]; + i += 1; + } + + if (*read + sum > block) + return -1; + + /* last frame is the remainder of the block */ + sizes[i] = block - *read - sum; + return 1; +} + +static uint64_t +ne_get_timecode_scale(nestegg * ctx) +{ + uint64_t scale; + + if (ne_get_uint(ctx->segment.info.timecode_scale, &scale) != 0) + scale = 1000000; + + return scale; +} + +static struct track_entry * +ne_find_track_entry(nestegg * ctx, unsigned int track) +{ + struct ebml_list_node * node; + unsigned int tracks = 0; + + node = ctx->segment.tracks.track_entry.head; + while (node) { + assert(node->id == ID_TRACK_ENTRY); + if (track == tracks) + return node->data; + tracks += 1; + node = node->next; + } + + return NULL; +} + +static int +ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_packet ** data) +{ + int r; + int64_t timecode, abs_timecode; + nestegg_packet * pkt; + struct cluster * cluster; + struct frame * f, * last; + struct track_entry * entry; + double track_scale; + uint64_t track, length, frame_sizes[256], cluster_tc, flags, frames, tc_scale, total; + unsigned int i, lacing; + size_t consumed = 0; + + *data = NULL; + + if (block_size > LIMIT_BLOCK) + return -1; + + r = ne_read_vint(ctx->io, &track, &length); + if (r != 1) + return r; + + if (track == 0 || track > ctx->track_count) + return -1; + + consumed += length; + + r = ne_read_int(ctx->io, &timecode, 2); + if (r != 1) + return r; + + consumed += 2; + + r = ne_read_uint(ctx->io, &flags, 1); + if (r != 1) + return r; + + consumed += 1; + + frames = 0; + + /* flags are different between block and simpleblock, but lacing is + encoded the same way */ + lacing = (flags & BLOCK_FLAGS_LACING) >> 1; + + switch (lacing) { + case LACING_NONE: + frames = 1; + break; + case LACING_XIPH: + case LACING_FIXED: + case LACING_EBML: + r = ne_read_uint(ctx->io, &frames, 1); + if (r != 1) + return r; + consumed += 1; + frames += 1; + } + + if (frames > 256) + return -1; + + switch (lacing) { + case LACING_NONE: + frame_sizes[0] = block_size - consumed; + break; + case LACING_XIPH: + if (frames == 1) + return -1; + r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes); + if (r != 1) + return r; + break; + case LACING_FIXED: + if ((block_size - consumed) % frames) + return -1; + for (i = 0; i < frames; ++i) + frame_sizes[i] = (block_size - consumed) / frames; + break; + case LACING_EBML: + if (frames == 1) + return -1; + r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes); + if (r != 1) + return r; + break; + } + + /* sanity check unlaced frame sizes against total block size. */ + total = consumed; + for (i = 0; i < frames; ++i) + total += frame_sizes[i]; + if (total > block_size) + return -1; + + entry = ne_find_track_entry(ctx, track - 1); + if (!entry) + return -1; + + track_scale = 1.0; + + tc_scale = ne_get_timecode_scale(ctx); + + assert(ctx->segment.cluster.tail->id == ID_CLUSTER); + cluster = ctx->segment.cluster.tail->data; + if (ne_get_uint(cluster->timecode, &cluster_tc) != 0) + return -1; + + abs_timecode = timecode + cluster_tc; + if (abs_timecode < 0) + return -1; + + pkt = ne_alloc(sizeof(*pkt)); + pkt->track = track - 1; + pkt->timecode = abs_timecode * tc_scale * track_scale; + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu", + block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames); + + last = NULL; + for (i = 0; i < frames; ++i) { + if (frame_sizes[i] > LIMIT_FRAME) { + nestegg_free_packet(pkt); + return -1; + } + f = ne_alloc(sizeof(*f)); + f->data = ne_alloc(frame_sizes[i]); + f->length = frame_sizes[i]; + r = ne_io_read(ctx->io, f->data, frame_sizes[i]); + if (r != 1) { + free(f->data); + free(f); + nestegg_free_packet(pkt); + return -1; + } + + if (!last) + pkt->frame = f; + else + last->next = f; + last = f; + } + + *data = pkt; + + return 1; +} + +static uint64_t +ne_buf_read_id(unsigned char const * p, size_t length) +{ + uint64_t id = 0; + + while (length--) { + id <<= 8; + id |= *p++; + } + + return id; +} + +static struct seek * +ne_find_seek_for_id(struct ebml_list_node * seek_head, uint64_t id) +{ + struct ebml_list * head; + struct ebml_list_node * seek; + struct ebml_binary binary_id; + struct seek * s; + + while (seek_head) { + assert(seek_head->id == ID_SEEK_HEAD); + head = seek_head->data; + seek = head->head; + + while (seek) { + assert(seek->id == ID_SEEK); + s = seek->data; + + if (ne_get_binary(s->id, &binary_id) == 0 && + ne_buf_read_id(binary_id.data, binary_id.length) == id) + return s; + + seek = seek->next; + } + + seek_head = seek_head->next; + } + + return NULL; +} + +static struct cue_point * +ne_find_cue_point_for_tstamp(struct ebml_list_node * cue_point, uint64_t scale, uint64_t tstamp) +{ + uint64_t time; + struct cue_point * c, * prev = NULL; + + while (cue_point) { + assert(cue_point->id == ID_CUE_POINT); + c = cue_point->data; + + if (!prev) + prev = c; + + if (ne_get_uint(c->time, &time) == 0 && time * scale > tstamp) + break; + + prev = cue_point->data; + cue_point = cue_point->next; + } + + return prev; +} + +static int +ne_is_suspend_element(uint64_t id) +{ + /* this could search the tree of elements for DESC_FLAG_SUSPEND */ + if (id == ID_SIMPLE_BLOCK || id == ID_BLOCK) + return 1; + return 0; +} + +static void +ne_null_log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...) +{ + if (ctx && severity && fmt) + return; +} + +int +nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback) +{ + int r; + uint64_t id, version, docversion; + struct ebml_list_node * track; + char * doctype; + nestegg * ctx = NULL; + + if (!(io.read && io.seek && io.tell)) + return -1; + + ctx = ne_alloc(sizeof(*ctx)); + + ctx->io = ne_alloc(sizeof(*ctx->io)); + *ctx->io = io; + ctx->log = callback; + ctx->alloc_pool = ne_pool_init(); + + if (!ctx->log) + ctx->log = ne_null_log_callback; + + r = ne_peek_element(ctx, &id, NULL); + if (r != 1) { + nestegg_destroy(ctx); + return -1; + } + + if (id != ID_EBML) { + nestegg_destroy(ctx); + return -1; + } + + ctx->log(ctx, NESTEGG_LOG_DEBUG, "ctx %p", ctx); + + ne_ctx_push(ctx, ne_top_level_elements, ctx); + + r = ne_parse(ctx, NULL); + + if (r != 1) { + nestegg_destroy(ctx); + return -1; + } + + if (ne_get_uint(ctx->ebml.ebml_read_version, &version) != 0) + version = 1; + if (version != 1) { + nestegg_destroy(ctx); + return -1; + } + + if (ne_get_string(ctx->ebml.doctype, &doctype) != 0) + doctype = "matroska"; + if (strcmp(doctype, "webm") != 0) { + nestegg_destroy(ctx); + return -1; + } + + if (ne_get_uint(ctx->ebml.doctype_read_version, &docversion) != 0) + docversion = 1; + if (docversion < 1 || docversion > 2) { + nestegg_destroy(ctx); + return -1; + } + + if (!ctx->segment.tracks.track_entry.head) { + nestegg_destroy(ctx); + return -1; + } + + track = ctx->segment.tracks.track_entry.head; + ctx->track_count = 0; + + while (track) { + ctx->track_count += 1; + track = track->next; + } + + *context = ctx; + + return 0; +} + +void +nestegg_destroy(nestegg * ctx) +{ + while (ctx->ancestor) + ne_ctx_pop(ctx); + ne_pool_destroy(ctx->alloc_pool); + free(ctx->io); + free(ctx); +} + +int +nestegg_duration(nestegg * ctx, uint64_t * duration) +{ + uint64_t tc_scale; + double unscaled_duration; + + if (ne_get_float(ctx->segment.info.duration, &unscaled_duration) != 0) + return -1; + + tc_scale = ne_get_timecode_scale(ctx); + + *duration = (uint64_t) (unscaled_duration * tc_scale); + return 0; +} + +int +nestegg_tstamp_scale(nestegg * ctx, uint64_t * scale) +{ + *scale = ne_get_timecode_scale(ctx); + return 0; +} + +int +nestegg_track_count(nestegg * ctx, unsigned int * tracks) +{ + *tracks = ctx->track_count; + return 0; +} + +int +nestegg_track_seek(nestegg * ctx, unsigned int track, uint64_t tstamp) +{ + int r; + struct cue_point * cue_point; + struct cue_track_positions * pos; + struct saved_state state; + struct seek * found; + uint64_t seek_pos, tc_scale, t, id; + struct ebml_list_node * node = ctx->segment.cues.cue_point.head; + + /* If there are no cues loaded, check for cues element in the seek head + and load it. */ + if (!node) { + found = ne_find_seek_for_id(ctx->segment.seek_head.head, ID_CUES); + if (!found) + return -1; + + if (ne_get_uint(found->position, &seek_pos) != 0) + return -1; + + /* Save old parser state. */ + r = ne_ctx_save(ctx, &state); + if (r != 0) + return -1; + + /* Seek and set up parser state for segment-level element (Cues). */ + r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET); + if (r != 0) + return -1; + ctx->last_id = 0; + ctx->last_size = 0; + + r = ne_read_element(ctx, &id, NULL); + if (r != 1) + return -1; + + if (id != ID_CUES) + return -1; + + ctx->ancestor = NULL; + ne_ctx_push(ctx, ne_top_level_elements, ctx); + ne_ctx_push(ctx, ne_segment_elements, &ctx->segment); + ne_ctx_push(ctx, ne_cues_elements, &ctx->segment.cues); + /* parser will run until end of cues element. */ + ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cue elements"); + r = ne_parse(ctx, ne_cues_elements); + while (ctx->ancestor) + ne_ctx_pop(ctx); + + /* Reset parser state to original state and seek back to old position. */ + if (ne_ctx_restore(ctx, &state) != 0) + return -1; + + if (r < 0) + return -1; + } + + tc_scale = ne_get_timecode_scale(ctx); + + cue_point = ne_find_cue_point_for_tstamp(ctx->segment.cues.cue_point.head, tc_scale, tstamp); + if (!cue_point) + return -1; + + node = cue_point->cue_track_positions.head; + + seek_pos = 0; + + while (node) { + assert(node->id == ID_CUE_TRACK_POSITIONS); + pos = node->data; + if (ne_get_uint(pos->track, &t) == 0 && t - 1 == track) { + if (ne_get_uint(pos->cluster_position, &seek_pos) != 0) + return -1; + break; + } + node = node->next; + } + + /* Seek and set up parser state for segment-level element (Cluster). */ + r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET); + if (r != 0) + return -1; + ctx->last_id = 0; + ctx->last_size = 0; + + while (ctx->ancestor) + ne_ctx_pop(ctx); + + ne_ctx_push(ctx, ne_top_level_elements, ctx); + ne_ctx_push(ctx, ne_segment_elements, &ctx->segment); + ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cluster elements"); + r = ne_parse(ctx, NULL); + if (r != 1) + return -1; + + if (!ne_is_suspend_element(ctx->last_id)) + return -1; + + return 0; +} + +int +nestegg_track_type(nestegg * ctx, unsigned int track) +{ + struct track_entry * entry; + uint64_t type; + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (ne_get_uint(entry->type, &type) != 0) + return -1; + + if (type & TRACK_TYPE_VIDEO) + return NESTEGG_TRACK_VIDEO; + + if (type & TRACK_TYPE_AUDIO) + return NESTEGG_TRACK_AUDIO; + + return -1; +} + +int +nestegg_track_codec_id(nestegg * ctx, unsigned int track) +{ + char * codec_id; + struct track_entry * entry; + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (ne_get_string(entry->codec_id, &codec_id) != 0) + return -1; + + if (strcmp(codec_id, TRACK_ID_VP8) == 0) + return NESTEGG_CODEC_VP8; + + if (strcmp(codec_id, TRACK_ID_VORBIS) == 0) + return NESTEGG_CODEC_VORBIS; + + return -1; +} + +int +nestegg_track_codec_data_count(nestegg * ctx, unsigned int track, + unsigned int * count) +{ + struct track_entry * entry; + struct ebml_binary codec_private; + unsigned char * p; + + *count = 0; + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS) + return -1; + + if (ne_get_binary(entry->codec_private, &codec_private) != 0) + return -1; + + if (codec_private.length < 1) + return -1; + + p = codec_private.data; + *count = *p + 1; + + if (*count > 3) + return -1; + + return 0; +} + +int +nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item, + unsigned char ** data, size_t * length) +{ + struct track_entry * entry; + struct ebml_binary codec_private; + uint64_t sizes[3], total; + unsigned char * p; + unsigned int count, i; + + *data = NULL; + *length = 0; + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS) + return -1; + + if (ne_get_binary(entry->codec_private, &codec_private) != 0) + return -1; + + p = codec_private.data; + count = *p++ + 1; + + if (count > 3) + return -1; + + i = 0; + total = 0; + while (--count) { + sizes[i] = ne_xiph_lace_value(&p); + total += sizes[i]; + i += 1; + } + sizes[i] = codec_private.length - total - (p - codec_private.data); + + for (i = 0; i < item; ++i) { + if (sizes[i] > LIMIT_FRAME) + return -1; + p += sizes[i]; + } + *data = p; + *length = sizes[item]; + + return 0; +} + +int +nestegg_track_video_params(nestegg * ctx, unsigned int track, + nestegg_video_params * params) +{ + struct track_entry * entry; + uint64_t value; + + memset(params, 0, sizeof(*params)); + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_VIDEO) + return -1; + + if (ne_get_uint(entry->video.pixel_width, &value) != 0) + return -1; + params->width = value; + + if (ne_get_uint(entry->video.pixel_height, &value) != 0) + return -1; + params->height = value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_bottom, &value); + params->crop_bottom = value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_top, &value); + params->crop_top = value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_left, &value); + params->crop_left = value; + + value = 0; + ne_get_uint(entry->video.pixel_crop_right, &value); + params->crop_right = value; + + value = params->width; + ne_get_uint(entry->video.display_width, &value); + params->display_width = value; + + value = params->height; + ne_get_uint(entry->video.display_height, &value); + params->display_height = value; + + return 0; +} + +int +nestegg_track_audio_params(nestegg * ctx, unsigned int track, + nestegg_audio_params * params) +{ + struct track_entry * entry; + uint64_t value; + + memset(params, 0, sizeof(*params)); + + entry = ne_find_track_entry(ctx, track); + if (!entry) + return -1; + + if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_AUDIO) + return -1; + + params->rate = 8000; + ne_get_float(entry->audio.sampling_frequency, ¶ms->rate); + + value = 1; + ne_get_uint(entry->audio.channels, &value); + params->channels = value; + + value = 16; + ne_get_uint(entry->audio.bit_depth, &value); + params->depth = value; + + return 0; +} + +int +nestegg_read_packet(nestegg * ctx, nestegg_packet ** pkt) +{ + int r; + uint64_t id, size; + + *pkt = NULL; + + for (;;) { + r = ne_peek_element(ctx, &id, &size); + if (r != 1) + return r; + + /* any suspend fields must be handled here */ + if (ne_is_suspend_element(id)) { + r = ne_read_element(ctx, &id, &size); + if (r != 1) + return r; + + /* the only suspend fields are blocks and simple blocks, which we + handle directly. */ + r = ne_read_block(ctx, id, size, pkt); + return r; + } + + r = ne_parse(ctx, NULL); + if (r != 1) + return r; + } + + return 1; +} + +void +nestegg_free_packet(nestegg_packet * pkt) +{ + struct frame * frame; + + while (pkt->frame) { + frame = pkt->frame; + pkt->frame = frame->next; + free(frame->data); + free(frame); + } + + free(pkt); +} + +int +nestegg_packet_track(nestegg_packet * pkt, unsigned int * track) +{ + *track = pkt->track; + return 0; +} + +int +nestegg_packet_tstamp(nestegg_packet * pkt, uint64_t * tstamp) +{ + *tstamp = pkt->timecode; + return 0; +} + +int +nestegg_packet_count(nestegg_packet * pkt, unsigned int * count) +{ + struct frame * f = pkt->frame; + + *count = 0; + + while (f) { + *count += 1; + f = f->next; + } + + return 0; +} + +int +nestegg_packet_data(nestegg_packet * pkt, unsigned int item, + unsigned char ** data, size_t * length) +{ + struct frame * f = pkt->frame; + unsigned int count = 0; + + *data = NULL; + *length = 0; + + while (f) { + if (count == item) { + *data = f->data; + *length = f->length; + return 0; + } + count += 1; + f = f->next; + } + + return -1; +} diff --git a/nestegg/test/test.c b/nestegg/test/test.c new file mode 100644 index 000000000..210b640c7 --- /dev/null +++ b/nestegg/test/test.c @@ -0,0 +1,248 @@ +/* + * Copyright © 2010 Mozilla Foundation + * + * This program is made available under an ISC-style license. See the + * accompanying file LICENSE for details. + */ +#include +#include +#include +#include +#include +#include "nestegg/nestegg.h" + +#undef DEBUG +#define SEEK_TEST + +static int +stdio_read(void * p, size_t length, void * fp) +{ + size_t r; + + r = fread(p, length, 1, fp); + if (r == 0 && feof(fp)) + return 0; + return r == 0 ? -1 : 1; +} + +static int +stdio_seek(int64_t offset, int whence, void * fp) +{ + return fseek(fp, offset, whence); +} + +static int64_t +stdio_tell(void * fp) +{ + return ftell(fp); +} + +static void +log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...) +{ + va_list ap; + char const * sev = NULL; + +#ifndef DEBUG + if (severity < NESTEGG_LOG_WARNING) + return; +#endif + + switch (severity) { + case NESTEGG_LOG_DEBUG: + sev = "debug: "; + break; + case NESTEGG_LOG_WARNING: + sev = "warning: "; + break; + case NESTEGG_LOG_CRITICAL: + sev = "critical:"; + break; + default: + sev = "unknown: "; + } + + fprintf(stderr, "%p %s ", (void *) ctx, sev); + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); +} + +int +main(int argc, char * argv[]) +{ + FILE * fp; + int r, type; + nestegg * ctx; + nestegg_audio_params aparams; + nestegg_packet * pkt; + nestegg_video_params vparams; + size_t length, size; + uint64_t duration, tstamp, pkt_tstamp; + unsigned char * codec_data, * ptr; + unsigned int cnt, i, j, track, tracks, pkt_cnt, pkt_track; + unsigned int data_items = 0; + nestegg_io io = { + stdio_read, + stdio_seek, + stdio_tell, + NULL + }; + + if (argc != 2) + return EXIT_FAILURE; + + fp = fopen(argv[1], "rb"); + if (!fp) + return EXIT_FAILURE; + + io.userdata = fp; + + ctx = NULL; + r = nestegg_init(&ctx, io, log_callback); + if (r != 0) + return EXIT_FAILURE; + + nestegg_track_count(ctx, &tracks); + nestegg_duration(ctx, &duration); +#ifdef DEBUG + fprintf(stderr, "media has %u tracks and duration %fs\n", tracks, duration / 1e9); +#endif + + for (i = 0; i < tracks; ++i) { + type = nestegg_track_type(ctx, i); +#ifdef DEBUG + fprintf(stderr, "track %u: type: %d codec: %d", i, + type, nestegg_track_codec_id(ctx, i)); +#endif + nestegg_track_codec_data_count(ctx, i, &data_items); + for (j = 0; j < data_items; ++j) { + nestegg_track_codec_data(ctx, i, j, &codec_data, &length); +#ifdef DEBUG + fprintf(stderr, " (%p, %u)", codec_data, (unsigned int) length); +#endif + } + if (type == NESTEGG_TRACK_VIDEO) { + nestegg_track_video_params(ctx, i, &vparams); +#ifdef DEBUG + fprintf(stderr, " video: %ux%u (d: %ux%u %ux%ux%ux%u)", + vparams.width, vparams.height, + vparams.display_width, vparams.display_height, + vparams.crop_top, vparams.crop_left, vparams.crop_bottom, vparams.crop_right); +#endif + } else if (type == NESTEGG_TRACK_AUDIO) { + nestegg_track_audio_params(ctx, i, &aparams); +#ifdef DEBUG + fprintf(stderr, " audio: %.2fhz %u bit %u channels", + aparams.rate, aparams.depth, aparams.channels); +#endif + } +#ifdef DEBUG + fprintf(stderr, "\n"); +#endif + } + +#ifdef SEEK_TEST +#ifdef DEBUG + fprintf(stderr, "seek to middle\n"); +#endif + r = nestegg_track_seek(ctx, 0, duration / 2); + if (r == 0) { +#ifdef DEBUG + fprintf(stderr, "middle "); +#endif + r = nestegg_read_packet(ctx, &pkt); + if (r == 1) { + nestegg_packet_track(pkt, &track); + nestegg_packet_count(pkt, &cnt); + nestegg_packet_tstamp(pkt, &tstamp); +#ifdef DEBUG + fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt); +#endif + nestegg_free_packet(pkt); + } else { +#ifdef DEBUG + fprintf(stderr, "middle seek failed\n"); +#endif + } + } + +#ifdef DEBUG + fprintf(stderr, "seek to ~end\n"); +#endif + r = nestegg_track_seek(ctx, 0, duration - (duration / 10)); + if (r == 0) { +#ifdef DEBUG + fprintf(stderr, "end "); +#endif + r = nestegg_read_packet(ctx, &pkt); + if (r == 1) { + nestegg_packet_track(pkt, &track); + nestegg_packet_count(pkt, &cnt); + nestegg_packet_tstamp(pkt, &tstamp); +#ifdef DEBUG + fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt); +#endif + nestegg_free_packet(pkt); + } else { +#ifdef DEBUG + fprintf(stderr, "end seek failed\n"); +#endif + } + } + +#ifdef DEBUG + fprintf(stderr, "seek to ~start\n"); +#endif + r = nestegg_track_seek(ctx, 0, duration / 10); + if (r == 0) { +#ifdef DEBUG + fprintf(stderr, "start "); +#endif + r = nestegg_read_packet(ctx, &pkt); + if (r == 1) { + nestegg_packet_track(pkt, &track); + nestegg_packet_count(pkt, &cnt); + nestegg_packet_tstamp(pkt, &tstamp); +#ifdef DEBUG + fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt); +#endif + nestegg_free_packet(pkt); + } else { +#ifdef DEBUG + fprintf(stderr, "start seek failed\n"); +#endif + } + } +#endif + + while (nestegg_read_packet(ctx, &pkt) > 0) { + nestegg_packet_track(pkt, &pkt_track); + nestegg_packet_count(pkt, &pkt_cnt); + nestegg_packet_tstamp(pkt, &pkt_tstamp); + +#ifdef DEBUG + fprintf(stderr, "t %u pts %f frames %u: ", pkt_track, pkt_tstamp / 1e9, pkt_cnt); +#endif + + for (i = 0; i < pkt_cnt; ++i) { + nestegg_packet_data(pkt, i, &ptr, &size); +#ifdef DEBUG + fprintf(stderr, "%u ", (unsigned int) size); +#endif + } +#ifdef DEBUG + fprintf(stderr, "\n"); +#endif + + nestegg_free_packet(pkt); + } + + nestegg_destroy(ctx); + fclose(fp); + + return EXIT_SUCCESS; +} diff --git a/release.sh b/release.sh deleted file mode 100755 index 3b77dad72..000000000 --- a/release.sh +++ /dev/null @@ -1,209 +0,0 @@ -#!/bin/bash -## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. -## - - - -self=$0 - -for opt; do - case $opt in - --clean) clean=yes;; - -j*) jopt=$opt;; - *) echo "Unsupported option $opt"; exit 1;; - esac -done - -TAB=$'\t' -cat > release.mk << EOF -%\$(BUILD_SFX).tar.bz2: %/.done -${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball" -${TAB}@cd \$(dir \$<); tar -cf - \$(subst .tar.bz2,,\$@) | bzip2 > ../\$@ - -%\$(BUILD_SFX).zip: %/.done -${TAB}@echo "\$(subst .zip,,\$@): zip" -${TAB}@rm -f \$@; cd \$(dir \$<); zip -rq ../\$@ \$(subst .zip,,\$@) - -logs/%\$(BUILD_SFX).log.bz2: %/.done -${TAB}@echo "\$(subst .log.bz2,,\$(notdir \$@)): tarlog" -${TAB}@mkdir -p logs -${TAB}@cat \$< | bzip2 > \$@ - -%/.done: -${TAB}@mkdir -p \$(dir \$@) -${TAB}@echo "\$(dir \$@): configure \$(CONFIG_OPTS) \$(EXTRA_PATH)" -${TAB}@cd \$(dir \$@); export PATH=\$\$PATH\$(EXTRA_PATH); ../\$(SRC_ROOT)/configure \$(CONFIG_OPTS) >makelog.txt 2>&1 -${TAB}@echo "\$(dir \$@): make" -${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1 -${TAB}@echo "\$(dir \$@): test install" -${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install >>makelog.txt 2>&1 -${TAB}@cd \$(dir \$@)/dist/build; PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1 -${TAB}@echo "\$(dir \$@): install" -${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install DIST_DIR=\$(TGT) >>makelog.txt 2>&1 -${TAB}@touch \$@ - -#include release-deps.mk -EOF - -#[ -f release-deps.mk ] || \ -# find ${self%/*} -name .git -prune -o -type f -print0 \ -# | xargs -0 -n1 echo \ -# | sed -e 's; ;\\ ;g' | awk '{print "$(TGT)/.done: "$0}' > release-deps.mk - -build_config_list() { - for codec in $CODEC_LIST; do - for arch in $ARCH_LIST; do - if [ -n "$OS_LIST" ]; then - for os in $OS_LIST; do - CONFIGS="$CONFIGS vpx-${codec}-${arch}-${os}" - done - else - CONFIGS="$CONFIGS vpx-${codec}-${arch}" - fi - done - done -} - -CODEC_LIST="vp8 vp8cx vp8dx" -case `uname` in - Linux*) - ARCH_LIST="x86 x86_64" - OS_LIST="linux" - build_config_list - ARCH_LIST="armv5te armv6 armv7" - OS_LIST="linux-gcc" - - ;; - CYGWIN*) - TAR_SFX=.zip - for vs in vs7 vs8; do - for arch in x86-win32 x86_64-win64; do - for msvcrt in md mt; do - case $vs,$arch in - vs7,x86_64-win64) continue ;; - esac - ARCH_LIST="$ARCH_LIST ${arch}${msvcrt}-${vs}" - done - done - done - ;; - Darwin*) - ARCH_LIST="universal" - OS_LIST="darwin8 darwin9" - ;; - sun_os*) - ARCH_LIST="x86 x86_64" - OS_LIST="solaris" - ;; -esac -build_config_list - -TAR_SFX=${TAR_SFX:-.tar.bz2} -ARM_TOOLCHAIN=/usr/local/google/csl-2009q3-67 -for cfg in $CONFIGS; do - full_cfg=$cfg - cfg=${cfg#vpx-} - opts= - rm -f makelog.txt - - case $cfg in - src-*) opts="$opts --enable-codec-srcs" - cfg=${cfg#src-} - ;; - eval-*) opts="$opts --enable-eval-limit" - cfg=${cfg#src-} - ;; - esac - - case $cfg in - # - # Linux - # - *x86-linux) - opts="$opts --target=x86-linux-gcc" ;; - *x86_64-linux) - opts="$opts --target=x86_64-linux-gcc" ;; - *arm*-linux-gcc) - armv=${cfg##*armv} - armv=${armv%%-*} - opts="$opts --target=armv${armv}-linux-gcc" ;; - *arm*-linux-rvct) - armv=${cfg##*armv} - armv=${armv%%-*} - opts="$opts --target=armv${armv}-linux-rvct" - opts="$opts --libc=${ARM_TOOLCHAIN}/arm-none-linux-gnueabi/libc" ;; - - - # - # Windows - # - # need --enable-debug-libs for now until we're smarter about - # building the debug/release from the customer installed - # environment - *-x86-win32*-vs*) - opts="$opts --target=x86-win32-vs${cfg##*-vs} --enable-debug-libs";; - *-x86_64-win64*-vs8) - opts="$opts --target=x86_64-win64-vs8 --enable-debug-libs" ;; - - # - # Darwin - # - *-universal-darwin*) - opts="$opts --target=universal-darwin${cfg##*-darwin}-gcc" ;; - - # - # Solaris - # - *x86-solaris) - opts="$opts --target=x86-solaris-gcc" ;; - *x86_64-solaris) - opts="$opts --target=x86_64-solaris-gcc" ;; - esac - - case $cfg in - *x86-linux | *x86-solaris) opts="$opts --enable-pic" ;; - esac - - case $cfg in - *-win[36][24]mt*) opts="$opts --enable-static-msvcrt" ;; - *-win[36][24]md*) opts="$opts --disable-static-msvcrt" ;; - esac - - opts="$opts --disable-codecs" - case $cfg in - vp8*) opts="$opts --enable-vp8" ;; - esac - case $cfg in - *cx-*) opts="${opts}-encoder" ;; - *dx-*) opts="${opts}-decoder" ;; - esac - opts="$opts --enable-postproc" - - [ "x${clean}" == "xyes" ] \ - && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \ - && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2 - - TGT=${full_cfg}${BUILD_SFX} - BUILD_TARGETS="logs/${TGT}.log.bz2 ${TGT}${TAR_SFX}" - echo "${BUILD_TARGETS}: CONFIG_OPTS=$opts" >>release.mk - echo "${BUILD_TARGETS}: TGT=${TGT}" >>release.mk - case $cfg in - *-arm*-linux-*) - echo "${BUILD_TARGETS}: EXTRA_PATH=:${ARM_TOOLCHAIN}/bin/" >>release.mk ;; - *-vs7) - echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ .NET\ 2003/Common7/IDE" >>release.mk ;; - *-vs8) - echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ 8/Common7/IDE" >>release.mk ;; - esac - MAKE_TGTS="$MAKE_TGTS ${TGT}${TAR_SFX} logs/${TGT}.log.bz2" -done - - -${MAKE:-make} ${jopt:--j3} -f release.mk \ - SRC_ROOT=${self%/*} BUILD_SFX=${BUILD_SFX} ${MAKE_TGTS} diff --git a/solution.mk b/solution.mk index 783c6f805..6d2c08d06 100644 --- a/solution.mk +++ b/solution.mk @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## @@ -21,7 +22,7 @@ else vpx.sln: $(wildcard *.vcproj) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ - $(if $(filter %vpx.vcproj,$^),--dep=ivfdec:vpx) \ + $(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \ $(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \ --ver=$(CONFIG_VS_VERSION)\ --target=$(TOOLCHAIN)\ diff --git a/tools/gen_authors.sh b/tools/gen_authors.sh new file mode 100755 index 000000000..e1246f08a --- /dev/null +++ b/tools/gen_authors.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# Add organization names manually. + +cat <" | sort | uniq) +Google Inc. +The Mozilla Foundation +The Xiph.Org Foundation +EOF diff --git a/tools_common.c b/tools_common.c new file mode 100644 index 000000000..d188bbe20 --- /dev/null +++ b/tools_common.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include +#include "tools_common.h" +#ifdef _WIN32 +#include +#include +#endif + +FILE* set_binary_mode(FILE *stream) +{ + (void)stream; +#ifdef _WIN32 + _setmode(_fileno(stream), _O_BINARY); +#endif + return stream; +} diff --git a/tools_common.h b/tools_common.h new file mode 100644 index 000000000..80c974732 --- /dev/null +++ b/tools_common.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef TOOLS_COMMON_H +#define TOOLS_COMMON_H + +/* Sets a stdio stream into binary mode */ +FILE* set_binary_mode(FILE *stream); + +#endif diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c index 12d83aa1b..5ab8e29ab 100644 --- a/vp8/common/alloccommon.c +++ b/vp8/common/alloccommon.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,46 +24,39 @@ extern void vp8_init_scan_order_mask(); void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols) { int i; - vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1); + vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1)); for (i = 0; i < rows; i++) { vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO)); } } + void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) { + int i; + + for (i = 0; i < NUM_YV12_BUFFERS; i++) + vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); + vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); - vp8_yv12_de_alloc_frame_buffer(&oci->new_frame); - vp8_yv12_de_alloc_frame_buffer(&oci->last_frame); - vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame); - vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame); vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); - vpx_free(oci->above_context[Y1CONTEXT]); - vpx_free(oci->above_context[UCONTEXT]); - vpx_free(oci->above_context[VCONTEXT]); - vpx_free(oci->above_context[Y2CONTEXT]); + vpx_free(oci->above_context); vpx_free(oci->mip); - oci->above_context[Y1CONTEXT] = 0; - oci->above_context[UCONTEXT] = 0; - oci->above_context[VCONTEXT] = 0; - oci->above_context[Y2CONTEXT] = 0; + oci->above_context = 0; oci->mip = 0; - // Structure used to minitor GF useage - if (oci->gf_active_flags != 0) - vpx_free(oci->gf_active_flags); - - oci->gf_active_flags = 0; } int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) { + int i; + vp8_de_alloc_frame_buffers(oci); - // our internal buffers are always multiples of 16 + /* our internal buffers are always multiples of 16 */ if ((width & 0xf) != 0) width += 16 - (width & 0xf); @@ -70,37 +64,33 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) height += 16 - (height & 0xf); + for (i = 0; i < NUM_YV12_BUFFERS; i++) + { + oci->fb_idx_ref_cnt[0] = 0; + + if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) + { + vp8_de_alloc_frame_buffers(oci); + return ALLOC_FAILURE; + } + } + + oci->new_fb_idx = 0; + oci->lst_fb_idx = 1; + oci->gld_fb_idx = 2; + oci->alt_fb_idx = 3; + + oci->fb_idx_ref_cnt[0] = 1; + oci->fb_idx_ref_cnt[1] = 1; + oci->fb_idx_ref_cnt[2] = 1; + oci->fb_idx_ref_cnt[3] = 1; + if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); return ALLOC_FAILURE; } - - if (vp8_yv12_alloc_frame_buffer(&oci->new_frame, width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - if (vp8_yv12_alloc_frame_buffer(&oci->last_frame, width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) { vp8_de_alloc_frame_buffers(oci); @@ -122,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) oci->mi = oci->mip + oci->mode_info_stride + 1; - oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1); + oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); - if (!oci->above_context[Y1CONTEXT]) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - oci->above_context[UCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1); - - if (!oci->above_context[UCONTEXT]) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - oci->above_context[VCONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1); - - if (!oci->above_context[VCONTEXT]) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols , 1); - - if (!oci->above_context[Y2CONTEXT]) + if (!oci->above_context) { vp8_de_alloc_frame_buffers(oci); return ALLOC_FAILURE; @@ -156,20 +122,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols); - // Structures used to minitor GF usage - if (oci->gf_active_flags != 0) - vpx_free(oci->gf_active_flags); - - oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1); - - if (!oci->gf_active_flags) - { - vp8_de_alloc_frame_buffers(oci); - return ALLOC_FAILURE; - } - - oci->gf_active_count = oci->mb_rows * oci->mb_cols; - return 0; } void vp8_setup_version(VP8_COMMON *cm) @@ -227,10 +179,10 @@ void vp8_create_common(VP8_COMMON *oci) oci->clr_type = REG_YUV; oci->clamp_type = RECON_CLAMP_REQUIRED; - // Initialise reference frame sign bias structure to defaults + /* Initialise reference frame sign bias structure to defaults */ vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); - // Default disable buffer to buffer copying + /* Default disable buffer to buffer copying */ oci->copy_buffer_to_gf = 0; oci->copy_buffer_to_arf = 0; } diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h index 73c7383c7..ea93c2522 100644 --- a/vp8/common/alloccommon.h +++ b/vp8/common/alloccommon.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c new file mode 100644 index 000000000..83921f807 --- /dev/null +++ b/vp8/common/arm/arm_systemdependent.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "g_common.h" +#include "pragmas.h" +#include "subpixel.h" +#include "loopfilter.h" +#include "recon.h" +#include "idct.h" +#include "onyxc_int.h" + +extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x); + +extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x); +extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x); + +void vp8_arch_arm_common_init(VP8_COMMON *ctx) +{ +#if CONFIG_RUNTIME_CPU_DETECT + VP8_COMMON_RTCD *rtcd = &ctx->rtcd; + int flags = arm_cpu_caps(); + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + rtcd->flags = flags; + + /* Override default functions with fastest ones for this CPU. */ +#if HAVE_ARMV6 + if (has_media) + { + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6; + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6; + rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6; + rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6; + + rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6; + rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual; + rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_v6; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_v6; + + rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6; + rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6; + rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6; + rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6; + rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6; + rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6; + + rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6; + rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6; + rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6; + rtcd->recon.recon = vp8_recon_b_armv6; + rtcd->recon.recon2 = vp8_recon2b_armv6; + rtcd->recon.recon4 = vp8_recon4b_armv6; + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon; + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon; + rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon; + rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon; + + rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon; + rtcd->idct.idct16 = vp8_short_idct4x4llm_neon; + rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon; + rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon; + + rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon; + rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon; + rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon; + rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon; + rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon; + rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon; + rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon; + rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon; + + rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon; + rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon; + rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon; + rtcd->recon.recon = vp8_recon_b_neon; + rtcd->recon.recon2 = vp8_recon2b_neon; + rtcd->recon.recon4 = vp8_recon4b_neon; + rtcd->recon.recon_mb = vp8_recon_mb_neon; + + } +#endif + +#endif + +#if HAVE_ARMV6 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_media) +#endif + { + vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; + vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; + } +#endif + +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_neon) +#endif + { + vp8_build_intra_predictors_mby_ptr = + vp8_build_intra_predictors_mby_neon; + vp8_build_intra_predictors_mby_s_ptr = + vp8_build_intra_predictors_mby_s_neon; + } +#endif +} diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm index 4428cf8ff..09d7338d9 100644 --- a/vp8/common/arm/armv6/bilinearfilter_v6.asm +++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm index 00e97397c..fca91a0db 100644 --- a/vp8/common/arm/armv6/copymem16x16_v6.asm +++ b/vp8/common/arm/armv6/copymem16x16_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm index 94473ca65..d8362ef05 100644 --- a/vp8/common/arm/armv6/copymem8x4_v6.asm +++ b/vp8/common/arm/armv6/copymem8x4_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm index 7cfa53389..c6a60c610 100644 --- a/vp8/common/arm/armv6/copymem8x8_v6.asm +++ b/vp8/common/arm/armv6/copymem8x8_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm new file mode 100644 index 000000000..e0660e9fd --- /dev/null +++ b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm @@ -0,0 +1,67 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + EXPORT |vp8_dc_only_idct_add_v6| + + AREA |.text|, CODE, READONLY + +;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr, +; unsigned char *dst_ptr, int pitch, int stride) +; r0 input_dc +; r1 pred_ptr +; r2 dest_ptr +; r3 pitch +; sp stride + +|vp8_dc_only_idct_add_v6| PROC + stmdb sp!, {r4 - r7, lr} + + add r0, r0, #4 ; input_dc += 4 + ldr r12, c0x0000FFFF + ldr r4, [r1], r3 + ldr r6, [r1], r3 + and r0, r12, r0, asr #3 ; input_dc >> 3 + mask + ldr lr, [sp, #20] + orr r0, r0, r0, lsl #16 ; a1 | a1 + + uxtab16 r5, r0, r4 ; a1+2 | a1+0 + uxtab16 r4, r0, r4, ror #8 ; a1+3 | a1+1 + uxtab16 r7, r0, r6 + uxtab16 r6, r0, r6, ror #8 + usat16 r5, #8, r5 + usat16 r4, #8, r4 + usat16 r7, #8, r7 + usat16 r6, #8, r6 + orr r5, r5, r4, lsl #8 + orr r7, r7, r6, lsl #8 + ldr r4, [r1], r3 + ldr r6, [r1] + str r5, [r2], lr + str r7, [r2], lr + + uxtab16 r5, r0, r4 + uxtab16 r4, r0, r4, ror #8 + uxtab16 r7, r0, r6 + uxtab16 r6, r0, r6, ror #8 + usat16 r5, #8, r5 + usat16 r4, #8, r4 + usat16 r7, #8, r7 + usat16 r6, #8, r6 + orr r5, r5, r4, lsl #8 + orr r7, r7, r6, lsl #8 + str r5, [r2], lr + str r7, [r2] + + ldmia sp!, {r4 - r7, pc} + + ENDP ; |vp8_dc_only_idct_add_v6| + +; Constant Pool +c0x0000FFFF DCD 0x0000FFFF + END diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm index a7863fc94..03b5bccd7 100644 --- a/vp8/common/arm/armv6/filter_v6.asm +++ b/vp8/common/arm/armv6/filter_v6.asm @@ -1,15 +1,17 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vp8_filter_block2d_first_pass_armv6| EXPORT |vp8_filter_block2d_second_pass_armv6| + EXPORT |vp8_filter4_block2d_second_pass_armv6| EXPORT |vp8_filter_block2d_first_pass_only_armv6| EXPORT |vp8_filter_block2d_second_pass_only_armv6| @@ -191,6 +193,64 @@ ENDP +;--------------------------------- +; r0 short *src_ptr, +; r1 unsigned char *output_ptr, +; r2 unsigned int output_pitch, +; r3 unsigned int cnt, +; stack const short *vp8_filter +;--------------------------------- +|vp8_filter4_block2d_second_pass_armv6| PROC + stmdb sp!, {r4 - r11, lr} + + ldr r11, [sp, #36] ; vp8_filter address + mov r7, r3, lsl #16 ; height is top part of counter + + ldr r4, [r11] ; load up packed filter coefficients + add lr, r1, r3 ; save final destination pointer + ldr r5, [r11, #4] + ldr r6, [r11, #8] + + pkhbt r12, r5, r4 ; pack the filter differently + pkhbt r11, r6, r5 + mov r4, #0x40 ; rounding factor (for smlad{x}) + +|height_loop_2nd_4| + ldrd r8, [r0, #-4] ; load the data + orr r7, r7, r3, lsr #1 ; loop counter + +|width_loop_2nd_4| + ldr r10, [r0, #4]! + smladx r6, r9, r12, r4 ; apply filter + pkhbt r8, r9, r8 + smlad r5, r8, r12, r4 + pkhbt r8, r10, r9 + smladx r6, r10, r11, r6 + sub r7, r7, #1 + smlad r5, r8, r11, r5 + + mov r8, r9 ; shift the data for the next loop + mov r9, r10 + + usat r6, #8, r6, asr #7 ; shift and clamp + usat r5, #8, r5, asr #7 + + strb r5, [r1], r2 ; the result is transposed back and stored + tst r7, #0xff + strb r6, [r1], r2 + + bne width_loop_2nd_4 + + subs r7, r7, #0x10000 + add r0, r0, #16 ; update src for next loop + sub r1, lr, r7, lsr #16 ; update dst for next loop + + bne height_loop_2nd_4 + + ldmia sp!, {r4 - r11, pc} + + ENDP + ;------------------------------------ ; r0 unsigned char *src_ptr ; r1 unsigned char *output_ptr, diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm index 25c5165ec..27215afcd 100644 --- a/vp8/common/arm/armv6/idct_v6.asm +++ b/vp8/common/arm/armv6/idct_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -14,8 +15,6 @@ EXPORT |vp8_short_idct4x4llm_v6_scott| EXPORT |vp8_short_idct4x4llm_v6_dual| - EXPORT |vp8_dc_only_idct_armv6| - AREA |.text|, CODE, READONLY ;******************************************************************************** @@ -343,34 +342,4 @@ loop2_dual ldmia sp!, {r4 - r11, pc} ; replace vars, return restore ENDP - -; sjl added 10/17/08 -;void dc_only_idct_armv6(short input_dc, short *output, int pitch) -|vp8_dc_only_idct_armv6| PROC - stmdb sp!, {r4 - r6, lr} - - add r0, r0, #0x4 - add r4, r1, r2 ; output + shortpitch - mov r0, r0, ASR #0x3 ;aka a1 - add r5, r1, r2, LSL #1 ; output + shortpitch * 2 - pkhbt r0, r0, r0, lsl #16 ; a1 | a1 - add r6, r5, r2 ; output + shortpitch * 3 - - str r0, [r1, #0] - str r0, [r1, #4] - - str r0, [r4, #0] - str r0, [r4, #4] - - str r0, [r5, #0] - str r0, [r5, #4] - - str r0, [r6, #0] - str r0, [r6, #4] - - - ldmia sp!, {r4 - r6, pc} - - ENDP ; |vp8_dc_only_idct_armv6| - END diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm index 87475681f..463bff0f5 100644 --- a/vp8/common/arm/armv6/iwalsh_v6.asm +++ b/vp8/common/arm/armv6/iwalsh_v6.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8_short_inv_walsh4x4_armv6| - EXPORT |vp8_short_inv_walsh4x4_1_armv6| + EXPORT |vp8_short_inv_walsh4x4_v6| + EXPORT |vp8_short_inv_walsh4x4_1_v6| ARM REQUIRE8 @@ -16,8 +17,8 @@ AREA |.text|, CODE, READONLY ; name this block of code -;short vp8_short_inv_walsh4x4_armv6(short *input, short *output) -|vp8_short_inv_walsh4x4_armv6| PROC +;short vp8_short_inv_walsh4x4_v6(short *input, short *output) +|vp8_short_inv_walsh4x4_v6| PROC stmdb sp!, {r4 - r11, lr} @@ -122,11 +123,11 @@ str r5, [r1] ldmia sp!, {r4 - r11, pc} - ENDP ; |vp8_short_inv_walsh4x4_armv6| + ENDP ; |vp8_short_inv_walsh4x4_v6| -;short vp8_short_inv_walsh4x4_1_armv6(short *input, short *output) -|vp8_short_inv_walsh4x4_1_armv6| PROC +;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output) +|vp8_short_inv_walsh4x4_1_v6| PROC ldrsh r2, [r0] ; [0] add r2, r2, #3 ; [0] + 3 @@ -144,7 +145,7 @@ str r2, [r1] bx lr - ENDP ; |vp8_short_inv_walsh4x4_1_armv6| + ENDP ; |vp8_short_inv_walsh4x4_1_v6| ; Constant Pool c0x00030003 DCD 0x00030003 diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm index c2b02dc0a..b6417dee6 100644 --- a/vp8/common/arm/armv6/loopfilter_v6.asm +++ b/vp8/common/arm/armv6/loopfilter_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm index 085ff80c9..99c7bcf2d 100644 --- a/vp8/common/arm/armv6/recon_v6.asm +++ b/vp8/common/arm/armv6/recon_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm index 15c6c7d16..013712036 100644 --- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm +++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -54,113 +55,87 @@ pstep RN r1 ;stack const char *thresh, ;stack int count -;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. +; All 16 elements in flimit are equal. So, in the code, only one load is needed +; for flimit. Same applies to limit. thresh is not used in simple looopfilter ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- |vp8_loop_filter_simple_horizontal_edge_armv6| PROC ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- stmdb sp!, {r4 - r11, lr} - sub src, src, pstep, lsl #1 ; move src pointer down by 2 lines - - ldr r12, [r3], #4 ; limit - ldr r3, [src], pstep ; p1 - - ldr r9, [sp, #36] ; count for 8-in-parallel - ldr r4, [src], pstep ; p0 - - ldr r7, [r2], #4 ; flimit - ldr r5, [src], pstep ; q0 + ldr r12, [r3] ; limit + ldr r3, [src, -pstep, lsl #1] ; p1 + ldr r4, [src, -pstep] ; p0 + ldr r5, [src] ; q0 + ldr r6, [src, pstep] ; q1 + ldr r7, [r2] ; flimit ldr r2, c0x80808080 - - ldr r6, [src] ; q1 - + ldr r9, [sp, #40] ; count for 8-in-parallel uadd8 r7, r7, r7 ; flimit * 2 - mov r9, r9, lsl #1 ; 4-in-parallel + mov r9, r9, lsl #1 ; double the count. we're doing 4 at a time uadd8 r12, r7, r12 ; flimit * 2 + limit + mov lr, #0 ; need 0 in a couple places |simple_hnext8| - ; vp8_simple_filter_mask() function + ; vp8_simple_filter_mask() uqsub8 r7, r3, r6 ; p1 - q1 uqsub8 r8, r6, r3 ; q1 - p1 uqsub8 r10, r4, r5 ; p0 - q0 uqsub8 r11, r5, r4 ; q0 - p0 orr r8, r8, r7 ; abs(p1 - q1) - ldr lr, c0x7F7F7F7F ; 01111111 mask orr r10, r10, r11 ; abs(p0 - q0) - and r8, lr, r8, lsr #1 ; abs(p1 - q1) / 2 uqadd8 r10, r10, r10 ; abs(p0 - q0) * 2 - mvn lr, #0 ; r10 == -1 + uhadd8 r8, r8, lr ; abs(p1 - q2) >> 1 uqadd8 r10, r10, r8 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 - ; STALL waiting on r10 :( - uqsub8 r10, r10, r12 ; compare to flimit - mov r8, #0 - - usub8 r10, r8, r10 ; use usub8 instead of ssub8 - ; STALL (maybe?) when are flags set? :/ - sel r10, lr, r8 ; filter mask: lr - + mvn r8, #0 + usub8 r10, r12, r10 ; compare to flimit. usub8 sets GE flags + sel r10, r8, lr ; filter mask: F or 0 cmp r10, #0 - beq simple_hskip_filter ; skip filtering + beq simple_hskip_filter ; skip filtering if all masks are 0x00 - ;vp8_simple_filter() function + ;vp8_simple_filter() eor r3, r3, r2 ; p1 offset to convert to a signed value eor r6, r6, r2 ; q1 offset to convert to a signed value eor r4, r4, r2 ; p0 offset to convert to a signed value eor r5, r5, r2 ; q0 offset to convert to a signed value - qsub8 r3, r3, r6 ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1) - qsub8 r6, r5, r4 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0)) - - qadd8 r3, r3, r6 - ldr r8, c0x03030303 ; r8 = 3 - - qadd8 r3, r3, r6 + qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 + qsub8 r6, r5, r4 ; q0 - p0 + qadd8 r3, r3, r6 ; += q0 - p0 ldr r7, c0x04040404 + qadd8 r3, r3, r6 ; += q0 - p0 + ldr r8, c0x03030303 + qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) + ;STALL + and r3, r3, r10 ; vp8_filter &= mask - qadd8 r3, r3, r6 - and r3, r3, lr ; vp8_filter &= mask; + qadd8 r7 , r3 , r7 ; Filter1 = vp8_filter + 4 + qadd8 r8 , r3 , r8 ; Filter2 = vp8_filter + 3 - ;save bottom 3 bits so that we round one side +4 and the other +3 - qadd8 r8 , r3 , r8 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3) - qadd8 r3 , r3 , r7 ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4) + shadd8 r7 , r7 , lr + shadd8 r8 , r8 , lr + shadd8 r7 , r7 , lr + shadd8 r8 , r8 , lr + shadd8 r7 , r7 , lr ; Filter1 >>= 3 + shadd8 r8 , r8 , lr ; Filter2 >>= 3 - mov r7, #0 - shadd8 r8 , r8 , r7 ; Filter2 >>= 3 - shadd8 r3 , r3 , r7 ; Filter1 >>= 3 - shadd8 r8 , r8 , r7 - shadd8 r3 , r3 , r7 - shadd8 r8 , r8 , r7 ; r8: Filter2 - shadd8 r3 , r3 , r7 ; r7: filter1 - - ;calculate output - sub src, src, pstep, lsl #1 - - qadd8 r4, r4, r8 ; u = vp8_signed_char_clamp(p0 + Filter2) - qsub8 r5 ,r5, r3 ; u = vp8_signed_char_clamp(q0 - Filter1) - eor r4, r4, r2 ; *op0 = u^0x80 - str r4, [src], pstep ; store op0 result + qsub8 r5 ,r5, r7 ; u = q0 - Filter1 + qadd8 r4, r4, r8 ; u = p0 + Filter2 eor r5, r5, r2 ; *oq0 = u^0x80 - str r5, [src], pstep ; store oq0 result + str r5, [src] ; store oq0 result + eor r4, r4, r2 ; *op0 = u^0x80 + str r4, [src, -pstep] ; store op0 result |simple_hskip_filter| - add src, src, #4 - sub src, src, pstep - sub src, src, pstep, lsl #1 - subs r9, r9, #1 + addne src, src, #4 ; next row - ;pld [src] - ;pld [src, pstep] - ;pld [src, pstep, lsl #1] - - ldrne r3, [src], pstep ; p1 - ldrne r4, [src], pstep ; p0 - ldrne r5, [src], pstep ; q0 - ldrne r6, [src] ; q1 + ldrne r3, [src, -pstep, lsl #1] ; p1 + ldrne r4, [src, -pstep] ; p0 + ldrne r5, [src] ; q0 + ldrne r6, [src, pstep] ; q1 bne simple_hnext8 @@ -173,9 +148,9 @@ pstep RN r1 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- stmdb sp!, {r4 - r11, lr} - ldr r12, [r2], #4 ; r12: flimit + ldr r12, [r2] ; r12: flimit ldr r2, c0x80808080 - ldr r7, [r3], #4 ; limit + ldr r7, [r3] ; limit ; load soure data to r7, r8, r9, r10 ldrh r3, [src, #-2] @@ -212,16 +187,14 @@ pstep RN r1 uqsub8 r10, r5, r4 ; q0 - p0 orr r7, r7, r8 ; abs(p1 - q1) orr r9, r9, r10 ; abs(p0 - q0) - ldr lr, c0x7F7F7F7F ; 0111 1111 mask - uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 - and r7, lr, r7, lsr #1 ; abs(p1 - q1) / 2 mov r8, #0 + uqadd8 r9, r9, r9 ; abs(p0 - q0) * 2 + uhadd8 r7, r7, r8 ; abs(p1 - q1) / 2 uqadd8 r7, r7, r9 ; abs(p0 - q0)*2 + abs(p1 - q1)/2 mvn r10, #0 ; r10 == -1 - uqsub8 r7, r7, r12 ; compare to flimit - usub8 r7, r8, r7 - sel r7, r10, r8 ; filter mask: lr + usub8 r7, r12, r7 ; compare to flimit + sel lr, r10, r8 ; filter mask cmp lr, #0 beq simple_vskip_filter ; skip filtering @@ -232,35 +205,34 @@ pstep RN r1 eor r4, r4, r2 ; p0 offset to convert to a signed value eor r5, r5, r2 ; q0 offset to convert to a signed value - qsub8 r3, r3, r6 ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1) - qsub8 r6, r5, r4 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0)) + qsub8 r3, r3, r6 ; vp8_filter = p1 - q1 + qsub8 r6, r5, r4 ; q0 - p0 - qadd8 r3, r3, r6 - ldr r8, c0x03030303 ; r8 = 3 + qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 + ldr r9, c0x03030303 ; r9 = 3 - qadd8 r3, r3, r6 + qadd8 r3, r3, r6 ; vp8_filter += q0 - p0 ldr r7, c0x04040404 - qadd8 r3, r3, r6 + qadd8 r3, r3, r6 ; vp8_filter = p1-q1 + 3*(q0-p0)) + ;STALL and r3, r3, lr ; vp8_filter &= mask - ;save bottom 3 bits so that we round one side +4 and the other +3 - qadd8 r8 , r3 , r8 ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3) - qadd8 r3 , r3 , r7 ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4) + qadd8 r9 , r3 , r9 ; Filter2 = vp8_filter + 3 + qadd8 r3 , r3 , r7 ; Filter1 = vp8_filter + 4 - mov r7, #0 - shadd8 r8 , r8 , r7 ; Filter2 >>= 3 - shadd8 r3 , r3 , r7 ; Filter1 >>= 3 - shadd8 r8 , r8 , r7 - shadd8 r3 , r3 , r7 - shadd8 r8 , r8 , r7 ; r8: filter2 - shadd8 r3 , r3 , r7 ; r7: filter1 + shadd8 r9 , r9 , r8 + shadd8 r3 , r3 , r8 + shadd8 r9 , r9 , r8 + shadd8 r3 , r3 , r8 + shadd8 r9 , r9 , r8 ; Filter2 >>= 3 + shadd8 r3 , r3 , r8 ; Filter1 >>= 3 ;calculate output sub src, src, pstep, lsl #2 - qadd8 r4, r4, r8 ; u = vp8_signed_char_clamp(p0 + Filter2) - qsub8 r5, r5, r3 ; u = vp8_signed_char_clamp(q0 - Filter1) + qadd8 r4, r4, r9 ; u = p0 + Filter2 + qsub8 r5, r5, r3 ; u = q0 - Filter1 eor r4, r4, r2 ; *op0 = u^0x80 eor r5, r5, r2 ; *oq0 = u^0x80 @@ -285,10 +257,6 @@ pstep RN r1 |simple_vskip_filter| subs r11, r11, #1 - ;pld [src] - ;pld [src, pstep] - ;pld [src, pstep, lsl #1] - ; load soure data to r7, r8, r9, r10 ldrneh r3, [src, #-2] ldrneh r4, [src], pstep @@ -308,14 +276,12 @@ pstep RN r1 bne simple_vnext8 - ldmia sp!, {r4 - r12, pc} + ldmia sp!, {r4 - r11, pc} ENDP ; |vp8_loop_filter_simple_vertical_edge_armv6| ; Constant Pool c0x80808080 DCD 0x80808080 c0x03030303 DCD 0x03030303 c0x04040404 DCD 0x04040404 -c0x01010101 DCD 0x01010101 -c0x7F7F7F7F DCD 0x7F7F7F7F END diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm index 551d863e9..8b9939484 100644 --- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm +++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -24,10 +25,10 @@ ;and the result is stored in transpose. |vp8_sixtap_predict8x4_armv6| PROC stmdb sp!, {r4 - r11, lr} - sub sp, sp, #184 ;reserve space on stack for temporary storage: 20x(8+1) +4 + str r3, [sp, #-184]! ;reserve space on stack for temporary storage, store yoffset cmp r2, #0 ;skip first_pass filter if xoffset=0 - str r3, [sp], #4 ;store yoffset + add lr, sp, #4 ;point to temporary buffer beq skip_firstpass_filter ;first-pass filter @@ -44,7 +45,6 @@ mov r2, #0x90000 ; height=9 is top part of counter sub r1, r1, #8 - mov lr, #20 |first_pass_hloop_v6| ldrb r6, [r0, #-5] ; load source data @@ -82,10 +82,10 @@ tst r2, #0xff ; test loop counter usat r11, #8, r11, asr #7 add r12, r12, #0x40 - strh r11, [sp], lr ; result is transposed and stored, which + strh r11, [lr], #20 ; result is transposed and stored, which usat r12, #8, r12, asr #7 - strh r12, [sp], lr + strh r12, [lr], #20 movne r11, r6 movne r12, r7 @@ -106,8 +106,7 @@ subs r2, r2, #0x10000 - mov r6, #158 - sub sp, sp, r6 + sub lr, lr, #158 add r0, r0, r1 ; move to next input line @@ -115,10 +114,7 @@ ;second pass filter secondpass_filter - mov r1, #18 - sub sp, sp, r1 ; 18+4 - - ldr r3, [sp, #-4] ; load back yoffset + ldr r3, [sp], #4 ; load back yoffset ldr r0, [sp, #216] ; load dst address from stack 180+36 ldr r1, [sp, #220] ; load dst stride from stack 180+40 @@ -191,30 +187,28 @@ skip_firstpass_filter sub r0, r0, r1, lsl #1 sub r1, r1, #8 mov r2, #9 - mov r3, #20 skip_firstpass_hloop ldrb r4, [r0], #1 ; load data subs r2, r2, #1 ldrb r5, [r0], #1 - strh r4, [sp], r3 ; store it to immediate buffer + strh r4, [lr], #20 ; store it to immediate buffer ldrb r6, [r0], #1 ; load data - strh r5, [sp], r3 + strh r5, [lr], #20 ldrb r7, [r0], #1 - strh r6, [sp], r3 + strh r6, [lr], #20 ldrb r8, [r0], #1 - strh r7, [sp], r3 + strh r7, [lr], #20 ldrb r9, [r0], #1 - strh r8, [sp], r3 + strh r8, [lr], #20 ldrb r10, [r0], #1 - strh r9, [sp], r3 + strh r9, [lr], #20 ldrb r11, [r0], #1 - strh r10, [sp], r3 + strh r10, [lr], #20 add r0, r0, r1 ; move to next input line - strh r11, [sp], r3 + strh r11, [lr], #20 - mov r4, #158 - sub sp, sp, r4 ; move over to next column + sub lr, lr, #158 ; move over to next column bne skip_firstpass_hloop b secondpass_filter diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c index bf972a3bc..65afb41a1 100644 --- a/vp8/common/arm/bilinearfilter_arm.c +++ b/vp8/common/arm/bilinearfilter_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -48,7 +49,7 @@ extern void vp8_filter_block2d_bil_second_pass_armv6 const short *vp8_filter ); -/* +#if 0 void vp8_filter_block2d_bil_first_pass_6 ( unsigned char *src_ptr, @@ -65,14 +66,14 @@ void vp8_filter_block2d_bil_first_pass_6 { for ( j=0; j> VP8_FILTER_SHIFT; src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -95,7 +96,7 @@ void vp8_filter_block2d_bil_second_pass_6 { for ( j=0; jmbflim, lfi->lim, lfi->mbthr, 2); } -// Vertical MB Filtering +/* Vertical MB Filtering */ void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -93,7 +84,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); } -// Horizontal B Filtering +/* Horizontal B Filtering */ void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -121,7 +112,7 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); } -// Vertical B Filtering +/* Vertical B Filtering */ void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -151,8 +142,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig #endif #if HAVE_ARMV7 -// NEON loopfilter functions -// Horizontal MB filtering +/* NEON loopfilter functions */ +/* Horizontal MB filtering */ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -173,7 +164,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); } -// Vertical MB Filtering +/* Vertical MB Filtering */ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -194,7 +185,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); } -// Horizontal B Filtering +/* Horizontal B Filtering */ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -219,7 +210,7 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); } -// Vertical B Filtering +/* Vertical B Filtering */ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h index 4bb49456d..cd62207d7 100644 --- a/vp8/common/arm/loopfilter_arm.h +++ b/vp8/common/arm/loopfilter_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6); extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6); extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_lf_normal_mb_v #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6 @@ -45,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6); #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon); @@ -56,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon); extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon); extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_lf_normal_mb_v #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon @@ -80,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon); #undef vp8_lf_simple_b_h #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon #endif +#endif #endif diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm index a2fea2bd6..bb72bad1f 100644 --- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm index 74d2db5dc..6d4820b7e 100644 --- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm index 46ebb0e0b..b9f3ce034 100644 --- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm index 80728d4f8..f7a7d1496 100644 --- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm +++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm index f42ac63c9..e3ea91fe6 100644 --- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm +++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm index 89d5e1018..bda4b9654 100644 --- a/vp8/common/arm/neon/copymem16x16_neon.asm +++ b/vp8/common/arm/neon/copymem16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm index 302f734ff..35c0f6708 100644 --- a/vp8/common/arm/neon/copymem8x4_neon.asm +++ b/vp8/common/arm/neon/copymem8x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm index 50d39ef66..1f5b9411b 100644 --- a/vp8/common/arm/neon/copymem8x8_neon.asm +++ b/vp8/common/arm/neon/copymem8x8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm new file mode 100644 index 000000000..49ba05fb0 --- /dev/null +++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm @@ -0,0 +1,49 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |vp8_dc_only_idct_add_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr, +; unsigned char *dst_ptr, int pitch, int stride) +; r0 input_dc +; r1 pred_ptr +; r2 dst_ptr +; r3 pitch +; sp stride +|vp8_dc_only_idct_add_neon| PROC + add r0, r0, #4 + asr r0, r0, #3 + ldr r12, [sp] + vdup.16 q0, r0 + + vld1.32 {d2[0]}, [r1], r3 + vld1.32 {d2[1]}, [r1], r3 + vld1.32 {d4[0]}, [r1], r3 + vld1.32 {d4[1]}, [r1] + + vaddw.u8 q1, q0, d2 + vaddw.u8 q2, q0, d4 + + vqmovun.s16 d2, q1 + vqmovun.s16 d4, q2 + + vst1.32 {d2[0]}, [r2], r12 + vst1.32 {d2[1]}, [r2], r12 + vst1.32 {d4[0]}, [r2], r12 + vst1.32 {d4[1]}, [r2] + + bx lr + + ENDP + END diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm index 4fc744c96..663bf390e 100644 --- a/vp8/common/arm/neon/iwalsh_neon.asm +++ b/vp8/common/arm/neon/iwalsh_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vp8_short_inv_walsh4x4_neon| EXPORT |vp8_short_inv_walsh4x4_1_neon| diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm new file mode 100644 index 000000000..bf0c35721 --- /dev/null +++ b/vp8/common/arm/neon/loopfilter_neon.asm @@ -0,0 +1,409 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_loop_filter_horizontal_edge_y_neon| + EXPORT |vp8_loop_filter_horizontal_edge_uv_neon| + EXPORT |vp8_loop_filter_vertical_edge_y_neon| + EXPORT |vp8_loop_filter_vertical_edge_uv_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; flimit, limit, and thresh should be positive numbers. +; All 16 elements in these variables are equal. + +; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; int count) +; r0 unsigned char *src +; r1 int pitch +; r2 const signed char *flimit +; r3 const signed char *limit +; sp const signed char *thresh, +; sp+4 int count (unused) +|vp8_loop_filter_horizontal_edge_y_neon| PROC + stmdb sp!, {lr} + vld1.s8 {d0[], d1[]}, [r2] ; flimit + vld1.s8 {d2[], d3[]}, [r3] ; limit + sub r2, r0, r1, lsl #2 ; move src pointer down by 4 lines + ldr r12, [sp, #4] ; load thresh pointer + + vld1.u8 {q3}, [r2], r1 ; p3 + vld1.u8 {q4}, [r2], r1 ; p2 + vld1.u8 {q5}, [r2], r1 ; p1 + vld1.u8 {q6}, [r2], r1 ; p0 + vld1.u8 {q7}, [r2], r1 ; q0 + vld1.u8 {q8}, [r2], r1 ; q1 + vld1.u8 {q9}, [r2], r1 ; q2 + vld1.u8 {q10}, [r2] ; q3 + vld1.s8 {d4[], d5[]}, [r12] ; thresh + sub r0, r0, r1, lsl #1 + + bl vp8_loop_filter_neon + + vst1.u8 {q5}, [r0], r1 ; store op1 + vst1.u8 {q6}, [r0], r1 ; store op0 + vst1.u8 {q7}, [r0], r1 ; store oq0 + vst1.u8 {q8}, [r0], r1 ; store oq1 + + ldmia sp!, {pc} + ENDP ; |vp8_loop_filter_horizontal_edge_y_neon| + +; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; unsigned char *v) +; r0 unsigned char *u, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 unsigned char *v +|vp8_loop_filter_horizontal_edge_uv_neon| PROC + stmdb sp!, {lr} + vld1.s8 {d0[], d1[]}, [r2] ; flimit + vld1.s8 {d2[], d3[]}, [r3] ; limit + ldr r2, [sp, #8] ; load v ptr + + sub r3, r0, r1, lsl #2 ; move u pointer down by 4 lines + vld1.u8 {d6}, [r3], r1 ; p3 + vld1.u8 {d8}, [r3], r1 ; p2 + vld1.u8 {d10}, [r3], r1 ; p1 + vld1.u8 {d12}, [r3], r1 ; p0 + vld1.u8 {d14}, [r3], r1 ; q0 + vld1.u8 {d16}, [r3], r1 ; q1 + vld1.u8 {d18}, [r3], r1 ; q2 + vld1.u8 {d20}, [r3] ; q3 + + ldr r3, [sp, #4] ; load thresh pointer + + sub r12, r2, r1, lsl #2 ; move v pointer down by 4 lines + vld1.u8 {d7}, [r12], r1 ; p3 + vld1.u8 {d9}, [r12], r1 ; p2 + vld1.u8 {d11}, [r12], r1 ; p1 + vld1.u8 {d13}, [r12], r1 ; p0 + vld1.u8 {d15}, [r12], r1 ; q0 + vld1.u8 {d17}, [r12], r1 ; q1 + vld1.u8 {d19}, [r12], r1 ; q2 + vld1.u8 {d21}, [r12] ; q3 + + vld1.s8 {d4[], d5[]}, [r3] ; thresh + + bl vp8_loop_filter_neon + + sub r0, r0, r1, lsl #1 + sub r2, r2, r1, lsl #1 + + vst1.u8 {d10}, [r0], r1 ; store u op1 + vst1.u8 {d11}, [r2], r1 ; store v op1 + vst1.u8 {d12}, [r0], r1 ; store u op0 + vst1.u8 {d13}, [r2], r1 ; store v op0 + vst1.u8 {d14}, [r0], r1 ; store u oq0 + vst1.u8 {d15}, [r2], r1 ; store v oq0 + vst1.u8 {d16}, [r0] ; store u oq1 + vst1.u8 {d17}, [r2] ; store v oq1 + + ldmia sp!, {pc} + ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon| + +; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; int count) +; r0 unsigned char *src, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 int count (unused) +|vp8_loop_filter_vertical_edge_y_neon| PROC + stmdb sp!, {lr} + vld1.s8 {d0[], d1[]}, [r2] ; flimit + vld1.s8 {d2[], d3[]}, [r3] ; limit + sub r2, r0, #4 ; src ptr down by 4 columns + sub r0, r0, #2 ; dst ptr + ldr r12, [sp, #4] ; load thresh pointer + + vld1.u8 {d6}, [r2], r1 ; load first 8-line src data + vld1.u8 {d8}, [r2], r1 + vld1.u8 {d10}, [r2], r1 + vld1.u8 {d12}, [r2], r1 + vld1.u8 {d14}, [r2], r1 + vld1.u8 {d16}, [r2], r1 + vld1.u8 {d18}, [r2], r1 + vld1.u8 {d20}, [r2], r1 + + vld1.s8 {d4[], d5[]}, [r12] ; thresh + + vld1.u8 {d7}, [r2], r1 ; load second 8-line src data + vld1.u8 {d9}, [r2], r1 + vld1.u8 {d11}, [r2], r1 + vld1.u8 {d13}, [r2], r1 + vld1.u8 {d15}, [r2], r1 + vld1.u8 {d17}, [r2], r1 + vld1.u8 {d19}, [r2], r1 + vld1.u8 {d21}, [r2] + + ;transpose to 8x16 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + bl vp8_loop_filter_neon + + vswp d12, d11 + vswp d16, d13 + vswp d14, d12 + vswp d16, d15 + + ;store op1, op0, oq0, oq1 + vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 + vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 + vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 + vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 + vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 + vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 + vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 + vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1 + vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r0], r1 + vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r0], r1 + vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r0], r1 + vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r0], r1 + vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r0], r1 + vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1 + vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0], r1 + vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r0] + + ldmia sp!, {pc} + ENDP ; |vp8_loop_filter_vertical_edge_y_neon| + +; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; unsigned char *v) +; r0 unsigned char *u, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 unsigned char *v +|vp8_loop_filter_vertical_edge_uv_neon| PROC + stmdb sp!, {lr} + sub r12, r0, #4 ; move u pointer down by 4 columns + vld1.s8 {d0[], d1[]}, [r2] ; flimit + vld1.s8 {d2[], d3[]}, [r3] ; limit + + ldr r2, [sp, #8] ; load v ptr + + vld1.u8 {d6}, [r12], r1 ;load u data + vld1.u8 {d8}, [r12], r1 + vld1.u8 {d10}, [r12], r1 + vld1.u8 {d12}, [r12], r1 + vld1.u8 {d14}, [r12], r1 + vld1.u8 {d16}, [r12], r1 + vld1.u8 {d18}, [r12], r1 + vld1.u8 {d20}, [r12] + + sub r3, r2, #4 ; move v pointer down by 4 columns + vld1.u8 {d7}, [r3], r1 ;load v data + vld1.u8 {d9}, [r3], r1 + vld1.u8 {d11}, [r3], r1 + vld1.u8 {d13}, [r3], r1 + vld1.u8 {d15}, [r3], r1 + vld1.u8 {d17}, [r3], r1 + vld1.u8 {d19}, [r3], r1 + vld1.u8 {d21}, [r3] + + ldr r12, [sp, #4] ; load thresh pointer + + ;transpose to 8x16 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + vld1.s8 {d4[], d5[]}, [r12] ; thresh + + bl vp8_loop_filter_neon + + sub r0, r0, #2 + sub r2, r2, #2 + + vswp d12, d11 + vswp d16, d13 + vswp d14, d12 + vswp d16, d15 + + ;store op1, op0, oq0, oq1 + vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 + vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1 + vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 + vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1 + vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 + vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1 + vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 + vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1 + vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 + vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1 + vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 + vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1 + vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 + vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1 + vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0] + vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2] + + ldmia sp!, {pc} + ENDP ; |vp8_loop_filter_vertical_edge_uv_neon| + +; void vp8_loop_filter_neon(); +; This is a helper function for the loopfilters. The invidual functions do the +; necessary load, transpose (if necessary) and store. + +; r0-r3 PRESERVE +; q0 flimit +; q1 limit +; q2 thresh +; q3 p3 +; q4 p2 +; q5 p1 +; q6 p0 +; q7 q0 +; q8 q1 +; q9 q2 +; q10 q3 +|vp8_loop_filter_neon| PROC + ldr r12, _lf_coeff_ + + ; vp8_filter_mask + vabd.u8 q11, q3, q4 ; abs(p3 - p2) + vabd.u8 q12, q4, q5 ; abs(p2 - p1) + vabd.u8 q13, q5, q6 ; abs(p1 - p0) + vabd.u8 q14, q8, q7 ; abs(q1 - q0) + vabd.u8 q3, q9, q8 ; abs(q2 - q1) + vabd.u8 q4, q10, q9 ; abs(q3 - q2) + vabd.u8 q9, q6, q7 ; abs(p0 - q0) + + vmax.u8 q11, q11, q12 + vmax.u8 q12, q13, q14 + vmax.u8 q3, q3, q4 + vmax.u8 q15, q11, q12 + + ; vp8_hevmask + vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 + vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 + vmax.u8 q15, q15, q3 + + vadd.u8 q0, q0, q0 ; flimit * 2 + vadd.u8 q0, q0, q1 ; flimit * 2 + limit + vcge.u8 q15, q1, q15 + + vabd.u8 q2, q5, q8 ; a = abs(p1 - q1) + vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2 + vshr.u8 q2, q2, #1 ; a = a / 2 + vqadd.u8 q9, q9, q2 ; a = b + a + vcge.u8 q9, q0, q9 ; (a > flimit * 2 + limit) * -1 + + vld1.u8 {q0}, [r12]! + + ; vp8_filter() function + ; convert to signed + veor q7, q7, q0 ; qs0 + veor q6, q6, q0 ; ps0 + veor q5, q5, q0 ; ps1 + veor q8, q8, q0 ; qs1 + + vld1.u8 {q10}, [r12]! + + vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) + vsubl.s8 q11, d15, d13 + + vmovl.u8 q4, d20 + + vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1) + vorr q14, q13, q14 ; vp8_hevmask + + vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) + vmul.i16 q11, q11, q4 + + vand q1, q1, q14 ; vp8_filter &= hev + vand q15, q15, q9 ; vp8_filter_mask + + vaddw.s8 q2, q2, d2 + vaddw.s8 q11, q11, d3 + + vld1.u8 {q9}, [r12]! + + ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d2, q2 + vqmovn.s16 d3, q11 + vand q1, q1, q15 ; vp8_filter &= mask + + vqadd.s8 q2, q1, q10 ; Filter2 = clamp(vp8_filter+3) + vqadd.s8 q1, q1, q9 ; Filter1 = clamp(vp8_filter+4) + vshr.s8 q2, q2, #3 ; Filter2 >>= 3 + vshr.s8 q1, q1, #3 ; Filter1 >>= 3 + + vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + Filter2) + vqsub.s8 q10, q7, q1 ; u = clamp(qs0 - Filter1) + + ; outer tap adjustments: ++vp8_filter >> 1 + vrshr.s8 q1, q1, #1 + vbic q1, q1, q14 ; vp8_filter &= ~hev + + vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + vp8_filter) + vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - vp8_filter) + + veor q5, q13, q0 ; *op1 = u^0x80 + veor q6, q11, q0 ; *op0 = u^0x80 + veor q7, q10, q0 ; *oq0 = u^0x80 + veor q8, q12, q0 ; *oq1 = u^0x80 + + bx lr + ENDP ; |vp8_loop_filter_horizontal_edge_y_neon| + + AREA loopfilter_dat, DATA, READONLY +_lf_coeff_ + DCD lf_coeff +lf_coeff + DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 + DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 + DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 + DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101 + + END diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm deleted file mode 100644 index e3e8e8a72..000000000 --- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm +++ /dev/null @@ -1,205 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_loop_filter_horizontal_edge_uv_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *u, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; stack(r5) unsigned char *v - -|vp8_loop_filter_horizontal_edge_uv_neon| PROC - sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines - vld1.s8 {d0[], d1[]}, [r2] ; flimit - - ldr r2, [sp, #4] ; load v ptr - ldr r12, [sp, #0] ; load thresh pointer - - sub r2, r2, r1, lsl #2 ; move v pointer down by 4 lines - - vld1.u8 {d6}, [r0], r1 ; p3 - vld1.u8 {d7}, [r2], r1 ; p3 - vld1.u8 {d8}, [r0], r1 ; p2 - vld1.u8 {d9}, [r2], r1 ; p2 - vld1.u8 {d10}, [r0], r1 ; p1 - vld1.u8 {d11}, [r2], r1 ; p1 - vld1.u8 {d12}, [r0], r1 ; p0 - vld1.u8 {d13}, [r2], r1 ; p0 - vld1.u8 {d14}, [r0], r1 ; q0 - vld1.u8 {d15}, [r2], r1 ; q0 - vld1.u8 {d16}, [r0], r1 ; q1 - vld1.u8 {d17}, [r2], r1 ; q1 - vld1.u8 {d18}, [r0], r1 ; q2 - vld1.u8 {d19}, [r2], r1 ; q2 - vld1.u8 {d20}, [r0], r1 ; q3 - vld1.u8 {d21}, [r2], r1 ; q3 - - vld1.s8 {d2[], d3[]}, [r3] ; limit - vld1.s8 {d4[], d5[]}, [r12] ; thresh - - ldr r12, _lfhuv_coeff_ - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q4, q10, q9 ; abs(q3 - q2) - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1 - vadd.u8 q0, q0, q0 ; flimit * 2 - vadd.u8 q0, q0, q1 ; flimit * 2 + limit - - vand q15, q15, q12 - vand q10, q10, q11 - vand q3, q3, q4 - - vabd.u8 q2, q5, q8 ; abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2 - vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 - - vld1.u8 {q0}, [r12]! - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value -;;;;;;;;;;;;;; - vld1.u8 {q10}, [r12]! - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vand q3, q3, q9 - vmovl.u8 q4, d20 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0) - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; vp8_filter &= hev - vand q15, q15, q3 ; q15: vp8_filter_mask - ;; - ;vld1.u8 {q4}, [r12]! ;no need 7 any more - - ;vqadd.s8 q1, q1, q2 - vaddw.s8 q2, q2, d2 - vaddw.s8 q11, q11, d3 - - vld1.u8 {q9}, [r12]! - ; - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q11 - ;; - - vand q1, q1, q15 ; vp8_filter &= mask - ;; -;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q1, q4 ; s = vp8_filter & 7 -; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) - ;;;; -; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3 -; vceq.i8 q2, q2, q9 ; s = (s==4)*-1 - ;; -; ;calculate output -; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) -; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; q10=3 - vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) - vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q1, q1, #3 ; Filter1 >>= 3 - - ;calculate output - vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1 - - sub r0, r0, r1, lsl #2 - sub r0, r0, r1, lsl #1 - ; - - vbic q1, q1, q14 ; vp8_filter &= ~hev - - sub r2, r2, r1, lsl #2 - sub r2, r2, r1, lsl #1 - ;; - - vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) - ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u) - vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) - ; - - veor q5, q13, q0 ; *op1 = u^0x80 - veor q6, q11, q0 ; *op0 = u^0x80 - veor q7, q10, q0 ; *oq0 = u^0x80 - veor q8, q12, q0 ; *oq1 = u^0x80 - ; - - vst1.u8 {d10}, [r0], r1 ; store u op1 - vst1.u8 {d11}, [r2], r1 ; store v op1 - vst1.u8 {d12}, [r0], r1 ; store u op0 - vst1.u8 {d13}, [r2], r1 ; store v op0 - vst1.u8 {d14}, [r0], r1 ; store u oq0 - vst1.u8 {d15}, [r2], r1 ; store v oq0 - vst1.u8 {d16}, [r0], r1 ; store u oq1 - vst1.u8 {d17}, [r2], r1 ; store v oq1 - - bx lr - ENDP ; |vp8_loop_filter_horizontal_edge_uv_neon| - -;----------------- - AREA hloopfilteruv_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_lfhuv_coeff_ - DCD lfhuv_coeff -lfhuv_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101 - - END diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm deleted file mode 100644 index f11055d42..000000000 --- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm +++ /dev/null @@ -1,188 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_loop_filter_horizontal_edge_y_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *s, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; //stack(r5) int count --unused - -|vp8_loop_filter_horizontal_edge_y_neon| PROC - sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines - ldr r12, [sp, #0] ; load thresh pointer - - vld1.u8 {q3}, [r0], r1 ; p3 - vld1.s8 {d0[], d1[]}, [r2] ; flimit - vld1.u8 {q4}, [r0], r1 ; p2 - vld1.s8 {d2[], d3[]}, [r3] ; limit - vld1.u8 {q5}, [r0], r1 ; p1 - vld1.s8 {d4[], d5[]}, [r12] ; thresh - vld1.u8 {q6}, [r0], r1 ; p0 - ldr r12, _lfhy_coeff_ - vld1.u8 {q7}, [r0], r1 ; q0 - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vld1.u8 {q8}, [r0], r1 ; q1 - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vld1.u8 {q9}, [r0], r1 ; q2 - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vld1.u8 {q10}, [r0], r1 ; q3 - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q4, q10, q9 ; abs(q3 - q2) - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1 - vadd.u8 q0, q0, q0 ; flimit * 2 - vadd.u8 q0, q0, q1 ; flimit * 2 + limit - - vand q15, q15, q12 - vand q10, q10, q11 - vand q3, q3, q4 - - vabd.u8 q2, q5, q8 ; abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2 - vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 - - vld1.u8 {q0}, [r12]! - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value -;;;;;;;;;;;;;; - vld1.u8 {q10}, [r12]! - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vand q3, q3, q9 - vmovl.u8 q4, d20 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0) - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; vp8_filter &= hev - vand q15, q15, q3 ; q15: vp8_filter_mask - ;; - ;vld1.u8 {q4}, [r12]! ;no need 7 any more - - ;vqadd.s8 q1, q1, q2 - vaddw.s8 q2, q2, d2 - vaddw.s8 q11, q11, d3 - - vld1.u8 {q9}, [r12]! - ; - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q11 - ;; - - vand q1, q1, q15 ; vp8_filter &= mask - ;; -;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q1, q4 ; s = vp8_filter & 7 -; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) - ;;;; -; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3 -; vceq.i8 q2, q2, q9 ; s = (s==4)*-1 - ;; -; ;calculate output -; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) -; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; q10=3 - vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) - vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q1, q1, #3 ; Filter1 >>= 3 - - ;calculate output - vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1 - - sub r0, r0, r1, lsl #2 - sub r0, r0, r1, lsl #1 - ; - - vbic q1, q1, q14 ; vp8_filter &= ~hev - ; - add r2, r1, r0 - - vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) - ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u) - vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) - - add r3, r2, r1 - - veor q5, q13, q0 ; *op1 = u^0x80 - veor q6, q11, q0 ; *op0 = u^0x80 - veor q7, q10, q0 ; *oq0 = u^0x80 - veor q8, q12, q0 ; *oq1 = u^0x80 - - add r12, r3, r1 - - vst1.u8 {q5}, [r0] ; store op1 - vst1.u8 {q6}, [r2] ; store op0 - vst1.u8 {q7}, [r3] ; store oq0 - vst1.u8 {q8}, [r12] ; store oq1 - - bx lr - ENDP ; |vp8_loop_filter_horizontal_edge_y_neon| - -;----------------- - AREA hloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_lfhy_coeff_ - DCD lfhy_coeff -lfhy_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101 - - END diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm index 6d74fab52..0b84dc750 100644 --- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm +++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm index 2bb6222b9..a793d095a 100644 --- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm +++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm deleted file mode 100644 index d79cc68a3..000000000 --- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm +++ /dev/null @@ -1,231 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_loop_filter_vertical_edge_uv_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *u, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; stack(r5) unsigned char *v - -|vp8_loop_filter_vertical_edge_uv_neon| PROC - sub r0, r0, #4 ; move u pointer down by 4 columns - vld1.s8 {d0[], d1[]}, [r2] ; flimit - - ldr r2, [sp, #4] ; load v ptr - ldr r12, [sp, #0] ; load thresh pointer - - sub r2, r2, #4 ; move v pointer down by 4 columns - - vld1.u8 {d6}, [r0], r1 ;load u data - vld1.u8 {d7}, [r2], r1 ;load v data - vld1.u8 {d8}, [r0], r1 - vld1.u8 {d9}, [r2], r1 - vld1.u8 {d10}, [r0], r1 - vld1.u8 {d11}, [r2], r1 - vld1.u8 {d12}, [r0], r1 - vld1.u8 {d13}, [r2], r1 - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d15}, [r2], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d17}, [r2], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d19}, [r2], r1 - vld1.u8 {d20}, [r0], r1 - vld1.u8 {d21}, [r2], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - vld1.s8 {d2[], d3[]}, [r3] ; limit - vld1.s8 {d4[], d5[]}, [r12] ; thresh - - ldr r12, _vlfuv_coeff_ - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q4, q10, q9 ; abs(q3 - q2) - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1 - vadd.u8 q0, q0, q0 ; flimit * 2 - vadd.u8 q0, q0, q1 ; flimit * 2 + limit - - vand q15, q15, q12 - vand q10, q10, q11 - vand q3, q3, q4 - - vabd.u8 q2, q5, q8 ; abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2 - vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 - - vld1.u8 {q0}, [r12]! - - vand q15, q15, q10 - - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value -;;;;;;;;;;;;;; - vld1.u8 {q10}, [r12]! - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vand q3, q3, q9 - vmovl.u8 q4, d20 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0) - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; vp8_filter &= hev - vand q15, q15, q3 ; q15: vp8_filter_mask - ;; - ;vld1.u8 {q4}, [r12]! ;no need 7 any more - - ;vqadd.s8 q1, q1, q2 - vaddw.s8 q2, q2, d2 - vaddw.s8 q11, q11, d3 - - vld1.u8 {q9}, [r12]! - ; - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q11 - ;; - - vand q1, q1, q15 ; vp8_filter &= mask - ;; -;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q1, q4 ; s = vp8_filter & 7 -; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) - ;;;; -; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3 -; vceq.i8 q2, q2, q9 ; s = (s==4)*-1 - ;; -; ;calculate output -; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) -; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; q10=3 - vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) - vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q1, q1, #3 ; Filter1 >>= 3 - ;calculate output - vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1 - - sub r0, r0, r1, lsl #3 - add r0, r0, #2 - - vbic q1, q1, q14 ; vp8_filter &= ~hev - - sub r2, r2, r1, lsl #3 - add r2, r2, #2 - - vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) - ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u) - vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) - - veor q7, q10, q0 ; *oq0 = u^0x80 - veor q5, q13, q0 ; *op1 = u^0x80 - veor q6, q11, q0 ; *op0 = u^0x80 - veor q8, q12, q0 ; *oq1 = u^0x80 - - vswp d12, d11 - vswp d16, d13 - vswp d14, d12 - vswp d16, d15 - - ;store op1, op0, oq0, oq1 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 - vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2], r1 - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 - vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r2], r1 - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 - vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r2], r1 - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 - vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r2], r1 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 - vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r2], r1 - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 - vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r2], r1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 - vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r2], r1 - vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1 - vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2], r1 - - bx lr - ENDP ; |vp8_loop_filter_vertical_edge_uv_neon| - -;----------------- - AREA vloopfilteruv_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_vlfuv_coeff_ - DCD vlfuv_coeff -vlfuv_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101 - - END diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm deleted file mode 100644 index 3a230a953..000000000 --- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm +++ /dev/null @@ -1,235 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_loop_filter_vertical_edge_y_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *s, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; //stack(r5) int count --unused - -|vp8_loop_filter_vertical_edge_y_neon| PROC - sub r0, r0, #4 ; move src pointer down by 4 columns - ldr r12, [sp, #0] ; load thresh pointer - - vld1.u8 {d6}, [r0], r1 ; load first 8-line src data - vld1.s8 {d0[], d1[]}, [r2] ; flimit - vld1.u8 {d8}, [r0], r1 - vld1.s8 {d2[], d3[]}, [r3] ; limit - vld1.u8 {d10}, [r0], r1 - vld1.s8 {d4[], d5[]}, [r12] ; thresh - vld1.u8 {d12}, [r0], r1 - ldr r12, _vlfy_coeff_ - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d20}, [r0], r1 - - vld1.u8 {d7}, [r0], r1 ; load second 8-line src data - vld1.u8 {d9}, [r0], r1 - vld1.u8 {d11}, [r0], r1 - vld1.u8 {d13}, [r0], r1 - vld1.u8 {d15}, [r0], r1 - vld1.u8 {d17}, [r0], r1 - vld1.u8 {d19}, [r0], r1 - vld1.u8 {d21}, [r0], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q4, q10, q9 ; abs(q3 - q2) - vabd.u8 q9, q6, q7 ; abs(p0 - q0) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q4, q1, q4 ; (abs(q3 - q2) > limit)*-1 - vadd.u8 q0, q0, q0 ; flimit * 2 - vadd.u8 q0, q0, q1 ; flimit * 2 + limit - - vand q15, q15, q12 - vand q10, q10, q11 - vand q3, q3, q4 - - vabd.u8 q2, q5, q8 ; abs(p1 - q1) - vqadd.u8 q9, q9, q9 ; abs(p0 - q0) * 2 - vshr.u8 q2, q2, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q9, q9, q2 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q9, q0, q9 ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1 - - vld1.u8 {q0}, [r12]! - - vand q15, q15, q10 - - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value -;;;;;;;;;;;;;; - vld1.u8 {q10}, [r12]! - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q11, d15, d13 - - vand q3, q3, q9 - vmovl.u8 q4, d20 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vmul.i8 q2, q2, q10 ; 3 * ( qs0 - ps0) - vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) - vmul.i16 q11, q11, q4 - - vand q1, q1, q14 ; vp8_filter &= hev - vand q15, q15, q3 ; q15: vp8_filter_mask - ;; - ;vld1.u8 {q4}, [r12]! ;no need 7 any more - - ;vqadd.s8 q1, q1, q2 - vaddw.s8 q2, q2, d2 - vaddw.s8 q11, q11, d3 - - vld1.u8 {q9}, [r12]! - ; - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q11 - ;; - - vand q1, q1, q15 ; vp8_filter &= mask - ;; -;;;;;;;;;;;; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q1, q4 ; s = vp8_filter & 7 -; vqadd.s8 q1, q1, q9 ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4) - ;;;; -; vshr.s8 q1, q1, #3 ; vp8_filter >>= 3 -; vceq.i8 q2, q2, q9 ; s = (s==4)*-1 - ;; -; ;calculate output -; vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - vp8_filter) -; vqadd.s8 q11, q2, q1 ; u = vp8_signed_char_clamp(s + vp8_filter) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; q10=3 - vqadd.s8 q2, q1, q10 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3) - vqadd.s8 q1, q1, q9 ; Filter1 = vp8_signed_char_clamp(vp8_filter+4) - vshr.s8 q2, q2, #3 ; Filter2 >>= 3 - vshr.s8 q1, q1, #3 ; Filter1 >>= 3 - ;calculate output - vqadd.s8 q11, q6, q2 ; u = vp8_signed_char_clamp(ps0 + Filter2) - vqsub.s8 q10, q7, q1 ; u = vp8_signed_char_clamp(qs0 - Filter1) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vrshr.s8 q1, q1, #1 ;round/shift: vp8_filter += 1; vp8_filter >>= 1 - - sub r0, r0, r1, lsl #4 - add r0, r0, #2 - ; - - vbic q1, q1, q14 ; vp8_filter &= ~hev - add r2, r0, r1 - ; - - vqadd.s8 q13, q5, q1 ; u = vp8_signed_char_clamp(ps1 + vp8_filter) - ;vqadd.s8 q11, q6, q11 ; u = vp8_signed_char_clamp(ps0 + u) - vqsub.s8 q12, q8, q1 ; u = vp8_signed_char_clamp(qs1 - vp8_filter) - - veor q7, q10, q0 ; *oq0 = u^0x80 - veor q5, q13, q0 ; *op1 = u^0x80 - veor q6, q11, q0 ; *op0 = u^0x80 - veor q8, q12, q0 ; *oq1 = u^0x80 - add r3, r2, r1 - ; - vswp d12, d11 - vswp d16, d13 - add r12, r3, r1 - vswp d14, d12 - vswp d16, d15 - - ;store op1, op0, oq0, oq1 - vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0] - vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r2] - vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r3] - vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r12], r1 - add r0, r12, r1 - vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r12] - vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 - add r2, r0, r1 - vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0] - vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r2], r1 - add r3, r2, r1 - vst4.8 {d14[0], d15[0], d16[0], d17[0]}, [r2] - vst4.8 {d14[1], d15[1], d16[1], d17[1]}, [r3], r1 - add r12, r3, r1 - vst4.8 {d14[2], d15[2], d16[2], d17[2]}, [r3] - vst4.8 {d14[3], d15[3], d16[3], d17[3]}, [r12], r1 - add r0, r12, r1 - vst4.8 {d14[4], d15[4], d16[4], d17[4]}, [r12] - vst4.8 {d14[5], d15[5], d16[5], d17[5]}, [r0], r1 - add r2, r0, r1 - vst4.8 {d14[6], d15[6], d16[6], d17[6]}, [r0] - vst4.8 {d14[7], d15[7], d16[7], d17[7]}, [r2] - - bx lr - ENDP ; |vp8_loop_filter_vertical_edge_y_neon| - -;----------------- - AREA vloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_vlfy_coeff_ - DCD vlfy_coeff -vlfy_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x01010101, 0x01010101, 0x01010101, 0x01010101 - - END diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm new file mode 100644 index 000000000..255dd5619 --- /dev/null +++ b/vp8/common/arm/neon/mbloopfilter_neon.asm @@ -0,0 +1,519 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon| + EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon| + EXPORT |vp8_mbloop_filter_vertical_edge_y_neon| + EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +; flimit, limit, and thresh should be positive numbers. +; All 16 elements in these variables are equal. + +; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; int count) +; r0 unsigned char *src, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 int count (unused) +|vp8_mbloop_filter_horizontal_edge_y_neon| PROC + stmdb sp!, {lr} + sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines + ldr r12, [sp, #4] ; load thresh pointer + + vld1.u8 {q3}, [r0], r1 ; p3 + vld1.s8 {d2[], d3[]}, [r3] ; limit + vld1.u8 {q4}, [r0], r1 ; p2 + vld1.s8 {d4[], d5[]}, [r12] ; thresh + vld1.u8 {q5}, [r0], r1 ; p1 + vld1.u8 {q6}, [r0], r1 ; p0 + vld1.u8 {q7}, [r0], r1 ; q0 + vld1.u8 {q8}, [r0], r1 ; q1 + vld1.u8 {q9}, [r0], r1 ; q2 + vld1.u8 {q10}, [r0], r1 ; q3 + + bl vp8_mbloop_filter_neon + + sub r0, r0, r1, lsl #3 + add r0, r0, r1 + add r2, r0, r1 + add r3, r2, r1 + + vst1.u8 {q4}, [r0] ; store op2 + vst1.u8 {q5}, [r2] ; store op1 + vst1.u8 {q6}, [r3], r1 ; store op0 + add r12, r3, r1 + vst1.u8 {q7}, [r3] ; store oq0 + vst1.u8 {q8}, [r12], r1 ; store oq1 + vst1.u8 {q9}, [r12] ; store oq2 + + ldmia sp!, {pc} + ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon| + +; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; unsigned char *v) +; r0 unsigned char *u, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 unsigned char *v +|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC + stmdb sp!, {lr} + sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines + vld1.s8 {d2[], d3[]}, [r3] ; limit + ldr r3, [sp, #8] ; load v ptr + ldr r12, [sp, #4] ; load thresh pointer + sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines + + vld1.u8 {d6}, [r0], r1 ; p3 + vld1.u8 {d7}, [r3], r1 ; p3 + vld1.u8 {d8}, [r0], r1 ; p2 + vld1.u8 {d9}, [r3], r1 ; p2 + vld1.u8 {d10}, [r0], r1 ; p1 + vld1.u8 {d11}, [r3], r1 ; p1 + vld1.u8 {d12}, [r0], r1 ; p0 + vld1.u8 {d13}, [r3], r1 ; p0 + vld1.u8 {d14}, [r0], r1 ; q0 + vld1.u8 {d15}, [r3], r1 ; q0 + vld1.u8 {d16}, [r0], r1 ; q1 + vld1.u8 {d17}, [r3], r1 ; q1 + vld1.u8 {d18}, [r0], r1 ; q2 + vld1.u8 {d19}, [r3], r1 ; q2 + vld1.u8 {d20}, [r0], r1 ; q3 + vld1.u8 {d21}, [r3], r1 ; q3 + + vld1.s8 {d4[], d5[]}, [r12] ; thresh + + bl vp8_mbloop_filter_neon + + sub r0, r0, r1, lsl #3 + sub r3, r3, r1, lsl #3 + + add r0, r0, r1 + add r3, r3, r1 + + vst1.u8 {d8}, [r0], r1 ; store u op2 + vst1.u8 {d9}, [r3], r1 ; store v op2 + vst1.u8 {d10}, [r0], r1 ; store u op1 + vst1.u8 {d11}, [r3], r1 ; store v op1 + vst1.u8 {d12}, [r0], r1 ; store u op0 + vst1.u8 {d13}, [r3], r1 ; store v op0 + vst1.u8 {d14}, [r0], r1 ; store u oq0 + vst1.u8 {d15}, [r3], r1 ; store v oq0 + vst1.u8 {d16}, [r0], r1 ; store u oq1 + vst1.u8 {d17}, [r3], r1 ; store v oq1 + vst1.u8 {d18}, [r0], r1 ; store u oq2 + vst1.u8 {d19}, [r3], r1 ; store v oq2 + + ldmia sp!, {pc} + ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon| + +; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; int count) +; r0 unsigned char *src, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 int count (unused) +|vp8_mbloop_filter_vertical_edge_y_neon| PROC + stmdb sp!, {lr} + sub r0, r0, #4 ; move src pointer down by 4 columns + + vld1.u8 {d6}, [r0], r1 ; load first 8-line src data + ldr r12, [sp, #4] ; load thresh pointer + vld1.u8 {d8}, [r0], r1 + sub sp, sp, #32 + vld1.u8 {d10}, [r0], r1 + vld1.u8 {d12}, [r0], r1 + vld1.u8 {d14}, [r0], r1 + vld1.u8 {d16}, [r0], r1 + vld1.u8 {d18}, [r0], r1 + vld1.u8 {d20}, [r0], r1 + + vld1.u8 {d7}, [r0], r1 ; load second 8-line src data + vld1.u8 {d9}, [r0], r1 + vld1.u8 {d11}, [r0], r1 + vld1.u8 {d13}, [r0], r1 + vld1.u8 {d15}, [r0], r1 + vld1.u8 {d17}, [r0], r1 + vld1.u8 {d19}, [r0], r1 + vld1.u8 {d21}, [r0], r1 + + ;transpose to 8x16 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + vld1.s8 {d4[], d5[]}, [r12] ; thresh + vld1.s8 {d2[], d3[]}, [r3] ; limit + mov r12, sp + vst1.u8 {q3}, [r12]! + vst1.u8 {q10}, [r12]! + + bl vp8_mbloop_filter_neon + + sub r0, r0, r1, lsl #4 + + add r2, r0, r1 + + add r3, r2, r1 + + vld1.u8 {q3}, [sp]! + vld1.u8 {q10}, [sp]! + + ;transpose to 16x8 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + add r12, r3, r1 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + ;store op2, op1, op0, oq0, oq1, oq2 + vst1.8 {d6}, [r0] + vst1.8 {d8}, [r2] + vst1.8 {d10}, [r3] + vst1.8 {d12}, [r12], r1 + add r0, r12, r1 + vst1.8 {d14}, [r12] + vst1.8 {d16}, [r0], r1 + add r2, r0, r1 + vst1.8 {d18}, [r0] + vst1.8 {d20}, [r2], r1 + add r3, r2, r1 + vst1.8 {d7}, [r2] + vst1.8 {d9}, [r3], r1 + add r12, r3, r1 + vst1.8 {d11}, [r3] + vst1.8 {d13}, [r12], r1 + add r0, r12, r1 + vst1.8 {d15}, [r12] + vst1.8 {d17}, [r0], r1 + add r2, r0, r1 + vst1.8 {d19}, [r0] + vst1.8 {d21}, [r2] + + ldmia sp!, {pc} + ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon| + +; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch, +; const signed char *flimit, +; const signed char *limit, +; const signed char *thresh, +; unsigned char *v) +; r0 unsigned char *u, +; r1 int pitch, +; r2 const signed char *flimit, +; r3 const signed char *limit, +; sp const signed char *thresh, +; sp+4 unsigned char *v +|vp8_mbloop_filter_vertical_edge_uv_neon| PROC + stmdb sp!, {lr} + sub r0, r0, #4 ; move src pointer down by 4 columns + vld1.s8 {d2[], d3[]}, [r3] ; limit + ldr r3, [sp, #8] ; load v ptr + ldr r12, [sp, #4] ; load thresh pointer + + sub r3, r3, #4 ; move v pointer down by 4 columns + + vld1.u8 {d6}, [r0], r1 ;load u data + vld1.u8 {d7}, [r3], r1 ;load v data + vld1.u8 {d8}, [r0], r1 + vld1.u8 {d9}, [r3], r1 + vld1.u8 {d10}, [r0], r1 + vld1.u8 {d11}, [r3], r1 + vld1.u8 {d12}, [r0], r1 + vld1.u8 {d13}, [r3], r1 + vld1.u8 {d14}, [r0], r1 + vld1.u8 {d15}, [r3], r1 + vld1.u8 {d16}, [r0], r1 + vld1.u8 {d17}, [r3], r1 + vld1.u8 {d18}, [r0], r1 + vld1.u8 {d19}, [r3], r1 + vld1.u8 {d20}, [r0], r1 + vld1.u8 {d21}, [r3], r1 + + ;transpose to 8x16 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + sub sp, sp, #32 + vld1.s8 {d4[], d5[]}, [r12] ; thresh + mov r12, sp + vst1.u8 {q3}, [r12]! + vst1.u8 {q10}, [r12]! + + bl vp8_mbloop_filter_neon + + sub r0, r0, r1, lsl #3 + sub r3, r3, r1, lsl #3 + + vld1.u8 {q3}, [sp]! + vld1.u8 {q10}, [sp]! + + ;transpose to 16x8 matrix + vtrn.32 q3, q7 + vtrn.32 q4, q8 + vtrn.32 q5, q9 + vtrn.32 q6, q10 + + vtrn.16 q3, q5 + vtrn.16 q4, q6 + vtrn.16 q7, q9 + vtrn.16 q8, q10 + + vtrn.8 q3, q4 + vtrn.8 q5, q6 + vtrn.8 q7, q8 + vtrn.8 q9, q10 + + ;store op2, op1, op0, oq0, oq1, oq2 + vst1.8 {d6}, [r0], r1 + vst1.8 {d7}, [r3], r1 + vst1.8 {d8}, [r0], r1 + vst1.8 {d9}, [r3], r1 + vst1.8 {d10}, [r0], r1 + vst1.8 {d11}, [r3], r1 + vst1.8 {d12}, [r0], r1 + vst1.8 {d13}, [r3], r1 + vst1.8 {d14}, [r0], r1 + vst1.8 {d15}, [r3], r1 + vst1.8 {d16}, [r0], r1 + vst1.8 {d17}, [r3], r1 + vst1.8 {d18}, [r0], r1 + vst1.8 {d19}, [r3], r1 + vst1.8 {d20}, [r0], r1 + vst1.8 {d21}, [r3], r1 + + ldmia sp!, {pc} + ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon| + +; void vp8_mbloop_filter_neon() +; This is a helper function for the macroblock loopfilters. The individual +; functions do the necessary load, transpose (if necessary), preserve (if +; necessary) and store. + +; TODO: +; The vertical filter writes p3/q3 back out because two 4 element writes are +; much simpler than ordering and writing two 3 element sets (or three 2 elements +; sets, or whichever other combinations are possible). +; If we can preserve q3 and q10, the vertical filter will be able to avoid +; storing those values on the stack and reading them back after the filter. + +; r0,r1 PRESERVE +; r2 flimit +; r3 PRESERVE +; q1 limit +; q2 thresh +; q3 p3 +; q4 p2 +; q5 p1 +; q6 p0 +; q7 q0 +; q8 q1 +; q9 q2 +; q10 q3 + +|vp8_mbloop_filter_neon| PROC + ldr r12, _mblf_coeff_ + + ; vp8_filter_mask + vabd.u8 q11, q3, q4 ; abs(p3 - p2) + vabd.u8 q12, q4, q5 ; abs(p2 - p1) + vabd.u8 q13, q5, q6 ; abs(p1 - p0) + vabd.u8 q14, q8, q7 ; abs(q1 - q0) + vabd.u8 q3, q9, q8 ; abs(q2 - q1) + vabd.u8 q0, q10, q9 ; abs(q3 - q2) + + vmax.u8 q11, q11, q12 + vmax.u8 q12, q13, q14 + vmax.u8 q3, q3, q0 + vmax.u8 q15, q11, q12 + + vabd.u8 q12, q6, q7 ; abs(p0 - q0) + + ; vp8_hevmask + vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh) * -1 + vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh) * -1 + vmax.u8 q15, q15, q3 + + vld1.s8 {d4[], d5[]}, [r2] ; flimit + + vld1.u8 {q0}, [r12]! + + vadd.u8 q2, q2, q2 ; flimit * 2 + vadd.u8 q2, q2, q1 ; flimit * 2 + limit + vcge.u8 q15, q1, q15 + + vabd.u8 q1, q5, q8 ; a = abs(p1 - q1) + vqadd.u8 q12, q12, q12 ; b = abs(p0 - q0) * 2 + vshr.u8 q1, q1, #1 ; a = a / 2 + vqadd.u8 q12, q12, q1 ; a = b + a + vcge.u8 q12, q2, q12 ; (a > flimit * 2 + limit) * -1 + + ; vp8_filter + ; convert to signed + veor q7, q7, q0 ; qs0 + veor q6, q6, q0 ; ps0 + veor q5, q5, q0 ; ps1 + veor q8, q8, q0 ; qs1 + veor q4, q4, q0 ; ps2 + veor q9, q9, q0 ; qs2 + + vorr q14, q13, q14 ; vp8_hevmask + + vsubl.s8 q2, d14, d12 ; qs0 - ps0 + vsubl.s8 q13, d15, d13 + + vqsub.s8 q1, q5, q8 ; vp8_filter = clamp(ps1-qs1) + + vadd.s16 q10, q2, q2 ; 3 * (qs0 - ps0) + vadd.s16 q11, q13, q13 + vand q15, q15, q12 ; vp8_filter_mask + + vadd.s16 q2, q2, q10 + vadd.s16 q13, q13, q11 + + vld1.u8 {q12}, [r12]! ; #3 + + vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) + vaddw.s8 q13, q13, d3 + + vld1.u8 {q11}, [r12]! ; #4 + + ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) + vqmovn.s16 d2, q2 + vqmovn.s16 d3, q13 + + vand q1, q1, q15 ; vp8_filter &= mask + + vld1.u8 {q15}, [r12]! ; #63 + ; + vand q13, q1, q14 ; Filter2 &= hev + + vld1.u8 {d7}, [r12]! ; #9 + + vqadd.s8 q2, q13, q11 ; Filter1 = clamp(Filter2+4) + vqadd.s8 q13, q13, q12 ; Filter2 = clamp(Filter2+3) + + vld1.u8 {d6}, [r12]! ; #18 + + vshr.s8 q2, q2, #3 ; Filter1 >>= 3 + vshr.s8 q13, q13, #3 ; Filter2 >>= 3 + + vmov q10, q15 + vmov q12, q15 + + vqsub.s8 q7, q7, q2 ; qs0 = clamp(qs0 - Filter1) + + vld1.u8 {d5}, [r12]! ; #27 + + vqadd.s8 q6, q6, q13 ; ps0 = clamp(ps0 + Filter2) + + vbic q1, q1, q14 ; vp8_filter &= ~hev + + ; roughly 1/7th difference across boundary + ; roughly 2/7th difference across boundary + ; roughly 3/7th difference across boundary + vmov q11, q15 + vmov q13, q15 + vmov q14, q15 + + vmlal.s8 q10, d2, d7 ; Filter2 * 9 + vmlal.s8 q11, d3, d7 + vmlal.s8 q12, d2, d6 ; Filter2 * 18 + vmlal.s8 q13, d3, d6 + vmlal.s8 q14, d2, d5 ; Filter2 * 27 + vmlal.s8 q15, d3, d5 + vqshrn.s16 d20, q10, #7 ; u = clamp((63 + Filter2 * 9)>>7) + vqshrn.s16 d21, q11, #7 + vqshrn.s16 d24, q12, #7 ; u = clamp((63 + Filter2 * 18)>>7) + vqshrn.s16 d25, q13, #7 + vqshrn.s16 d28, q14, #7 ; u = clamp((63 + Filter2 * 27)>>7) + vqshrn.s16 d29, q15, #7 + + vqsub.s8 q11, q9, q10 ; s = clamp(qs2 - u) + vqadd.s8 q10, q4, q10 ; s = clamp(ps2 + u) + vqsub.s8 q13, q8, q12 ; s = clamp(qs1 - u) + vqadd.s8 q12, q5, q12 ; s = clamp(ps1 + u) + vqsub.s8 q15, q7, q14 ; s = clamp(qs0 - u) + vqadd.s8 q14, q6, q14 ; s = clamp(ps0 + u) + veor q9, q11, q0 ; *oq2 = s^0x80 + veor q4, q10, q0 ; *op2 = s^0x80 + veor q8, q13, q0 ; *oq1 = s^0x80 + veor q5, q12, q0 ; *op2 = s^0x80 + veor q7, q15, q0 ; *oq0 = s^0x80 + veor q6, q14, q0 ; *op0 = s^0x80 + + bx lr + ENDP ; |vp8_mbloop_filter_neon| + + AREA mbloopfilter_dat, DATA, READONLY +_mblf_coeff_ + DCD mblf_coeff +mblf_coeff + DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 + DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 + DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 + DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f + DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212 + DCD 0x1b1b1b1b, 0x1b1b1b1b + + END diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm deleted file mode 100644 index 86eddaa2e..000000000 --- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm +++ /dev/null @@ -1,257 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_mbloop_filter_horizontal_edge_uv_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *u, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; stack(r5) unsigned char *v -|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC - sub r0, r0, r1, lsl #2 ; move u pointer down by 4 lines - vld1.s8 {d2[], d3[]}, [r3] ; limit - ldr r3, [sp, #4] ; load v ptr - ldr r12, [sp, #0] ; load thresh pointer - sub r3, r3, r1, lsl #2 ; move v pointer down by 4 lines - - vld1.u8 {d6}, [r0], r1 ; p3 - vld1.u8 {d7}, [r3], r1 ; p3 - vld1.u8 {d8}, [r0], r1 ; p2 - vld1.u8 {d9}, [r3], r1 ; p2 - vld1.u8 {d10}, [r0], r1 ; p1 - vld1.u8 {d11}, [r3], r1 ; p1 - vld1.u8 {d12}, [r0], r1 ; p0 - vld1.u8 {d13}, [r3], r1 ; p0 - vld1.u8 {d14}, [r0], r1 ; q0 - vld1.u8 {d15}, [r3], r1 ; q0 - vld1.u8 {d16}, [r0], r1 ; q1 - vld1.u8 {d17}, [r3], r1 ; q1 - vld1.u8 {d18}, [r0], r1 ; q2 - vld1.u8 {d19}, [r3], r1 ; q2 - vld1.u8 {d20}, [r0], r1 ; q3 - vld1.u8 {d21}, [r3], r1 ; q3 - - vld1.s8 {d4[], d5[]}, [r12] ; thresh - - ldr r12, _mbhlfuv_coeff_ - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q0, q10, q9 ; abs(q3 - q2) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1 - - vand q15, q15, q12 - - vabd.u8 q12, q6, q7 ; abs(p0 - q0) - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vld1.s8 {d4[], d5[]}, [r2] ; flimit - - vand q10, q10, q11 - vand q3, q3, q0 - - vld1.u8 {q0}, [r12]! - - vadd.u8 q2, q2, q2 ; flimit * 2 - vadd.u8 q2, q2, q1 ; flimit * 2 + limit - - vabd.u8 q1, q5, q8 ; abs(p1 - q1) - vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2 - vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1 - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value - veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value - veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value -;;;;;;;;;;;;; - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q13, d15, d13 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - - ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q11, q13, q13 - - vand q3, q3, q12 - - ;vadd.s8 q2, q2, q10 - vadd.s16 q2, q2, q10 - vadd.s16 q13, q13, q11 - - vld1.u8 {q12}, [r12]! ;#3 - - ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d3 - - vand q15, q15, q3 ; q15: vp8_filter_mask - vld1.u8 {q11}, [r12]! ;#4 - - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q13 - -;;;;;;;;;;;;;; - vand q1, q1, q15 ; vp8_filter &= mask - - vld1.u8 {q15}, [r12]! ;#63 - ; - vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev - - vld1.u8 {d7}, [r12]! ;#9 - ; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q13, q12 ; s = Filter2 & 7 - -; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4) -; vld1.u8 {d6}, [r12]! ;#18 - -; sub r0, r0, r1, lsl #3 -; sub r3, r3, r1, lsl #3 - -; vshr.s8 q13, q13, #3 ; Filter2 >>= 3 -; vceq.i8 q2, q2, q11 ; s = (s==4)*-1 - -; add r0, r0, r1 -; add r3, r3, r1 - -; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) -; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2) - -; vld1.u8 {d5}, [r12]! ;#27 -; vmov q10, q15 -; vmov q12, q15 - -; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4) - vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3) - - vld1.u8 {d6}, [r12]! ;#18 - - sub r0, r0, r1, lsl #3 - sub r3, r3, r1, lsl #3 - - vshr.s8 q2, q2, #3 ; Filter1 >>= 3 - vshr.s8 q13, q13, #3 ; Filter2 >>= 3 - - vmov q10, q15 - vmov q12, q15 - - vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) - - vld1.u8 {d5}, [r12]! ;#27 - - add r0, r0, r1 - add r3, r3, r1 - - vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter - - ; roughly 1/7th difference across boundary - ; roughly 2/7th difference across boundary - ; roughly 3/7th difference across boundary - vmov q11, q15 - vmov q13, q15 - vmov q14, q15 - - vmlal.s8 q10, d2, d7 ; Filter2 * 9 - vmlal.s8 q11, d3, d7 - vmlal.s8 q12, d2, d6 ; Filter2 * 18 - vmlal.s8 q13, d3, d6 - vmlal.s8 q14, d2, d5 ; Filter2 * 27 - vmlal.s8 q15, d3, d5 - vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - vqshrn.s16 d21, q11, #7 - vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - vqshrn.s16 d25, q13, #7 - vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - vqshrn.s16 d29, q15, #7 - - vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u) - vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u) - vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u) - vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u) - vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u) - vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u) - veor q9, q11, q0 ; *oq2 = s^0x80 - veor q4, q10, q0 ; *op2 = s^0x80 - veor q8, q13, q0 ; *oq1 = s^0x80 - veor q5, q12, q0 ; *op2 = s^0x80 - veor q7, q15, q0 ; *oq0 = s^0x80 - veor q6, q14, q0 ; *op0 = s^0x80 - - vst1.u8 {d8}, [r0], r1 ; store u op2 - vst1.u8 {d9}, [r3], r1 ; store v op2 - vst1.u8 {d10}, [r0], r1 ; store u op1 - vst1.u8 {d11}, [r3], r1 ; store v op1 - vst1.u8 {d12}, [r0], r1 ; store u op0 - vst1.u8 {d13}, [r3], r1 ; store v op0 - vst1.u8 {d14}, [r0], r1 ; store u oq0 - vst1.u8 {d15}, [r3], r1 ; store v oq0 - vst1.u8 {d16}, [r0], r1 ; store u oq1 - vst1.u8 {d17}, [r3], r1 ; store v oq1 - vst1.u8 {d18}, [r0], r1 ; store u oq2 - vst1.u8 {d19}, [r3], r1 ; store v oq2 - - bx lr - ENDP ; |vp8_mbloop_filter_horizontal_edge_uv_neon| - -;----------------- - AREA mbhloopfilteruv_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_mbhlfuv_coeff_ - DCD mbhlfuv_coeff -mbhlfuv_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f - DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212 - DCD 0x1b1b1b1b, 0x1b1b1b1b - - END diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm deleted file mode 100644 index 2ab0fc240..000000000 --- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm +++ /dev/null @@ -1,236 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_mbloop_filter_horizontal_edge_y_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *s, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; //stack(r5) int count --unused -|vp8_mbloop_filter_horizontal_edge_y_neon| PROC - sub r0, r0, r1, lsl #2 ; move src pointer down by 4 lines - ldr r12, [sp, #0] ; load thresh pointer - - vld1.u8 {q3}, [r0], r1 ; p3 - vld1.s8 {d2[], d3[]}, [r3] ; limit - vld1.u8 {q4}, [r0], r1 ; p2 - vld1.s8 {d4[], d5[]}, [r12] ; thresh - vld1.u8 {q5}, [r0], r1 ; p1 - ldr r12, _mbhlfy_coeff_ - vld1.u8 {q6}, [r0], r1 ; p0 - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vld1.u8 {q7}, [r0], r1 ; q0 - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vld1.u8 {q8}, [r0], r1 ; q1 - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vld1.u8 {q9}, [r0], r1 ; q2 - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vld1.u8 {q10}, [r0], r1 ; q3 - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q0, q10, q9 ; abs(q3 - q2) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1 - - vand q15, q15, q12 - - vabd.u8 q12, q6, q7 ; abs(p0 - q0) - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vld1.s8 {d4[], d5[]}, [r2] ; flimit - - vand q10, q10, q11 - vand q3, q3, q0 - - vld1.u8 {q0}, [r12]! - - vadd.u8 q2, q2, q2 ; flimit * 2 - vadd.u8 q2, q2, q1 ; flimit * 2 + limit - - vabd.u8 q1, q5, q8 ; abs(p1 - q1) - vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2 - vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1 - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value - veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value - veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value -;;;;;;;;;;;;; - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q13, d15, d13 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - - ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q11, q13, q13 - - vand q3, q3, q12 - - ;vadd.s8 q2, q2, q10 - vadd.s16 q2, q2, q10 - vadd.s16 q13, q13, q11 - - vld1.u8 {q12}, [r12]! ;#3 - - ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d3 - - vand q15, q15, q3 ; q15: vp8_filter_mask - vld1.u8 {q11}, [r12]! ;#4 - - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q13 - -;;;;;;;;;;;;;; - vand q1, q1, q15 ; vp8_filter &= mask - - vld1.u8 {q15}, [r12]! ;#63 - ; - vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev - - vld1.u8 {d7}, [r12]! ;#9 - sub r0, r0, r1, lsl #3 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q13, q12 ; s = Filter2 & 7 - -; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4) -; vld1.u8 {d6}, [r12]! ;#18 - -; add r0, r0, r1 -; add r2, r0, r1 - -; vshr.s8 q13, q13, #3 ; Filter2 >>= 3 -; vceq.i8 q2, q2, q11 ; s = (s==4)*-1 - -; add r3, r2, r1 - -; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) -; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2) - -; vld1.u8 {d5}, [r12]! ;#27 -; vmov q10, q15 -; vmov q12, q15 - -; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4) - vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3) - - vld1.u8 {d6}, [r12]! ;#18 - add r0, r0, r1 - add r2, r0, r1 - - vshr.s8 q2, q2, #3 ; Filter1 >>= 3 - vshr.s8 q13, q13, #3 ; Filter2 >>= 3 - - vmov q10, q15 - vmov q12, q15 - - vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) - - vld1.u8 {d5}, [r12]! ;#27 - add r3, r2, r1 - - vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter - - ; roughly 1/7th difference across boundary - ; roughly 2/7th difference across boundary - ; roughly 3/7th difference across boundary - vmov q11, q15 - vmov q13, q15 - vmov q14, q15 - - vmlal.s8 q10, d2, d7 ; Filter2 * 9 - vmlal.s8 q11, d3, d7 - vmlal.s8 q12, d2, d6 ; Filter2 * 18 - vmlal.s8 q13, d3, d6 - vmlal.s8 q14, d2, d5 ; Filter2 * 27 - vmlal.s8 q15, d3, d5 - vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - vqshrn.s16 d21, q11, #7 - vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - vqshrn.s16 d25, q13, #7 - vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - vqshrn.s16 d29, q15, #7 - - vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u) - vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u) - vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u) - vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u) - vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u) - vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u) - veor q9, q11, q0 ; *oq2 = s^0x80 - veor q4, q10, q0 ; *op2 = s^0x80 - veor q5, q12, q0 ; *op2 = s^0x80 - veor q6, q14, q0 ; *op0 = s^0x80 - veor q8, q13, q0 ; *oq1 = s^0x80 - veor q7, q15, q0 ; *oq0 = s^0x80 - - vst1.u8 {q4}, [r0] ; store op2 - vst1.u8 {q5}, [r2] ; store op1 - vst1.u8 {q6}, [r3], r1 ; store op0 - add r12, r3, r1 - vst1.u8 {q7}, [r3] ; store oq0 - vst1.u8 {q8}, [r12], r1 ; store oq1 - vst1.u8 {q9}, [r12] ; store oq2 - - bx lr - ENDP ; |vp8_mbloop_filter_horizontal_edge_y_neon| - -;----------------- - AREA mbhloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_mbhlfy_coeff_ - DCD mbhlfy_coeff -mbhlfy_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f - DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212 - DCD 0x1b1b1b1b, 0x1b1b1b1b - - END diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm deleted file mode 100644 index ad5afba34..000000000 --- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm +++ /dev/null @@ -1,296 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_mbloop_filter_vertical_edge_uv_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *u, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; stack(r5) unsigned char *v -|vp8_mbloop_filter_vertical_edge_uv_neon| PROC - sub r0, r0, #4 ; move src pointer down by 4 columns - vld1.s8 {d2[], d3[]}, [r3] ; limit - ldr r3, [sp, #4] ; load v ptr - ldr r12, [sp, #0] ; load thresh pointer - - sub r3, r3, #4 ; move v pointer down by 4 columns - - vld1.u8 {d6}, [r0], r1 ;load u data - vld1.u8 {d7}, [r3], r1 ;load v data - vld1.u8 {d8}, [r0], r1 - vld1.u8 {d9}, [r3], r1 - vld1.u8 {d10}, [r0], r1 - vld1.u8 {d11}, [r3], r1 - vld1.u8 {d12}, [r0], r1 - vld1.u8 {d13}, [r3], r1 - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d15}, [r3], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d17}, [r3], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d19}, [r3], r1 - vld1.u8 {d20}, [r0], r1 - vld1.u8 {d21}, [r3], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - sub sp, sp, #32 - vld1.s8 {d4[], d5[]}, [r12] ; thresh - vst1.u8 {q3}, [sp]! - ldr r12, _mbvlfuv_coeff_ - vst1.u8 {q10}, [sp]! - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q0, q10, q9 ; abs(q3 - q2) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1 - - vand q15, q15, q12 - - vabd.u8 q12, q6, q7 ; abs(p0 - q0) - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vld1.s8 {d4[], d5[]}, [r2] ; flimit - - vand q10, q10, q11 - vand q3, q3, q0 - - vld1.u8 {q0}, [r12]! - - vadd.u8 q2, q2, q2 ; flimit * 2 - vadd.u8 q2, q2, q1 ; flimit * 2 + limit - - vabd.u8 q1, q5, q8 ; abs(p1 - q1) - vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2 - vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1 - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value - veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value - veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value -;;;;;;;;;;;;; - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q13, d15, d13 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - - ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q11, q13, q13 - - vand q3, q3, q12 - - ;vadd.s8 q2, q2, q10 - vadd.s16 q2, q2, q10 - vadd.s16 q13, q13, q11 - - vld1.u8 {q12}, [r12]! ;#3 - - ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d3 - - vand q15, q15, q3 ; q15: vp8_filter_mask - vld1.u8 {q11}, [r12]! ;#4 - - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q13 - -;;;;;;;;;;;;;; - vand q1, q1, q15 ; vp8_filter &= mask - - vld1.u8 {q15}, [r12]! ;#63 - ; - vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev - - vld1.u8 {d7}, [r12]! ;#9 - ; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q13, q12 ; s = Filter2 & 7 - -; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4) -; vld1.u8 {d6}, [r12]! ;#18 - -; sub r0, r0, r1, lsl #3 -; sub r3, r3, r1, lsl #3 -; sub sp, sp, #32 - -; vshr.s8 q13, q13, #3 ; Filter2 >>= 3 -; vceq.i8 q2, q2, q11 ; s = (s==4)*-1 - -; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) -; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2) - -; vld1.u8 {d5}, [r12]! ;#27 -; vmov q10, q15 -; vmov q12, q15 - -; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4) - vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3) - - vld1.u8 {d6}, [r12]! ;#18 - - sub r0, r0, r1, lsl #3 - sub r3, r3, r1, lsl #3 - - vshr.s8 q2, q2, #3 ; Filter1 >>= 3 - vshr.s8 q13, q13, #3 ; Filter2 >>= 3 - - vmov q10, q15 - vmov q12, q15 - - vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) - - vld1.u8 {d5}, [r12]! ;#27 - - sub sp, sp, #32 - - vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter - - ; roughly 1/7th difference across boundary - ; roughly 2/7th difference across boundary - ; roughly 3/7th difference across boundary - vmov q11, q15 - vmov q13, q15 - vmov q14, q15 - - vmlal.s8 q10, d2, d7 ; Filter2 * 9 - vmlal.s8 q11, d3, d7 - vmlal.s8 q12, d2, d6 ; Filter2 * 18 - vmlal.s8 q13, d3, d6 - vmlal.s8 q14, d2, d5 ; Filter2 * 27 - vmlal.s8 q15, d3, d5 - vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - vqshrn.s16 d21, q11, #7 - vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - vqshrn.s16 d25, q13, #7 - vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - vqshrn.s16 d29, q15, #7 - - vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u) - vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u) - vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u) - vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u) - vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u) - vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u) - veor q9, q11, q0 ; *oq2 = s^0x80 - veor q4, q10, q0 ; *op2 = s^0x80 - veor q8, q13, q0 ; *oq1 = s^0x80 - veor q5, q12, q0 ; *op2 = s^0x80 - veor q7, q15, q0 ; *oq0 = s^0x80 - vld1.u8 {q3}, [sp]! - veor q6, q14, q0 ; *op0 = s^0x80 - vld1.u8 {q10}, [sp]! - - ;transpose to 16x8 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - ;store op2, op1, op0, oq0, oq1, oq2 - vst1.8 {d6}, [r0], r1 - vst1.8 {d7}, [r3], r1 - vst1.8 {d8}, [r0], r1 - vst1.8 {d9}, [r3], r1 - vst1.8 {d10}, [r0], r1 - vst1.8 {d11}, [r3], r1 - vst1.8 {d12}, [r0], r1 - vst1.8 {d13}, [r3], r1 - vst1.8 {d14}, [r0], r1 - vst1.8 {d15}, [r3], r1 - vst1.8 {d16}, [r0], r1 - vst1.8 {d17}, [r3], r1 - vst1.8 {d18}, [r0], r1 - vst1.8 {d19}, [r3], r1 - vst1.8 {d20}, [r0], r1 - vst1.8 {d21}, [r3], r1 - - bx lr - ENDP ; |vp8_mbloop_filter_vertical_edge_uv_neon| - -;----------------- - AREA mbvloopfilteruv_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_mbvlfuv_coeff_ - DCD mbvlfuv_coeff -mbvlfuv_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f - DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212 - DCD 0x1b1b1b1b, 0x1b1b1b1b - - END diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm deleted file mode 100644 index 60e517519..000000000 --- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm +++ /dev/null @@ -1,303 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_mbloop_filter_vertical_edge_y_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit -;are equal. So, in the code, only one load is needed -;for flimit. Same way applies to limit and thresh. -; r0 unsigned char *s, -; r1 int p, //pitch -; r2 const signed char *flimit, -; r3 const signed char *limit, -; stack(r4) const signed char *thresh, -; //stack(r5) int count --unused -|vp8_mbloop_filter_vertical_edge_y_neon| PROC - sub r0, r0, #4 ; move src pointer down by 4 columns - - vld1.u8 {d6}, [r0], r1 ; load first 8-line src data - ldr r12, [sp, #0] ; load thresh pointer - vld1.u8 {d8}, [r0], r1 - sub sp, sp, #32 - vld1.u8 {d10}, [r0], r1 - vld1.u8 {d12}, [r0], r1 - vld1.u8 {d14}, [r0], r1 - vld1.u8 {d16}, [r0], r1 - vld1.u8 {d18}, [r0], r1 - vld1.u8 {d20}, [r0], r1 - - vld1.u8 {d7}, [r0], r1 ; load second 8-line src data - vld1.u8 {d9}, [r0], r1 - vld1.u8 {d11}, [r0], r1 - vld1.u8 {d13}, [r0], r1 - vld1.u8 {d15}, [r0], r1 - vld1.u8 {d17}, [r0], r1 - vld1.u8 {d19}, [r0], r1 - vld1.u8 {d21}, [r0], r1 - - ;transpose to 8x16 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - vld1.s8 {d2[], d3[]}, [r3] ; limit - vst1.u8 {q3}, [sp]! - vld1.s8 {d4[], d5[]}, [r12] ; thresh - ldr r12, _mbvlfy_coeff_ - vst1.u8 {q10}, [sp]! - - ;vp8_filter_mask() function - ;vp8_hevmask() function - vabd.u8 q11, q3, q4 ; abs(p3 - p2) - vabd.u8 q12, q4, q5 ; abs(p2 - p1) - vabd.u8 q13, q5, q6 ; abs(p1 - p0) - vabd.u8 q14, q8, q7 ; abs(q1 - q0) - vabd.u8 q3, q9, q8 ; abs(q2 - q1) - vabd.u8 q0, q10, q9 ; abs(q3 - q2) - - vcge.u8 q15, q1, q11 ; (abs(p3 - p2) > limit)*-1 - vcge.u8 q12, q1, q12 ; (abs(p2 - p1) > limit)*-1 - vcge.u8 q10, q1, q13 ; (abs(p1 - p0) > limit)*-1 - vcge.u8 q11, q1, q14 ; (abs(q1 - q0) > limit)*-1 - vcge.u8 q3, q1, q3 ; (abs(q2 - q1) > limit)*-1 - vcge.u8 q0, q1, q0 ; (abs(q3 - q2) > limit)*-1 - - vand q15, q15, q12 - - vabd.u8 q12, q6, q7 ; abs(p0 - q0) - - vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 - vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 - - vld1.s8 {d4[], d5[]}, [r2] ; flimit - - vand q10, q10, q11 - vand q3, q3, q0 - - vld1.u8 {q0}, [r12]! - - vadd.u8 q2, q2, q2 ; flimit * 2 - vadd.u8 q2, q2, q1 ; flimit * 2 + limit - - vabd.u8 q1, q5, q8 ; abs(p1 - q1) - vqadd.u8 q12, q12, q12 ; abs(p0 - q0) * 2 - vshr.u8 q1, q1, #1 ; abs(p1 - q1) / 2 - vqadd.u8 q12, q12, q1 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 - vcge.u8 q12, q2, q12 ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1 - - vand q15, q15, q10 - - ;vp8_filter() function - veor q7, q7, q0 ; qs0: q0 offset to convert to a signed value - veor q6, q6, q0 ; ps0: p0 offset to convert to a signed value - veor q5, q5, q0 ; ps1: p1 offset to convert to a signed value - veor q8, q8, q0 ; qs1: q1 offset to convert to a signed value - veor q4, q4, q0 ; ps2: p2 offset to convert to a signed value - veor q9, q9, q0 ; qs2: q2 offset to convert to a signed value -;;;;;;;;;;;;; - vorr q14, q13, q14 ; q14: vp8_hevmask - - ;vqsub.s8 q2, q7, q6 ; ( qs0 - ps0) - vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) - vsubl.s8 q13, d15, d13 - - vqsub.s8 q1, q5, q8 ; vp8_filter = vp8_signed_char_clamp(ps1-qs1) - - ;vadd.s8 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q10, q2, q2 ; 3 * ( qs0 - ps0) - vadd.s16 q11, q13, q13 - - vand q3, q3, q12 - - ;vadd.s8 q2, q2, q10 - vadd.s16 q2, q2, q10 - vadd.s16 q13, q13, q11 - - vld1.u8 {q12}, [r12]! ;#3 - - ;vqadd.s8 q1, q1, q2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q2, q2, d2 ; vp8_filter + 3 * ( qs0 - ps0) - vaddw.s8 q13, q13, d3 - - vand q15, q15, q3 ; q15: vp8_filter_mask - vld1.u8 {q11}, [r12]! ;#4 - - vqmovn.s16 d2, q2 ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0)) - vqmovn.s16 d3, q13 - -;;;;;;;;;;;;;; - vand q1, q1, q15 ; vp8_filter &= mask - - vld1.u8 {q15}, [r12]! ;#63 - ; - vand q13, q1, q14 ; Filter2: q13; Filter2 &= hev - - vld1.u8 {d7}, [r12]! ;#9 - ; - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7 -; vand q2, q13, q12 ; s = Filter2 & 7 - -; vqadd.s8 q13, q13, q11 ; Filter2 = vp8_signed_char_clamp(Filter2+4) -; vld1.u8 {d6}, [r12]! ;#18 - -; sub r0, r0, r1, lsl #4 -; sub sp, sp, #32 -; add r2, r0, r1 - -; vshr.s8 q13, q13, #3 ; Filter2 >>= 3 -; vceq.i8 q2, q2, q11 ; s = (s==4)*-1 - -; add r3, r2, r1 - -; vqsub.s8 q7, q7, q13 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2) -; vqadd.s8 q11, q2, q13 ; u = vp8_signed_char_clamp(s + Filter2) - -; vld1.u8 {d5}, [r12]! ;#27 -; vmov q10, q15 -; vmov q12, q15 - -; vqadd.s8 q6, q6, q11 ; ps0 = vp8_signed_char_clamp(ps0 + u) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - vqadd.s8 q2, q13, q11 ; Filter1 = vp8_signed_char_clamp(Filter2+4) - vqadd.s8 q13, q13, q12 ; Filter2 = vp8_signed_char_clamp(Filter2+3) - - vld1.u8 {d6}, [r12]! ;#18 - sub r0, r0, r1, lsl #4 - sub sp, sp, #32 - - add r2, r0, r1 - - vshr.s8 q2, q2, #3 ; Filter1 >>= 3 - vshr.s8 q13, q13, #3 ; Filter2 >>= 3 - - vmov q10, q15 - vmov q12, q15 - - vqsub.s8 q7, q7, q2 ; qs0 = vp8_signed_char_clamp(qs0 - Filter1) - - vld1.u8 {d5}, [r12]! ;#27 - add r3, r2, r1 - - vqadd.s8 q6, q6, q13 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - vbic q1, q1, q14 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter - - ; roughly 1/7th difference across boundary - ; roughly 2/7th difference across boundary - ; roughly 3/7th difference across boundary - vmov q11, q15 - vmov q13, q15 - vmov q14, q15 - - vmlal.s8 q10, d2, d7 ; Filter2 * 9 - vmlal.s8 q11, d3, d7 - vmlal.s8 q12, d2, d6 ; Filter2 * 18 - vmlal.s8 q13, d3, d6 - vmlal.s8 q14, d2, d5 ; Filter2 * 27 - vmlal.s8 q15, d3, d5 - vqshrn.s16 d20, q10, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) - vqshrn.s16 d21, q11, #7 - vqshrn.s16 d24, q12, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) - vqshrn.s16 d25, q13, #7 - vqshrn.s16 d28, q14, #7 ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) - vqshrn.s16 d29, q15, #7 - - vqsub.s8 q11, q9, q10 ; s = vp8_signed_char_clamp(qs2 - u) - vqadd.s8 q10, q4, q10 ; s = vp8_signed_char_clamp(ps2 + u) - vqsub.s8 q13, q8, q12 ; s = vp8_signed_char_clamp(qs1 - u) - vqadd.s8 q12, q5, q12 ; s = vp8_signed_char_clamp(ps1 + u) - vqsub.s8 q15, q7, q14 ; s = vp8_signed_char_clamp(qs0 - u) - vqadd.s8 q14, q6, q14 ; s = vp8_signed_char_clamp(ps0 + u) - veor q9, q11, q0 ; *oq2 = s^0x80 - veor q4, q10, q0 ; *op2 = s^0x80 - veor q8, q13, q0 ; *oq1 = s^0x80 - veor q5, q12, q0 ; *op2 = s^0x80 - veor q7, q15, q0 ; *oq0 = s^0x80 - vld1.u8 {q3}, [sp]! - veor q6, q14, q0 ; *op0 = s^0x80 - vld1.u8 {q10}, [sp]! - - ;transpose to 16x8 matrix - vtrn.32 q3, q7 - vtrn.32 q4, q8 - vtrn.32 q5, q9 - vtrn.32 q6, q10 - add r12, r3, r1 - - vtrn.16 q3, q5 - vtrn.16 q4, q6 - vtrn.16 q7, q9 - vtrn.16 q8, q10 - - vtrn.8 q3, q4 - vtrn.8 q5, q6 - vtrn.8 q7, q8 - vtrn.8 q9, q10 - - ;store op2, op1, op0, oq0, oq1, oq2 - vst1.8 {d6}, [r0] - vst1.8 {d8}, [r2] - vst1.8 {d10}, [r3] - vst1.8 {d12}, [r12], r1 - add r0, r12, r1 - vst1.8 {d14}, [r12] - vst1.8 {d16}, [r0], r1 - add r2, r0, r1 - vst1.8 {d18}, [r0] - vst1.8 {d20}, [r2], r1 - add r3, r2, r1 - vst1.8 {d7}, [r2] - vst1.8 {d9}, [r3], r1 - add r12, r3, r1 - vst1.8 {d11}, [r3] - vst1.8 {d13}, [r12], r1 - add r0, r12, r1 - vst1.8 {d15}, [r12] - vst1.8 {d17}, [r0], r1 - add r2, r0, r1 - vst1.8 {d19}, [r0] - vst1.8 {d21}, [r2] - - bx lr - ENDP ; |vp8_mbloop_filter_vertical_edge_y_neon| - -;----------------- - AREA mbvloopfiltery_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 16 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_mbvlfy_coeff_ - DCD mbvlfy_coeff -mbvlfy_coeff - DCD 0x80808080, 0x80808080, 0x80808080, 0x80808080 - DCD 0x03030303, 0x03030303, 0x03030303, 0x03030303 - DCD 0x04040404, 0x04040404, 0x04040404, 0x04040404 - DCD 0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f - DCD 0x09090909, 0x09090909, 0x12121212, 0x12121212 - DCD 0x1b1b1b1b, 0x1b1b1b1b - - END diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm index b9ba1cbc3..3f1a30f48 100644 --- a/vp8/common/arm/neon/recon16x16mb_neon.asm +++ b/vp8/common/arm/neon/recon16x16mb_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm index 25aaf8c8e..99b251c91 100644 --- a/vp8/common/arm/neon/recon2b_neon.asm +++ b/vp8/common/arm/neon/recon2b_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm index a4f5b806b..991727746 100644 --- a/vp8/common/arm/neon/recon4b_neon.asm +++ b/vp8/common/arm/neon/recon4b_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/recon_neon.c b/vp8/common/arm/neon/recon_neon.c new file mode 100644 index 000000000..f7930ee5f --- /dev/null +++ b/vp8/common/arm/neon/recon_neon.c @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "recon.h" +#include "blockd.h" + +extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr); + +void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +{ + unsigned char *pred_ptr = &x->predictor[0]; + short *diff_ptr = &x->diff[0]; + unsigned char *dst_ptr = x->dst.y_buffer; + unsigned char *udst_ptr = x->dst.u_buffer; + unsigned char *vdst_ptr = x->dst.v_buffer; + int ystride = x->dst.y_stride; + /*int uv_stride = x->dst.uv_stride;*/ + + vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr); +} diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm index 16d85a0d5..288c0ef01 100644 --- a/vp8/common/arm/neon/reconb_neon.asm +++ b/vp8/common/arm/neon/reconb_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm index 4873e447f..fd7002e7a 100644 --- a/vp8/common/arm/neon/save_neon_reg.asm +++ b/vp8/common/arm/neon/save_neon_reg.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm index 7d06ff908..d7bdbae75 100644 --- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm +++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm index ffecfbfbc..d77a2879e 100644 --- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm +++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm index 9f5f0d2ce..e434a709c 100644 --- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm +++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm index c23a9dbd1..3d22d775a 100644 --- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm index 18e19f958..1dd6b1b37 100644 --- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm index d27485e6c..37255c758 100644 --- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm +++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c deleted file mode 100644 index 130059e64..000000000 --- a/vp8/common/arm/recon_arm.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "recon.h" -#include "blockd.h" - -extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr); - -/* -void vp8_recon16x16mby(MACROBLOCKD *x) -{ - int i; - for(i=0;i<16;i+=4) - { - //vp8_recon4b(&x->block[i]); - BLOCKD *b = &x->block[i]; - vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -} -*/ -void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - BLOCKD *b = &x->block[0]; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - //b = &x->block[4]; - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - //b = &x->block[8]; - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - //b = &x->block[12]; - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); -} - -#if HAVE_ARMV7 -void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - unsigned char *pred_ptr = &x->predictor[0]; - short *diff_ptr = &x->diff[0]; - unsigned char *dst_ptr = x->dst.y_buffer; - unsigned char *udst_ptr = x->dst.u_buffer; - unsigned char *vdst_ptr = x->dst.v_buffer; - int ystride = x->dst.y_stride; - //int uv_stride = x->dst.uv_stride; - - vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr); -} - -#else -/* -void vp8_recon16x16mb(MACROBLOCKD *x) -{ - int i; - - for(i=0;i<16;i+=4) - { -// vp8_recon4b(&x->block[i]); - BLOCKD *b = &x->block[i]; - vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - - } - for(i=16;i<24;i+=2) - { -// vp8_recon2b(&x->block[i]); - BLOCKD *b = &x->block[i]; - vp8_recon2b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } -} -*/ -void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - BLOCKD *b = &x->block[0]; - - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 4; - RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 4; - - //b = &x->block[16]; - - RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b++; - b++; - RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b++; - b++; - RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b++; - b++; - RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); -} -#endif diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h index fd9f85eea..b46b7fc7d 100644 --- a/vp8/common/arm/recon_arm.h +++ b/vp8/common/arm/recon_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6); extern prototype_copy_block(vp8_copy_mem8x4_v6); extern prototype_copy_block(vp8_copy_mem16x16_v6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_recon #define vp8_recon_recon vp8_recon_b_armv6 @@ -38,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6); #undef vp8_recon_copy16x16 #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6 #endif +#endif #if HAVE_ARMV7 extern prototype_recon_block(vp8_recon_b_neon); @@ -48,6 +51,9 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon); extern prototype_copy_block(vp8_copy_mem8x4_neon); extern prototype_copy_block(vp8_copy_mem16x16_neon); +extern prototype_recon_macroblock(vp8_recon_mb_neon); + +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_recon_recon #define vp8_recon_recon vp8_recon_b_neon @@ -65,6 +71,10 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon); #undef vp8_recon_copy16x16 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon + +#undef vp8_recon_recon_mb +#define vp8_recon_recon_mb vp8_recon_mb_neon +#endif #endif #endif diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c deleted file mode 100644 index 334d35236..000000000 --- a/vp8/common/arm/reconintra4x4_arm.c +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "recon.h" -#include "vpx_mem/vpx_mem.h" -#include "reconintra.h" - -void vp8_predict_intra4x4(BLOCKD *x, - int b_mode, - unsigned char *predictor) -{ - int i, r, c; - - unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride; - unsigned char Left[4]; - unsigned char top_left = Above[-1]; - - Left[0] = (*(x->base_dst))[x->dst - 1]; - Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride]; - Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride]; - Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride]; - - switch (b_mode) - { - case B_DC_PRED: - { - int expected_dc = 0; - - for (i = 0; i < 4; i++) - { - expected_dc += Above[i]; - expected_dc += Left[i]; - } - - expected_dc = (expected_dc + 4) >> 3; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - predictor[c] = expected_dc; - } - - predictor += 16; - } - } - break; - case B_TM_PRED: - { - // prediction similar to true_motion prediction - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - int pred = Above[c] - top_left + Left[r]; - - if (pred < 0) - pred = 0; - - if (pred > 255) - pred = 255; - - predictor[c] = pred; - } - - predictor += 16; - } - } - break; - - case B_VE_PRED: - { - - unsigned int ap[4]; - ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2; - ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2; - ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2; - ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - - predictor[c] = ap[c]; - } - - predictor += 16; - } - - } - break; - - - case B_HE_PRED: - { - - unsigned int lp[4]; - lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2; - lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2; - lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2; - lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2; - - for (r = 0; r < 4; r++) - { - for (c = 0; c < 4; c++) - { - predictor[c] = lp[r]; - } - - predictor += 16; - } - } - break; - case B_LD_PRED: - { - unsigned char *ptr = Above; - predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; - predictor[0 * 16 + 1] = - predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; - predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; - - } - break; - case B_RD_PRED: - { - - unsigned char pp[9]; - - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[3 * 16 + 2] = - predictor[2 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 3] = - predictor[2 * 16 + 2] = - predictor[1 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - - } - break; - case B_VR_PRED: - { - - unsigned char pp[9]; - - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1; - predictor[3 * 16 + 2] = - predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1; - predictor[3 * 16 + 3] = - predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - predictor[2 * 16 + 3] = - predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1; - predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1; - - } - break; - case B_VL_PRED: - { - - unsigned char *pp = Above; - - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1; - predictor[1 * 16 + 1] = - predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 1] = - predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1; - predictor[3 * 16 + 1] = - predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[0 * 16 + 3] = - predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - case B_HD_PRED: - { - unsigned char pp[9]; - pp[0] = Left[3]; - pp[1] = Left[2]; - pp[2] = Left[1]; - pp[3] = Left[0]; - pp[4] = top_left; - pp[5] = Above[0]; - pp[6] = Above[1]; - pp[7] = Above[2]; - pp[8] = Above[3]; - - - predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[2 * 16 + 0] = - predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1; - predictor[2 * 16 + 1] = - predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[2 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; - predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; - predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; - } - break; - - - case B_HU_PRED: - { - unsigned char *pp = Left; - predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; - predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; - predictor[0 * 16 + 2] = - predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1; - predictor[0 * 16 + 3] = - predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; - predictor[1 * 16 + 2] = - predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; - predictor[1 * 16 + 3] = - predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; - predictor[2 * 16 + 2] = - predictor[2 * 16 + 3] = - predictor[3 * 16 + 0] = - predictor[3 * 16 + 1] = - predictor[3 * 16 + 2] = - predictor[3 * 16 + 3] = pp[3]; - } - break; - - - } -} -// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and -// to the right prediction have filled in pixels to use. -void vp8_intra_prediction_down_copy(MACROBLOCKD *x) -{ - unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; - - unsigned int *src_ptr = (unsigned int *)above_right; - unsigned int *dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride); - unsigned int *dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride); - unsigned int *dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride); - - *dst_ptr0 = *src_ptr; - *dst_ptr1 = *src_ptr; - *dst_ptr2 = *src_ptr; -} - - - -/* -void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - int i; - - vp8_intra_prediction_down_copy(x); - - for(i=0;i<16;i++) - { - BLOCKD *b = &x->block[i]; - - vp8_predict_intra4x4(b, x->block[i].bmi.mode,x->block[i].predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } - - vp8_recon_intra_mbuv(x); - -} -*/ -void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) -{ - int i; - BLOCKD *b = &x->block[0]; - - vp8_intra_prediction_down_copy(x); - - { - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - b += 1; - - vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); - } - - vp8_recon_intra_mbuv(rtcd, x); - -} diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c index d7ee1ddfa..4cc93d134 100644 --- a/vp8/common/arm/reconintra_arm.c +++ b/vp8/common/arm/reconintra_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,7 @@ void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x) unsigned char *y_buffer = x->dst.y_buffer; unsigned char *ypred_ptr = x->predictor; int y_stride = x->dst.y_stride; - int mode = x->mbmi.mode; + int mode = x->mode_info_context->mbmi.mode; int Up = x->up_available; int Left = x->left_available; @@ -51,7 +52,7 @@ void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x) unsigned char *y_buffer = x->dst.y_buffer; unsigned char *ypred_ptr = x->predictor; int y_stride = x->dst.y_stride; - int mode = x->mbmi.mode; + int mode = x->mode_info_context->mbmi.mode; int Up = x->up_available; int Left = x->left_available; diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h index 56aec55b9..6288538d0 100644 --- a/vp8/common/arm/subpixel_arm.h +++ b/vp8/common/arm/subpixel_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6); extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6); extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_subpix_sixtap16x16 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6 @@ -45,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6); #undef vp8_subpix_bilinear4x4 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon); @@ -56,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon); extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon); extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_subpix_sixtap16x16 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon @@ -80,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon); #undef vp8_subpix_bilinear4x4 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon #endif +#endif #endif diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c deleted file mode 100644 index ecc6929c0..000000000 --- a/vp8/common/arm/systemdependent.c +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "g_common.h" -#include "pragmas.h" -#include "subpixel.h" -#include "loopfilter.h" -#include "recon.h" -#include "idct.h" -#include "onyxc_int.h" - -void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x); - -void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x); -extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x); - -void vp8_machine_specific_config(VP8_COMMON *ctx) -{ -#if CONFIG_RUNTIME_CPU_DETECT - VP8_COMMON_RTCD *rtcd = &ctx->rtcd; - -#if HAVE_ARMV7 - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_neon; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_neon; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_neon; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_neon; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_neon; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_neon; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_neon; - - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_neon; - rtcd->idct.idct16 = vp8_short_idct4x4llm_neon; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_neon; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_neon; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_neon; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_neon; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_neon; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_neon; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_neon; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_neon; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_neon; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_neon; - rtcd->recon.recon = vp8_recon_b_neon; - rtcd->recon.recon2 = vp8_recon2b_neon; - rtcd->recon.recon4 = vp8_recon4b_neon; -#elif HAVE_ARMV6 - - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_armv6; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_armv6; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_armv6; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_armv6; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_armv6; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_armv6; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_armv6; - - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_v6; - rtcd->idct.idct16 = vp8_short_idct4x4llm_v6_dual; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_armv6; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_armv6; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_armv6; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_armv6; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_armv6; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_armv6; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_armv6; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_v6; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_v6; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_v6; - rtcd->recon.recon = vp8_recon_b_armv6; - rtcd->recon.recon2 = vp8_recon2b_armv6; - rtcd->recon.recon4 = vp8_recon4b_armv6; -#else -//pure c - rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c; - rtcd->idct.idct16 = vp8_short_idct4x4llm_c; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_c; - rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; - rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; - - rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; - rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; - rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; - rtcd->recon.recon = vp8_recon_b_c; - rtcd->recon.recon2 = vp8_recon2b_c; - rtcd->recon.recon4 = vp8_recon4b_c; - - rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c; - rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c; - rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_c; - rtcd->subpix.sixtap4x4 = vp8_sixtap_predict_c; - rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c; - rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_c; - rtcd->subpix.bilinear8x4 = vp8_bilinear_predict8x4_c; - rtcd->subpix.bilinear4x4 = vp8_bilinear_predict4x4_c; - - rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c; - rtcd->loopfilter.normal_b_v = vp8_loop_filter_bv_c; - rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c; - rtcd->loopfilter.normal_b_h = vp8_loop_filter_bh_c; - rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c; - rtcd->loopfilter.simple_b_v = vp8_loop_filter_bvs_c; - rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c; - rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c; -#endif - - rtcd->postproc.down = vp8_mbpost_proc_down_c; - rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; - rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; - rtcd->postproc.addnoise = vp8_plane_add_noise_c; -#endif - -#if HAVE_ARMV7 - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon; -#elif HAVE_ARMV6 - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; -#else - vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; - vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; - -#endif - -} diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c index 68634bf55..5baf8ccf5 100644 --- a/vp8/common/arm/vpx_asm_offsets.c +++ b/vp8/common/arm/vpx_asm_offsets.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -31,55 +32,50 @@ */ #if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER -DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); -DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); -DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); -DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); -DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); -DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); -DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); -DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); -DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); +DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width)); +DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height)); +DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride)); +DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width)); +DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height)); +DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride)); +DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer)); +DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer)); +DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer)); DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border)); #endif #if CONFIG_VP8_DECODER DEFINE(mb_diff, offsetof(MACROBLOCKD, diff)); DEFINE(mb_predictor, offsetof(MACROBLOCKD, predictor)); -DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride)); -DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer)); -DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer)); -DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer)); -DEFINE(mb_mbmi_mode, offsetof(MACROBLOCKD, mbmi.mode)); -DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available)); -DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available)); +DEFINE(mb_dst_y_stride, offsetof(MACROBLOCKD, dst.y_stride)); +DEFINE(mb_dst_y_buffer, offsetof(MACROBLOCKD, dst.y_buffer)); +DEFINE(mb_dst_u_buffer, offsetof(MACROBLOCKD, dst.u_buffer)); +DEFINE(mb_dst_v_buffer, offsetof(MACROBLOCKD, dst.v_buffer)); +DEFINE(mb_up_available, offsetof(MACROBLOCKD, up_available)); +DEFINE(mb_left_available, offsetof(MACROBLOCKD, left_available)); DEFINE(detok_scan, offsetof(DETOK, scan)); -DEFINE(detok_ptr_onyxblock2context_leftabove, offsetof(DETOK, ptr_onyxblock2context_leftabove)); -DEFINE(detok_onyx_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr)); -DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr)); -DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr)); -DEFINE(detok_ptr_onyx_coef_bands_x, offsetof(DETOK, ptr_onyx_coef_bands_x)); +DEFINE(detok_ptr_block2leftabove, offsetof(DETOK, ptr_block2leftabove)); +DEFINE(detok_coef_tree_ptr, offsetof(DETOK, vp8_coef_tree_ptr)); +DEFINE(detok_teb_base_ptr, offsetof(DETOK, teb_base_ptr)); +DEFINE(detok_norm_ptr, offsetof(DETOK, norm_ptr)); +DEFINE(detok_ptr_coef_bands_x, offsetof(DETOK, ptr_coef_bands_x)); -DEFINE(DETOK_A, offsetof(DETOK, A)); -DEFINE(DETOK_L, offsetof(DETOK, L)); +DEFINE(detok_A, offsetof(DETOK, A)); +DEFINE(detok_L, offsetof(DETOK, L)); -DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr)); -DEFINE(detok_current_bc, offsetof(DETOK, current_bc)); -DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs)); +DEFINE(detok_qcoeff_start_ptr, offsetof(DETOK, qcoeff_start_ptr)); +DEFINE(detok_current_bc, offsetof(DETOK, current_bc)); +DEFINE(detok_coef_probs, offsetof(DETOK, coef_probs)); DEFINE(detok_eob, offsetof(DETOK, eob)); -DEFINE(bool_decoder_lowvalue, offsetof(BOOL_DECODER, lowvalue)); -DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); +DEFINE(bool_decoder_user_buffer_end, offsetof(BOOL_DECODER, user_buffer_end)); +DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer)); DEFINE(bool_decoder_value, offsetof(BOOL_DECODER, value)); DEFINE(bool_decoder_count, offsetof(BOOL_DECODER, count)); -DEFINE(bool_decoder_user_buffer, offsetof(BOOL_DECODER, user_buffer)); -DEFINE(bool_decoder_user_buffer_sz, offsetof(BOOL_DECODER, user_buffer_sz)); -DEFINE(bool_decoder_decode_buffer, offsetof(BOOL_DECODER, decode_buffer)); -DEFINE(bool_decoder_read_ptr, offsetof(BOOL_DECODER, read_ptr)); -DEFINE(bool_decoder_write_ptr, offsetof(BOOL_DECODER, write_ptr)); +DEFINE(bool_decoder_range, offsetof(BOOL_DECODER, range)); -DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); +DEFINE(tokenextrabits_min_val, offsetof(TOKENEXTRABITS, min_val)); DEFINE(tokenextrabits_length, offsetof(TOKENEXTRABITS, Length)); #endif diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h index 6a91ba1ae..6ac3f8b5a 100644 --- a/vp8/common/bigend.h +++ b/vp8/common/bigend.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c index 53f5e72d2..7f75a72c5 100644 --- a/vp8/common/blockd.c +++ b/vp8/common/blockd.c @@ -1,23 +1,24 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #include "blockd.h" #include "vpx_mem/vpx_mem.h" -void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count) -{ - vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count); - vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count); -} - -const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0}; -const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0}; const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1}; -const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3}; + +const unsigned char vp8_block2left[25] = +{ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 +}; +const unsigned char vp8_block2above[25] = +{ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 +}; diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h index 7a7b06b36..333f02052 100644 --- a/vp8/common/blockd.h +++ b/vp8/common/blockd.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,7 +24,7 @@ void vpx_log(const char *format, ...); #define TRUE 1 #define FALSE 0 -//#define DCPRED 1 +/*#define DCPRED 1*/ #define DCPREDSIMTHRESH 0 #define DCPREDCNTTHRESH 3 @@ -38,7 +39,7 @@ void vpx_log(const char *format, ...); #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 4 -// Segment Feature Masks +/* Segment Feature Masks */ #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 @@ -48,19 +49,19 @@ typedef struct } POS; -typedef int ENTROPY_CONTEXT; - +typedef char ENTROPY_CONTEXT; typedef struct { - ENTROPY_CONTEXT l[4]; - ENTROPY_CONTEXT a[4]; -} TEMP_CONTEXT; + ENTROPY_CONTEXT y1[4]; + ENTROPY_CONTEXT u[2]; + ENTROPY_CONTEXT v[2]; + ENTROPY_CONTEXT y2; +} ENTROPY_CONTEXT_PLANES; -extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count); -extern const int vp8_block2left[25]; -extern const int vp8_block2above[25]; extern const int vp8_block2type[25]; -extern const int vp8_block2context[25]; + +extern const unsigned char vp8_block2left[25]; +extern const unsigned char vp8_block2above[25]; #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \ Dest = ((A)!=0) + ((B)!=0); @@ -74,11 +75,11 @@ typedef enum typedef enum { - DC_PRED, // average of above and left pixels - V_PRED, // vertical prediction - H_PRED, // horizontal prediction - TM_PRED, // Truemotion prediction - B_PRED, // block based prediction, each block has its own prediction mode + DC_PRED, /* average of above and left pixels */ + V_PRED, /* vertical prediction */ + H_PRED, /* horizontal prediction */ + TM_PRED, /* Truemotion prediction */ + B_PRED, /* block based prediction, each block has its own prediction mode */ NEARESTMV, NEARMV, @@ -89,16 +90,16 @@ typedef enum MB_MODE_COUNT } MB_PREDICTION_MODE; -// Macroblock level features +/* Macroblock level features */ typedef enum { - MB_LVL_ALT_Q = 0, // Use alternate Quantizer .... - MB_LVL_ALT_LF = 1, // Use alternate loop filter value... - MB_LVL_MAX = 2, // Number of MB level features supported + MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ + MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ + MB_LVL_MAX = 2 /* Number of MB level features supported */ } MB_LVL_FEATURES; -// Segment Feature Masks +/* Segment Feature Masks */ #define SEGMENT_ALTQ 0x01 #define SEGMENT_ALT_LF 0x02 @@ -109,11 +110,11 @@ typedef enum typedef enum { - B_DC_PRED, // average of above and left pixels + B_DC_PRED, /* average of above and left pixels */ B_TM_PRED, - B_VE_PRED, // vertical prediction - B_HE_PRED, // horizontal prediction + B_VE_PRED, /* vertical prediction */ + B_HE_PRED, /* horizontal prediction */ B_LD_PRED, B_RD_PRED, @@ -167,16 +168,16 @@ typedef struct int as_int; MV as_mv; } mv; - int partitioning; - int partition_count; - int mb_skip_coeff; //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens - int dc_diff; - unsigned char segment_id; // Which set of segmentation parameters should be used for this MB unsigned char segment_flag; - int force_no_skip; - B_MODE_INFO partition_bmi[16]; + unsigned char partitioning; + unsigned char mb_skip_coeff; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ + unsigned char dc_diff; + unsigned char need_to_clamp_mvs; + unsigned char segment_id; /* Which set of segmentation parameters should be used for this MB */ + + unsigned char force_no_skip; /* encoder only */ } MB_MODE_INFO; @@ -195,9 +196,9 @@ typedef struct short *diff; short *reference; - short(*dequant)[4]; + short *dequant; - // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries + /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ unsigned char **base_pre; int pre; int pre_stride; @@ -214,49 +215,46 @@ typedef struct typedef struct { - DECLARE_ALIGNED(16, short, diff[400]); // from idct diff + DECLARE_ALIGNED(16, short, diff[400]); /* from idct diff */ DECLARE_ALIGNED(16, unsigned char, predictor[384]); - DECLARE_ALIGNED(16, short, reference[384]); +/* not used DECLARE_ALIGNED(16, short, reference[384]); */ DECLARE_ALIGNED(16, short, qcoeff[400]); DECLARE_ALIGNED(16, short, dqcoeff[400]); + DECLARE_ALIGNED(16, char, eobs[25]); - // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. + /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; - YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction + YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ YV12_BUFFER_CONFIG dst; MODE_INFO *mode_info_context; - MODE_INFO *mode_info; - int mode_info_stride; FRAME_TYPE frame_type; - MB_MODE_INFO mbmi; - int up_available; int left_available; - // Y,U,V,Y2 - ENTROPY_CONTEXT *above_context[4]; // row of context for each plane - ENTROPY_CONTEXT(*left_context)[4]; // (up to) 4 contexts "" + /* Y,U,V,Y2 */ + ENTROPY_CONTEXT_PLANES *above_context; + ENTROPY_CONTEXT_PLANES *left_context; - // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. + /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */ unsigned char segmentation_enabled; - // 0 (do not update) 1 (update) the macroblock segmentation map. + /* 0 (do not update) 1 (update) the macroblock segmentation map. */ unsigned char update_mb_segmentation_map; - // 0 (do not update) 1 (update) the macroblock segmentation feature data. + /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ unsigned char update_mb_segmentation_data; - // 0 (do not update) 1 (update) the macroblock segmentation feature data. + /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ unsigned char mb_segement_abs_delta; unsigned char temporal_update; - // Per frame flags that define which MB level features (such as quantizer or loop filter level) - // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO + /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */ + /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */ #if CONFIG_SEGMENTATION vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS + 3]; // Probability Tree used to code Segment number #else @@ -264,25 +262,22 @@ typedef struct #endif signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; // Segment parameters - // mode_based Loop filter adjustment + /* mode_based Loop filter adjustment */ unsigned char mode_ref_lf_delta_enabled; unsigned char mode_ref_lf_delta_update; - // Delta values have the range +/- MAX_LOOP_FILTER - //char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF - //char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT - signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = Intra, Last, GF, ARF - signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; // 0 = BPRED, ZERO_MV, MV, SPLIT + /* Delta values have the range +/- MAX_LOOP_FILTER */ + signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ + signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ + signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ + signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ - // Distance of MB away from frame edges + /* Distance of MB away from frame edges */ int mb_to_left_edge; int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; - //char * gf_active_ptr; - signed char *gf_active_ptr; - unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; vp8_subpix_fn_t subpixel_predict; diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h index 0659d4873..5658868a6 100644 --- a/vp8/common/boolcoder.h +++ b/vp8/common/boolcoder.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h index 7881b0a41..7a7db3847 100644 --- a/vp8/common/codec_common_interface.h +++ b/vp8/common/codec_common_interface.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef CODEC_COMMON_INTERFACE_H diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h index 99affd618..785e3ff70 100644 --- a/vp8/common/coefupdateprobs.h +++ b/vp8/common/coefupdateprobs.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/common.h b/vp8/common/common.h index 29f6d371b..9a93da991 100644 --- a/vp8/common/common.h +++ b/vp8/common/common.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h index deb5ed8e5..4e6248697 100644 --- a/vp8/common/common_types.h +++ b/vp8/common/common_types.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/context.c b/vp8/common/context.c index 17ee8c338..99e95d30f 100644 --- a/vp8/common/context.c +++ b/vp8/common/context.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c index e2d2d2c0f..8c03480fa 100644 --- a/vp8/common/debugmodes.c +++ b/vp8/common/debugmodes.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f int mb_index = 0; FILE *mvs = fopen("mvs.stt", "a"); - // print out the macroblock Y modes + /* print out the macroblock Y modes */ mb_index = 0; fprintf(mvs, "Mb Modes for Frame %d\n", frame); @@ -59,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f fprintf(mvs, "\n"); - // print out the macroblock UV modes + /* print out the macroblock UV modes */ mb_index = 0; fprintf(mvs, "UV Modes for Frame %d\n", frame); @@ -79,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f fprintf(mvs, "\n"); - // print out the block modes + /* print out the block modes */ mb_index = 0; fprintf(mvs, "Mbs for Frame %d\n", frame); { @@ -107,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f } fprintf(mvs, "\n"); - // print out the macroblock mvs + /* print out the macroblock mvs */ mb_index = 0; fprintf(mvs, "MVs for Frame %d\n", frame); @@ -127,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f fprintf(mvs, "\n"); - // print out the block modes + /* print out the block modes */ mb_index = 0; fprintf(mvs, "MVs for Frame %d\n", frame); { diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h index ccdf326e6..ca58d565a 100644 --- a/vp8/common/defaultcoefcounts.h +++ b/vp8/common/defaultcoefcounts.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C { { - // Block Type ( 0 ) + /* Block Type ( 0 ) */ { - // Coeff Band ( 0 ) + /* Coeff Band ( 0 ) */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, }, { - // Coeff Band ( 1 ) + /* Coeff Band ( 1 ) */ {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,}, {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,}, {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,}, }, { - // Coeff Band ( 2 ) + /* Coeff Band ( 2 ) */ {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,}, {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,}, {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,}, }, { - // Coeff Band ( 3 ) + /* Coeff Band ( 3 ) */ {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,}, {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,}, { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,}, }, { - // Coeff Band ( 4 ) + /* Coeff Band ( 4 ) */ {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,}, {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,}, { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,}, }, { - // Coeff Band ( 5 ) + /* Coeff Band ( 5 ) */ {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,}, {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,}, { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,}, }, { - // Coeff Band ( 6 ) + /* Coeff Band ( 6 ) */ {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,}, {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,}, { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,}, }, { - // Coeff Band ( 7 ) + /* Coeff Band ( 7 ) */ { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,}, }, }, { - // Block Type ( 1 ) + /* Block Type ( 1 ) */ { - // Coeff Band ( 0 ) + /* Coeff Band ( 0 ) */ {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,}, {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,}, {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,}, }, { - // Coeff Band ( 1 ) + /* Coeff Band ( 1 ) */ {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,}, {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,}, {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,}, }, { - // Coeff Band ( 2 ) + /* Coeff Band ( 2 ) */ {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,}, {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,}, {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,}, }, { - // Coeff Band ( 3 ) + /* Coeff Band ( 3 ) */ {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,}, {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,}, {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,}, }, { - // Coeff Band ( 4 ) + /* Coeff Band ( 4 ) */ {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,}, {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,}, { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,}, }, { - // Coeff Band ( 5 ) + /* Coeff Band ( 5 ) */ {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,}, {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,}, { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,}, }, { - // Coeff Band ( 6 ) + /* Coeff Band ( 6 ) */ {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,}, {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,}, { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,}, }, { - // Coeff Band ( 7 ) + /* Coeff Band ( 7 ) */ { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,}, { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,}, }, }, { - // Block Type ( 2 ) + /* Block Type ( 2 ) */ { - // Coeff Band ( 0 ) + /* Coeff Band ( 0 ) */ { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,}, {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,}, {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,}, }, { - // Coeff Band ( 1 ) + /* Coeff Band ( 1 ) */ {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,}, {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,}, {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,}, }, { - // Coeff Band ( 2 ) + /* Coeff Band ( 2 ) */ { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,}, { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,}, { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,}, }, { - // Coeff Band ( 3 ) + /* Coeff Band ( 3 ) */ { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,}, { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,}, }, { - // Coeff Band ( 4 ) + /* Coeff Band ( 4 ) */ { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,}, { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,}, }, { - // Coeff Band ( 5 ) + /* Coeff Band ( 5 ) */ { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,}, { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,}, }, { - // Coeff Band ( 6 ) + /* Coeff Band ( 6 ) */ { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,}, { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,}, }, { - // Coeff Band ( 7 ) + /* Coeff Band ( 7 ) */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, }, }, { - // Block Type ( 3 ) + /* Block Type ( 3 ) */ { - // Coeff Band ( 0 ) + /* Coeff Band ( 0 ) */ {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,}, {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,}, {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,}, }, { - // Coeff Band ( 1 ) + /* Coeff Band ( 1 ) */ {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,}, {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,}, {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,}, }, { - // Coeff Band ( 2 ) + /* Coeff Band ( 2 ) */ {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,}, {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,}, {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,}, }, { - // Coeff Band ( 3 ) + /* Coeff Band ( 3 ) */ {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,}, {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,}, {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,}, }, { - // Coeff Band ( 4 ) + /* Coeff Band ( 4 ) */ {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,}, {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,}, {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,}, }, { - // Coeff Band ( 5 ) + /* Coeff Band ( 5 ) */ {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,}, {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,}, {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,}, }, { - // Coeff Band ( 6 ) + /* Coeff Band ( 6 ) */ {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,}, {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,}, {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,}, }, { - // Coeff Band ( 7 ) + /* Coeff Band ( 7 ) */ { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,}, { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,}, diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h index 5e6fa0ca9..b923da6e0 100644 --- a/vp8/common/dma_desc.h +++ b/vp8/common/dma_desc.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h index f63a5cdc1..43daa65bc 100644 --- a/vp8/common/duck_io.h +++ b/vp8/common/duck_io.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c index e524c2acc..1438e7e0f 100644 --- a/vp8/common/entropy.c +++ b/vp8/common/entropy.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 1415832d5..0685cd0ae 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -16,18 +17,18 @@ /* Coefficient token alphabet */ -#define ZERO_TOKEN 0 //0 Extra Bits 0+0 -#define ONE_TOKEN 1 //1 Extra Bits 0+1 -#define TWO_TOKEN 2 //2 Extra Bits 0+1 -#define THREE_TOKEN 3 //3 Extra Bits 0+1 -#define FOUR_TOKEN 4 //4 Extra Bits 0+1 -#define DCT_VAL_CATEGORY1 5 //5-6 Extra Bits 1+1 -#define DCT_VAL_CATEGORY2 6 //7-10 Extra Bits 2+1 -#define DCT_VAL_CATEGORY3 7 //11-26 Extra Bits 4+1 -#define DCT_VAL_CATEGORY4 8 //11-26 Extra Bits 5+1 -#define DCT_VAL_CATEGORY5 9 //27-58 Extra Bits 5+1 -#define DCT_VAL_CATEGORY6 10 //59+ Extra Bits 11+1 -#define DCT_EOB_TOKEN 11 //EOB Extra Bits 0+0 +#define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ +#define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ +#define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ +#define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ +#define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ +#define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ +#define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ +#define DCT_VAL_CATEGORY3 7 /* 11-26 Extra Bits 4+1 */ +#define DCT_VAL_CATEGORY4 8 /* 11-26 Extra Bits 5+1 */ +#define DCT_VAL_CATEGORY5 9 /* 27-58 Extra Bits 5+1 */ +#define DCT_VAL_CATEGORY6 10 /* 59+ Extra Bits 11+1 */ +#define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ #define vp8_coef_tokens 12 #define MAX_ENTROPY_TOKENS vp8_coef_tokens @@ -82,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); coefficient band (and since zigzag positions 0, 1, and 2 are in distinct bands). */ -/*# define DC_TOKEN_CONTEXTS 3 // 00, 0!0, !0!0 */ +/*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ # define PREV_COEF_CONTEXTS 3 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]); diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c index 7dc1acde0..e9dc668b2 100644 --- a/vp8/common/entropymode.c +++ b/vp8/common/entropymode.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -263,8 +264,10 @@ void vp8_entropy_mode_init() vp8_tokens_from_tree(vp8_uv_mode_encodings, vp8_uv_mode_tree); vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree); - vp8_tokens_from_tree(VP8_MVREFENCODINGS, vp8_mv_ref_tree); - vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree); + vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array, + vp8_mv_ref_tree, NEARESTMV); + vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array, + vp8_sub_mv_ref_tree, LEFT4X4); vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree); } diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h index ff630a477..da6ae8ead 100644 --- a/vp8/common/entropymode.h +++ b/vp8/common/entropymode.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -53,10 +54,6 @@ extern struct vp8_token_struct vp8_mbsplit_encodings [VP8_NUMMBSPLITS]; extern struct vp8_token_struct vp8_mv_ref_encoding_array [VP8_MVREFS]; extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS]; -#define VP8_MVREFENCODINGS (vp8_mv_ref_encoding_array - NEARESTMV) -#define VP8_SUBMVREFENCODINGS (vp8_sub_mv_ref_encoding_array - LEFT4X4) - - extern const vp8_tree_index vp8_small_mvtree[]; extern struct vp8_token_struct vp8_small_mvencodings [8]; diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c index 2b00c17a9..e5df1f095 100644 --- a/vp8/common/entropymv.c +++ b/vp8/common/entropymv.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] = const MV_CONTEXT vp8_default_mv_context[2] = { {{ - // row - 162, // is short - 128, // sign - 225, 146, 172, 147, 214, 39, 156, // short tree - 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 // long bits + /* row */ + 162, /* is short */ + 128, /* sign */ + 225, 146, 172, 147, 214, 39, 156, /* short tree */ + 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ }}, {{ - // same for column - 164, // is short + /* same for column */ + 164, /* is short */ 128, 204, 170, 119, 235, 140, 230, 228, - 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 // long bits + 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ }} }; diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h index d940c599b..911507ddc 100644 --- a/vp8/common/entropymv.h +++ b/vp8/common/entropymv.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/extend.c b/vp8/common/extend.c index 74079527c..47207fa79 100644 --- a/vp8/common/extend.c +++ b/vp8/common/extend.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,14 +15,14 @@ static void extend_plane_borders ( - unsigned char *s, // source - int sp, // pitch - int h, // height - int w, // width - int et, // extend top border - int el, // extend left border - int eb, // extend bottom border - int er // extend right border + unsigned char *s, /* source */ + int sp, /* pitch */ + int h, /* height */ + int w, /* width */ + int et, /* extend top border */ + int el, /* extend left border */ + int eb, /* extend bottom border */ + int er /* extend right border */ ) { @@ -30,7 +31,7 @@ static void extend_plane_borders unsigned char *dest_ptr1, *dest_ptr2; int linesize; - // copy the left and right most columns out + /* copy the left and right most columns out */ src_ptr1 = s; src_ptr2 = s + w - 1; dest_ptr1 = s - el; @@ -38,7 +39,11 @@ static void extend_plane_borders for (i = 0; i < h - 0 + 1; i++) { - vpx_memset(dest_ptr1, src_ptr1[0], el); + /* Some linkers will complain if we call vpx_memset with el set to a + * constant 0. + */ + if (el) + vpx_memset(dest_ptr1, src_ptr1[0], el); vpx_memset(dest_ptr2, src_ptr2[0], er); src_ptr1 += sp; src_ptr2 += sp; @@ -46,7 +51,7 @@ static void extend_plane_borders dest_ptr2 += sp; } - // Now copy the top and bottom source lines into each line of the respective borders + /* Now copy the top and bottom source lines into each line of the respective borders */ src_ptr1 = s - el; src_ptr2 = s + sp * (h - 1) - el; dest_ptr1 = s + sp * (-et) - el; @@ -72,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height) int er = 0xf & (16 - (width & 0xf)); int eb = 0xf & (16 - (height & 0xf)); - // check for non multiples of 16 + /* check for non multiples of 16 */ if (er != 0 || eb != 0) { extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er); - //adjust for uv + /* adjust for uv */ height = (height + 1) >> 1; width = (width + 1) >> 1; er = 0x7 & (8 - (width & 0x7)); @@ -91,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height) } } -// note the extension is only for the last row, for intra prediction purpose +/* note the extension is only for the last row, for intra prediction purpose */ void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) { int i; diff --git a/vp8/common/extend.h b/vp8/common/extend.h index 6809ae756..fd0a608e5 100644 --- a/vp8/common/extend.h +++ b/vp8/common/extend.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c index 38991cb28..399a847d5 100644 --- a/vp8/common/filter_c.c +++ b/vp8/common/filter_c.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -31,13 +32,13 @@ static const int bilinear_filters[8][2] = static const short sub_pel_filters[8][6] = { - { 0, 0, 128, 0, 0, 0 }, // note that 1/8 pel positions are just as per alpha -0.5 bicubic + { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ { 0, -6, 123, 12, -1, 0 }, - { 2, -11, 108, 36, -8, 1 }, // New 1/4 pel 6 tap filter + { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ { 0, -9, 93, 50, -6, 0 }, - { 3, -16, 77, 77, -16, 3 }, // New 1/2 pel 6 tap filter + { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ { 0, -6, 50, 93, -9, 0 }, - { 1, -8, 36, 108, -11, 2 }, // New 1/4 pel 6 tap filter + { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0 }, @@ -68,9 +69,9 @@ void vp8_filter_block2d_first_pass ((int)src_ptr[pixel_step] * vp8_filter[3]) + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); // Rounding + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - // Normalize back to 0-255 + /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; if (Temp < 0) @@ -82,7 +83,7 @@ void vp8_filter_block2d_first_pass src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -107,16 +108,16 @@ void vp8_filter_block2d_second_pass { for (j = 0; j < output_width; j++) { - // Apply filter + /* Apply filter */ Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ((int)src_ptr[pixel_step] * vp8_filter[3]) + ((int)src_ptr[2*pixel_step] * vp8_filter[4]) + ((int)src_ptr[3*pixel_step] * vp8_filter[5]) + - (VP8_FILTER_WEIGHT >> 1); // Rounding + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ - // Normalize back to 0-255 + /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; if (Temp < 0) @@ -128,7 +129,7 @@ void vp8_filter_block2d_second_pass src_ptr++; } - // Start next row + /* Start next row */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_pitch; } @@ -145,12 +146,12 @@ void vp8_filter_block2d const short *VFilter ) { - int FData[9*4]; // Temp data bufffer used in filtering + int FData[9*4]; /* Temp data bufffer used in filtering */ - // First filter 1-D horizontally... + /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); - // then filter verticaly... + /* then filter verticaly... */ vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); } @@ -194,8 +195,8 @@ void vp8_sixtap_predict_c const short *HFilter; const short *VFilter; - HFilter = sub_pel_filters[xoffset]; // 6 tap - VFilter = sub_pel_filters[yoffset]; // 6 tap + HFilter = sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = sub_pel_filters[yoffset]; /* 6 tap */ vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); } @@ -211,16 +212,16 @@ void vp8_sixtap_predict8x8_c { const short *HFilter; const short *VFilter; - int FData[13*16]; // Temp data bufffer used in filtering + int FData[13*16]; /* Temp data bufffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; // 6 tap - VFilter = sub_pel_filters[yoffset]; // 6 tap + HFilter = sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - // First filter 1-D horizontally... + /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); - // then filter verticaly... + /* then filter verticaly... */ vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } @@ -237,16 +238,16 @@ void vp8_sixtap_predict8x4_c { const short *HFilter; const short *VFilter; - int FData[13*16]; // Temp data bufffer used in filtering + int FData[13*16]; /* Temp data bufffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; // 6 tap - VFilter = sub_pel_filters[yoffset]; // 6 tap + HFilter = sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - // First filter 1-D horizontally... + /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); - // then filter verticaly... + /* then filter verticaly... */ vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); } @@ -263,16 +264,16 @@ void vp8_sixtap_predict16x16_c { const short *HFilter; const short *VFilter; - int FData[21*24]; // Temp data bufffer used in filtering + int FData[21*24]; /* Temp data bufffer used in filtering */ - HFilter = sub_pel_filters[xoffset]; // 6 tap - VFilter = sub_pel_filters[yoffset]; // 6 tap + HFilter = sub_pel_filters[xoffset]; /* 6 tap */ + VFilter = sub_pel_filters[yoffset]; /* 6 tap */ - // First filter 1-D horizontally... + /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); - // then filter verticaly... + /* then filter verticaly... */ vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } @@ -323,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass { for (j = 0; j < output_width; j++) { - // Apply bilinear filter + /* Apply bilinear filter */ output_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } @@ -383,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass { for (j = 0; j < output_width; j++) { - // Apply filter + /* Apply filter */ Temp = ((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[pixel_step] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); @@ -391,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass src_ptr++; } - // Next row... + /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_pitch; } @@ -431,12 +432,12 @@ void vp8_filter_block2d_bil ) { - unsigned short FData[17*16]; // Temp data bufffer used in filtering + unsigned short FData[17*16]; /* Temp data bufffer used in filtering */ - // First filter 1-D horizontally... + /* First filter 1-D horizontally... */ vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter); - // then 1-D vertically... + /* then 1-D vertically... */ vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter); } diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c index fcb1f202c..e63d4ef8d 100644 --- a/vp8/common/findnearmv.c +++ b/vp8/common/findnearmv.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -167,7 +168,7 @@ void vp8_find_near_mvs vp8_clamp_mv(nearest, xd); vp8_clamp_mv(nearby, xd); - vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy + vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/ } vp8_prob *vp8_mv_ref_probs( @@ -178,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs( p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1]; p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2]; p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3]; - //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3]; + /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/ return p; } diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h index 2c02033e6..1a6c72bcd 100644 --- a/vp8/common/findnearmv.h +++ b/vp8/common/findnearmv.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp index 5f1faed2f..c5826285e 100644 --- a/vp8/common/fourcc.hpp +++ b/vp8/common/fourcc.hpp @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h index e68c53e1c..5f523980b 100644 --- a/vp8/common/g_common.h +++ b/vp8/common/g_common.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c index 0011ae0dc..b3eadaf27 100644 --- a/vp8/common/generic/systemdependent.c +++ b/vp8/common/generic/systemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -17,6 +18,7 @@ #include "onyxc_int.h" extern void vp8_arch_x86_common_init(VP8_COMMON *ctx); +extern void vp8_arch_arm_common_init(VP8_COMMON *ctx); void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x); extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x); @@ -31,16 +33,18 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->idct.idct1 = vp8_short_idct4x4llm_1_c; rtcd->idct.idct16 = vp8_short_idct4x4llm_c; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_c; + rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_c; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_c; rtcd->recon.copy16x16 = vp8_copy_mem16x16_c; rtcd->recon.copy8x8 = vp8_copy_mem8x8_c; rtcd->recon.copy8x4 = vp8_copy_mem8x4_c; - rtcd->recon.recon = vp8_recon_b_c; + rtcd->recon.recon = vp8_recon_b_c; rtcd->recon.recon2 = vp8_recon2b_c; - rtcd->recon.recon4 = vp8_recon4b_c; + rtcd->recon.recon4 = vp8_recon4b_c; + rtcd->recon.recon_mb = vp8_recon_mb_c; + rtcd->recon.recon_mby = vp8_recon_mby_c; rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_c; rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_c; @@ -60,15 +64,18 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c; rtcd->loopfilter.simple_b_h = vp8_loop_filter_bhs_c; -#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER - rtcd->postproc.down = vp8_mbpost_proc_down_c; - rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; - rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; - rtcd->postproc.addnoise = vp8_plane_add_noise_c; +#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR) + rtcd->postproc.down = vp8_mbpost_proc_down_c; + rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; + rtcd->postproc.downacross = vp8_post_proc_down_and_across_c; + rtcd->postproc.addnoise = vp8_plane_add_noise_c; + rtcd->postproc.blend_mb_inner = vp8_blend_mb_inner_c; + rtcd->postproc.blend_mb_outer = vp8_blend_mb_outer_c; + rtcd->postproc.blend_b = vp8_blend_b_c; #endif #endif - // Pure C: + /* Pure C: */ vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby; vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s; @@ -76,4 +83,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx) vp8_arch_x86_common_init(ctx); #endif +#if ARCH_ARM + vp8_arch_arm_common_init(ctx); +#endif + } diff --git a/vp8/common/header.h b/vp8/common/header.h index 8b2b0094a..3e98eeb3c 100644 --- a/vp8/common/header.h +++ b/vp8/common/header.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/idct.h b/vp8/common/idct.h index 47b5f0576..f5fd94dfd 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -17,8 +18,10 @@ #define prototype_idct(sym) \ void sym(short *input, short *output, int pitch) -#define prototype_idct_scalar(sym) \ - void sym(short input, short *output, int pitch) +#define prototype_idct_scalar_add(sym) \ + void sym(short input, \ + unsigned char *pred, unsigned char *output, \ + int pitch, int stride) #if ARCH_X86 || ARCH_X86_64 #include "x86/idct_x86.h" @@ -38,10 +41,10 @@ extern prototype_idct(vp8_idct_idct1); #endif extern prototype_idct(vp8_idct_idct16); -#ifndef vp8_idct_idct1_scalar -#define vp8_idct_idct1_scalar vp8_dc_only_idct_c +#ifndef vp8_idct_idct1_scalar_add +#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c #endif -extern prototype_idct_scalar(vp8_idct_idct1_scalar); +extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add); #ifndef vp8_idct_iwalsh1 @@ -55,14 +58,14 @@ extern prototype_second_order(vp8_idct_iwalsh1); extern prototype_second_order(vp8_idct_iwalsh16); typedef prototype_idct((*vp8_idct_fn_t)); -typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t)); +typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t)); typedef prototype_second_order((*vp8_second_order_fn_t)); typedef struct { - vp8_idct_fn_t idct1; - vp8_idct_fn_t idct16; - vp8_idct_scalar_fn_t idct1_scalar; + vp8_idct_fn_t idct1; + vp8_idct_fn_t idct16; + vp8_idct_scalar_add_fn_t idct1_scalar_add; vp8_second_order_fn_t iwalsh1; vp8_second_order_fn_t iwalsh16; diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index 57cf8584e..196062df6 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -103,23 +104,30 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch) } } - -void vp8_dc_only_idct_c(short input_dc, short *output, int pitch) +void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) { - int i; - int a1; - short *op = output; - int shortpitch = pitch >> 1; - a1 = ((input_dc + 4) >> 3); + int a1 = ((input_dc + 4) >> 3); + int r, c; - for (i = 0; i < 4; i++) + for (r = 0; r < 4; r++) { - op[0] = a1; - op[1] = a1; - op[2] = a1; - op[3] = a1; - op += shortpitch; + for (c = 0; c < 4; c++) + { + int a = a1 + pred_ptr[c] ; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dst_ptr[c] = (unsigned char) a ; + } + + dst_ptr += stride; + pred_ptr += pitch; } + } void vp8_short_inv_walsh4x4_c(short *input, short *output) diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index 1ff596ead..81a3f2d89 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -37,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD * { int i; - // do 2nd order transform on the dc block + /* do 2nd order transform on the dc block */ IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff); recon_dcblock(x); @@ -64,9 +65,10 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x { int i; - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != B_PRED && + x->mode_info_context->mbmi.mode != SPLITMV) { - // do 2nd order transform on the dc block + /* do 2nd order transform on the dc block */ IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff); recon_dcblock(x); diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h index 93a40f956..b3ffb7073 100644 --- a/vp8/common/invtrans.h +++ b/vp8/common/invtrans.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h index 08c525c5d..99df1164c 100644 --- a/vp8/common/littlend.h +++ b/vp8/common/littlend.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c index 79e617754..f9d082304 100644 --- a/vp8/common/loopfilter.c +++ b/vp8/common/loopfilter.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -22,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c); prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c); prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c); -// Horizontal MB filtering +/* Horizontal MB filtering */ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -46,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); } -// Vertical MB Filtering +/* Vertical MB Filtering */ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -70,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); } -// Horizontal B Filtering +/* Horizontal B Filtering */ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -98,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); } -// Vertical B Filtering +/* Vertical B Filtering */ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -139,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm) const int yhedge_boost = 2; const int uvhedge_boost = 2; - // For each possible value for the loop filter fill out a "loop_filter_info" entry. + /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */ for (i = 0; i <= MAX_LOOP_FILTER; i++) { int filt_lvl = i; @@ -165,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm) HEVThresh = 0; } - // Set loop filter paramaeters that control sharpness. + /* Set loop filter paramaeters that control sharpness. */ block_inside_limit = filt_lvl >> (sharpness_lvl > 0); block_inside_limit = block_inside_limit >> (sharpness_lvl > 4); @@ -194,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm) } - // Set up the function pointers depending on the type of loop filtering selected + /* Set up the function pointers depending on the type of loop filtering selected */ if (lft == NORMAL_LOOPFILTER) { cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v); @@ -211,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm) } } -// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding -// each frame. Check last_frame_type to skip the function most of times. +/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding + * each frame. Check last_frame_type to skip the function most of times. + */ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type) { int HEVThresh; int i, j; - // For each possible value for the loop filter fill out a "loop_filter_info" entry. + /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */ for (i = 0; i <= MAX_LOOP_FILTER; i++) { int filt_lvl = i; @@ -246,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type) for (j = 0; j < 16; j++) { - //lfi[i].lim[j] = block_inside_limit; - //lfi[i].mbflim[j] = filt_lvl+yhedge_boost; + /*lfi[i].lim[j] = block_inside_limit; + lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/ lfi[i].mbthr[j] = HEVThresh; - //lfi[i].flim[j] = filt_lvl; + /*lfi[i].flim[j] = filt_lvl;*/ lfi[i].thr[j] = HEVThresh; - //lfi[i].uvlim[j] = block_inside_limit; - //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost; + /*lfi[i].uvlim[j] = block_inside_limit; + lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/ lfi[i].uvmbthr[j] = HEVThresh; - //lfi[i].uvflim[j] = filt_lvl; + /*lfi[i].uvflim[j] = filt_lvl;*/ lfi[i].uvthr[j] = HEVThresh; } } @@ -267,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level) if (mbd->mode_ref_lf_delta_enabled) { - // Aplly delta for reference frame + /* Apply delta for reference frame */ *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame]; - // Apply delta for mode + /* Apply delta for mode */ if (mbmi->ref_frame == INTRA_FRAME) { - // Only the split mode BPRED has a further special case + /* Only the split mode BPRED has a further special case */ if (mbmi->mode == B_PRED) *filter_level += mbd->mode_lf_deltas[0]; } else { - // Zero motion mode + /* Zero motion mode */ if (mbmi->mode == ZEROMV) *filter_level += mbd->mode_lf_deltas[1]; - // Split MB motion mode + /* Split MB motion mode */ else if (mbmi->mode == SPLITMV) *filter_level += mbd->mode_lf_deltas[3]; - // All other inter motion modes (Nearest, Near, New) + /* All other inter motion modes (Nearest, Near, New) */ else *filter_level += mbd->mode_lf_deltas[2]; } - // Range check + /* Range check */ if (*filter_level > MAX_LOOP_FILTER) *filter_level = MAX_LOOP_FILTER; else if (*filter_level < 0) @@ -310,7 +312,7 @@ void vp8_loop_filter_frame { YV12_BUFFER_CONFIG *post = cm->frame_to_show; loop_filter_info *lfi = cm->lf_info; - int frame_type = cm->frame_type; + FRAME_TYPE frame_type = cm->frame_type; int mb_row; int mb_col; @@ -323,21 +325,21 @@ void vp8_loop_filter_frame int i; unsigned char *y_ptr, *u_ptr, *v_ptr; - mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list + mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */ - // Note the baseline filter values for each segment + /* Note the baseline filter values for each segment */ if (alt_flt_enabled) { for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // Abs value + /* Abs value */ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - // Delta Value + /* Delta Value */ else { baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range + baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */ } } } @@ -347,18 +349,18 @@ void vp8_loop_filter_frame baseline_filter_level[i] = default_filt_lvl; } - // Initialize the loop filter for this frame. + /* Initialize the loop filter for this frame. */ if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level)) vp8_init_loop_filter(cm); else if (frame_type != cm->last_frame_type) vp8_frame_init_loop_filter(lfi, frame_type); - // Set up the buffer pointers + /* Set up the buffer pointers */ y_ptr = post->y_buffer; u_ptr = post->u_buffer; v_ptr = post->v_buffer; - // vp8_filter each macro block + /* vp8_filter each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) @@ -367,9 +369,10 @@ void vp8_loop_filter_frame filter_level = baseline_filter_level[Segment]; - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units - // Apply any context driven MB level adjustment + /* Distance of Mb to the various image edges. + * These specified to 8th pel as they are always compared to values that are in 1/8th pel units + * Apply any context driven MB level adjustment + */ vp8_adjust_mb_lf_value(mbd, &filter_level); if (filter_level) @@ -380,7 +383,7 @@ void vp8_loop_filter_frame if (mbd->mode_info_context->mbmi.dc_diff > 0) cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); - // don't apply across umv border + /* don't apply across umv border */ if (mb_row > 0) cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); @@ -392,14 +395,14 @@ void vp8_loop_filter_frame u_ptr += 8; v_ptr += 8; - mbd->mode_info_context++; // step to next MB + mbd->mode_info_context++; /* step to next MB */ } y_ptr += post->y_stride * 16 - post->y_width; u_ptr += post->uv_stride * 8 - post->uv_width; v_ptr += post->uv_stride * 8 - post->uv_width; - mbd->mode_info_context++; // Skip border mb + mbd->mode_info_context++; /* Skip border mb */ } } @@ -423,26 +426,26 @@ void vp8_loop_filter_frame_yonly int baseline_filter_level[MAX_MB_SEGMENTS]; int filter_level; int alt_flt_enabled = mbd->segmentation_enabled; - int frame_type = cm->frame_type; + FRAME_TYPE frame_type = cm->frame_type; (void) sharpness_lvl; - //MODE_INFO * this_mb_mode_info = cm->mi; // Point at base of Mb MODE_INFO list - mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list + /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */ + mbd->mode_info_context = cm->mi; /* Point at base of Mb MODE_INFO list */ - // Note the baseline filter values for each segment + /* Note the baseline filter values for each segment */ if (alt_flt_enabled) { for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // Abs value + /* Abs value */ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - // Delta Value + /* Delta Value */ else { baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range + baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */ } } } @@ -452,16 +455,16 @@ void vp8_loop_filter_frame_yonly baseline_filter_level[i] = default_filt_lvl; } - // Initialize the loop filter for this frame. + /* Initialize the loop filter for this frame. */ if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level)) vp8_init_loop_filter(cm); else if (frame_type != cm->last_frame_type) vp8_frame_init_loop_filter(lfi, frame_type); - // Set up the buffer pointers + /* Set up the buffer pointers */ y_ptr = post->y_buffer; - // vp8_filter each macro block + /* vp8_filter each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) @@ -469,7 +472,7 @@ void vp8_loop_filter_frame_yonly int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0; filter_level = baseline_filter_level[Segment]; - // Apply any context driven MB level adjustment + /* Apply any context driven MB level adjustment */ vp8_adjust_mb_lf_value(mbd, &filter_level); if (filter_level) @@ -480,7 +483,7 @@ void vp8_loop_filter_frame_yonly if (mbd->mode_info_context->mbmi.dc_diff > 0) cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); - // don't apply across umv border + /* don't apply across umv border */ if (mb_row > 0) cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0); @@ -489,12 +492,12 @@ void vp8_loop_filter_frame_yonly } y_ptr += 16; - mbd->mode_info_context ++; // step to next MB + mbd->mode_info_context ++; /* step to next MB */ } y_ptr += post->y_stride * 16 - post->y_width; - mbd->mode_info_context ++; // Skip border mb + mbd->mode_info_context ++; /* Skip border mb */ } } @@ -515,7 +518,7 @@ void vp8_loop_filter_partial_frame unsigned char *y_ptr; int mb_row; int mb_col; - //int mb_rows = post->y_height >> 4; + /*int mb_rows = post->y_height >> 4;*/ int mb_cols = post->y_width >> 4; int linestocopy; @@ -524,12 +527,12 @@ void vp8_loop_filter_partial_frame int baseline_filter_level[MAX_MB_SEGMENTS]; int filter_level; int alt_flt_enabled = mbd->segmentation_enabled; - int frame_type = cm->frame_type; + FRAME_TYPE frame_type = cm->frame_type; (void) sharpness_lvl; - //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list - mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); // Point at base of Mb MODE_INFO list + /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */ + mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* Point at base of Mb MODE_INFO list */ linestocopy = (post->y_height >> (4 + Fraction)); @@ -538,19 +541,19 @@ void vp8_loop_filter_partial_frame linestocopy <<= 4; - // Note the baseline filter values for each segment + /* Note the baseline filter values for each segment */ if (alt_flt_enabled) { for (i = 0; i < MAX_MB_SEGMENTS; i++) { - // Abs value + /* Abs value */ if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - // Delta Value + /* Delta Value */ else { baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range + baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */ } } } @@ -560,16 +563,16 @@ void vp8_loop_filter_partial_frame baseline_filter_level[i] = default_filt_lvl; } - // Initialize the loop filter for this frame. + /* Initialize the loop filter for this frame. */ if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level)) vp8_init_loop_filter(cm); else if (frame_type != cm->last_frame_type) vp8_frame_init_loop_filter(lfi, frame_type); - // Set up the buffer pointers + /* Set up the buffer pointers */ y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride; - // vp8_filter each macro block + /* vp8_filter each macro block */ for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++) { for (mb_col = 0; mb_col < mb_cols; mb_col++) @@ -592,10 +595,10 @@ void vp8_loop_filter_partial_frame } y_ptr += 16; - mbd->mode_info_context += 1; // step to next MB + mbd->mode_info_context += 1; /* step to next MB */ } y_ptr += post->y_stride * 16 - post->y_width; - mbd->mode_info_context += 1; // Skip border mb + mbd->mode_info_context += 1; /* Skip border mb */ } } diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h index c6ce508cc..e45683460 100644 --- a/vp8/common/loopfilter.h +++ b/vp8/common/loopfilter.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,10 +22,10 @@ typedef enum SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE; -// FRK -// Need to align this structure so when it is declared and -// passed it can be loaded into vector registers. -// FRK +/* FRK + * Need to align this structure so when it is declared and + * passed it can be loaded into vector registers. + */ typedef struct { DECLARE_ALIGNED(16, signed char, lim[16]); @@ -116,5 +117,14 @@ typedef struct #define LF_INVOKE(ctx,fn) vp8_lf_##fn #endif +typedef void loop_filter_uvfunction +( + unsigned char *u, /* source pointer */ + int p, /* pitch */ + const signed char *flimit, + const signed char *limit, + const signed char *thresh, + unsigned char *v +); #endif diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c index 7d16e4843..694052924 100644 --- a/vp8/common/loopfilter_filters.c +++ b/vp8/common/loopfilter_filters.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -12,12 +13,9 @@ #include "loopfilter.h" #include "onyxc_int.h" - -#define NEW_LOOPFILTER_MASK - typedef unsigned char uc; -__inline signed char vp8_signed_char_clamp(int t) +static __inline signed char vp8_signed_char_clamp(int t) { t = (t < -128 ? -128 : t); t = (t > 127 ? 127 : t); @@ -25,8 +23,8 @@ __inline signed char vp8_signed_char_clamp(int t) } -// should we apply any filter at all ( 11111111 yes, 00000000 no) -__inline signed char vp8_filter_mask(signed char limit, signed char flimit, +/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ +static __inline signed char vp8_filter_mask(signed char limit, signed char flimit, uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3) { signed char mask = 0; @@ -36,17 +34,13 @@ __inline signed char vp8_filter_mask(signed char limit, signed char flimit, mask |= (abs(q1 - q0) > limit) * -1; mask |= (abs(q2 - q1) > limit) * -1; mask |= (abs(q3 - q2) > limit) * -1; -#ifndef NEW_LOOPFILTER_MASK - mask |= (abs(p0 - q0) > flimit) * -1; -#else mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit) * -1; -#endif mask = ~mask; return mask; } -// is there high variance internal edge ( 11111111 yes, 00000000 no) -__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1) +/* is there high variance internal edge ( 11111111 yes, 00000000 no) */ +static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1) { signed char hev = 0; hev |= (abs(p1 - p0) > thresh) * -1; @@ -54,7 +48,7 @@ __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1) return hev; } -__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1) +static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1) { signed char ps0, qs0; @@ -67,17 +61,18 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc qs0 = (signed char) * oq0 ^ 0x80; qs1 = (signed char) * oq1 ^ 0x80; - // add outer taps if we have high edge variance + /* add outer taps if we have high edge variance */ vp8_filter = vp8_signed_char_clamp(ps1 - qs1); vp8_filter &= hev; - // inner taps + /* inner taps */ vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); vp8_filter &= mask; - // save bottom 3 bits so that we round one side +4 and the other +3 - // if it equals 4 we'll set to adjust by -1 to account for the fact - // we'd round 3 the other way + /* save bottom 3 bits so that we round one side +4 and the other +3 + * if it equals 4 we'll set to adjust by -1 to account for the fact + * we'd round 3 the other way + */ Filter1 = vp8_signed_char_clamp(vp8_filter + 4); Filter2 = vp8_signed_char_clamp(vp8_filter + 3); Filter1 >>= 3; @@ -88,7 +83,7 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *op0 = u ^ 0x80; vp8_filter = Filter1; - // outer tap adjustments + /* outer tap adjustments */ vp8_filter += 1; vp8_filter >>= 1; vp8_filter &= ~hev; @@ -102,19 +97,20 @@ __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc void vp8_loop_filter_horizontal_edge_c ( unsigned char *s, - int p, //pitch + int p, /* pitch */ const signed char *flimit, const signed char *limit, const signed char *thresh, int count ) { - int hev = 0; // high edge variance + int hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ do { mask = vp8_filter_mask(limit[i], flimit[i], @@ -140,12 +136,13 @@ void vp8_loop_filter_vertical_edge_c int count ) { - int hev = 0; // high edge variance + int hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ do { mask = vp8_filter_mask(limit[i], flimit[i], @@ -160,7 +157,7 @@ void vp8_loop_filter_vertical_edge_c while (++i < count * 8); } -__inline void vp8_mbfilter(signed char mask, signed char hev, +static __inline void vp8_mbfilter(signed char mask, signed char hev, uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) { signed char s, u; @@ -172,7 +169,7 @@ __inline void vp8_mbfilter(signed char mask, signed char hev, signed char qs1 = (signed char) * oq1 ^ 0x80; signed char qs2 = (signed char) * oq2 ^ 0x80; - // add outer taps if we have high edge variance + /* add outer taps if we have high edge variance */ vp8_filter = vp8_signed_char_clamp(ps1 - qs1); vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); vp8_filter &= mask; @@ -180,7 +177,7 @@ __inline void vp8_mbfilter(signed char mask, signed char hev, Filter2 = vp8_filter; Filter2 &= hev; - // save bottom 3 bits so that we round one side +4 and the other +3 + /* save bottom 3 bits so that we round one side +4 and the other +3 */ Filter1 = vp8_signed_char_clamp(Filter2 + 4); Filter2 = vp8_signed_char_clamp(Filter2 + 3); Filter1 >>= 3; @@ -189,25 +186,25 @@ __inline void vp8_mbfilter(signed char mask, signed char hev, ps0 = vp8_signed_char_clamp(ps0 + Filter2); - // only apply wider filter if not high edge variance + /* only apply wider filter if not high edge variance */ vp8_filter &= ~hev; Filter2 = vp8_filter; - // roughly 3/7th difference across boundary + /* roughly 3/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7); s = vp8_signed_char_clamp(qs0 - u); *oq0 = s ^ 0x80; s = vp8_signed_char_clamp(ps0 + u); *op0 = s ^ 0x80; - // roughly 2/7th difference across boundary + /* roughly 2/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7); s = vp8_signed_char_clamp(qs1 - u); *oq1 = s ^ 0x80; s = vp8_signed_char_clamp(ps1 + u); *op1 = s ^ 0x80; - // roughly 1/7th difference across boundary + /* roughly 1/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); s = vp8_signed_char_clamp(qs2 - u); *oq2 = s ^ 0x80; @@ -225,12 +222,13 @@ void vp8_mbloop_filter_horizontal_edge_c int count ) { - signed char hev = 0; // high edge variance + signed char hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; - // loop filter designed to work using chars so that we can make maximum use - // of 8 bit simd instructions. + /* loop filter designed to work using chars so that we can make maximum use + * of 8 bit simd instructions. + */ do { @@ -259,7 +257,7 @@ void vp8_mbloop_filter_vertical_edge_c int count ) { - signed char hev = 0; // high edge variance + signed char hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; @@ -279,21 +277,18 @@ void vp8_mbloop_filter_vertical_edge_c } -// should we apply any filter at all ( 11111111 yes, 00000000 no) -__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1) +/* should we apply any filter at all ( 11111111 yes, 00000000 no) */ +static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1) { -// Why does this cause problems for win32? -// error C2143: syntax error : missing ';' before 'type' -// (void) limit; -#ifndef NEW_LOOPFILTER_MASK - signed char mask = (abs(p0 - q0) <= flimit) * -1; -#else +/* Why does this cause problems for win32? + * error C2143: syntax error : missing ';' before 'type' + * (void) limit; + */ signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= flimit * 2 + limit) * -1; -#endif return mask; } -__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) +static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) { signed char vp8_filter, Filter1, Filter2; signed char p1 = (signed char) * op1 ^ 0x80; @@ -306,7 +301,7 @@ __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0)); vp8_filter &= mask; - // save bottom 3 bits so that we round one side +4 and the other +3 + /* save bottom 3 bits so that we round one side +4 and the other +3 */ Filter1 = vp8_signed_char_clamp(vp8_filter + 4); Filter1 >>= 3; u = vp8_signed_char_clamp(q0 - Filter1); @@ -334,7 +329,7 @@ void vp8_loop_filter_simple_horizontal_edge_c do { - //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]); + /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/ mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]); vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p); ++s; @@ -358,7 +353,7 @@ void vp8_loop_filter_simple_vertical_edge_c do { - //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]); + /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/ mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]); vp8_simple_filter(mask, s - 2, s - 1, s, s + 1); s += p; diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h index 97bffc776..4b8ee5877 100644 --- a/vp8/common/mac_specs.h +++ b/vp8/common/mac_specs.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c index a7e0ce99a..af55e2fe0 100644 --- a/vp8/common/mbpitch.c +++ b/vp8/common/mbpitch.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,7 +14,7 @@ typedef enum { PRED = 0, - DEST = 1, + DEST = 1 } BLOCKSET; void vp8_setup_block @@ -61,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs) v = &x->pre.v_buffer; } - for (block = 0; block < 16; block++) // y blocks + for (block = 0; block < 16; block++) /* y blocks */ { vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride, (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs); } - for (block = 16; block < 20; block++) // U and V blocks + for (block = 16; block < 20; block++) /* U and V blocks */ { vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride, ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs); @@ -122,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x) void vp8_build_block_doffsets(MACROBLOCKD *x) { - // handle the destination pitch features + /* handle the destination pitch features */ vp8_setup_macroblock(x, DEST); vp8_setup_macroblock(x, PRED); } diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c index 9301a2567..86a74bc0f 100644 --- a/vp8/common/modecont.c +++ b/vp8/common/modecont.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,27 +14,27 @@ const int vp8_mode_contexts[6][4] = { { - // 0 + /* 0 */ 7, 1, 1, 143, }, { - // 1 + /* 1 */ 14, 18, 14, 107, }, { - // 2 + /* 2 */ 135, 64, 57, 68, }, { - // 3 + /* 3 */ 60, 56, 128, 65, }, { - // 4 + /* 4 */ 159, 134, 128, 34, }, { - // 5 + /* 5 */ 234, 188, 128, 28, }, }; diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h index 0c57651ed..24db88295 100644 --- a/vp8/common/modecont.h +++ b/vp8/common/modecont.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c index ceee74c70..a31a561c8 100644 --- a/vp8/common/modecontext.c +++ b/vp8/common/modecontext.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,133 +14,133 @@ const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] = { { - //Above Mode : 0 - { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, // left_mode 0 - { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, // left_mode 1 - { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, // left_mode 2 - { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, // left_mode 3 - { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, // left_mode 4 - { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, // left_mode 5 - { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, // left_mode 6 - { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, // left_mode 7 - { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, // left_mode 8 - { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, // left_mode 9 + /*Above Mode : 0*/ + { 43438, 2195, 470, 316, 615, 171, 217, 412, 124, 160, }, /* left_mode 0 */ + { 5722, 2751, 296, 291, 81, 68, 80, 101, 100, 170, }, /* left_mode 1 */ + { 1629, 201, 307, 25, 47, 16, 34, 72, 19, 28, }, /* left_mode 2 */ + { 332, 266, 36, 500, 20, 65, 23, 14, 154, 106, }, /* left_mode 3 */ + { 450, 97, 10, 24, 117, 10, 2, 12, 8, 71, }, /* left_mode 4 */ + { 384, 49, 29, 44, 12, 162, 51, 5, 87, 42, }, /* left_mode 5 */ + { 495, 53, 157, 27, 14, 57, 180, 17, 17, 34, }, /* left_mode 6 */ + { 695, 64, 62, 9, 27, 5, 3, 147, 10, 26, }, /* left_mode 7 */ + { 230, 54, 20, 124, 16, 125, 29, 12, 283, 37, }, /* left_mode 8 */ + { 260, 87, 21, 120, 32, 16, 33, 16, 33, 203, }, /* left_mode 9 */ }, { - //Above Mode : 1 - { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, // left_mode 0 - { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, // left_mode 1 - { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, // left_mode 2 - { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, // left_mode 3 - { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, // left_mode 4 - { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, // left_mode 5 - { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, // left_mode 6 - { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, // left_mode 7 - { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, // left_mode 8 - { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, // left_mode 9 + /*Above Mode : 1*/ + { 3934, 2573, 355, 137, 128, 87, 133, 117, 37, 27, }, /* left_mode 0 */ + { 1036, 1929, 278, 135, 27, 37, 48, 55, 41, 91, }, /* left_mode 1 */ + { 223, 256, 253, 15, 13, 9, 28, 64, 3, 3, }, /* left_mode 2 */ + { 120, 129, 17, 316, 15, 11, 9, 4, 53, 74, }, /* left_mode 3 */ + { 129, 58, 6, 11, 38, 2, 0, 5, 2, 67, }, /* left_mode 4 */ + { 53, 22, 11, 16, 8, 26, 14, 3, 19, 12, }, /* left_mode 5 */ + { 59, 26, 61, 11, 4, 9, 35, 13, 8, 8, }, /* left_mode 6 */ + { 101, 52, 40, 8, 5, 2, 8, 59, 2, 20, }, /* left_mode 7 */ + { 48, 34, 10, 52, 8, 15, 6, 6, 63, 20, }, /* left_mode 8 */ + { 96, 48, 22, 63, 11, 14, 5, 8, 9, 96, }, /* left_mode 9 */ }, { - //Above Mode : 2 - { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, // left_mode 0 - { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, // left_mode 1 - { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, // left_mode 2 - { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, // left_mode 3 - { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, // left_mode 4 - { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, // left_mode 5 - { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, // left_mode 6 - { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, // left_mode 7 - { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, // left_mode 8 - { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, // left_mode 9 + /*Above Mode : 2*/ + { 709, 461, 506, 36, 27, 33, 151, 98, 24, 6, }, /* left_mode 0 */ + { 201, 375, 442, 27, 13, 8, 46, 58, 6, 19, }, /* left_mode 1 */ + { 122, 140, 417, 4, 13, 3, 33, 59, 4, 2, }, /* left_mode 2 */ + { 36, 17, 22, 16, 6, 8, 12, 17, 9, 21, }, /* left_mode 3 */ + { 51, 15, 7, 1, 14, 0, 4, 5, 3, 22, }, /* left_mode 4 */ + { 18, 11, 30, 9, 7, 20, 11, 5, 2, 6, }, /* left_mode 5 */ + { 38, 21, 103, 9, 4, 12, 79, 13, 2, 5, }, /* left_mode 6 */ + { 64, 17, 66, 2, 12, 4, 2, 65, 4, 5, }, /* left_mode 7 */ + { 14, 7, 7, 16, 3, 11, 4, 13, 15, 16, }, /* left_mode 8 */ + { 36, 8, 32, 9, 9, 4, 14, 7, 6, 24, }, /* left_mode 9 */ }, { - //Above Mode : 3 - { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, // left_mode 0 - { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, // left_mode 1 - { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, // left_mode 2 - { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, // left_mode 3 - { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, // left_mode 4 - { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, // left_mode 5 - { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, // left_mode 6 - { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, // left_mode 7 - { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, // left_mode 8 - { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, // left_mode 9 + /*Above Mode : 3*/ + { 1340, 173, 36, 119, 30, 10, 13, 10, 20, 26, }, /* left_mode 0 */ + { 156, 293, 26, 108, 5, 16, 2, 4, 23, 30, }, /* left_mode 1 */ + { 60, 34, 13, 7, 3, 3, 0, 8, 4, 5, }, /* left_mode 2 */ + { 72, 64, 1, 235, 3, 9, 2, 7, 28, 38, }, /* left_mode 3 */ + { 29, 14, 1, 3, 5, 0, 2, 2, 5, 13, }, /* left_mode 4 */ + { 22, 7, 4, 11, 2, 5, 1, 2, 6, 4, }, /* left_mode 5 */ + { 18, 14, 5, 6, 4, 3, 14, 0, 9, 2, }, /* left_mode 6 */ + { 41, 10, 7, 1, 2, 0, 0, 10, 2, 1, }, /* left_mode 7 */ + { 23, 19, 2, 33, 1, 5, 2, 0, 51, 8, }, /* left_mode 8 */ + { 33, 26, 7, 53, 3, 9, 3, 3, 9, 19, }, /* left_mode 9 */ }, { - //Above Mode : 4 - { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, // left_mode 0 - { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, // left_mode 1 - { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, // left_mode 2 - { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, // left_mode 3 - { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, // left_mode 4 - { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, // left_mode 5 - { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, // left_mode 6 - { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, // left_mode 7 - { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, // left_mode 8 - { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, // left_mode 9 + /*Above Mode : 4*/ + { 410, 165, 43, 31, 66, 15, 30, 54, 8, 17, }, /* left_mode 0 */ + { 115, 64, 27, 18, 30, 7, 11, 15, 4, 19, }, /* left_mode 1 */ + { 31, 23, 25, 1, 7, 2, 2, 10, 0, 5, }, /* left_mode 2 */ + { 17, 4, 1, 6, 8, 2, 7, 5, 5, 21, }, /* left_mode 3 */ + { 120, 12, 1, 2, 83, 3, 0, 4, 1, 40, }, /* left_mode 4 */ + { 4, 3, 1, 2, 1, 2, 5, 0, 3, 6, }, /* left_mode 5 */ + { 10, 2, 13, 6, 6, 6, 8, 2, 4, 5, }, /* left_mode 6 */ + { 58, 10, 5, 1, 28, 1, 1, 33, 1, 9, }, /* left_mode 7 */ + { 8, 2, 1, 4, 2, 5, 1, 1, 2, 10, }, /* left_mode 8 */ + { 76, 7, 5, 7, 18, 2, 2, 0, 5, 45, }, /* left_mode 9 */ }, { - //Above Mode : 5 - { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, // left_mode 0 - { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, // left_mode 1 - { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, // left_mode 2 - { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, // left_mode 3 - { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, // left_mode 4 - { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, // left_mode 5 - { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, // left_mode 6 - { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, // left_mode 7 - { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, // left_mode 8 - { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, // left_mode 9 + /*Above Mode : 5*/ + { 444, 46, 47, 20, 14, 110, 60, 14, 60, 7, }, /* left_mode 0 */ + { 59, 57, 25, 18, 3, 17, 21, 6, 14, 6, }, /* left_mode 1 */ + { 24, 17, 20, 6, 4, 13, 7, 2, 3, 2, }, /* left_mode 2 */ + { 13, 11, 5, 14, 4, 9, 2, 4, 15, 7, }, /* left_mode 3 */ + { 8, 5, 2, 1, 4, 0, 1, 1, 2, 12, }, /* left_mode 4 */ + { 19, 5, 5, 7, 4, 40, 6, 3, 10, 4, }, /* left_mode 5 */ + { 16, 5, 9, 1, 1, 16, 26, 2, 10, 4, }, /* left_mode 6 */ + { 11, 4, 8, 1, 1, 4, 4, 5, 4, 1, }, /* left_mode 7 */ + { 15, 1, 3, 7, 3, 21, 7, 1, 34, 5, }, /* left_mode 8 */ + { 18, 5, 1, 3, 4, 3, 7, 1, 2, 9, }, /* left_mode 9 */ }, { - //Above Mode : 6 - { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, // left_mode 0 - { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, // left_mode 1 - { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, // left_mode 2 - { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, // left_mode 3 - { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, // left_mode 4 - { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, // left_mode 5 - { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, // left_mode 6 - { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, // left_mode 7 - { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, // left_mode 8 - { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, // left_mode 9 + /*Above Mode : 6*/ + { 476, 149, 94, 13, 14, 77, 291, 27, 23, 3, }, /* left_mode 0 */ + { 79, 83, 42, 14, 2, 12, 63, 2, 4, 14, }, /* left_mode 1 */ + { 43, 36, 55, 1, 3, 8, 42, 11, 5, 1, }, /* left_mode 2 */ + { 9, 9, 6, 16, 1, 5, 6, 3, 11, 10, }, /* left_mode 3 */ + { 10, 3, 1, 3, 10, 1, 0, 1, 1, 4, }, /* left_mode 4 */ + { 14, 6, 15, 5, 1, 20, 25, 2, 5, 0, }, /* left_mode 5 */ + { 28, 7, 51, 1, 0, 8, 127, 6, 2, 5, }, /* left_mode 6 */ + { 13, 3, 3, 2, 3, 1, 2, 8, 1, 2, }, /* left_mode 7 */ + { 10, 3, 3, 3, 3, 8, 2, 2, 9, 3, }, /* left_mode 8 */ + { 13, 7, 11, 4, 0, 4, 6, 2, 5, 8, }, /* left_mode 9 */ }, { - //Above Mode : 7 - { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, // left_mode 0 - { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, // left_mode 1 - { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, // left_mode 2 - { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, // left_mode 3 - { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, // left_mode 4 - { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, // left_mode 5 - { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, // left_mode 6 - { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, // left_mode 7 - { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, // left_mode 8 - { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, // left_mode 9 + /*Above Mode : 7*/ + { 376, 135, 119, 6, 32, 8, 31, 224, 9, 3, }, /* left_mode 0 */ + { 93, 60, 54, 6, 13, 7, 8, 92, 2, 12, }, /* left_mode 1 */ + { 74, 36, 84, 0, 3, 2, 9, 67, 2, 1, }, /* left_mode 2 */ + { 19, 4, 4, 8, 8, 2, 4, 7, 6, 16, }, /* left_mode 3 */ + { 51, 7, 4, 1, 77, 3, 0, 14, 1, 15, }, /* left_mode 4 */ + { 7, 7, 5, 7, 4, 7, 4, 5, 0, 3, }, /* left_mode 5 */ + { 18, 2, 19, 2, 2, 4, 12, 11, 1, 2, }, /* left_mode 6 */ + { 129, 6, 27, 1, 21, 3, 0, 189, 0, 6, }, /* left_mode 7 */ + { 9, 1, 2, 8, 3, 7, 0, 5, 3, 3, }, /* left_mode 8 */ + { 20, 4, 5, 10, 4, 2, 7, 17, 3, 16, }, /* left_mode 9 */ }, { - //Above Mode : 8 - { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, // left_mode 0 - { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, // left_mode 1 - { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, // left_mode 2 - { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, // left_mode 3 - { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, // left_mode 4 - { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, // left_mode 5 - { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, // left_mode 6 - { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, // left_mode 7 - { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, // left_mode 8 - { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, // left_mode 9 + /*Above Mode : 8*/ + { 617, 68, 34, 79, 11, 27, 25, 14, 75, 13, }, /* left_mode 0 */ + { 51, 82, 21, 26, 6, 12, 13, 1, 26, 16, }, /* left_mode 1 */ + { 29, 9, 12, 11, 3, 7, 1, 10, 2, 2, }, /* left_mode 2 */ + { 17, 19, 11, 74, 4, 3, 2, 0, 58, 13, }, /* left_mode 3 */ + { 10, 1, 1, 3, 4, 1, 0, 2, 1, 8, }, /* left_mode 4 */ + { 14, 4, 5, 5, 1, 13, 2, 0, 27, 8, }, /* left_mode 5 */ + { 10, 3, 5, 4, 1, 7, 6, 4, 5, 1, }, /* left_mode 6 */ + { 10, 2, 6, 2, 1, 1, 1, 4, 2, 1, }, /* left_mode 7 */ + { 14, 8, 5, 23, 2, 12, 6, 2, 117, 5, }, /* left_mode 8 */ + { 9, 6, 2, 19, 1, 6, 3, 2, 9, 9, }, /* left_mode 9 */ }, { - //Above Mode : 9 - { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, // left_mode 0 - { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, // left_mode 1 - { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, // left_mode 2 - { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, // left_mode 3 - { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, // left_mode 4 - { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, // left_mode 5 - { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, // left_mode 6 - { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, // left_mode 7 - { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, // left_mode 8 - { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, // left_mode 9 + /*Above Mode : 9*/ + { 680, 73, 22, 38, 42, 5, 11, 9, 6, 28, }, /* left_mode 0 */ + { 113, 112, 21, 22, 10, 2, 8, 4, 6, 42, }, /* left_mode 1 */ + { 44, 20, 24, 6, 5, 4, 3, 3, 1, 2, }, /* left_mode 2 */ + { 40, 23, 7, 71, 5, 2, 4, 1, 7, 22, }, /* left_mode 3 */ + { 85, 9, 4, 4, 17, 2, 0, 3, 2, 23, }, /* left_mode 4 */ + { 13, 4, 2, 6, 1, 7, 0, 1, 7, 6, }, /* left_mode 5 */ + { 26, 6, 8, 3, 2, 3, 8, 1, 5, 4, }, /* left_mode 6 */ + { 54, 8, 9, 6, 7, 0, 1, 11, 1, 3, }, /* left_mode 7 */ + { 9, 10, 4, 13, 2, 5, 4, 2, 14, 8, }, /* left_mode 8 */ + { 92, 9, 5, 19, 15, 3, 3, 1, 6, 58, }, /* left_mode 9 */ }, }; diff --git a/vp8/common/mv.h b/vp8/common/mv.h index 3d8418108..73c91b9e7 100644 --- a/vp8/common/mv.h +++ b/vp8/common/mv.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index 428721996..3c199d1c2 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -203,7 +204,7 @@ extern "C" // and not just a copy of the pointer.. int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp); int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush); - int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags); + int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags); int vp8_update_reference(VP8_PTR comp, int ref_frame_flags); diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h index 94632dac9..f60b0f3f5 100644 --- a/vp8/common/onyxc_int.h +++ b/vp8/common/onyxc_int.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,9 +21,9 @@ #include "recon.h" #include "postproc.h" -//#ifdef PACKET_TESTING +/*#ifdef PACKET_TESTING*/ #include "header.h" -//#endif +/*#endif*/ /* Create/destroy static data structures. */ @@ -32,6 +33,7 @@ void vp8_initialize_common(void); #define MAXQ 127 #define QINDEX_RANGE (MAXQ + 1) +#define NUM_YV12_BUFFERS 4 typedef struct frame_contexts { @@ -41,7 +43,7 @@ typedef struct frame_contexts vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1]; vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1]; MV_CONTEXT mvc[2]; - MV_CONTEXT pre_mvc[2]; //not to caculate the mvcost for the frame if mvc doesn't change. + MV_CONTEXT pre_mvc[2]; /* not to caculate the mvcost for the frame if mvc doesn't change. */ } FRAME_CONTEXT; typedef enum @@ -72,6 +74,7 @@ typedef struct VP8_COMMON_RTCD vp8_subpix_rtcd_vtable_t subpix; vp8_loopfilter_rtcd_vtable_t loopfilter; vp8_postproc_rtcd_vtable_t postproc; + int flags; #else int unused; #endif @@ -81,9 +84,9 @@ typedef struct VP8Common { struct vpx_internal_error_info error; - DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]); int Width; int Height; @@ -93,15 +96,16 @@ typedef struct VP8Common YUV_TYPE clr_type; CLAMP_TYPE clamp_type; - YV12_BUFFER_CONFIG last_frame; - YV12_BUFFER_CONFIG golden_frame; - YV12_BUFFER_CONFIG alt_ref_frame; - YV12_BUFFER_CONFIG new_frame; YV12_BUFFER_CONFIG *frame_to_show; + + YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS]; + int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; + int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; + YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG temp_scale_frame; - FRAME_TYPE last_frame_type; //Add to check if vp8_frame_init_loop_filter() can be skiped. + FRAME_TYPE last_frame_type; /* Add to check if vp8_frame_init_loop_filter() can be skipped. */ FRAME_TYPE frame_type; int show_frame; @@ -112,7 +116,7 @@ typedef struct VP8Common int mb_cols; int mode_info_stride; - // prfile settings + /* profile settings */ int experimental; int mb_no_coeff_skip; int no_lpf; @@ -121,7 +125,7 @@ typedef struct VP8Common int full_pixel; int base_qindex; - int last_kf_gf_q; // Q used on the last GF or KF + int last_kf_gf_q; /* Q used on the last GF or KF */ int y1dc_delta_q; int y2dc_delta_q; @@ -131,8 +135,6 @@ typedef struct VP8Common unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; - unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0 - int gf_active_count; /* We allocate a MODE_INFO struct for each macroblock, together with an extra row on top and column on the left to simplify prediction. */ @@ -153,31 +155,31 @@ typedef struct VP8Common int last_sharpness_level; int sharpness_level; - int refresh_last_frame; // Two state 0 = NO, 1 = YES - int refresh_golden_frame; // Two state 0 = NO, 1 = YES - int refresh_alt_ref_frame; // Two state 0 = NO, 1 = YES + int refresh_last_frame; /* Two state 0 = NO, 1 = YES */ + int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */ + int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */ - int copy_buffer_to_gf; // 0 none, 1 Last to GF, 2 ARF to GF - int copy_buffer_to_arf; // 0 none, 1 Last to ARF, 2 GF to ARF + int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */ + int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */ - int refresh_entropy_probs; // Two state 0 = NO, 1 = YES + int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ - int ref_frame_sign_bias[MAX_REF_FRAMES]; // Two state 0, 1 + int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ - // Y,U,V,Y2 - ENTROPY_CONTEXT *above_context[4]; // row of context for each plane - ENTROPY_CONTEXT left_context[4][4]; // (up to) 4 contexts "" + /* Y,U,V,Y2 */ + ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ + ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ - // keyframe block modes are predicted by their above, left neighbors + /* keyframe block modes are predicted by their above, left neighbors */ vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1]; vp8_prob kf_ymode_prob [VP8_YMODES-1]; /* keyframe "" */ vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1]; - FRAME_CONTEXT lfc; // last frame entropy - FRAME_CONTEXT fc; // this frame entropy + FRAME_CONTEXT lfc; /* last frame entropy */ + FRAME_CONTEXT fc; /* this frame entropy */ unsigned int current_video_frame; @@ -201,6 +203,7 @@ typedef struct VP8Common void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level); void vp8_init_loop_filter(VP8_COMMON *cm); +void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type); extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val); #endif diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h index 644c0ec77..e53bc3138 100644 --- a/vp8/common/onyxd.h +++ b/vp8/common/onyxd.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -50,7 +51,7 @@ extern "C" int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst); int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp); - int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags); + int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags); int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h index 32a55ee6c..115134a53 100644 --- a/vp8/common/partialgfupdate.h +++ b/vp8/common/partialgfupdate.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c index 0979185d6..15b1c2c89 100644 --- a/vp8/common/postproc.c +++ b/vp8/common/postproc.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -18,7 +19,54 @@ #include #include #include -// global constants + +#define RGB_TO_YUV(t) \ + ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16), \ + (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \ + ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128) + +/* global constants */ +#if CONFIG_POSTPROC_VISUALIZER +static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] = +{ + { RGB_TO_YUV(0x98FB98) }, /* PaleGreen */ + { RGB_TO_YUV(0x00FF00) }, /* Green */ + { RGB_TO_YUV(0xADFF2F) }, /* GreenYellow */ + { RGB_TO_YUV(0x228B22) }, /* ForestGreen */ + { RGB_TO_YUV(0x006400) }, /* DarkGreen */ + { RGB_TO_YUV(0x98F5FF) }, /* Cadet Blue */ + { RGB_TO_YUV(0x6CA6CD) }, /* Sky Blue */ + { RGB_TO_YUV(0x00008B) }, /* Dark blue */ + { RGB_TO_YUV(0x551A8B) }, /* Purple */ + { RGB_TO_YUV(0xFF0000) } /* Red */ +}; + +static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] = +{ + { RGB_TO_YUV(0x6633ff) }, /* Purple */ + { RGB_TO_YUV(0xcc33ff) }, /* Magenta */ + { RGB_TO_YUV(0xff33cc) }, /* Pink */ + { RGB_TO_YUV(0xff3366) }, /* Coral */ + { RGB_TO_YUV(0x3366ff) }, /* Blue */ + { RGB_TO_YUV(0xed00f5) }, /* Dark Blue */ + { RGB_TO_YUV(0x2e00b8) }, /* Dark Purple */ + { RGB_TO_YUV(0xff6633) }, /* Orange */ + { RGB_TO_YUV(0x33ccff) }, /* Light Blue */ + { RGB_TO_YUV(0x8ab800) }, /* Green */ + { RGB_TO_YUV(0xffcc33) }, /* Light Orange */ + { RGB_TO_YUV(0x33ffcc) }, /* Aqua */ + { RGB_TO_YUV(0x66ff33) }, /* Light Green */ + { RGB_TO_YUV(0xccff33) }, /* Yellow */ +}; + +static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] = +{ + { RGB_TO_YUV(0x00ff00) }, /* Blue */ + { RGB_TO_YUV(0x0000ff) }, /* Green */ + { RGB_TO_YUV(0xffff00) }, /* Yellow */ + { RGB_TO_YUV(0xff0000) }, /* Red */ +}; +#endif static const short kernel5[] = { @@ -75,7 +123,7 @@ const short vp8_rv[] = extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch); - +extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); /*********************************************************************************************************** */ void vp8_post_proc_down_and_across_c @@ -100,7 +148,7 @@ void vp8_post_proc_down_and_across_c for (row = 0; row < rows; row++) { - // post_proc_down for one row + /* post_proc_down for one row */ p_src = src_ptr; p_dst = dst_ptr; @@ -123,7 +171,7 @@ void vp8_post_proc_down_and_across_c p_dst[col] = v; } - // now post_proc_across + /* now post_proc_across */ p_src = dst_ptr; p_dst = dst_ptr; @@ -152,12 +200,12 @@ void vp8_post_proc_down_and_across_c p_dst[col-2] = d[(col-2)&7]; } - //handle the last two pixels + /* handle the last two pixels */ p_dst[col-2] = d[(col-2)&7]; p_dst[col-1] = d[(col-1)&7]; - //next row + /* next row */ src_ptr += pitch; dst_ptr += pitch; } @@ -329,13 +377,6 @@ void vp8_de_noise(YV12_BUFFER_CONFIG *source, } - -//Notes: It is better to change CHAR to unsigned or signed to -//avoid error on ARM platform. -char vp8_an[8][64][3072]; -int vp8_cd[8][64]; - - double vp8_gaussian(double sigma, double mu, double x) { return 1 / (sigma * sqrt(2.0 * 3.14159265)) * @@ -357,9 +398,9 @@ static void fillrd(struct postproc_state *state, int q, int a) sigma = ai + .5 + .6 * (63 - qi) / 63.0; - // set up a lookup table of 256 entries that matches - // a gaussian distribution with sigma determined by q. - // + /* set up a lookup table of 256 entries that matches + * a gaussian distribution with sigma determined by q. + */ { double i; int next, j; @@ -450,16 +491,200 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise, } } +/* Blend the macro block with a solid colored square. Leave the + * edges unblended to give distinction to macro blocks in areas + * filled with the same color block. + */ +void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v, + int y1, int u1, int v1, int alpha, int stride) +{ + int i, j; + int y1_const = y1*((1<<16)-alpha); + int u1_const = u1*((1<<16)-alpha); + int v1_const = v1*((1<<16)-alpha); + + y += 2*stride + 2; + for (i = 0; i < 12; i++) + { + for (j = 0; j < 12; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + stride >>= 1; + + u += stride + 1; + v += stride + 1; + + for (i = 0; i < 6; i++) + { + for (j = 0; j < 6; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } + u += stride; + v += stride; + } +} + +/* Blend only the edge of the macro block. Leave center + * unblended to allow for other visualizations to be layered. + */ +void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v, + int y1, int u1, int v1, int alpha, int stride) +{ + int i, j; + int y1_const = y1*((1<<16)-alpha); + int u1_const = u1*((1<<16)-alpha); + int v1_const = v1*((1<<16)-alpha); + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 16; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + for (i = 0; i < 12; i++) + { + y[0] = (y[0]*alpha + y1_const)>>16; + y[1] = (y[1]*alpha + y1_const)>>16; + y[14] = (y[14]*alpha + y1_const)>>16; + y[15] = (y[15]*alpha + y1_const)>>16; + y += stride; + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 16; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + stride >>= 1; + + for (j = 0; j < 8; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } + u += stride; + v += stride; + + for (i = 0; i < 6; i++) + { + u[0] = (u[0]*alpha + u1_const)>>16; + v[0] = (v[0]*alpha + v1_const)>>16; + + u[7] = (u[7]*alpha + u1_const)>>16; + v[7] = (v[7]*alpha + v1_const)>>16; + + u += stride; + v += stride; + } + + for (j = 0; j < 8; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } +} + +void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v, + int y1, int u1, int v1, int alpha, int stride) +{ + int i, j; + int y1_const = y1*((1<<16)-alpha); + int u1_const = u1*((1<<16)-alpha); + int v1_const = v1*((1<<16)-alpha); + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + y[j] = (y[j]*alpha + y1_const)>>16; + } + y += stride; + } + + stride >>= 1; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + u[j] = (u[j]*alpha + u1_const)>>16; + v[j] = (v[j]*alpha + v1_const)>>16; + } + u += stride; + v += stride; + } +} + +static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height) +{ + int dx; + int dy; + + if (*x1 > width) + { + dx = *x1 - x0; + dy = *y1 - y0; + + *x1 = width; + if (dx) + *y1 = ((width-x0)*dy)/dx + y0; + } + if (*x1 < 0) + { + dx = *x1 - x0; + dy = *y1 - y0; + + *x1 = 0; + if (dx) + *y1 = ((0-x0)*dy)/dx + y0; + } + if (*y1 > height) + { + dx = *x1 - x0; + dy = *y1 - y0; + + *y1 = height; + if (dy) + *x1 = ((height-y0)*dx)/dy + x0; + } + if (*y1 < 0) + { + dx = *x1 - x0; + dy = *y1 - y0; + + *y1 = 0; + if (dy) + *x1 = ((0-y0)*dx)/dy + x0; + } +} + + #if CONFIG_RUNTIME_CPU_DETECT #define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc) #else #define RTCD_VTABLE(oci) NULL #endif -int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags) +int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { char message[512]; int q = oci->filter_level * 10 / 6; + int flags = ppflags->post_proc_flag; + int deblock_level = ppflags->deblocking_level; + int noise_level = ppflags->noise_level; if (!oci->frame_to_show) return -1; @@ -471,7 +696,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l { *dest = *oci->frame_to_show; - // handle problem with extending borders + /* handle problem with extending borders */ dest->y_width = oci->Width; dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; @@ -516,7 +741,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l oci->post_proc_buffer.y_stride); } - if (flags & VP8D_DEBUG_LEVEL1) +#if CONFIG_POSTPROC_VISUALIZER + if (flags & VP8D_DEBUG_TXT_FRAME_INFO) { sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d", (oci->frame_type == KEY_FRAME), @@ -527,7 +753,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l oci->mb_cols, oci->mb_rows); vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride); } - else if (flags & VP8D_DEBUG_LEVEL2) + + if (flags & VP8D_DEBUG_TXT_MBLK_MODES) { int i, j; unsigned char *y_ptr; @@ -539,7 +766,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l y_ptr = post->y_buffer + 4 * post->y_stride + 4; - // vp8_filter each macro block + /* vp8_filter each macro block */ for (i = 0; i < mb_rows; i++) { for (j = 0; j < mb_cols; j++) @@ -553,12 +780,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l y_ptr += 16; } - mb_index ++; //border + mb_index ++; /* border */ y_ptr += post->y_stride * 16 - post->y_width; } } - else if (flags & VP8D_DEBUG_LEVEL3) + + if (flags & VP8D_DEBUG_TXT_DC_DIFF) { int i, j; unsigned char *y_ptr; @@ -570,7 +798,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l y_ptr = post->y_buffer + 4 * post->y_stride + 4; - // vp8_filter each macro block + /* vp8_filter each macro block */ for (i = 0; i < mb_rows; i++) { for (j = 0; j < mb_cols; j++) @@ -587,53 +815,290 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l y_ptr += 16; } - mb_index ++; //border + mb_index ++; /* border */ y_ptr += post->y_stride * 16 - post->y_width; } } - else if (flags & VP8D_DEBUG_LEVEL4) + + if (flags & VP8D_DEBUG_TXT_RATE_INFO) { sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate); vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride); -#if 0 - int i, j; - unsigned char *y_ptr; - YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; - int mb_rows = post->y_height >> 4; - int mb_cols = post->y_width >> 4; - int mb_index = 0; - MODE_INFO *mi = oci->mi; - - y_ptr = post->y_buffer + 4 * post->y_stride + 4; - - // vp8_filter each macro block - for (i = 0; i < mb_rows; i++) - { - for (j = 0; j < mb_cols; j++) - { - char zz[4]; - - sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0'); - vp8_blit_text(zz, y_ptr, post->y_stride); - mb_index ++; - y_ptr += 16; - } - - mb_index ++; //border - y_ptr += post->y_stride * 16 - post->y_width; - - } - -#endif - } + /* Draw motion vectors */ + if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag) + { + YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; + int width = post->y_width; + int height = post->y_height; + int mb_cols = width >> 4; + unsigned char *y_buffer = oci->post_proc_buffer.y_buffer; + int y_stride = oci->post_proc_buffer.y_stride; + MODE_INFO *mi = oci->mi; + int x0, y0; + for (y0 = 0; y0 < height; y0 += 16) + { + for (x0 = 0; x0 < width; x0 += 16) + { + int x1, y1; + + if (!(ppflags->display_mv_flag & (1<mbmi.mode))) + { + mi++; + continue; + } + + if (mi->mbmi.mode == SPLITMV) + { + switch (mi->mbmi.partitioning) + { + case 0 : /* mv_top_bottom */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 8 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+8, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+8, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[8]; + + x1 = x0 + 8 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+8, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+8, x1, y0+12, y1, y_buffer, y_stride); + + break; + } + case 1 : /* mv_left_right */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 + 8 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+8, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+8, y1, y_buffer, y_stride); + + bmi = &mi->bmi[2]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 + 8 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+8, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+8, y1, y_buffer, y_stride); + + break; + } + case 2 : /* mv_quarters */ + { + B_MODE_INFO *bmi = &mi->bmi[0]; + MV *mv = &bmi->mv.as_mv; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[2]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 + 4 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+4, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+4, y1, y_buffer, y_stride); + + bmi = &mi->bmi[8]; + + x1 = x0 + 4 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+4, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+4, x1, y0+12, y1, y_buffer, y_stride); + + bmi = &mi->bmi[10]; + + x1 = x0 +12 + (mv->col >> 3); + y1 = y0 +12 + (mv->row >> 3); + + constrain_line (x0+12, &x1, y0+12, &y1, width, height); + vp8_blit_line (x0+12, x1, y0+12, y1, y_buffer, y_stride); + break; + } + default : + { + B_MODE_INFO *bmi = mi->bmi; + int bx0, by0; + + for (by0 = y0; by0 < (y0+16); by0 += 4) + { + for (bx0 = x0; bx0 < (x0+16); bx0 += 4) + { + MV *mv = &bmi->mv.as_mv; + + x1 = bx0 + 2 + (mv->col >> 3); + y1 = by0 + 2 + (mv->row >> 3); + + constrain_line (bx0+2, &x1, by0+2, &y1, width, height); + vp8_blit_line (bx0+2, x1, by0+2, y1, y_buffer, y_stride); + + bmi++; + } + } + } + } + } + else if (mi->mbmi.mode >= NEARESTMV) + { + MV *mv = &mi->mbmi.mv.as_mv; + const int lx0 = x0 + 8; + const int ly0 = y0 + 8; + + x1 = lx0 + (mv->col >> 3); + y1 = ly0 + (mv->row >> 3); + + if (x1 != lx0 && y1 != ly0) + { + constrain_line (lx0, &x1, ly0-1, &y1, width, height); + vp8_blit_line (lx0, x1, ly0-1, y1, y_buffer, y_stride); + + constrain_line (lx0, &x1, ly0+1, &y1, width, height); + vp8_blit_line (lx0, x1, ly0+1, y1, y_buffer, y_stride); + } + else + vp8_blit_line (lx0, x1, ly0, y1, y_buffer, y_stride); + } + + mi++; + } + mi++; + } + } + + /* Color in block modes */ + if ((flags & VP8D_DEBUG_CLR_BLK_MODES) + && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag)) + { + int y, x; + YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; + int width = post->y_width; + int height = post->y_height; + unsigned char *y_ptr = oci->post_proc_buffer.y_buffer; + unsigned char *u_ptr = oci->post_proc_buffer.u_buffer; + unsigned char *v_ptr = oci->post_proc_buffer.v_buffer; + int y_stride = oci->post_proc_buffer.y_stride; + MODE_INFO *mi = oci->mi; + + for (y = 0; y < height; y += 16) + { + for (x = 0; x < width; x += 16) + { + int Y = 0, U = 0, V = 0; + + if (mi->mbmi.mode == B_PRED && + ((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag)) + { + int by, bx; + unsigned char *yl, *ul, *vl; + B_MODE_INFO *bmi = mi->bmi; + + yl = y_ptr + x; + ul = u_ptr + (x>>1); + vl = v_ptr + (x>>1); + + for (by = 0; by < 16; by += 4) + { + for (bx = 0; bx < 16; bx += 4) + { + if ((ppflags->display_b_modes_flag & (1<mbmi.mode)) + || (ppflags->display_mb_modes_flag & B_PRED)) + { + Y = B_PREDICTION_MODE_colors[bmi->mode][0]; + U = B_PREDICTION_MODE_colors[bmi->mode][1]; + V = B_PREDICTION_MODE_colors[bmi->mode][2]; + + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b) + (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride); + } + bmi++; + } + + yl += y_stride*4; + ul += y_stride*1; + vl += y_stride*1; + } + } + else if (ppflags->display_mb_modes_flag & (1<mbmi.mode)) + { + Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0]; + U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1]; + V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2]; + + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner) + (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride); + } + + mi++; + } + y_ptr += y_stride*16; + u_ptr += y_stride*4; + v_ptr += y_stride*4; + + mi++; + } + } + + /* Color in frame reference blocks */ + if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag) + { + int y, x; + YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer; + int width = post->y_width; + int height = post->y_height; + unsigned char *y_ptr = oci->post_proc_buffer.y_buffer; + unsigned char *u_ptr = oci->post_proc_buffer.u_buffer; + unsigned char *v_ptr = oci->post_proc_buffer.v_buffer; + int y_stride = oci->post_proc_buffer.y_stride; + MODE_INFO *mi = oci->mi; + + for (y = 0; y < height; y += 16) + { + for (x = 0; x < width; x +=16) + { + int Y = 0, U = 0, V = 0; + + if (ppflags->display_ref_frame_flag & (1<mbmi.ref_frame)) + { + Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0]; + U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1]; + V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2]; + + POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer) + (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride); + } + + mi++; + } + y_ptr += y_stride*16; + u_ptr += y_stride*4; + v_ptr += y_stride*4; + + mi++; + } + } +#endif *dest = oci->post_proc_buffer; - // handle problem with extending borders + /* handle problem with extending borders */ dest->y_width = oci->Width; dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h index cd99056b0..c641b9ca5 100644 --- a/vp8/common/postproc.h +++ b/vp8/common/postproc.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,6 +24,18 @@ char whiteclamp[16], char bothclamp[16],\ unsigned int w, unsigned int h, int pitch) +#define prototype_postproc_blend_mb_inner(sym)\ + void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ + int y1, int u1, int v1, int alpha, int stride) + +#define prototype_postproc_blend_mb_outer(sym)\ + void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ + int y1, int u1, int v1, int alpha, int stride) + +#define prototype_postproc_blend_b(sym)\ + void sym (unsigned char *y, unsigned char *u, unsigned char *v,\ + int y1, int u1, int v1, int alpha, int stride) + #if ARCH_X86 || ARCH_X86_64 #include "x86/postproc_x86.h" #endif @@ -47,16 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross); #endif extern prototype_postproc_addnoise(vp8_postproc_addnoise); +#ifndef vp8_postproc_blend_mb_inner +#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c +#endif +extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner); + +#ifndef vp8_postproc_blend_mb_outer +#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c +#endif +extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer); + +#ifndef vp8_postproc_blend_b +#define vp8_postproc_blend_b vp8_blend_b_c +#endif +extern prototype_postproc_blend_b(vp8_postproc_blend_b); typedef prototype_postproc((*vp8_postproc_fn_t)); typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t)); typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t)); +typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t)); +typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t)); +typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t)); typedef struct { - vp8_postproc_inplace_fn_t down; - vp8_postproc_inplace_fn_t across; - vp8_postproc_fn_t downacross; - vp8_postproc_addnoise_fn_t addnoise; + vp8_postproc_inplace_fn_t down; + vp8_postproc_inplace_fn_t across; + vp8_postproc_fn_t downacross; + vp8_postproc_addnoise_fn_t addnoise; + vp8_postproc_blend_mb_inner_fn_t blend_mb_inner; + vp8_postproc_blend_mb_outer_fn_t blend_mb_outer; + vp8_postproc_blend_b_fn_t blend_b; } vp8_postproc_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT @@ -78,7 +111,7 @@ struct postproc_state #include "onyxc_int.h" #include "ppflags.h" int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, - int deblock_level, int noise_level, int flags); + vp8_ppflags_t *flags); void vp8_de_noise(YV12_BUFFER_CONFIG *source, diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm index e87eb2112..a4ce91583 100644 --- a/vp8/common/ppc/copy_altivec.asm +++ b/vp8/common/ppc/copy_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm index 2a3550773..4da2e94f9 100644 --- a/vp8/common/ppc/filter_altivec.asm +++ b/vp8/common/ppc/filter_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm index 27e02a87f..fd8aa665f 100644 --- a/vp8/common/ppc/filter_bilinear_altivec.asm +++ b/vp8/common/ppc/filter_bilinear_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm index e88af8d7d..117d9cfc8 100644 --- a/vp8/common/ppc/idctllm_altivec.asm +++ b/vp8/common/ppc/idctllm_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c index 586eed477..bad3cf3bd 100644 --- a/vp8/common/ppc/loopfilter_altivec.c +++ b/vp8/common/ppc/loopfilter_altivec.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm index 78a5cf9b3..61df4e976 100644 --- a/vp8/common/ppc/loopfilter_filters_altivec.asm +++ b/vp8/common/ppc/loopfilter_filters_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm index 227ef2a94..f81d86f74 100644 --- a/vp8/common/ppc/platform_altivec.asm +++ b/vp8/common/ppc/platform_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm index f478b954c..dd39e05a8 100644 --- a/vp8/common/ppc/recon_altivec.asm +++ b/vp8/common/ppc/recon_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c index 284731085..1f5d79068 100644 --- a/vp8/common/ppc/systemdependent.c +++ b/vp8/common/ppc/systemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h index c66397682..65b0cab6a 100644 --- a/vp8/common/ppflags.h +++ b/vp8/common/ppflags.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -12,14 +13,28 @@ #define __INC_PPFLAGS_H enum { - VP8D_NOFILTERING = 0, - VP8D_DEBLOCK = 1, - VP8D_DEMACROBLOCK = 2, - VP8D_ADDNOISE = 4, - VP8D_DEBUG_LEVEL1 = 8, - VP8D_DEBUG_LEVEL2 = 16, - VP8D_DEBUG_LEVEL3 = 32, - VP8D_DEBUG_LEVEL4 = 64, + VP8D_NOFILTERING = 0, + VP8D_DEBLOCK = 1<<0, + VP8D_DEMACROBLOCK = 1<<1, + VP8D_ADDNOISE = 1<<2, + VP8D_DEBUG_TXT_FRAME_INFO = 1<<3, + VP8D_DEBUG_TXT_MBLK_MODES = 1<<4, + VP8D_DEBUG_TXT_DC_DIFF = 1<<5, + VP8D_DEBUG_TXT_RATE_INFO = 1<<6, + VP8D_DEBUG_DRAW_MV = 1<<7, + VP8D_DEBUG_CLR_BLK_MODES = 1<<8, + VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9 }; +typedef struct +{ + int post_proc_flag; + int deblocking_level; + int noise_level; + int display_ref_frame_flag; + int display_mb_modes_flag; + int display_b_modes_flag; + int display_mv_flag; +} vp8_ppflags_t; + #endif diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h index 25a4b776f..99fee5ae2 100644 --- a/vp8/common/pragmas.h +++ b/vp8/common/pragmas.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c index df4c96e4a..f315f50e0 100644 --- a/vp8/common/predictdc.c +++ b/vp8/common/predictdc.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h index b8871e452..fa8596822 100644 --- a/vp8/common/predictdc.h +++ b/vp8/common/predictdc.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h index 00ec9a8d7..0b142bda7 100644 --- a/vp8/common/preproc.h +++ b/vp8/common/preproc.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h index 986c45b10..7d554b509 100644 --- a/vp8/common/preprocif.h +++ b/vp8/common/preprocif.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h index 1171ede43..c9659902b 100644 --- a/vp8/common/proposed.h +++ b/vp8/common/proposed.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c index 09fe31fe5..e9833fe33 100644 --- a/vp8/common/quant_common.c +++ b/vp8/common/quant_common.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h index 0c92ce8b9..cb64d8eb8 100644 --- a/vp8/common/quant_common.h +++ b/vp8/common/quant_common.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/recon.c b/vp8/common/recon.c index d1268ea22..d72d6e410 100644 --- a/vp8/common/recon.c +++ b/vp8/common/recon.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -105,8 +106,24 @@ void vp8_recon2b_c } } -void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { +#if ARCH_ARM + BLOCKD *b = &x->block[0]; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + + /*b = &x->block[4];*/ + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + + /*b = &x->block[8];*/ + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + + /*b = &x->block[12];*/ + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); +#else int i; for (i = 0; i < 16; i += 4) @@ -115,10 +132,36 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } +#endif } -void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) +void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { +#if ARCH_ARM + BLOCKD *b = &x->block[0]; + + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 4; + RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 4; + + /*b = &x->block[16];*/ + + RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b++; + b++; + RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b++; + b++; + RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b++; + b++; + RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); +#else int i; for (i = 0; i < 16; i += 4) @@ -134,4 +177,5 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } +#endif } diff --git a/vp8/common/recon.h b/vp8/common/recon.h index f65a90f7e..1e6e343fc 100644 --- a/vp8/common/recon.h +++ b/vp8/common/recon.h @@ -1,21 +1,29 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef __INC_RECON_H #define __INC_RECON_H +#include "blockd.h" + #define prototype_copy_block(sym) \ void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch) #define prototype_recon_block(sym) \ - void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch); + void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch) + +#define prototype_recon_macroblock(sym) \ + void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x) + +struct vp8_recon_rtcd_vtable; #if ARCH_X86 || ARCH_X86_64 #include "x86/recon_x86.h" @@ -55,9 +63,20 @@ extern prototype_recon_block(vp8_recon_recon2); #endif extern prototype_recon_block(vp8_recon_recon4); +#ifndef vp8_recon_recon_mb +#define vp8_recon_recon_mb vp8_recon_mb_c +#endif +extern prototype_recon_macroblock(vp8_recon_recon_mb); + +#ifndef vp8_recon_recon_mby +#define vp8_recon_recon_mby vp8_recon_mby_c +#endif +extern prototype_recon_macroblock(vp8_recon_recon_mby); + typedef prototype_copy_block((*vp8_copy_block_fn_t)); typedef prototype_recon_block((*vp8_recon_fn_t)); -typedef struct +typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t)); +typedef struct vp8_recon_rtcd_vtable { vp8_copy_block_fn_t copy16x16; vp8_copy_block_fn_t copy8x8; @@ -65,6 +84,8 @@ typedef struct vp8_recon_fn_t recon; vp8_recon_fn_t recon2; vp8_recon_fn_t recon4; + vp8_recon_mb_fn_t recon_mb; + vp8_recon_mb_fn_t recon_mby; } vp8_recon_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT @@ -73,9 +94,6 @@ typedef struct #define RECON_INVOKE(ctx,fn) vp8_recon_##fn #endif -#include "blockd.h" -void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); -void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x); #endif diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c index c48886deb..74871c0e8 100644 --- a/vp8/common/reconinter.c +++ b/vp8/common/reconinter.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -17,9 +18,10 @@ #include "onyxc_int.h" #endif -// use this define on systems where unaligned int reads and writes are -// not allowed, i.e. ARM architectures -//#define MUST_BE_ALIGNED +/* use this define on systems where unaligned int reads and writes are + * not allowed, i.e. ARM architectures + */ +/*#define MUST_BE_ALIGNED*/ static const int bbb[4] = {0, 2, 8, 10}; @@ -209,7 +211,8 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) { int i; - if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && + x->mode_info_context->mbmi.mode != SPLITMV) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = &x->predictor[256]; @@ -253,16 +256,18 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x) } } - +/*encoder only*/ void vp8_build_inter_predictors_mby(MACROBLOCKD *x) { - if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV) + + if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && + x->mode_info_context->mbmi.mode != SPLITMV) { unsigned char *ptr_base; unsigned char *ptr; unsigned char *pred_ptr = x->predictor; - int mv_row = x->mbmi.mv.as_mv.row; - int mv_col = x->mbmi.mv.as_mv.col; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int pre_stride = x->block[0].pre_stride; ptr_base = x->pre.y_buffer; @@ -281,7 +286,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x) { int i; - if (x->mbmi.partitioning < 3) + if (x->mode_info_context->mbmi.partitioning < 3) { for (i = 0; i < 4; i++) { @@ -312,7 +317,9 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x) void vp8_build_inter_predictors_mb(MACROBLOCKD *x) { - if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV) + + if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME && + x->mode_info_context->mbmi.mode != SPLITMV) { int offset; unsigned char *ptr_base; @@ -322,8 +329,8 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x) unsigned char *upred_ptr = &x->predictor[256]; unsigned char *vpred_ptr = &x->predictor[320]; - int mv_row = x->mbmi.mv.as_mv.row; - int mv_col = x->mbmi.mv.as_mv.col; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int pre_stride = x->block[0].pre_stride; ptr_base = x->pre.y_buffer; @@ -360,7 +367,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x) { int i; - if (x->mbmi.partitioning < 3) + if (x->mode_info_context->mbmi.partitioning < 3) { for (i = 0; i < 4; i++) { @@ -409,7 +416,7 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) { int i, j; - if (x->mbmi.mode == SPLITMV) + if (x->mode_info_context->mbmi.mode == SPLITMV) { for (i = 0; i < 2; i++) { @@ -454,8 +461,8 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) } else { - int mvrow = x->mbmi.mv.as_mv.row; - int mvcol = x->mbmi.mv.as_mv.col; + int mvrow = x->mode_info_context->mbmi.mv.as_mv.row; + int mvcol = x->mode_info_context->mbmi.mv.as_mv.col; if (mvrow < 0) mvrow -= 1; @@ -485,15 +492,16 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel) } -// The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this -// situation, we can write the result directly to dst buffer instead of writing it to predictor -// buffer and then copying it to dst buffer. +/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this + * situation, we can write the result directly to dst buffer instead of writing it to predictor + * buffer and then copying it to dst buffer. + */ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf) { int r; unsigned char *ptr_base; unsigned char *ptr; - //unsigned char *pred_ptr = d->predictor; + /*unsigned char *pred_ptr = d->predictor;*/ int dst_stride = d->dst_stride; int pre_stride = d->pre_stride; @@ -529,37 +537,37 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) { - //unsigned char *pred_ptr = x->block[0].predictor; - //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst; + /*unsigned char *pred_ptr = x->block[0].predictor; + unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/ unsigned char *pred_ptr = x->predictor; unsigned char *dst_ptr = x->dst.y_buffer; - if (x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { int offset; unsigned char *ptr_base; unsigned char *ptr; unsigned char *uptr, *vptr; - //unsigned char *pred_ptr = x->predictor; - //unsigned char *upred_ptr = &x->predictor[256]; - //unsigned char *vpred_ptr = &x->predictor[320]; + /*unsigned char *pred_ptr = x->predictor; + unsigned char *upred_ptr = &x->predictor[256]; + unsigned char *vpred_ptr = &x->predictor[320];*/ unsigned char *udst_ptr = x->dst.u_buffer; unsigned char *vdst_ptr = x->dst.v_buffer; - int mv_row = x->mbmi.mv.as_mv.row; - int mv_col = x->mbmi.mv.as_mv.col; - int pre_stride = x->dst.y_stride; //x->block[0].pre_stride; + int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; + int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; + int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/ ptr_base = x->pre.y_buffer; ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); if ((mv_row | mv_col) & 7) { - x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride); + x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } else { - RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride); + RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } mv_row = x->block[16].bmi.mv.as_mv.row; @@ -582,16 +590,17 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) } else { - //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later, - //if sth is wrong, go back to what it is in build_inter_predictors_mb. + /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later, + * if sth is wrong, go back to what it is in build_inter_predictors_mb. + */ int i; - if (x->mbmi.partitioning < 3) + if (x->mode_info_context->mbmi.partitioning < 3) { for (i = 0; i < 4; i++) { BLOCKD *d = &x->block[bbb[i]]; - //vp8_build_inter_predictors4b(x, d, 16); + /*vp8_build_inter_predictors4b(x, d, 16);*/ { unsigned char *ptr_base; @@ -603,11 +612,11 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { - x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride); + x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } else { - RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/ } } } @@ -621,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { - //vp8_build_inter_predictors2b(x, d0, 16); + /*vp8_build_inter_predictors2b(x, d0, 16);*/ unsigned char *ptr_base; unsigned char *ptr; unsigned char *pred_ptr = d0->predictor; @@ -653,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { - //vp8_build_inter_predictors2b(x, d0, 8); + /*vp8_build_inter_predictors2b(x, d0, 8);*/ unsigned char *ptr_base; unsigned char *ptr; unsigned char *pred_ptr = d0->predictor; @@ -663,11 +672,15 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x) if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7) { - x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); + x->subpixel_predict8x4(ptr, d0->pre_stride, + d0->bmi.mv.as_mv.col & 7, + d0->bmi.mv.as_mv.row & 7, + dst_ptr, x->dst.uv_stride); } else { - RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride); + RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, + d0->pre_stride, dst_ptr, x->dst.uv_stride); } } else diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h index b2d1ae97a..7c1dee431 100644 --- a/vp8/common/reconinter.h +++ b/vp8/common/reconinter.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c index e33bce348..9cf5f6a88 100644 --- a/vp8/common/reconintra.c +++ b/vp8/common/reconintra.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,9 +14,9 @@ #include "reconintra.h" #include "vpx_mem/vpx_mem.h" -// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and -// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x). - +/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and + * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x). + */ void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) { int i; @@ -41,8 +42,8 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x) yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1]; } - // for Y - switch (x->mbmi.mode) + /* for Y */ + switch (x->mode_info_context->mbmi.mode) { case DC_PRED: { @@ -155,15 +156,15 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) int r, c, i; int y_stride = x->dst.y_stride; - ypred_ptr = x->dst.y_buffer; //x->predictor; + ypred_ptr = x->dst.y_buffer; /*x->predictor;*/ for (i = 0; i < 16; i++) { yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1]; } - // for Y - switch (x->mbmi.mode) + /* for Y */ + switch (x->mode_info_context->mbmi.mode) { case DC_PRED: { @@ -203,11 +204,11 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) expected_dc = 128; } - //vpx_memset(ypred_ptr, expected_dc, 256); + /*vpx_memset(ypred_ptr, expected_dc, 256);*/ for (r = 0; r < 16; r++) { vpx_memset(ypred_ptr, expected_dc, 16); - ypred_ptr += y_stride; //16; + ypred_ptr += y_stride; /*16;*/ } } break; @@ -221,7 +222,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; - ypred_ptr += y_stride; //16; + ypred_ptr += y_stride; /*16;*/ } } break; @@ -232,7 +233,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) { vpx_memset(ypred_ptr, yleft_col[r], 16); - ypred_ptr += y_stride; //16; + ypred_ptr += y_stride; /*16;*/ } } @@ -255,7 +256,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) ypred_ptr[c] = pred; } - ypred_ptr += y_stride; //16; + ypred_ptr += y_stride; /*16;*/ } } @@ -289,7 +290,7 @@ void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x) vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1]; } - switch (x->mbmi.uv_mode) + switch (x->mode_info_context->mbmi.uv_mode) { case DC_PRED: { @@ -417,8 +418,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride; unsigned char vleft_col[20]; unsigned char vtop_left = vabove_row[-1]; - unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256]; - unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320]; + unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/ + unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/ int uv_stride = x->dst.uv_stride; int i, j; @@ -429,7 +430,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1]; } - switch (x->mbmi.uv_mode) + switch (x->mode_info_context->mbmi.uv_mode) { case DC_PRED: { @@ -471,14 +472,14 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) } - //vpx_memset(upred_ptr,expected_udc,64); - //vpx_memset(vpred_ptr,expected_vdc,64); + /*vpx_memset(upred_ptr,expected_udc,64);*/ + /*vpx_memset(vpred_ptr,expected_vdc,64);*/ for (i = 0; i < 8; i++) { vpx_memset(upred_ptr, expected_udc, 8); vpx_memset(vpred_ptr, expected_vdc, 8); - upred_ptr += uv_stride; //8; - vpred_ptr += uv_stride; //8; + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ } } break; @@ -490,8 +491,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) { vpx_memcpy(upred_ptr, uabove_row, 8); vpx_memcpy(vpred_ptr, vabove_row, 8); - upred_ptr += uv_stride; //8; - vpred_ptr += uv_stride; //8; + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ } } @@ -504,8 +505,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) { vpx_memset(upred_ptr, uleft_col[i], 8); vpx_memset(vpred_ptr, vleft_col[i], 8); - upred_ptr += uv_stride; //8; - vpred_ptr += uv_stride; //8; + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ } } @@ -537,8 +538,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x) vpred_ptr[j] = predv; } - upred_ptr += uv_stride; //8; - vpred_ptr += uv_stride; //8; + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ } } diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h index d63aa15cb..988b43a77 100644 --- a/vp8/common/reconintra.h +++ b/vp8/common/reconintra.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c index d92d5c96a..db44fa190 100644 --- a/vp8/common/reconintra4x4.c +++ b/vp8/common/reconintra4x4.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -55,7 +56,7 @@ void vp8_predict_intra4x4(BLOCKD *x, break; case B_TM_PRED: { - // prediction similar to true_motion prediction + /* prediction similar to true_motion prediction */ for (r = 0; r < 4; r++) { for (c = 0; c < 4; c++) @@ -294,8 +295,9 @@ void vp8_predict_intra4x4(BLOCKD *x, } } -// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and -// to the right prediction have filled in pixels to use. +/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and + * to the right prediction have filled in pixels to use. + */ void vp8_intra_prediction_down_copy(MACROBLOCKD *x) { unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; @@ -317,6 +319,74 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) vp8_intra_prediction_down_copy(x); +#if ARCH_ARM + { + BLOCKD *b = &x->block[0]; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b += 1; + + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); + RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); + } +#else for (i = 0; i < 16; i++) { BLOCKD *b = &x->block[i]; @@ -324,6 +394,7 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x) vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor); RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); } +#endif vp8_recon_intra_mbuv(rtcd, x); diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h index 788c8c40a..6ac2b7137 100644 --- a/vp8/common/reconintra4x4.h +++ b/vp8/common/reconintra4x4.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h deleted file mode 100644 index bb93533a3..000000000 --- a/vp8/common/segmentation_common.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "string.h" -#include "blockd.h" -#include "onyxc_int.h" - -extern void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd); diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c index dcaafe6c6..7976e252b 100644 --- a/vp8/common/setupintrarecon.c +++ b/vp8/common/setupintrarecon.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -15,22 +16,16 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf) { int i; - // set up frame new frame for intra coded blocks - vpx_memset(ybf->y_buffer - 1 - 2 * ybf->y_stride, 127, ybf->y_width + 5); + /* set up frame new frame for intra coded blocks */ vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); - for (i = 0; i < ybf->y_height; i++) ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129; - vpx_memset(ybf->u_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5); vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; - vpx_memset(ybf->v_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5); vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); - for (i = 0; i < ybf->uv_height; i++) ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129; diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h index 6ec79b29c..5264fd04b 100644 --- a/vp8/common/setupintrarecon.h +++ b/vp8/common/setupintrarecon.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h index fbd5f4daf..acdeec3bc 100644 --- a/vp8/common/subpixel.h +++ b/vp8/common/subpixel.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c index afe6a885e..73656b3d7 100644 --- a/vp8/common/swapyv12buffer.c +++ b/vp8/common/swapyv12buffer.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h index caf9499d9..a6473ed92 100644 --- a/vp8/common/swapyv12buffer.h +++ b/vp8/common/swapyv12buffer.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h index 1829b649c..db996987a 100644 --- a/vp8/common/systemdependent.h +++ b/vp8/common/systemdependent.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c index a45937b12..1756100a7 100644 --- a/vp8/common/textblit.c +++ b/vp8/common/textblit.c @@ -1,13 +1,14 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ - +#include void vp8_blit_text(const char *msg, unsigned char *address, const int pitch) @@ -50,3 +51,80 @@ void vp8_blit_text(const char *msg, unsigned char *address, const int pitch) colpos++; } } + +static void plot (const int x, const int y, unsigned char *image, const int pitch) +{ + image [x+y*pitch] ^= 255; +} + +/* Bresenham line algorithm */ +void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch) +{ + int steep = abs(y1 - y0) > abs(x1 - x0); + int deltax, deltay; + int error, ystep, y, x; + + if (steep) + { + int t; + t = x0; + x0 = y0; + y0 = t; + + t = x1; + x1 = y1; + y1 = t; + } + + if (x0 > x1) + { + int t; + t = x0; + x0 = x1; + x1 = t; + + t = y0; + y0 = y1; + y1 = t; + } + + deltax = x1 - x0; + deltay = abs(y1 - y0); + error = deltax / 2; + + y = y0; + + if (y0 < y1) + ystep = 1; + else + ystep = -1; + + if (steep) + { + for (x = x0; x <= x1; x++) + { + plot(y,x, image, pitch); + + error = error - deltay; + if (error < 0) + { + y = y + ystep; + error = error + deltax; + } + } + } + else + { + for (x = x0; x <= x1; x++) + { + plot(x,y, image, pitch); + + error = error - deltay; + if (error < 0) + { + y = y + ystep; + error = error + deltax; + } + } + } +} diff --git a/vp8/common/threading.h b/vp8/common/threading.h index a02cb244b..1929f7c4f 100644 --- a/vp8/common/threading.h +++ b/vp8/common/threading.h @@ -1,17 +1,18 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef _PTHREAD_EMULATION #define _PTHREAD_EMULATION -#define VPXINFINITE 10000 //10second. +#define VPXINFINITE 10000 /* 10second. */ /* Thread management macros */ #ifdef _WIN32 @@ -71,10 +72,11 @@ #define sem_wait(sem) (semaphore_wait(*sem) ) #define sem_post(sem) semaphore_signal(*sem) #define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem) -#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} +#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ #else #include -#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} +#include +#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ #endif /* Not Windows. Assume pthreads */ diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c index 4ad018d49..d80c64bdf 100644 --- a/vp8/common/treecoder.c +++ b/vp8/common/treecoder.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -46,6 +47,12 @@ void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) tree2tok(p, t, 0, 0, 0); } +void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t, + int offset) +{ + tree2tok(p - offset, t, 0, 0, 0); +} + static void branch_counts( int n, /* n = size of alphabet */ vp8_token tok [ /* n */ ], diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h index 0356d2b02..ebf51c5ed 100644 --- a/vp8/common/treecoder.h +++ b/vp8/common/treecoder.h @@ -1,17 +1,18 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef __INC_TREECODER_H #define __INC_TREECODER_H -typedef unsigned char vp8bc_index_t; // probability index +typedef unsigned char vp8bc_index_t; /* probability index */ typedef unsigned char vp8_prob; @@ -53,6 +54,8 @@ typedef const struct vp8_token_struct /* Construct encoding array from tree. */ void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree); +void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree, + int offset); /* Convert array of token occurrence counts into a table of probabilities diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h index addd26469..22b531a76 100644 --- a/vp8/common/type_aliases.h +++ b/vp8/common/type_aliases.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -63,32 +64,32 @@ typedef signed char INT8; #endif #ifndef TYPE_INT16 -//#define TYPE_INT16 +/*#define TYPE_INT16*/ typedef signed short INT16; #endif #ifndef TYPE_INT32 -//#define TYPE_INT32 +/*#define TYPE_INT32*/ typedef signed int INT32; #endif #ifndef TYPE_UINT8 -//#define TYPE_UINT8 +/*#define TYPE_UINT8*/ typedef unsigned char UINT8; #endif #ifndef TYPE_UINT32 -//#define TYPE_UINT32 +/*#define TYPE_UINT32*/ typedef unsigned int UINT32; #endif #ifndef TYPE_UINT16 -//#define TYPE_UINT16 +/*#define TYPE_UINT16*/ typedef unsigned short UINT16; #endif #ifndef TYPE_BOOL -//#define TYPE_BOOL +/*#define TYPE_BOOL*/ typedef int BOOL; #endif @@ -100,7 +101,7 @@ typedef __int64 INT64; #ifndef TYPE_INT64 #ifdef _TMS320C6X -//for now we only have 40bits +/* for now we only have 40bits */ typedef long INT64; #else typedef long long INT64; diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp index e352e7a19..44869ecc7 100644 --- a/vp8/common/vfwsetting.hpp +++ b/vp8/common/vfwsetting.hpp @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h index 40608c6dd..a2fce65dc 100644 --- a/vp8/common/vpx_ref_build_prefix.h +++ b/vp8/common/vpx_ref_build_prefix.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h index d03e0bd02..a95d90574 100644 --- a/vp8/common/vpxblit.h +++ b/vp8/common/vpxblit.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h index a8e28f59a..4ee617f6c 100644 --- a/vp8/common/vpxblit_c64.h +++ b/vp8/common/vpxblit_c64.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h index e4c9f3ef3..b70f29673 100644 --- a/vp8/common/vpxerrors.h +++ b/vp8/common/vpxerrors.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx index 06faca69c..faddf1f42 100644 --- a/vp8/common/x86/boolcoder.cxx +++ b/vp8/common/x86/boolcoder.cxx @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h index 5dfb212e1..f6e568cdc 100644 --- a/vp8/common/x86/idct_x86.h +++ b/vp8/common/x86/idct_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,7 +22,7 @@ #if HAVE_MMX extern prototype_idct(vp8_short_idct4x4llm_1_mmx); extern prototype_idct(vp8_short_idct4x4llm_mmx); -extern prototype_idct_scalar(vp8_dc_only_idct_mmx); +extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx); extern prototype_second_order(vp8_short_inv_walsh4x4_mmx); extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx); @@ -33,8 +34,8 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx); #undef vp8_idct_idct16 #define vp8_idct_idct16 vp8_short_idct4x4llm_mmx -#undef vp8_idct_idct1_scalar -#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx +#undef vp8_idct_idct1_scalar_add +#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx #undef vp8_idct_iwalsh16 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm index 2751c6934..43735bc4b 100644 --- a/vp8/common/x86/idctllm_mmx.asm +++ b/vp8/common/x86/idctllm_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -57,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -69,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -112,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL] ; + pmulhw mm5, [GLOBAL(x_s1sqr2)] ; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL] ; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)] ; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -124,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -195,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx): mov rax, arg(0) ;input movd mm0, [rax] - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] mov rdx, arg(1) ;output psraw mm0, 3 @@ -219,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx): pop rbp ret -;void dc_only_idct_mmx(short input_dc, short *output, int pitch) -global sym(vp8_dc_only_idct_mmx) -sym(vp8_dc_only_idct_mmx): +;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride) +global sym(vp8_dc_only_idct_add_mmx) +sym(vp8_dc_only_idct_add_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 + SHADOW_ARGS_TO_STACK 5 GET_GOT rbx + push rsi + push rdi ; end prolog - movd mm0, arg(0) ;input_dc + mov rsi, arg(1) ;s -- prediction + mov rdi, arg(2) ;d -- destination + movsxd rax, dword ptr arg(4) ;stride + movsxd rdx, dword ptr arg(3) ;pitch + pxor mm0, mm0 - paddw mm0, [fours GLOBAL] - mov rdx, arg(1) ;output + movd mm5, arg(0) ;input_dc - psraw mm0, 3 - movsxd rax, dword ptr arg(2) ;pitch + paddw mm5, [GLOBAL(fours)] - punpcklwd mm0, mm0 - punpckldq mm0, mm0 + psraw mm5, 3 - movq [rdx], mm0 - movq [rdx+rax], mm0 + punpcklwd mm5, mm5 + punpckldq mm5, mm5 - movq [rdx+rax*2], mm0 - add rdx, rax + movd mm1, [rsi] + punpcklbw mm1, mm0 + paddsw mm1, mm5 + packuswb mm1, mm0 ; pack and unpack to saturate + movd [rdi], mm1 - movq [rdx+rax*2], mm0 + movd mm2, [rsi+rdx] + punpcklbw mm2, mm0 + paddsw mm2, mm5 + packuswb mm2, mm0 ; pack and unpack to saturate + movd [rdi+rax], mm2 + + movd mm3, [rsi+2*rdx] + punpcklbw mm3, mm0 + paddsw mm3, mm5 + packuswb mm3, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm3 + + add rdi, rax + add rsi, rdx + movd mm4, [rsi+2*rdx] + punpcklbw mm4, mm0 + paddsw mm4, mm5 + packuswb mm4, mm0 ; pack and unpack to saturate + movd [rdi+2*rax], mm4 ; begin epilog + pop rdi + pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm new file mode 100644 index 000000000..edee1578e --- /dev/null +++ b/vp8/common/x86/idctllm_sse2.asm @@ -0,0 +1,708 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void idct_dequant_0_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *pre - 2 +; unsigned char *dst - 3 +; int dst_stride - 4 +; int blk_stride - 5 +; ) + +global sym(idct_dequant_0_2x_sse2) +sym(idct_dequant_0_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + ; end prolog + + mov rdx, arg(1) ; dequant + mov rax, arg(0) ; qcoeff + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + movd xmm4, [rax] + movd xmm5, [rdx] + + pinsrw xmm4, [rax+32], 4 + pinsrw xmm5, [rdx], 4 + + pmullw xmm4, xmm5 + + ; clear coeffs + movd [rax], xmm7 + movd [rax+32], xmm7 +;pshufb + pshuflw xmm4, xmm4, 00000000b + pshufhw xmm4, xmm4, 00000000b + + mov rax, arg(2) ; pre + paddw xmm4, [GLOBAL(fours)] + + movsxd rcx, dword ptr arg(5) ; blk_stride + psraw xmm4, 3 + + movq xmm0, [rax] + movq xmm1, [rax+rcx] + movq xmm2, [rax+2*rcx] + lea rcx, [3*rcx] + movq xmm3, [rax+rcx] + + punpcklbw xmm0, xmm7 + punpcklbw xmm1, xmm7 + punpcklbw xmm2, xmm7 + punpcklbw xmm3, xmm7 + + mov rax, arg(3) ; dst + movsxd rdx, dword ptr arg(4) ; dst_stride + + ; Add to predict buffer + paddw xmm0, xmm4 + paddw xmm1, xmm4 + paddw xmm2, xmm4 + paddw xmm3, xmm4 + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; store blocks back out + movq [rax], xmm0 + movq [rax + rdx], xmm1 + + lea rax, [rax + 2*rdx] + + movq [rax], xmm2 + movq [rax + rdx], xmm3 + + ; begin epilog + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +global sym(idct_dequant_full_2x_sse2) +sym(idct_dequant_full_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + mov rsi, arg(2) ; pre + mov rdi, arg(3) ; dst + movsxd rcx, dword ptr arg(5) ; blk_stride + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + mov rdx, arg(1) ; dequant + + ; note the transpose of xmm1 and xmm2, necessary for shuffle + ; to spit out sensicle data + movdqa xmm0, [rax] + movdqa xmm2, [rax+16] + movdqa xmm1, [rax+32] + movdqa xmm3, [rax+48] + + ; Clear out coeffs + movdqa [rax], xmm7 + movdqa [rax+16], xmm7 + movdqa [rax+32], xmm7 + movdqa [rax+48], xmm7 + + ; dequantize qcoeff buffer + pmullw xmm0, [rdx] + pmullw xmm2, [rdx+16] + pmullw xmm1, [rdx] + pmullw xmm3, [rdx+16] + + ; repack so block 0 row x and block 1 row x are together + movdqa xmm4, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm4, xmm1 + + pshufd xmm0, xmm0, 11011000b + pshufd xmm1, xmm4, 11011000b + + movdqa xmm4, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm4, xmm3 + + pshufd xmm2, xmm2, 11011000b + pshufd xmm3, xmm4, 11011000b + + ; first pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 ; + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + + ; transpose for the second pass + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + ; second pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + paddw xmm0, [GLOBAL(fours)] + + paddw xmm2, [GLOBAL(fours)] + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + psraw xmm2, 3 + + psraw xmm0, 3 + psraw xmm4, 3 + + psraw xmm6, 3 + + ; transpose to save + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + pxor xmm7, xmm7 + + ; Load up predict blocks + movq xmm4, [rsi] + movq xmm5, [rsi+rcx] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movq xmm4, [rsi+2*rcx] + lea rcx, [3*rcx] + movq xmm5, [rsi+rcx] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm4 + paddw xmm3, xmm5 + +.finish: + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; Load destination stride before writing out, + ; doesn't need to persist + movsxd rdx, dword ptr arg(4) ; dst_stride + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm2 + movq [rdi + rdx], xmm3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void idct_dequant_dc_0_2x_sse2 +; ( +; short *qcoeff - 0 +; short *dequant - 1 +; unsigned char *pre - 2 +; unsigned char *dst - 3 +; int dst_stride - 4 +; short *dc - 5 +; ) +global sym(idct_dequant_dc_0_2x_sse2) +sym(idct_dequant_dc_0_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + mov rsi, arg(2) ; pre + mov rdi, arg(3) ; dst + mov rdx, arg(5) ; dc + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + ; load up 2 dc words here == 2*16 = doubleword + movd xmm4, [rdx] + + ; Load up predict blocks + movq xmm0, [rsi] + movq xmm1, [rsi+16] + movq xmm2, [rsi+32] + movq xmm3, [rsi+48] + + ; Duplicate and expand dc across + punpcklwd xmm4, xmm4 + punpckldq xmm4, xmm4 + + ; Rounding to dequant and downshift + paddw xmm4, [GLOBAL(fours)] + psraw xmm4, 3 + + ; Predict buffer needs to be expanded from bytes to words + punpcklbw xmm0, xmm7 + punpcklbw xmm1, xmm7 + punpcklbw xmm2, xmm7 + punpcklbw xmm3, xmm7 + + ; Add to predict buffer + paddw xmm0, xmm4 + paddw xmm1, xmm4 + paddw xmm2, xmm4 + paddw xmm3, xmm4 + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; Load destination stride before writing out, + ; doesn't need to persist + movsxd rdx, dword ptr arg(4) ; dst_stride + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm2 + movq [rdi + rdx], xmm3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +global sym(idct_dequant_dc_full_2x_sse2) +sym(idct_dequant_dc_full_2x_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ; special case when 2 blocks have 0 or 1 coeffs + ; dc is set as first coeff, so no need to load qcoeff + mov rax, arg(0) ; qcoeff + mov rsi, arg(2) ; pre + mov rdi, arg(3) ; dst + + ; Zero out xmm7, for use unpacking + pxor xmm7, xmm7 + + mov rdx, arg(1) ; dequant + + ; note the transpose of xmm1 and xmm2, necessary for shuffle + ; to spit out sensicle data + movdqa xmm0, [rax] + movdqa xmm2, [rax+16] + movdqa xmm1, [rax+32] + movdqa xmm3, [rax+48] + + ; Clear out coeffs + movdqa [rax], xmm7 + movdqa [rax+16], xmm7 + movdqa [rax+32], xmm7 + movdqa [rax+48], xmm7 + + ; dequantize qcoeff buffer + pmullw xmm0, [rdx] + pmullw xmm2, [rdx+16] + pmullw xmm1, [rdx] + pmullw xmm3, [rdx+16] + + ; DC component + mov rdx, arg(5) + + ; repack so block 0 row x and block 1 row x are together + movdqa xmm4, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm4, xmm1 + + pshufd xmm0, xmm0, 11011000b + pshufd xmm1, xmm4, 11011000b + + movdqa xmm4, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm4, xmm3 + + pshufd xmm2, xmm2, 11011000b + pshufd xmm3, xmm4, 11011000b + + ; insert DC component + pinsrw xmm0, [rdx], 0 + pinsrw xmm0, [rdx+2], 4 + + ; first pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 ; + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + + ; transpose for the second pass + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + ; second pass + psubw xmm0, xmm2 ; b1 = 0-2 + paddw xmm2, xmm2 + + movdqa xmm5, xmm1 + paddw xmm2, xmm0 ; a1 = 0+2 + + pmulhw xmm5, [GLOBAL(x_s1sqr2)] + paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) + + movdqa xmm7, xmm3 + pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] + + paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) + psubw xmm7, xmm5 ; c1 + + movdqa xmm5, xmm1 + movdqa xmm4, xmm3 + + pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] + paddw xmm5, xmm1 + + pmulhw xmm3, [GLOBAL(x_s1sqr2)] + paddw xmm3, xmm4 + + paddw xmm3, xmm5 ; d1 + paddw xmm0, [GLOBAL(fours)] + + paddw xmm2, [GLOBAL(fours)] + movdqa xmm6, xmm2 ; a1 + + movdqa xmm4, xmm0 ; b1 + paddw xmm2, xmm3 ;0 + + paddw xmm4, xmm7 ;1 + psubw xmm0, xmm7 ;2 + + psubw xmm6, xmm3 ;3 + psraw xmm2, 3 + + psraw xmm0, 3 + psraw xmm4, 3 + + psraw xmm6, 3 + + ; transpose to save + movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 + punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 + punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 + + movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 + punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 + punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 + + + movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 + punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 + punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 + + movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 + punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 + punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 + + + movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 + punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 + punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 + + movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 + punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 + punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 + + pshufd xmm0, xmm2, 11011000b + pshufd xmm2, xmm1, 11011000b + + pshufd xmm1, xmm5, 11011000b + pshufd xmm3, xmm7, 11011000b + + pxor xmm7, xmm7 + + ; Load up predict blocks + movq xmm4, [rsi] + movq xmm5, [rsi+16] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm0, xmm4 + paddw xmm1, xmm5 + + movq xmm4, [rsi+32] + movq xmm5, [rsi+48] + + punpcklbw xmm4, xmm7 + punpcklbw xmm5, xmm7 + + paddw xmm2, xmm4 + paddw xmm3, xmm5 + +.finish: + + ; pack up before storing + packuswb xmm0, xmm7 + packuswb xmm1, xmm7 + packuswb xmm2, xmm7 + packuswb xmm3, xmm7 + + ; Load destination stride before writing out, + ; doesn't need to persist + movsxd rdx, dword ptr arg(4) ; dst_stride + + ; store blocks back out + movq [rdi], xmm0 + movq [rdi + rdx], xmm1 + + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm2 + movq [rdi + rdx], xmm3 + + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +fours: + times 8 dw 0x0004 +align 16 +x_s1sqr2: + times 8 dw 0x8A8C +align 16 +x_c1sqr2less1: + times 8 dw 0x4E7B diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm index 562e5908f..10b5274dc 100644 --- a/vp8/common/x86/iwalsh_mmx.asm +++ b/vp8/common/x86/iwalsh_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -68,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx): movq mm2, [rsi + 16] ;ip[8] movq mm3, [rsi + 24] ;ip[12] - movd mm7, rax + movq mm7, rax movq mm4, mm0 punpcklwd mm7, mm7 ;0003000300030003h diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm index 96943dfb8..83c97df7d 100644 --- a/vp8/common/x86/iwalsh_sse2.asm +++ b/vp8/common/x86/iwalsh_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -16,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 + SAVE_XMM push rsi push rdi ; end prolog @@ -100,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2): ; begin epilog pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm index 6e4d2b651..c6c215c3c 100644 --- a/vp8/common/x86/loopfilter_mmx.asm +++ b/vp8/common/x86/loopfilter_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -110,7 +111,7 @@ next8_h: psubusb mm3, mm2 ; q1-=p1 psubusb mm2, mm4 ; p1-=q1 por mm2, mm3 ; abs(p1-q1) - pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm2, 1 ; abs(p1-q1)/2 movq mm6, mm5 ; p0 @@ -149,12 +150,12 @@ next8_h: ; start work on filters movq mm2, [rsi+2*rax] ; p1 movq mm7, [rdi] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; 1 * (q0 - p0) + hvm(p1 - q1) @@ -162,8 +163,8 @@ next8_h: paddsb mm2, mm0 ; 3 * (q0 - p0) + hvm(p1 - q1) pand mm1, mm2 ; mask filter values we don't care about movq mm2, mm1 - paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 pxor mm0, mm0 ; pxor mm5, mm5 @@ -184,29 +185,29 @@ next8_h: movq mm5, mm0 ; save results packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [ones GLOBAL] - paddsw mm1, [ones GLOBAL] + paddsw mm5, [GLOBAL(ones)] + paddsw mm1, [GLOBAL(ones)] psraw mm5, 1 ; partial shifted one more time for 2nd tap psraw mm1, 1 ; partial shifted one more time for 2nd tap packsswb mm5, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 pandn mm4, mm5 ; high edge variance additive paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+rax], mm6 ; write back movq mm6, [rsi+2*rax] ; p1 - pxor mm6, [t80 GLOBAL] ; reoffset + pxor mm6, [GLOBAL(t80)] ; reoffset paddsb mm6, mm4 ; p1+= p1 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+2*rax], mm6 ; write back psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset movq [rsi], mm3 ; write back psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [t80 GLOBAL] ; unoffset + pxor mm7, [GLOBAL(t80)] ; unoffset movq [rdi], mm7 ; write back add rsi,8 @@ -402,7 +403,7 @@ next8_v: psubusb mm5, mm1 ; q1-=p1 psubusb mm1, mm2 ; p1-=q1 por mm5, mm1 ; abs(p1-q1) - pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 mov rdx, arg(2) ;flimit ; @@ -454,14 +455,14 @@ next8_v: movq mm6, [rdx+8] ; p0 movq mm0, [rdx+16] ; q0 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 pand mm2, mm4 ; high var mask (hvm)(p1 - q1) - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 @@ -473,9 +474,9 @@ next8_v: pand mm1, mm2 ; mask filter values we don't care about movq mm2, mm1 - paddsb mm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb mm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb mm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + paddsb mm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 pxor mm0, mm0 ; pxor mm5, mm5 @@ -502,9 +503,9 @@ next8_v: movq mm5, mm0 ; save results packsswb mm0, mm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw mm5, [ones GLOBAL] + paddsw mm5, [GLOBAL(ones)] - paddsw mm1, [ones GLOBAL] + paddsw mm1, [GLOBAL(ones)] psraw mm5, 1 ; partial shifted one more time for 2nd tap psraw mm1, 1 ; partial shifted one more time for 2nd tap @@ -513,22 +514,22 @@ next8_v: pandn mm4, mm5 ; high edge variance additive paddsb mm6, mm2 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset ; mm6=p0 ; movq mm1, [rdx] ; p1 - pxor mm1, [t80 GLOBAL] ; reoffset + pxor mm1, [GLOBAL(t80)] ; reoffset paddsb mm1, mm4 ; p1+= p1 add - pxor mm1, [t80 GLOBAL] ; unoffset + pxor mm1, [GLOBAL(t80)] ; unoffset ; mm6 = p0 mm1 = p1 psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset ; mm3 = q0 psubsb mm7, mm4 ; q1-= q1 add - pxor mm7, [t80 GLOBAL] ; unoffset + pxor mm7, [GLOBAL(t80)] ; unoffset ; mm7 = q1 ; tranpose and write back @@ -707,7 +708,7 @@ next8_mbh: psubusb mm3, mm2 ; q1-=p1 psubusb mm2, mm4 ; p1-=q1 por mm2, mm3 ; abs(p1-q1) - pand mm2, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm2, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm2, 1 ; abs(p1-q1)/2 movq mm6, mm5 ; p0 @@ -752,12 +753,12 @@ next8_mbh: ; start work on filters movq mm2, [rsi+2*rax] ; p1 movq mm7, [rdi] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; 1 * (q0 - p0) + (p1 - q1) @@ -771,7 +772,7 @@ next8_mbh: pand mm2, mm4; ; Filter2 = vp8_filter & hev movq mm5, mm2 ; - paddsb mm5, [t3 GLOBAL]; + paddsb mm5, [GLOBAL(t3)]; pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -784,7 +785,7 @@ next8_mbh: movq mm5, mm0 ; Filter2 - paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -817,10 +818,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s27 GLOBAL] - pmulhw mm2, [s27 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -828,8 +829,8 @@ next8_mbh: psubsb mm3, mm1 paddsb mm6, mm1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] movq [rsi+rax], mm6 movq [rsi], mm3 @@ -843,10 +844,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s18 GLOBAL] - pmulhw mm2, [s18 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -854,14 +855,14 @@ next8_mbh: movq mm3, [rdi] movq mm6, [rsi+rax*2] ; p1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdi], mm3 movq [rsi+rax*2], mm6 @@ -875,10 +876,10 @@ next8_mbh: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s9 GLOBAL] - pmulhw mm2, [s9 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -888,14 +889,14 @@ next8_mbh: neg rax movq mm3, [rdi+rax ] - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdi+rax ], mm3 neg rax movq [rdi+rax*4], mm6 @@ -1104,7 +1105,7 @@ next8_mbv: psubusb mm5, mm1 ; q1-=p1 psubusb mm1, mm2 ; p1-=q1 por mm5, mm1 ; abs(p1-q1) - pand mm5, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm5, 1 ; abs(p1-q1)/2 mov rdx, arg(2) ;flimit ; @@ -1154,14 +1155,14 @@ next8_mbv: ; start work on filters movq mm2, [rdx+16] ; p1 movq mm7, [rdx+40] ; q1 - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 movq mm6, [rdx+24] ; p0 movq mm0, [rdx+32] ; q0 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 @@ -1175,7 +1176,7 @@ next8_mbv: pand mm2, mm4; ; Filter2 = vp8_filter & hev movq mm5, mm2 ; - paddsb mm5, [t3 GLOBAL]; + paddsb mm5, [GLOBAL(t3)]; pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -1188,7 +1189,7 @@ next8_mbv: movq mm5, mm0 ; Filter2 - paddsb mm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) + paddsb mm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) pxor mm0, mm0 ; 0 pxor mm7, mm7 ; 0 @@ -1221,10 +1222,10 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s27 GLOBAL] - pmulhw mm2, [s27 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s27)] + pmulhw mm2, [GLOBAL(s27)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -1232,8 +1233,8 @@ next8_mbv: psubsb mm3, mm1 paddsb mm6, mm1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] movq [rdx+24], mm6 movq [rdx+32], mm3 @@ -1247,24 +1248,24 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s18 GLOBAL] - pmulhw mm2, [s18 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s18)] + pmulhw mm2, [GLOBAL(s18)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 movq mm3, [rdx + 40] movq mm6, [rdx + 16] ; p1 - pxor mm3, [t80 GLOBAL] - pxor mm6, [t80 GLOBAL] + pxor mm3, [GLOBAL(t80)] + pxor mm6, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] movq [rdx + 40], mm3 movq [rdx + 16], mm6 @@ -1278,10 +1279,10 @@ next8_mbv: pxor mm2, mm2 punpcklbw mm1, mm4 punpckhbw mm2, mm4 - pmulhw mm1, [s9 GLOBAL] - pmulhw mm2, [s9 GLOBAL] - paddw mm1, [s63 GLOBAL] - paddw mm2, [s63 GLOBAL] + pmulhw mm1, [GLOBAL(s9)] + pmulhw mm2, [GLOBAL(s9)] + paddw mm1, [GLOBAL(s63)] + paddw mm2, [GLOBAL(s63)] psraw mm1, 7 psraw mm2, 7 packsswb mm1, mm2 @@ -1289,14 +1290,14 @@ next8_mbv: movq mm6, [rdx+ 8] movq mm3, [rdx+48] - pxor mm6, [t80 GLOBAL] - pxor mm3, [t80 GLOBAL] + pxor mm6, [GLOBAL(t80)] + pxor mm3, [GLOBAL(t80)] paddsb mm6, mm1 psubsb mm3, mm1 - pxor mm6, [t80 GLOBAL] ; mm6 = 71 61 51 41 31 21 11 01 - pxor mm3, [t80 GLOBAL] ; mm3 = 76 66 56 46 36 26 15 06 + pxor mm6, [GLOBAL(t80)] ; mm6 = 71 61 51 41 31 21 11 01 + pxor mm3, [GLOBAL(t80)] ; mm3 = 76 66 56 46 36 26 15 06 ; tranpose and write back movq mm0, [rdx] ; mm0 = 70 60 50 40 30 20 10 00 @@ -1431,7 +1432,7 @@ nexts8_h: psubusb mm0, mm1 ; q1-=p1 psubusb mm1, mm4 ; p1-=q1 por mm1, mm0 ; abs(p1-q1) - pand mm1, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm1, 1 ; abs(p1-q1)/2 movq mm5, [rsi+rax] ; p0 @@ -1449,12 +1450,12 @@ nexts8_h: pcmpeqb mm5, mm3 ; start work on filters - pxor mm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm2, mm7 ; p1 - q1 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values - pxor mm0, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm0 ; q0 psubsb mm0, mm6 ; q0 - p0 paddsb mm2, mm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1463,7 +1464,7 @@ nexts8_h: pand mm5, mm2 ; mask filter values we don't care about ; do + 4 side - paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1476,12 +1477,12 @@ nexts8_h: por mm0, mm1 ; put the two together to get result psubsb mm3, mm0 ; q0-= q0 add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset movq [rsi], mm3 ; write back ; now do +3 side - psubsb mm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1493,7 +1494,7 @@ nexts8_h: paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq [rsi+rax], mm6 ; write back add rsi,8 @@ -1588,7 +1589,7 @@ nexts8_v: psubusb mm7, mm6 ; q1-=p1 psubusb mm6, mm3 ; p1-=q1 por mm6, mm7 ; abs(p1-q1) - pand mm6, [tfe GLOBAL] ; set lsb of each byte to zero + pand mm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw mm6, 1 ; abs(p1-q1)/2 movq mm5, mm1 ; p0 @@ -1616,16 +1617,16 @@ nexts8_v: movq t0, mm0 movq t1, mm3 - pxor mm0, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor mm3, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor mm0, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor mm3, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb mm0, mm3 ; p1 - q1 movq mm6, mm1 ; p0 movq mm7, mm2 ; q0 - pxor mm6, [t80 GLOBAL] ; offset to convert to signed values + pxor mm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor mm7, [t80 GLOBAL] ; offset to convert to signed values + pxor mm7, [GLOBAL(t80)] ; offset to convert to signed values movq mm3, mm7 ; offseted ; q0 psubsb mm7, mm6 ; q0 - p0 @@ -1636,7 +1637,7 @@ nexts8_v: pand mm5, mm0 ; mask filter values we don't care about - paddsb mm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb mm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1650,10 +1651,10 @@ nexts8_v: por mm0, mm7 ; put the two together to get result psubsb mm3, mm0 ; q0-= q0sz add - pxor mm3, [t80 GLOBAL] ; unoffset + pxor mm3, [GLOBAL(t80)] ; unoffset ; now do +3 side - psubsb mm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb mm5, [GLOBAL(t1s)] ; +3 instead of +4 movq mm0, mm5 ; get a copy of filters psllw mm0, 8 ; shift left 8 @@ -1665,7 +1666,7 @@ nexts8_v: por mm0, mm5 ; put the two together to get result paddsb mm6, mm0 ; p0+= p0 add - pxor mm6, [t80 GLOBAL] ; unoffset + pxor mm6, [GLOBAL(t80)] ; unoffset movq mm0, t0 diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm index 5275dfa3b..849133dc4 100644 --- a/vp8/common/x86/loopfilter_sse2.asm +++ b/vp8/common/x86/loopfilter_sse2.asm @@ -1,15 +1,278 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" +; Use of pmaxub instead of psubusb to compute filter mask was seen +; in ffvp8 + +%macro LFH_FILTER_AND_HEV_MASK 1 +%if %1 + movdqa xmm2, [rdi+2*rax] ; q3 + movdqa xmm1, [rsi+2*rax] ; q2 + movdqa xmm4, [rsi+rax] ; q1 + movdqa xmm5, [rsi] ; q0 + neg rax ; negate pitch to deal with above border +%else + movlps xmm2, [rsi + rcx*2] ; q3 + movlps xmm1, [rsi + rcx] ; q2 + movlps xmm4, [rsi] ; q1 + movlps xmm5, [rsi + rax] ; q0 + + movhps xmm2, [rdi + rcx*2] + movhps xmm1, [rdi + rcx] + movhps xmm4, [rdi] + movhps xmm5, [rdi + rax] + + lea rsi, [rsi + rax*4] + lea rdi, [rdi + rax*4] + + movdqa XMMWORD PTR [rsp], xmm1 ; store q2 + movdqa XMMWORD PTR [rsp + 16], xmm4 ; store q1 +%endif + + movdqa xmm6, xmm1 ; q2 + movdqa xmm3, xmm4 ; q1 + + psubusb xmm1, xmm2 ; q2-=q3 + psubusb xmm2, xmm6 ; q3-=q2 + + psubusb xmm4, xmm6 ; q1-=q2 + psubusb xmm6, xmm3 ; q2-=q1 + + por xmm4, xmm6 ; abs(q2-q1) + por xmm1, xmm2 ; abs(q3-q2) + + movdqa xmm0, xmm5 ; q0 + pmaxub xmm1, xmm4 + + psubusb xmm5, xmm3 ; q0-=q1 + psubusb xmm3, xmm0 ; q1-=q0 + + por xmm5, xmm3 ; abs(q0-q1) + movdqa t0, xmm5 ; save to t0 + + pmaxub xmm1, xmm5 + +%if %1 + movdqa xmm2, [rsi+4*rax] ; p3 + movdqa xmm4, [rdi+4*rax] ; p2 + movdqa xmm6, [rsi+2*rax] ; p1 +%else + movlps xmm2, [rsi + rax] ; p3 + movlps xmm4, [rsi] ; p2 + movlps xmm6, [rsi + rcx] ; p1 + + movhps xmm2, [rdi + rax] + movhps xmm4, [rdi] + movhps xmm6, [rdi + rcx] + + movdqa XMMWORD PTR [rsp + 32], xmm4 ; store p2 + movdqa XMMWORD PTR [rsp + 48], xmm6 ; store p1 +%endif + + movdqa xmm5, xmm4 ; p2 + movdqa xmm3, xmm6 ; p1 + + psubusb xmm4, xmm2 ; p2-=p3 + psubusb xmm2, xmm5 ; p3-=p2 + + psubusb xmm3, xmm5 ; p1-=p2 + pmaxub xmm1, xmm4 ; abs(p3 - p2) + + psubusb xmm5, xmm6 ; p2-=p1 + pmaxub xmm1, xmm2 ; abs(p3 - p2) + + pmaxub xmm1, xmm5 ; abs(p2 - p1) + movdqa xmm2, xmm6 ; p1 + + pmaxub xmm1, xmm3 ; abs(p2 - p1) +%if %1 + movdqa xmm4, [rsi+rax] ; p0 + movdqa xmm3, [rdi] ; q1 +%else + movlps xmm4, [rsi + rcx*2] ; p0 + movhps xmm4, [rdi + rcx*2] + movdqa xmm3, q1 ; q1 +%endif + + movdqa xmm5, xmm4 ; p0 + psubusb xmm4, xmm6 ; p0-=p1 + + psubusb xmm6, xmm5 ; p1-=p0 + + por xmm6, xmm4 ; abs(p1 - p0) + mov rdx, arg(2) ; get flimit + + movdqa t1, xmm6 ; save to t1 + + movdqa xmm4, xmm3 ; q1 + pmaxub xmm1, xmm6 + + psubusb xmm3, xmm2 ; q1-=p1 + psubusb xmm2, xmm4 ; p1-=q1 + + psubusb xmm1, xmm7 + por xmm2, xmm3 ; abs(p1-q1) + + movdqa xmm4, XMMWORD PTR [rdx] ; flimit + + movdqa xmm3, xmm0 ; q0 + pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero + + mov rdx, arg(4) ; hev get thresh + + movdqa xmm6, xmm5 ; p0 + psrlw xmm2, 1 ; abs(p1-q1)/2 + + psubusb xmm5, xmm3 ; p0-=q0 + paddb xmm4, xmm4 ; flimit*2 (less than 255) + + psubusb xmm3, xmm6 ; q0-=p0 + por xmm5, xmm3 ; abs(p0 - q0) + + paddusb xmm5, xmm5 ; abs(p0-q0)*2 + paddb xmm7, xmm4 ; flimit * 2 + limit (less than 255) + + movdqa xmm4, t0 ; hev get abs (q1 - q0) + + movdqa xmm3, t1 ; get abs (p1 - p0) + + paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + + movdqa xmm2, XMMWORD PTR [rdx] ; hev + + psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + psubusb xmm4, xmm2 ; hev + + psubusb xmm3, xmm2 ; hev + por xmm1, xmm5 + + pxor xmm7, xmm7 + paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + pcmpeqb xmm4, xmm5 ; hev + pcmpeqb xmm3, xmm3 ; hev + + pcmpeqb xmm1, xmm7 ; mask xmm1 + pxor xmm4, xmm3 ; hev +%endmacro + +%macro B_FILTER 1 +%if %1 == 0 + movdqa xmm2, p1 ; p1 + movdqa xmm7, q1 ; q1 +%elif %1 == 1 + movdqa xmm2, [rsi+2*rax] ; p1 + movdqa xmm7, [rdi] ; q1 +%elif %1 == 2 + lea rdx, srct + + movdqa xmm2, [rdx] ; p1 + movdqa xmm7, [rdx+48] ; q1 + movdqa xmm6, [rdx+16] ; p0 + movdqa xmm0, [rdx+32] ; q0 +%endif + + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + + psubsb xmm2, xmm7 ; p1 - q1 + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values + + pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values + + movdqa xmm3, xmm0 ; q0 + psubsb xmm0, xmm6 ; q0 - p0 + + paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) + + paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) + + paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) + + pand xmm1, xmm2 ; mask filter values we don't care about + + movdqa xmm2, xmm1 + + paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 + paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 + + punpckhbw xmm5, xmm2 ; axbxcxdx + punpcklbw xmm2, xmm2 ; exfxgxhx + + punpcklbw xmm0, xmm1 ; exfxgxhx + psraw xmm5, 11 ; sign extended shift right by 3 + + punpckhbw xmm1, xmm1 ; axbxcxdx + psraw xmm2, 11 ; sign extended shift right by 3 + + packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; + psraw xmm0, 11 ; sign extended shift right by 3 + + psraw xmm1, 11 ; sign extended shift right by 3 + movdqa xmm5, xmm0 ; save results + + packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 + paddsw xmm5, [GLOBAL(ones)] + + paddsw xmm1, [GLOBAL(ones)] + psraw xmm5, 1 ; partial shifted one more time for 2nd tap + + psraw xmm1, 1 ; partial shifted one more time for 2nd tap + + paddsb xmm6, xmm2 ; p0+= p0 add + packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 + +%if %1 == 0 + movdqa xmm1, p1 ; p1 +%elif %1 == 1 + movdqa xmm1, [rsi+2*rax] ; p1 +%elif %1 == 2 + movdqa xmm1, [rdx] ; p1 +%endif + pandn xmm4, xmm5 ; high edge variance additive + pxor xmm6, [GLOBAL(t80)] ; unoffset + + pxor xmm1, [GLOBAL(t80)] ; reoffset + psubsb xmm3, xmm0 ; q0-= q0 add + + paddsb xmm1, xmm4 ; p1+= p1 add + pxor xmm3, [GLOBAL(t80)] ; unoffset + + pxor xmm1, [GLOBAL(t80)] ; unoffset + psubsb xmm7, xmm4 ; q1-= q1 add + + pxor xmm7, [GLOBAL(t80)] ; unoffset +%if %1 == 0 + lea rsi, [rsi + rcx*2] + lea rdi, [rdi + rcx*2] + movq MMWORD PTR [rsi], xmm6 ; p0 + movhps MMWORD PTR [rdi], xmm6 + movq MMWORD PTR [rsi + rax], xmm1 ; p1 + movhps MMWORD PTR [rdi + rax], xmm1 + movq MMWORD PTR [rsi + rcx], xmm3 ; q0 + movhps MMWORD PTR [rdi + rcx], xmm3 + movq MMWORD PTR [rsi + rcx*2],xmm7 ; q1 + movhps MMWORD PTR [rdi + rcx*2],xmm7 +%elif %1 == 1 + movdqa [rsi+rax], xmm6 ; write back + movdqa [rsi+2*rax], xmm1 ; write back + movdqa [rsi], xmm3 ; write back + movdqa [rdi], xmm7 ; write back +%endif + +%endmacro + ;void vp8_loop_filter_horizontal_edge_sse2 ;( @@ -25,185 +288,29 @@ sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes + sub rsp, 32 ; reserve 32 bytes %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step - mov rdx, arg(3) ;limit - movdqa xmm7, XMMWORD PTR [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax + mov rdx, arg(3) ;limit + movdqa xmm7, XMMWORD PTR [rdx] - ; calculate breakout conditions - movdqu xmm2, [rdi+2*rax] ; q3 - movdqu xmm1, [rsi+2*rax] ; q2 - movdqa xmm6, xmm1 ; q2 - psubusb xmm1, xmm2 ; q2-=q3 - psubusb xmm2, xmm6 ; q3-=q2 - por xmm1, xmm2 ; abs(q3-q2) - psubusb xmm1, xmm7 ; + lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - - movdqu xmm4, [rsi+rax] ; q1 - movdqa xmm3, xmm4 ; q1 - psubusb xmm4, xmm6 ; q1-=q2 - psubusb xmm6, xmm3 ; q2-=q1 - por xmm4, xmm6 ; abs(q2-q1) - - psubusb xmm4, xmm7 - por xmm1, xmm4 - - movdqu xmm4, [rsi] ; q0 - movdqa xmm0, xmm4 ; q0 - psubusb xmm4, xmm3 ; q0-=q1 - psubusb xmm3, xmm0 ; q1-=q0 - por xmm4, xmm3 ; abs(q0-q1) - movdqa t0, xmm4 ; save to t0 - psubusb xmm4, xmm7 - por xmm1, xmm4 - - neg rax ; negate pitch to deal with above border - movdqu xmm2, [rsi+4*rax] ; p3 - movdqu xmm4, [rdi+4*rax] ; p2 - movdqa xmm5, xmm4 ; p2 - psubusb xmm4, xmm2 ; p2-=p3 - psubusb xmm2, xmm5 ; p3-=p2 - por xmm4, xmm2 ; abs(p3 - p2) - psubusb xmm4, xmm7 - por xmm1, xmm4 - - - movdqu xmm4, [rsi+2*rax] ; p1 - movdqa xmm3, xmm4 ; p1 - psubusb xmm4, xmm5 ; p1-=p2 - psubusb xmm5, xmm3 ; p2-=p1 - por xmm4, xmm5 ; abs(p2 - p1) - psubusb xmm4, xmm7 - por xmm1, xmm4 - - movdqa xmm2, xmm3 ; p1 - - movdqu xmm4, [rsi+rax] ; p0 - movdqa xmm5, xmm4 ; p0 - psubusb xmm4, xmm3 ; p0-=p1 - psubusb xmm3, xmm5 ; p1-=p0 - por xmm4, xmm3 ; abs(p1 - p0) - movdqa t1, xmm4 ; save to t1 - psubusb xmm4, xmm7 - por xmm1, xmm4 - - movdqu xmm3, [rdi] ; q1 - movdqa xmm4, xmm3 ; q1 - psubusb xmm3, xmm2 ; q1-=p1 - psubusb xmm2, xmm4 ; p1-=q1 - por xmm2, xmm3 ; abs(p1-q1) - pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero - psrlw xmm2, 1 ; abs(p1-q1)/2 - - movdqa xmm6, xmm5 ; p0 - movdqu xmm3, [rsi] ; q0 - psubusb xmm5, xmm3 ; p0-=q0 - psubusb xmm3, xmm6 ; q0-=p0 - por xmm5, xmm3 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;flimit ; get flimit - movdqa xmm2, [rdx] ; - - paddb xmm2, xmm2 ; flimit*2 (less than 255) - paddb xmm7, xmm2 ; flimit * 2 + limit (less than 255) - - psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit - por xmm1, xmm5 - pxor xmm5, xmm5 - pcmpeqb xmm1, xmm5 ; mask mm1 - - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movdqa xmm7, [rdx] ; - movdqa xmm4, t0 ; get abs (q1 - q0) - psubusb xmm4, xmm7 - movdqa xmm3, t1 ; get abs (p1 - p0) - psubusb xmm3, xmm7 - paddb xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb xmm4, xmm5 - pcmpeqb xmm5, xmm5 - pxor xmm4, xmm5 - - - ; start work on filters - movdqu xmm2, [rsi+2*rax] ; p1 - movdqu xmm7, [rdi] ; q1 - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values - psubsb xmm2, xmm7 ; p1 - q1 - pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - movdqa xmm2, xmm1 - paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - - pxor xmm0, xmm0 ; - pxor xmm5, xmm5 - punpcklbw xmm0, xmm2 ; - punpckhbw xmm5, xmm2 ; - psraw xmm0, 11 ; - psraw xmm5, 11 - packsswb xmm0, xmm5 - movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor xmm0, xmm0 ; 0 - movdqa xmm5, xmm1 ; abcdefgh - punpcklbw xmm0, xmm1 ; e0f0g0h0 - psraw xmm0, 11 ; sign extended shift right by 3 - pxor xmm1, xmm1 ; 0 - punpckhbw xmm1, xmm5 ; a0b0c0d0 - psraw xmm1, 11 ; sign extended shift right by 3 - movdqa xmm5, xmm0 ; save results - - packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw xmm5, [ones GLOBAL] - paddsw xmm1, [ones GLOBAL] - psraw xmm5, 1 ; partial shifted one more time for 2nd tap - psraw xmm1, 1 ; partial shifted one more time for 2nd tap - packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - pandn xmm4, xmm5 ; high edge variance additive - - paddsb xmm6, xmm2 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset - movdqu [rsi+rax], xmm6 ; write back - - movdqu xmm6, [rsi+2*rax] ; p1 - pxor xmm6, [t80 GLOBAL] ; reoffset - paddsb xmm6, xmm4 ; p1+= p1 add - pxor xmm6, [t80 GLOBAL] ; unoffset - movdqu [rsi+2*rax], xmm6 ; write back - - psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [t80 GLOBAL] ; unoffset - movdqu [rsi], xmm3 ; write back - - psubsb xmm7, xmm4 ; q1-= q1 add - pxor xmm7, [t80 GLOBAL] ; unoffset - movdqu [rdi], xmm7 ; write back + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 1 + ; filter and write back the result + B_FILTER 1 add rsp, 32 pop rsp @@ -211,12 +318,13 @@ sym(vp8_loop_filter_horizontal_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret -;void vp8_loop_filter_vertical_edge_sse2 +;void vp8_loop_filter_horizontal_edge_uv_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, @@ -225,425 +333,42 @@ sym(vp8_loop_filter_horizontal_edge_sse2): ; const char *thresh, ; int count ;) -global sym(vp8_loop_filter_vertical_edge_sse2) -sym(vp8_loop_filter_vertical_edge_sse2): +global sym(vp8_loop_filter_horizontal_edge_uv_sse2) +sym(vp8_loop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax - sub rsp, 96 ; reserve 96 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; - - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - - add rdi, rax - lea rcx, [rdi + rax *8] - - ;transpose - movq xmm7, QWORD PTR [rsi+2*rax] ; 67 66 65 64 63 62 61 60 - movq xmm6, QWORD PTR [rdi+2*rax] ; 77 76 75 74 73 72 71 70 - - punpcklbw xmm7, xmm6 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 - movq xmm5, QWORD PTR [rsi] ; 47 46 45 44 43 42 41 40 - - movq xmm4, QWORD PTR [rsi+rax] ; 57 56 55 54 53 52 51 50 - punpcklbw xmm5, xmm4 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 - - movdqa xmm3, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 - punpckhwd xmm5, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 - - lea rsi, [rsi+ rax*8] - - punpcklwd xmm3, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - movq xmm6, QWORD PTR [rsi + 2*rax] ; e7 e6 e5 e4 e3 e2 e1 e0 - - movq xmm7, QWORD PTR [rcx + 2*rax] ; f7 f6 f5 f4 f3 f2 f1 f0 - punpcklbw xmm6, xmm7 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 - - movq xmm4, QWORD PTR [rsi] ; c7 c6 c5 c4 c3 c2 c1 c0 - movq xmm7, QWORD PTR [rsi + rax] ; d7 d6 d5 d4 d3 d2 d1 d0 - - punpcklbw xmm4, xmm7 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0 - movdqa xmm7, xmm4 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0 - - punpckhwd xmm7, xmm6 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 - punpcklwd xmm4, xmm6 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - - ; xmm3 xmm4, xmm5 xmm7 in use - neg rax - - lea rsi, [rsi+rax*8] - movq xmm6, QWORD PTR [rsi+rax*2] ; 27 26 25 24 23 22 21 20 - - movq xmm1, QWORD PTR [rsi+rax ] ; 37 36 35 34 33 32 31 30 - punpcklbw xmm6, xmm1 ; 37 27 36 26 35 25 34 24 33 23 32 22 31 21 30 20 - - movq xmm2, QWORD PTR [rsi+rax*4] ; 07 06 05 04 03 02 01 00 - movq xmm1, QWORD PTR [rdi+rax*4] ; 17 16 15 14 13 12 11 10 - - punpcklbw xmm2, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - movdqa xmm0, xmm2 - - punpckhwd xmm2, xmm6 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - punpcklwd xmm0, xmm6 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - - movdqa xmm6, xmm2 - punpckldq xmm2, xmm5 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - - punpckhdq xmm6, xmm5 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - ;xmm0 xmm2 xmm3 xmm4, xmm6, xmm7 - - movdqa xmm5, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhdq xmm5, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - punpckldq xmm0, xmm3 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - lea rsi, [rcx+rax] - ; xmm1, xmm3 free - movq xmm1, QWORD PTR [rsi+rax*2] ; a7 a6 a5 a4 a3 a2 a1 a0 - movq xmm3, QWORD PTR [rsi+rax] ; b7 b6 b5 b4 b3 b2 b1 b0 - - punpcklbw xmm1, xmm3 ; - lea rdx, srct ; - - movdqa [rdx+16], xmm1 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 - movq xmm3, QWORD PTR [rsi+rax*4] ; 87 86 85 84 83 82 81 80 - - movq xmm1, QWORD PTR [rcx+rax*4] - punpcklbw xmm3, xmm1 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - movdqa [rdx], xmm3 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - punpckhwd xmm3, [rdx+16] ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - movdqa xmm1, xmm3 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - punpckhdq xmm1, xmm7 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 - punpckldq xmm3, xmm7 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 - - movdqa xmm7, xmm2 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - punpcklqdq xmm7, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - - punpckhqdq xmm2, xmm3 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - movdqa [rdx+32], xmm7 ; save 4s - - movdqa [rdx+48], xmm2 ; save 5s - movdqa xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - - punpckhqdq xmm7, xmm1 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 = q3 - punpcklqdq xmm6, xmm1 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 = q2 - - ; free 1, 3 xmm7-7s xmm6-6s, xmm2-5s - movq xmm1, QWORD PTR [rdx] ; 93 83 92 82 91 81 90 80 - movq xmm3, QWORD PTR [rdx+16] ; b3 a3 b2 a2 b1 a1 b0 a0 - - punpcklwd xmm1, xmm3 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - movdqa xmm3, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - - punpckhdq xmm3, xmm4 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - punpckldq xmm1, xmm4 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - - movdqa xmm4, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - punpcklqdq xmm5, xmm3 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - punpckhqdq xmm4, xmm3 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - movdqa [rdx], xmm5 ; save 2s - - movdqa [rdx+16], xmm4 ; save 3s - - movdqa xmm3, xmm6 ; - psubusb xmm3, xmm7 ; q3 - q2 - - psubusb xmm7, xmm6 ; q2 - q3 - por xmm7, xmm3 ; abs(q3-q2) - - movdqa xmm3, xmm2 ; q1 - psubusb xmm3, xmm6 ; q1 - q2 - - psubusb xmm6, xmm2 ; q2 - q1 - por xmm6, xmm3 ; abs(q2-q1) - - - movdqa xmm3, xmm0 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - punpcklqdq xmm0, xmm1 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - - punpckhqdq xmm3, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - movdqa xmm1, xmm3 - - psubusb xmm3, xmm0 ; p2-p3 - psubusb xmm0, xmm1 ; p3-p2 - - por xmm0, xmm3 ; abs(p3-p2) - movdqa xmm3, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - psubusb xmm3, xmm1 ; p1-p2 - psubusb xmm1, xmm5 ; p2-p1 - - por xmm1, xmm3 ; abs(p1-p2) - mov rdx, arg(3) ;limit - - movdqa xmm3, [rdx] ; limit - - psubusb xmm7, xmm3 - psubusb xmm0, xmm3 - - psubusb xmm1, xmm3 - psubusb xmm6, xmm3 - - por xmm7, xmm6 - por xmm0, xmm1 - - por xmm0, xmm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movdqa xmm1, xmm5 ; p1 - - movdqa xmm7, xmm4 ; xmm4 xmm7 = p0 - - psubusb xmm7, xmm5 ; p0 - p1 - psubusb xmm5, xmm4 ; p1 - p0 - - por xmm5, xmm7 ; abs(p1-p0) - movdqa t0, xmm5 ; save abs(p1-p0) - - lea rdx, srct - psubusb xmm5, xmm3 - - por xmm0, xmm5 ; xmm0=mask - movdqa xmm5, [rdx+32] ; xmm5=q0 - - movdqa xmm7, [rdx+48] ; xmm7=q1 - movdqa xmm6, xmm5 ; mm6=q0 - - movdqa xmm2, xmm7 ; q1 - - psubusb xmm5, xmm7 ; q0-q1 - psubusb xmm7, xmm6 ; q1-q0 - - por xmm7, xmm5 ; abs(q1-q0) - movdqa t1, xmm7 ; save abs(q1-q0) - - psubusb xmm7, xmm3 - por xmm0, xmm7 ; mask - - movdqa xmm5, xmm2 ; q1 - psubusb xmm5, xmm1 ; q1-=p1 - psubusb xmm1, xmm2 ; p1-=q1 - por xmm5, xmm1 ; abs(p1-q1) - pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero - psrlw xmm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;flimit ; - movdqa xmm2, [rdx] ;flimit xmm2 - - movdqa xmm1, xmm4 ; xmm1=xmm4=p0 - - movdqa xmm7, xmm6 ; xmm7=xmm6=q0 - psubusb xmm1, xmm7 ; p0-q0 - - psubusb xmm7, xmm4 ; q0-p0 - por xmm1, xmm7 ; abs(q0-p0) - paddusb xmm1, xmm1 ; abs(q0-p0)*2 - paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - paddb xmm2, xmm2 ; flimit*2 (less than 255) - paddb xmm3, xmm2 ; flimit * 2 + limit (less than 255) - - psubusb xmm1, xmm3 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit - - por xmm1, xmm0; ; mask - - pxor xmm0, xmm0 - pcmpeqb xmm1, xmm0 - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movdqa xmm7, [rdx] - - ; - movdqa xmm4, t0 ; get abs (q1 - q0) - psubusb xmm4, xmm7 - - movdqa xmm3, t1 ; get abs (p1 - p0) - psubusb xmm3, xmm7 - - por xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb xmm4, xmm0 - - pcmpeqb xmm0, xmm0 - pxor xmm4, xmm0 - - ; start work on filters - lea rdx, srct - - movdqa xmm2, [rdx] ; p1 - movdqa xmm7, [rdx+48] ; q1 - - movdqa xmm6, [rdx+16] ; p0 - movdqa xmm0, [rdx+32] ; q0 - - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) - - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values - - movdqa xmm3, xmm0 ; q0 - psubsb xmm0, xmm6 ; q0 - p0 - - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) - paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) - - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) - pand xmm1, xmm2 ; mask filter values we don't care about - - movdqa xmm2, xmm1 - paddsb xmm1, [t4 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 - - paddsb xmm2, [t3 GLOBAL] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 - pxor xmm0, xmm0 ; - - pxor xmm5, xmm5 - punpcklbw xmm0, xmm2 ; - - punpckhbw xmm5, xmm2 ; - psraw xmm0, 11 ; - - psraw xmm5, 11 - packsswb xmm0, xmm5 - - movdqa xmm2, xmm0 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; - - pxor xmm0, xmm0 ; 0 - movdqa xmm5, xmm1 ; abcdefgh - - punpcklbw xmm0, xmm1 ; e0f0g0h0 - psraw xmm0, 11 ; sign extended shift right by 3 - - pxor xmm1, xmm1 ; 0 - punpckhbw xmm1, xmm5 ; a0b0c0d0 - - psraw xmm1, 11 ; sign extended shift right by 3 - movdqa xmm5, xmm0 ; save results - - packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 - paddsw xmm5, [ones GLOBAL] - - paddsw xmm1, [ones GLOBAL] - psraw xmm5, 1 ; partial shifted one more time for 2nd tap - - psraw xmm1, 1 ; partial shifted one more time for 2nd tap - packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 - - pandn xmm4, xmm5 ; high edge variance additive - - paddsb xmm6, xmm2 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset - - ; mm6=p0 ; - movdqa xmm1, [rdx] ; p1 - pxor xmm1, [t80 GLOBAL] ; reoffset - - paddsb xmm1, xmm4 ; p1+= p1 add - pxor xmm1, [t80 GLOBAL] ; unoffset - ; mm6 = p0 mm1 = p1 - - psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [t80 GLOBAL] ; unoffset - - ; mm3 = q0 - psubsb xmm7, xmm4 ; q1-= q1 add - pxor xmm7, [t80 GLOBAL] ; unoffset - ; mm7 = q1 - - ; tranpose and write back - ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - - movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - - movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - - punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - punpcklwd xmm1, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 - punpckhwd xmm5, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - - ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 - ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 - ; xmm5 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 - lea rsi, [rsi+rax*8] - - movd [rsi+rax*4+2], xmm2 - psrldq xmm2, 4 - - movd [rdi+rax*4+2], xmm2 - psrldq xmm2, 4 - - movd [rsi+rax*2+2], xmm2 - psrldq xmm2, 4 - - movd [rdi+rax*2+2], xmm2 - movd [rsi+2], xmm6 - - psrldq xmm6, 4 - movd [rdi+2], xmm6 - - psrldq xmm6, 4 - neg rax - - movd [rdi+rax+2], xmm6 - psrldq xmm6, 4 - - movd [rdi+rax*2+2], xmm6 - lea rsi, [rsi+rax*8] - - neg rax - ;;;;;;;;;;;;;;;;;;;;/ - movd [rsi+rax*4+2], xmm1 - psrldq xmm1, 4 - - movd [rcx+rax*4+2], xmm1 - psrldq xmm1, 4 - - movd [rsi+rax*2+2], xmm1 - psrldq xmm1, 4 - - movd [rcx+rax*2+2], xmm1 - psrldq xmm1, 4 - - movd [rsi+2], xmm5 - psrldq xmm5, 4 - - movd [rcx+2], xmm5 - psrldq xmm5, 4 - - neg rax - movd [rcx+rax+2], xmm5 - - psrldq xmm5, 4 - movd [rcx+rax*2+2], xmm5 + sub rsp, 96 ; reserve 96 bytes + %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16]; + %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16]; + %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16]; + %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16]; + %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16]; + + mov rsi, arg(0) ; u + mov rdi, arg(5) ; v + movsxd rax, dword ptr arg(1) ; src_pixel_step + mov rcx, rax + neg rax ; negate pitch to deal with above border + + mov rdx, arg(3) ;limit + movdqa xmm7, XMMWORD PTR [rdx] + + lea rsi, [rsi + rcx] + lea rdi, [rdi + rcx] + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 0 + ; filter and write back the result + B_FILTER 0 add rsp, 96 pop rsp @@ -651,11 +376,200 @@ sym(vp8_loop_filter_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret +%macro MB_FILTER_AND_WRITEBACK 1 +%if %1 == 0 + movdqa xmm2, p1 ; p1 + movdqa xmm7, q1 ; q1 +%elif %1 == 1 + movdqa xmm2, [rsi+2*rax] ; p1 + movdqa xmm7, [rdi] ; q1 + + mov rcx, rax + neg rcx +%elif %1 == 2 + lea rdx, srct + + movdqa xmm2, [rdx+32] ; p1 + movdqa xmm7, [rdx+80] ; q1 + movdqa xmm6, [rdx+48] ; p0 + movdqa xmm0, [rdx+64] ; q0 +%endif + + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values + + psubsb xmm2, xmm7 ; p1 - q1 + movdqa xmm3, xmm0 ; q0 + + psubsb xmm0, xmm6 ; q0 - p0 + + paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) + + paddsb xmm2, xmm0 ; 2 * (q0 - p0) + + paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) + + pand xmm1, xmm2 ; mask filter values we don't care about + + movdqa xmm2, xmm1 ; vp8_filter + + pand xmm2, xmm4 ; Filter2 = vp8_filter & hev + pxor xmm0, xmm0 + + pandn xmm4, xmm1 ; vp8_filter&=~hev + pxor xmm1, xmm1 + + punpcklbw xmm0, xmm4 ; Filter 2 (hi) + movdqa xmm5, xmm2 + + punpckhbw xmm1, xmm4 ; Filter 2 (lo) + paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) + + pmulhw xmm1, [GLOBAL(s9)] ; Filter 2 (lo) * 9 + + pmulhw xmm0, [GLOBAL(s9)] ; Filter 2 (hi) * 9 + + punpckhbw xmm7, xmm5 ; axbxcxdx + paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) + + punpcklbw xmm5, xmm5 ; exfxgxhx + psraw xmm7, 11 ; sign extended shift right by 3 + + psraw xmm5, 11 ; sign extended shift right by 3 + punpckhbw xmm4, xmm2 ; axbxcxdx + + punpcklbw xmm2, xmm2 ; exfxgxhx + psraw xmm4, 11 ; sign extended shift right by 3 + + packsswb xmm5, xmm7 ; Filter2 >>=3; + psraw xmm2, 11 ; sign extended shift right by 3 + + packsswb xmm2, xmm4 ; Filter1 >>=3; + movdqa xmm7, xmm1 + + paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 + movdqa xmm4, xmm1 + + psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 + movdqa xmm5, xmm0 + + movdqa xmm2, xmm5 + paddw xmm0, [GLOBAL(s63)] ; Filter 2 (hi) * 9 + 63 + + paddw xmm1, [GLOBAL(s63)] ; Filter 2 (lo) * 9 + 63 + paddw xmm5, xmm5 ; Filter 2 (hi) * 18 + + paddw xmm7, xmm7 ; Filter 2 (lo) * 18 + paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 + + paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 + paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 + + paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 + psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 + + psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 + psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 + + packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) + psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 + + psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 + packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) + + psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 + + packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) + + psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) + paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) + +%if %1 == 0 + movdqa xmm5, q2 ; q2 + movdqa xmm1, q1 ; q1 + movdqa xmm4, p1 ; p1 + movdqa xmm7, p2 ; p2 + +%elif %1 == 1 + movdqa xmm5, XMMWORD PTR [rdi+rcx] ; q2 + movdqa xmm1, XMMWORD PTR [rdi] ; q1 + movdqa xmm4, XMMWORD PTR [rsi+rax*2] ; p1 + movdqa xmm7, XMMWORD PTR [rdi+rax*4] ; p2 +%elif %1 == 2 + movdqa xmm5, XMMWORD PTR [rdx+96] ; q2 + movdqa xmm1, XMMWORD PTR [rdx+80] ; q1 + movdqa xmm4, XMMWORD PTR [rdx+32] ; p1 + movdqa xmm7, XMMWORD PTR [rdx+16] ; p2 +%endif + + pxor xmm3, [GLOBAL(t80)] ; *oq0 = sq^0x80 + pxor xmm6, [GLOBAL(t80)] ; *oq0 = sp^0x80 + + pxor xmm1, [GLOBAL(t80)] + pxor xmm4, [GLOBAL(t80)] + + psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) + paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) + + pxor xmm1, [GLOBAL(t80)] ; *oq1 = sq^0x80; + pxor xmm4, [GLOBAL(t80)] ; *op1 = sp^0x80; + + pxor xmm7, [GLOBAL(t80)] + pxor xmm5, [GLOBAL(t80)] + + paddsb xmm7, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) + psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) + + pxor xmm7, [GLOBAL(t80)] ; *op2 = sp^0x80; + pxor xmm5, [GLOBAL(t80)] ; *oq2 = sq^0x80; + +%if %1 == 0 + lea rsi, [rsi+rcx*2] + lea rdi, [rdi+rcx*2] + + movq MMWORD PTR [rsi], xmm6 ; p0 + movhps MMWORD PTR [rdi], xmm6 + movq MMWORD PTR [rsi + rcx], xmm3 ; q0 + movhps MMWORD PTR [rdi + rcx], xmm3 + + movq MMWORD PTR [rsi+rcx*2], xmm1 ; q1 + movhps MMWORD PTR [rdi+rcx*2], xmm1 + + movq MMWORD PTR [rsi + rax], xmm4 ; p1 + movhps MMWORD PTR [rdi + rax], xmm4 + + movq MMWORD PTR [rsi+rax*2], xmm7 ; p2 + movhps MMWORD PTR [rdi+rax*2], xmm7 + + lea rsi, [rsi + rcx] + lea rdi, [rdi + rcx] + movq MMWORD PTR [rsi+rcx*2], xmm5 ; q2 + movhps MMWORD PTR [rdi+rcx*2], xmm5 +%elif %1 == 1 + movdqa XMMWORD PTR [rdi+rcx], xmm5 ; q2 + movdqa XMMWORD PTR [rdi], xmm1 ; q1 + movdqa XMMWORD PTR [rsi], xmm3 ; q0 + movdqa XMMWORD PTR [rsi+rax ],xmm6 ; p0 + movdqa XMMWORD PTR [rsi+rax*2],xmm4 ; p1 + movdqa XMMWORD PTR [rdi+rax*4],xmm7 ; p2 +%elif %1 == 2 + movdqa XMMWORD PTR [rdx+80], xmm1 ; q1 + movdqa XMMWORD PTR [rdx+64], xmm3 ; q0 + movdqa XMMWORD PTR [rdx+48], xmm6 ; p0 + movdqa XMMWORD PTR [rdx+32], xmm4 ; p1 +%endif + +%endmacro + + ;void vp8_mbloop_filter_horizontal_edge_sse2 ;( ; unsigned char *src_ptr, @@ -670,330 +584,29 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax - sub rsp, 32 ; reserve 32 bytes - %define t0 [rsp + 0] ;__declspec(align(16)) char t0[8]; - %define t1 [rsp + 16] ;__declspec(align(16)) char t1[8]; + sub rsp, 32 ; reserve 32 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixel_step - mov rdx, arg(3) ;limit + mov rdx, arg(3) ;limit movdqa xmm7, XMMWORD PTR [rdx] - mov rdi, rsi ; rdi points to row +1 for indirect addressing - add rdi, rax + lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing - ; calculate breakout conditions - movdqa xmm2, XMMWORD PTR [rdi+2*rax] ; q3 - movdqa xmm1, XMMWORD PTR [rsi+2*rax] ; q2 - - movdqa xmm6, xmm1 ; q2 - psubusb xmm1, xmm2 ; q2-=q3 - - - psubusb xmm2, xmm6 ; q3-=q2 - por xmm1, xmm2 ; abs(q3-q2) - - psubusb xmm1, xmm7 - - ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit - movdqa xmm4, XMMWORD PTR [rsi+rax] ; q1 - movdqa xmm3, xmm4 ; q1 - - psubusb xmm4, xmm6 ; q1-=q2 - psubusb xmm6, xmm3 ; q2-=q1 - - por xmm4, xmm6 ; abs(q2-q1) - psubusb xmm4, xmm7 - - por xmm1, xmm4 - ; mm1 = mask, mm3=q1, mm7 = limit - - movdqa xmm4, XMMWORD PTR [rsi] ; q0 - movdqa xmm0, xmm4 ; q0 - - psubusb xmm4, xmm3 ; q0-=q1 - psubusb xmm3, xmm0 ; q1-=q0 - - por xmm4, xmm3 ; abs(q0-q1) - movdqa t0, xmm4 ; save to t0 - - psubusb xmm4, xmm7 - por xmm1, xmm4 - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - neg rax ; negate pitch to deal with above border - - movdqa xmm2, XMMWORD PTR [rsi+4*rax] ; p3 - movdqa xmm4, XMMWORD PTR [rdi+4*rax] ; p2 - - movdqa xmm5, xmm4 ; p2 - psubusb xmm4, xmm2 ; p2-=p3 - - psubusb xmm2, xmm5 ; p3-=p2 - por xmm4, xmm2 ; abs(p3 - p2) - - psubusb xmm4, xmm7 - por xmm1, xmm4 - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - movdqa xmm4, XMMWORD PTR [rsi+2*rax] ; p1 - movdqa xmm3, xmm4 ; p1 - - psubusb xmm4, xmm5 ; p1-=p2 - psubusb xmm5, xmm3 ; p2-=p1 - - por xmm4, xmm5 ; abs(p2 - p1) - psubusb xmm4, xmm7 - - por xmm1, xmm4 - - movdqa xmm2, xmm3 ; p1 - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) - movdqa xmm4, XMMWORD PTR [rsi+rax] ; p0 - movdqa xmm5, xmm4 ; p0 - - psubusb xmm4, xmm3 ; p0-=p1 - psubusb xmm3, xmm5 ; p1-=p0 - - por xmm4, xmm3 ; abs(p1 - p0) - movdqa t1, xmm4 ; save to t1 - - psubusb xmm4, xmm7 - por xmm1, xmm4 - - ; mm1 = mask, mm0=q0, mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm5 = p0 - movdqa xmm3, XMMWORD PTR [rdi] ; q1 - movdqa xmm4, xmm3 ; q1 - psubusb xmm3, xmm2 ; q1-=p1 - psubusb xmm2, xmm4 ; p1-=q1 - por xmm2, xmm3 ; abs(p1-q1) - pand xmm2, [tfe GLOBAL] ; set lsb of each byte to zero - psrlw xmm2, 1 ; abs(p1-q1)/2 - - movdqa xmm6, xmm5 ; p0 - movdqa xmm3, xmm0 ; q0 - - psubusb xmm5, xmm3 ; p0-=q0 - psubusb xmm3, xmm6 ; q0-=p0 - - por xmm5, xmm3 ; abs(p0 - q0) - paddusb xmm5, xmm5 ; abs(p0-q0)*2 - paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - mov rdx, arg(2) ;flimit ; get flimit - movdqa xmm2, XMMWORD PTR [rdx] ; - paddb xmm2, xmm2 ; flimit*2 (less than 255) - paddb xmm7, xmm2 ; flimit * 2 + limit (less than 255) - - psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit - por xmm1, xmm5 - pxor xmm5, xmm5 - pcmpeqb xmm1, xmm5 ; mask mm1 - ; mm1 = mask, mm0=q0, mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movdqa xmm7, XMMWORD PTR [rdx] ; - - movdqa xmm4, t0 ; get abs (q1 - q0) - psubusb xmm4, xmm7 - - movdqa xmm3, t1 ; get abs (p1 - p0) - psubusb xmm3, xmm7 - - paddb xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb xmm4, xmm5 - - pcmpeqb xmm5, xmm5 - pxor xmm4, xmm5 - ; mm1 = mask, mm0=q0, mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0) - ; mm6 = p0, mm4=hev - ; start work on filters - movdqa xmm2, XMMWORD PTR [rsi+2*rax] ; p1 - movdqa xmm7, XMMWORD PTR [rdi] ; q1 - - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) - - paddsb xmm2, xmm0 ; 2 * (q0 - p0) - paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) - - pand xmm1, xmm2 ; mask filter values we don't care about - ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0 - movdqa xmm2, xmm1 ; vp8_filter - pand xmm2, xmm4; ; Filter2 = vp8_filter & hev - - - movdqa xmm5, xmm2 ; - paddsb xmm5, [t3 GLOBAL]; - - pxor xmm0, xmm0 ; 0 - pxor xmm7, xmm7 ; 0 - - punpcklbw xmm0, xmm5 ; e0f0g0h0 - psraw xmm0, 11 ; sign extended shift right by 3 - - punpckhbw xmm7, xmm5 ; a0b0c0d0 - psraw xmm7, 11 ; sign extended shift right by 3 - - packsswb xmm0, xmm7 ; Filter2 >>=3; - movdqa xmm5, xmm0 ; Filter2 - - paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) - pxor xmm0, xmm0 ; 0 - - pxor xmm7, xmm7 ; 0 - punpcklbw xmm0, xmm2 ; e0f0g0h0 - - psraw xmm0, 11 ; sign extended shift right by 3 - punpckhbw xmm7, xmm2 ; a0b0c0d0 - - psraw xmm7, 11 ; sign extended shift right by 3 - packsswb xmm0, xmm7 ; Filter2 >>=3; - - ; mm0= filter2 mm1 = vp8_filter, mm3 =qs0 mm5=s mm4 =hev mm6=ps0 - psubsb xmm3, xmm0 ; qs0 =qs0 - filter1 - paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 - - ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn xmm4, xmm1 ; vp8_filter&=~hev - - - ; mm3=qs0, mm4=filter2, mm6=ps0 - - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor xmm0, xmm0 - pxor xmm1, xmm1 - - pxor xmm2, xmm2 - punpcklbw xmm1, xmm4 - - punpckhbw xmm2, xmm4 - pmulhw xmm1, [s27 GLOBAL] - - pmulhw xmm2, [s27 GLOBAL] - paddw xmm1, [s63 GLOBAL] - - paddw xmm2, [s63 GLOBAL] - psraw xmm1, 7 - - psraw xmm2, 7 - packsswb xmm1, xmm2 - - psubsb xmm3, xmm1 - paddsb xmm6, xmm1 - - pxor xmm3, [t80 GLOBAL] - pxor xmm6, [t80 GLOBAL] - - movdqa XMMWORD PTR [rsi+rax], xmm6 - movdqa XMMWORD PTR [rsi], xmm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor xmm1, xmm1 - pxor xmm2, xmm2 - - punpcklbw xmm1, xmm4 - punpckhbw xmm2, xmm4 - - pmulhw xmm1, [s18 GLOBAL] - pmulhw xmm2, [s18 GLOBAL] - - paddw xmm1, [s63 GLOBAL] - paddw xmm2, [s63 GLOBAL] - - psraw xmm1, 7 - psraw xmm2, 7 - - packsswb xmm1, xmm2 - - movdqa xmm3, XMMWORD PTR [rdi] - movdqa xmm6, XMMWORD PTR [rsi+rax*2] ; p1 - - pxor xmm3, [t80 GLOBAL] - pxor xmm6, [t80 GLOBAL] - - paddsb xmm6, xmm1 - psubsb xmm3, xmm1 - - pxor xmm6, [t80 GLOBAL] - pxor xmm3, [t80 GLOBAL] - - movdqa XMMWORD PTR [rdi], xmm3 - movdqa XMMWORD PTR [rsi+rax*2],xmm6 - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor xmm1, xmm1 - pxor xmm2, xmm2 - - punpcklbw xmm1, xmm4 - punpckhbw xmm2, xmm4 - - pmulhw xmm1, [s9 GLOBAL] - pmulhw xmm2, [s9 GLOBAL] - - paddw xmm1, [s63 GLOBAL] - paddw xmm2, [s63 GLOBAL] - - psraw xmm1, 7 - psraw xmm2, 7 - - packsswb xmm1, xmm2 - - - movdqa xmm6, XMMWORD PTR [rdi+rax*4] - neg rax - - movdqa xmm3, XMMWORD PTR [rdi+rax ] - - pxor xmm6, [t80 GLOBAL] - pxor xmm3, [t80 GLOBAL] - - paddsb xmm6, xmm1 - psubsb xmm3, xmm1 - - pxor xmm6, [t80 GLOBAL] - pxor xmm3, [t80 GLOBAL] - - movdqa XMMWORD PTR [rdi+rax ], xmm3 - neg rax - - movdqa XMMWORD PTR [rdi+rax*4], xmm6 + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 1 + ; filter and write back the results + MB_FILTER_AND_WRITEBACK 1 add rsp, 32 pop rsp @@ -1001,11 +614,627 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret +;void vp8_mbloop_filter_horizontal_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *flimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) +sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 96 ; reserve 96 bytes + %define q2 [rsp + 0] ;__declspec(align(16)) char q2[16]; + %define q1 [rsp + 16] ;__declspec(align(16)) char q1[16]; + %define p2 [rsp + 32] ;__declspec(align(16)) char p2[16]; + %define p1 [rsp + 48] ;__declspec(align(16)) char p1[16]; + %define t0 [rsp + 64] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 80] ;__declspec(align(16)) char t1[16]; + + mov rsi, arg(0) ; u + mov rdi, arg(5) ; v + movsxd rax, dword ptr arg(1) ; src_pixel_step + mov rcx, rax + neg rax ; negate pitch to deal with above border + + mov rdx, arg(3) ;limit + movdqa xmm7, XMMWORD PTR [rdx] + + lea rsi, [rsi + rcx] + lea rdi, [rdi + rcx] + + ; calculate breakout conditions and high edge variance + LFH_FILTER_AND_HEV_MASK 0 + ; filter and write back the results + MB_FILTER_AND_WRITEBACK 0 + + add rsp, 96 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +%macro TRANSPOSE_16X8 2 + movq xmm4, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 + movq xmm1, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 + movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 + movq xmm7, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 + movq xmm5, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40 + movq xmm2, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50 + + punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 + + movq xmm1, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 + + movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 + punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 + + movq xmm7, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 + + punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 +%if %1 + lea rsi, [rsi+rax*8] +%else + mov rsi, arg(5) ; v_ptr +%endif + + movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 + punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 + + punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 + + punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 +%if %1 + lea rdi, [rdi+rax*8] +%else + lea rsi, [rsi - 4] +%endif + + punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 +%if %1 + lea rdx, srct +%else + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing +%endif + + movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 + punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 + + movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 + punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + + punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 + + punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 + + punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 + + movdqa t0, xmm2 ; save to free XMM2 + movq xmm2, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 + movq xmm6, QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 + movq xmm0, QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 + movq xmm5, QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 + movq xmm1, QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 + + punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 + + movq xmm6, QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 + + punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 + + movq xmm5, QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 + + punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 + + movq xmm6, QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 + + punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 + + movdqa xmm6, xmm1 ; + punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 + + punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 + movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 + + punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 + + punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 + + movdqa xmm0, xmm5 + punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 + + punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 + movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 + + punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 + + punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 + movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 + + punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 + + punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 +%if %2 + movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + + punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + + movdqa [rdx], xmm2 ; save 2 + + movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 + punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + + movdqa [rdx+16], xmm3 ; save 3 + + punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 + + movdqa [rdx+32], xmm4 ; save 4 + movdqa [rdx+48], xmm5 ; save 5 + movdqa xmm1, t0 ; get + + movdqa xmm2, xmm1 ; + punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 + + punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 +%else + movdqa [rdx+112], xmm7 ; save 7 + + movdqa [rdx+96], xmm6 ; save 6 + + movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 + punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + + punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + + movdqa [rdx+32], xmm2 ; save 2 + + movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 + punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + + movdqa [rdx+48], xmm3 ; save 3 + + punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 + + movdqa [rdx+64], xmm4 ; save 4 + movdqa [rdx+80], xmm5 ; save 5 + movdqa xmm1, t0 ; get + + movdqa xmm2, xmm1 + punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 + + punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + + movdqa [rdx+16], xmm1 + + movdqa [rdx], xmm2 +%endif +%endmacro + +%macro LFV_FILTER_MASK_HEV_MASK 1 + movdqa xmm0, xmm6 ; q2 + psubusb xmm0, xmm7 ; q2-q3 + + psubusb xmm7, xmm6 ; q3-q2 + movdqa xmm4, xmm5 ; q1 + + por xmm7, xmm0 ; abs (q3-q2) + psubusb xmm4, xmm6 ; q1-q2 + + movdqa xmm0, xmm1 + psubusb xmm6, xmm5 ; q2-q1 + + por xmm6, xmm4 ; abs (q2-q1) + psubusb xmm0, xmm2 ; p2 - p3; + + psubusb xmm2, xmm1 ; p3 - p2; + por xmm0, xmm2 ; abs(p2-p3) +%if %1 + movdqa xmm2, [rdx] ; p1 +%else + movdqa xmm2, [rdx+32] ; p1 +%endif + movdqa xmm5, xmm2 ; p1 + pmaxub xmm0, xmm7 + + psubusb xmm5, xmm1 ; p1-p2 + psubusb xmm1, xmm2 ; p2-p1 + + movdqa xmm7, xmm3 ; p0 + psubusb xmm7, xmm2 ; p0-p1 + + por xmm1, xmm5 ; abs(p2-p1) + pmaxub xmm0, xmm6 + + pmaxub xmm0, xmm1 + movdqa xmm1, xmm2 ; p1 + + psubusb xmm2, xmm3 ; p1-p0 + lea rdx, srct + + por xmm2, xmm7 ; abs(p1-p0) + + movdqa t0, xmm2 ; save abs(p1-p0) + + pmaxub xmm0, xmm2 + +%if %1 + movdqa xmm5, [rdx+32] ; q0 + movdqa xmm7, [rdx+48] ; q1 +%else + movdqa xmm5, [rdx+64] ; q0 + movdqa xmm7, [rdx+80] ; q1 +%endif + mov rdx, arg(3) ; limit + + movdqa xmm6, xmm5 ; q0 + movdqa xmm2, xmm7 ; q1 + + psubusb xmm5, xmm7 ; q0-q1 + psubusb xmm7, xmm6 ; q1-q0 + + por xmm7, xmm5 ; abs(q1-q0) + + movdqa t1, xmm7 ; save abs(q1-q0) + + movdqa xmm4, XMMWORD PTR [rdx]; limit + + pmaxub xmm0, xmm7 + mov rdx, arg(2) ; flimit + + psubusb xmm0, xmm4 + movdqa xmm5, xmm2 ; q1 + + psubusb xmm5, xmm1 ; q1-=p1 + psubusb xmm1, xmm2 ; p1-=q1 + + por xmm5, xmm1 ; abs(p1-q1) + movdqa xmm1, xmm3 ; p0 + + pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero + psubusb xmm1, xmm6 ; p0-q0 + + psrlw xmm5, 1 ; abs(p1-q1)/2 + psubusb xmm6, xmm3 ; q0-p0 + + movdqa xmm2, XMMWORD PTR [rdx]; flimit + + mov rdx, arg(4) ; get thresh + + por xmm1, xmm6 ; abs(q0-p0) + paddb xmm2, xmm2 ; flimit*2 (less than 255) + + movdqa xmm6, t0 ; get abs (q1 - q0) + + paddusb xmm1, xmm1 ; abs(q0-p0)*2 + + movdqa xmm3, t1 ; get abs (p1 - p0) + + movdqa xmm7, XMMWORD PTR [rdx] + + paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 + psubusb xmm6, xmm7 ; abs(q1 - q0) > thresh + + paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255) + psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh + + psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit + por xmm6, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh + + por xmm1, xmm0 ; mask + pcmpeqb xmm6, xmm0 + + pxor xmm0, xmm0 + pcmpeqb xmm4, xmm4 + + pcmpeqb xmm1, xmm0 + pxor xmm4, xmm6 +%endmacro + +%macro BV_TRANSPOSE 0 + ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 + ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 + movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + + movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + + punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 + + movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 + + punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 + movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 + + punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 + ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 + ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 + ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 + ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 +%endmacro + +%macro BV_WRITEBACK 2 + movd [rsi+2], %1 + psrldq %1, 4 + + movd [rdi+2], %1 + psrldq %1, 4 + + movd [rsi+2*rax+2], %1 + psrldq %1, 4 + + movd [rdi+2*rax+2], %1 + + movd [rsi+4*rax+2], %2 + psrldq %2, 4 + + movd [rdi+4*rax+2], %2 + psrldq %2, 4 + + movd [rsi+2*rcx+2], %2 + psrldq %2, 4 + + movd [rdi+2*rcx+2], %2 +%endmacro + + +;void vp8_loop_filter_vertical_edge_sse2 +;( +; unsigned char *src_ptr, +; int src_pixel_step, +; const char *flimit, +; const char *limit, +; const char *thresh, +; int count +;) +global sym(vp8_loop_filter_vertical_edge_sse2) +sym(vp8_loop_filter_vertical_edge_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 96 ; reserve 96 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; + %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; + + mov rsi, arg(0) ; src_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax*2+rax] + + ;transpose 16x8 to 8x16, and store the 8-line result on stack. + TRANSPOSE_16X8 1, 1 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK 1 + + ; start work on filters + B_FILTER 2 + + ; tranpose and write back - only work on q1, q0, p0, p1 + BV_TRANSPOSE + ; store 16-line result + + lea rdx, [rax] + neg rdx + + BV_WRITEBACK xmm1, xmm5 + + lea rsi, [rsi+rdx*8] + lea rdi, [rdi+rdx*8] + BV_WRITEBACK xmm2, xmm6 + + add rsp, 96 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_loop_filter_vertical_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *flimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_loop_filter_vertical_edge_uv_sse2) +sym(vp8_loop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 96 ; reserve 96 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; + %define srct [rsp + 32] ;__declspec(align(16)) char srct[64]; + + mov rsi, arg(0) ; u_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax+2*rax] + + lea rdx, srct + + ;transpose 16x8 to 8x16, and store the 8-line result on stack. + TRANSPOSE_16X8 0, 1 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK 1 + + ; start work on filters + B_FILTER 2 + + ; tranpose and write back - only work on q1, q0, p0, p1 + BV_TRANSPOSE + + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + + ; store 16-line result + BV_WRITEBACK xmm1, xmm5 + + mov rsi, arg(0) ; u_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + BV_WRITEBACK xmm2, xmm6 + + add rsp, 96 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +%macro MBV_TRANSPOSE 0 + movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + + punpcklbw xmm0, xmm7 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 + punpckhbw xmm1, xmm7 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + + movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + + punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 + punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 + + movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 + punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 + + punpckhwd xmm3, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 + movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + + punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 + punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 + + movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + + movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 + punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 + + movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 + punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 + + punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 + movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 + + punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 + punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 +%endmacro + +%macro MBV_WRITEBACK_1 0 + movq QWORD PTR [rsi], xmm0 + movhps MMWORD PTR [rdi], xmm0 + + movq QWORD PTR [rsi+2*rax], xmm6 + movhps MMWORD PTR [rdi+2*rax], xmm6 + + movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 + punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 + + punpckhdq xmm3, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 + + movq QWORD PTR [rsi+4*rax], xmm0 + movhps MMWORD PTR [rdi+4*rax], xmm0 + + movq QWORD PTR [rsi+2*rcx], xmm3 + movhps MMWORD PTR [rdi+2*rcx], xmm3 + + movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 + punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 + + punpckhbw xmm5, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 + movdqa xmm0, xmm2 + + punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 + punpckhwd xmm2, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 + + movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 + punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 + + punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 +%endmacro + +%macro MBV_WRITEBACK_2 0 + movq QWORD PTR [rsi], xmm1 + movhps MMWORD PTR [rdi], xmm1 + + movq QWORD PTR [rsi+2*rax], xmm5 + movhps MMWORD PTR [rdi+2*rax], xmm5 + + movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 + punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 + punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 + + movq QWORD PTR [rsi+4*rax], xmm1 + movhps MMWORD PTR [rdi+4*rax], xmm1 + + movq QWORD PTR [rsi+2*rcx], xmm4 + movhps MMWORD PTR [rdi+2*rcx], xmm4 +%endmacro + + ;void vp8_mbloop_filter_vertical_edge_sse2 ;( ; unsigned char *src_ptr, @@ -1020,6 +1249,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -1031,531 +1261,36 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; %define srct [rsp + 32] ;__declspec(align(16)) char srct[128]; + mov rsi, arg(0) ; src_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step - mov rsi, arg(0) ;src_ptr - movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? - - lea rsi, [rsi + rax*4 - 4] - lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing - - mov rcx, rax - neg rcx + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax*2+rax] ; Transpose - movq xmm0, QWORD PTR [rdi+rax*2] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 - movq xmm7, QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 + TRANSPOSE_16X8 1, 0 - punpcklbw xmm7, xmm0 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 - movq xmm0, QWORD PTR [rsi+rax] ; + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK 0 - movq xmm5, QWORD PTR [rsi] ; - punpcklbw xmm5, xmm0 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 + neg rax + ; start work on filters + MB_FILTER_AND_WRITEBACK 2 - movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 - punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 - - punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 - movq xmm7, QWORD PTR [rsi + rcx] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 - - movq xmm0, QWORD PTR [rsi + rcx*2] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 - punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 - - movq xmm4, QWORD PTR [rsi + rcx*4] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 - movq xmm7, QWORD PTR [rdi + rcx*4] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 - - punpcklbw xmm4, xmm7 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 - - punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - - movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 - movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 - - punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - - punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 - - movdqa t0, xmm2 ; save to free XMM2 - ;movdqa t1, xmm3 - - ; XMM3 XMM4 XMM7 in use lea rsi, [rsi+rax*8] lea rdi, [rdi+rax*8] - movq xmm6, QWORD PTR [rdi+rax*2] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 - movq xmm5, QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 - - punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 - movq xmm6, QWORD PTR [rsi+rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 - - movq xmm1, QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 - punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 - - movdqa xmm6, xmm1 ; - punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 - - punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 - movq xmm5, QWORD PTR [rsi+rcx] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 - - movq xmm0, QWORD PTR [rsi+rcx*2] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 - punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 - - movq xmm2, QWORD PTR [rsi+rcx*4] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 - movq xmm5, QWORD PTR [rdi+rcx*4] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 - - punpcklbw xmm2, xmm5 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 - - punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 - punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - movdqa xmm0, xmm5 - punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 - - - punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 - movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 - - punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 - punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 - - movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 - punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - - - lea rdx, srct - punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 - - movdqa [rdx+112], xmm7 ; save 7 - movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 - - movdqa [rdx+96], xmm6 ; save 6 - punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - - punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 - movdqa [rdx+32], xmm2 ; save 2 - - movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 - punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - - movdqa [rdx+48], xmm3 ; save 3 - punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 - - movdqa [rdx+64], xmm4 ; save 4 - movdqa [rdx+80], xmm5 ; save 5 - - movdqa xmm1, t0 ; get - movdqa xmm2, xmm1 ; - - punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - - movdqa [rdx+16], xmm1 - movdqa [rdx], xmm2 - - movdqa xmm0, xmm6 ; q2 - psubusb xmm0, xmm7 ; q2-q3 - - psubusb xmm7, xmm6 ; q3-q2 - por xmm7, xmm0 ; abs (q3-q2) - - movdqa xmm1, xmm5 ; q1 - psubusb xmm1, xmm6 ; q1-q2 - - psubusb xmm6, xmm5 ; q2-q1 - por xmm6, xmm1 ; abs (q2-q1) - - ;/* - ;movdqa xmm0, xmm4 ; q0 - ;psubusb xmm0 xmm5 ; q0-q1 - ; - ;pusbusb xmm5, xmm4 ; q1-q0 - ;por xmm5, xmm0 ; abs (q1-q0) - ;*/ - - movdqa xmm1, [rdx+16] ; p2 - movdqa xmm0, xmm1 - - psubusb xmm0, xmm2 ; p2 - p3; - psubusb xmm2, xmm1 ; p3 - p2; - - por xmm0, xmm2 ; abs(p2-p3) - - movdqa xmm2, [rdx+32] ; p1 - movdqa xmm5, xmm2 ; p1 - - psubusb xmm5, xmm1 ; p1-p2 - psubusb xmm1, xmm2 ; p2-p1 - - por xmm1, xmm5 ; abs(p2-p1) - mov rdx, arg(3) ;limit - - movdqa xmm4, [rdx] ; limit - psubusb xmm7, xmm4 ; - - - psubusb xmm0, xmm4 ; abs(p3-p2) > limit - psubusb xmm1, xmm4 ; abs(p2-p1) > limit - - psubusb xmm6, xmm4 ; abs(q2-q1) > limit - por xmm7, xmm6 ; or - - por xmm0, xmm1 ; - por xmm0, xmm7 ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit - - movdqa xmm1, xmm2 ; p1 - - movdqa xmm7, xmm3 ; p0 - psubusb xmm7, xmm2 ; p0-p1 - - psubusb xmm2, xmm3 ; p1-p0 - por xmm2, xmm7 ; abs(p1-p0) - - movdqa t0, xmm2 ; save abs(p1-p0) - lea rdx, srct - - psubusb xmm2, xmm4 ; abs(p1-p0)>limit - por xmm0, xmm2 ; mask - - movdqa xmm5, [rdx+64] ; q0 - movdqa xmm7, [rdx+80] ; q1 - - movdqa xmm6, xmm5 ; q0 - movdqa xmm2, xmm7 ; q1 - psubusb xmm5, xmm7 ; q0-q1 - - psubusb xmm7, xmm6 ; q1-q0 - por xmm7, xmm5 ; abs(q1-q0) - - movdqa t1, xmm7 ; save abs(q1-q0) - psubusb xmm7, xmm4 ; abs(q1-q0)> limit - - por xmm0, xmm7 ; mask - - movdqa xmm5, xmm2 ; q1 - psubusb xmm5, xmm1 ; q1-=p1 - psubusb xmm1, xmm2 ; p1-=q1 - por xmm5, xmm1 ; abs(p1-q1) - pand xmm5, [tfe GLOBAL] ; set lsb of each byte to zero - psrlw xmm5, 1 ; abs(p1-q1)/2 - - mov rdx, arg(2) ;flimit ; - movdqa xmm2, [rdx] ; flimit - - movdqa xmm1, xmm3 ; p0 - movdqa xmm7, xmm6 ; q0 - psubusb xmm1, xmm7 ; p0-q0 - psubusb xmm7, xmm3 ; q0-p0 - por xmm1, xmm7 ; abs(q0-p0) - paddusb xmm1, xmm1 ; abs(q0-p0)*2 - paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 - - paddb xmm2, xmm2 ; flimit*2 (less than 255) - paddb xmm4, xmm2 ; flimit * 2 + limit (less than 255) - - psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > flimit * 2 + limit - por xmm1, xmm0; ; mask - pxor xmm0, xmm0 - pcmpeqb xmm1, xmm0 - - ; calculate high edge variance - mov rdx, arg(4) ;thresh ; get thresh - movdqa xmm7, [rdx] - - movdqa xmm4, t0 ; get abs (q1 - q0) - psubusb xmm4, xmm7 ; abs(q1 - q0) > thresh - - movdqa xmm3, t1 ; get abs (p1 - p0) - psubusb xmm3, xmm7 ; abs(p1 - p0)> thresh - - por xmm4, xmm3 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh - pcmpeqb xmm4, xmm0 - - pcmpeqb xmm0, xmm0 - pxor xmm4, xmm0 - - - ; start work on filters - lea rdx, srct - - ; start work on filters - movdqa xmm2, [rdx+32] ; p1 - movdqa xmm7, [rdx+80] ; q1 - - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values - - psubsb xmm2, xmm7 ; p1 - q1 - movdqa xmm6, [rdx+48] ; p0 - - movdqa xmm0, [rdx+64] ; q0 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values - movdqa xmm3, xmm0 ; q0 - - psubsb xmm0, xmm6 ; q0 - p0 - paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) - - paddsb xmm2, xmm0 ; 2 * (q0 - p0) - paddsb xmm2, xmm0 ; 3 * (q0 - p0)+ (p1 - q1) - - pand xmm1, xmm2 ; mask filter values we don't care about - - ; xmm1 = vp8_filter, xmm4=hev, xmm6=ps0, xmm3=qs0 - movdqa xmm2, xmm1 ; vp8_filter - pand xmm2, xmm4; ; Filter2 = vp8_filter & hev - - movdqa xmm5, xmm2 - paddsb xmm5, [t3 GLOBAL] - - pxor xmm0, xmm0 ; 0 - pxor xmm7, xmm7 ; 0 - - punpcklbw xmm0, xmm5 ; e0f0g0h0 - psraw xmm0, 11 ; sign extended shift right by 3 - - punpckhbw xmm7, xmm5 ; a0b0c0d0 - psraw xmm7, 11 ; sign extended shift right by 3 - - packsswb xmm0, xmm7 ; Filter2 >>=3; - movdqa xmm5, xmm0 ; Filter2 - - paddsb xmm2, [t4 GLOBAL] ; vp8_signed_char_clamp(Filter2 + 4) - pxor xmm0, xmm0 ; 0 - - pxor xmm7, xmm7 ; 0 - punpcklbw xmm0, xmm2 ; e0f0g0h0 - - psraw xmm0, 11 ; sign extended shift right by 3 - punpckhbw xmm7, xmm2 ; a0b0c0d0 - - psraw xmm7, 11 ; sign extended shift right by 3 - packsswb xmm0, xmm7 ; Filter2 >>=3; - - ; xmm0= filter2 xmm1 = vp8_filter, xmm3 =qs0 xmm5=s xmm4 =hev xmm6=ps0 - psubsb xmm3, xmm0 ; qs0 =qs0 - filter1 - paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 - - - ; xmm1=vp8_filter, xmm3=qs0, xmm4 =hev xmm6=ps0 - ; vp8_filter &= ~hev; - ; Filter2 = vp8_filter; - pandn xmm4, xmm1 ; vp8_filter&=~hev - - ; xmm3=qs0, xmm4=filter2, xmm6=ps0 - ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7); - ; s = vp8_signed_char_clamp(qs0 - u); - ; *oq0 = s^0x80; - ; s = vp8_signed_char_clamp(ps0 + u); - ; *op0 = s^0x80; - pxor xmm0, xmm0 - pxor xmm1, xmm1 - - pxor xmm2, xmm2 - punpcklbw xmm1, xmm4 - - punpckhbw xmm2, xmm4 - pmulhw xmm1, [s27 GLOBAL] - - pmulhw xmm2, [s27 GLOBAL] - paddw xmm1, [s63 GLOBAL] - - paddw xmm2, [s63 GLOBAL] - psraw xmm1, 7 - - psraw xmm2, 7 - packsswb xmm1, xmm2 - - psubsb xmm3, xmm1 - paddsb xmm6, xmm1 - - pxor xmm3, [t80 GLOBAL] - pxor xmm6, [t80 GLOBAL] - - movdqa [rdx+48], xmm6 - movdqa [rdx+64], xmm3 - - ; roughly 2/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7); - ; s = vp8_signed_char_clamp(qs1 - u); - ; *oq1 = s^0x80; - ; s = vp8_signed_char_clamp(ps1 + u); - ; *op1 = s^0x80; - pxor xmm1, xmm1 - pxor xmm2, xmm2 - - punpcklbw xmm1, xmm4 - punpckhbw xmm2, xmm4 - - pmulhw xmm1, [s18 GLOBAL] - pmulhw xmm2, [s18 GLOBAL] - - paddw xmm1, [s63 GLOBAL] - paddw xmm2, [s63 GLOBAL] - - psraw xmm1, 7 - psraw xmm2, 7 - - packsswb xmm1, xmm2 - - movdqa xmm3, [rdx + 80] ;/q1 - movdqa xmm6, [rdx + 32] ; p1 - - pxor xmm3, [t80 GLOBAL] - pxor xmm6, [t80 GLOBAL] - - paddsb xmm6, xmm1 - psubsb xmm3, xmm1 - - pxor xmm6, [t80 GLOBAL] - pxor xmm3, [t80 GLOBAL] - - movdqa [rdx + 80], xmm3 - movdqa [rdx + 32], xmm6 - - - ; roughly 1/7th difference across boundary - ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7); - ; s = vp8_signed_char_clamp(qs2 - u); - ; *oq2 = s^0x80; - ; s = vp8_signed_char_clamp(ps2 + u); - ; *op2 = s^0x80; - pxor xmm1, xmm1 - pxor xmm2, xmm2 - - punpcklbw xmm1, xmm4 - punpckhbw xmm2, xmm4 - - pmulhw xmm1, [s9 GLOBAL] - pmulhw xmm2, [s9 GLOBAL] - - paddw xmm1, [s63 GLOBAL] - paddw xmm2, [s63 GLOBAL] - - psraw xmm1, 7 - psraw xmm2, 7 - - packsswb xmm1, xmm2 - - movdqa xmm6, [rdx+16] - movdqa xmm3, [rdx+96] - - pxor xmm6, [t80 GLOBAL] - pxor xmm3, [t80 GLOBAL] - - paddsb xmm6, xmm1 - psubsb xmm3, xmm1 - - pxor xmm6, [t80 GLOBAL] ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 - pxor xmm3, [t80 GLOBAL] ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06 - - ; transpose and write back - movdqa xmm0, [rdx] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 - movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 + MBV_TRANSPOSE - punpcklbw xmm0, xmm6 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpckhbw xmm1, xmm6 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 + neg rax - movdqa xmm2, [rdx+32] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 - movdqa xmm6, xmm2 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 + MBV_WRITEBACK_1 - punpcklbw xmm2, [rdx+48] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 - punpckhbw xmm6, [rdx+48] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 - - movdqa xmm5, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 - punpcklwd xmm0, xmm2 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckhwd xmm5, xmm2 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 - - punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - - movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpcklbw xmm2, [rdx+80] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - - movdqa xmm6, xmm3 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 - punpcklbw xmm6, [rdx+112] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 - - movdqa xmm7, xmm2 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 - punpcklwd xmm2, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 - - punpckhwd xmm7, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 - movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 - - punpckldq xmm0, xmm2 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 - punpckhdq xmm6, xmm2 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 - - lea rsi, [rsi+rcx*8] - lea rdi, [rdi+rcx*8] - - movq QWORD PTR [rsi+rcx*4], xmm0 - psrldq xmm0, 8 - - movq QWORD PTR [rsi+rcx*2], xmm6 - psrldq xmm6, 8 - - movq QWORD PTR [rdi+rcx*4], xmm0 - movq QWORD PTR [rsi+rcx], xmm6 - - movdqa xmm0, xmm5 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 - punpckldq xmm0, xmm7 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 - - punpckhdq xmm5, xmm7 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 - - movq QWORD PTR [rsi], xmm0 - psrldq xmm0, 8 - - movq QWORD PTR [rsi+rax*2], xmm5 - psrldq xmm5, 8 - - movq QWORD PTR [rsi+rax], xmm0 - movq QWORD PTR [rdi+rax*2], xmm5 - - movdqa xmm2, [rdx+64] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 - punpckhbw xmm2, [rdx+80] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 - - punpckhbw xmm3, [rdx+112] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 - movdqa xmm0, xmm2 - - punpcklwd xmm0, xmm3 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 - punpckhwd xmm2, xmm3 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 - - movdqa xmm3, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 - punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 - - punpckhdq xmm3, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 - - lea rsi, [rsi+rax*8] - lea rdi, [rdi+rax*8] - - movq QWORD PTR [rsi+rcx*4], xmm1 - psrldq xmm1, 8 - - movq QWORD PTR [rsi+rcx*2], xmm3 - psrldq xmm3, 8 - - movq QWORD PTR [rdi+rcx*4], xmm1 - movq QWORD PTR [rsi+rcx], xmm3 - - movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 - punpckldq xmm1, xmm2 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 - - punpckhdq xmm4, xmm2 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 - movq QWORD PTR [rsi], xmm1 - - psrldq xmm1, 8 - - movq QWORD PTR [rsi+rax*2], xmm4 - psrldq xmm4, 8 - - movq QWORD PTR [rsi+rax], xmm1 - movq QWORD PTR [rdi+rax*2], xmm4 + lea rsi, [rsi+rax*8] + lea rdi, [rdi+rax*8] + MBV_WRITEBACK_2 add rsp, 160 pop rsp @@ -1563,6 +1298,75 @@ sym(vp8_mbloop_filter_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_mbloop_filter_vertical_edge_uv_sse2 +;( +; unsigned char *u, +; int src_pixel_step, +; const char *flimit, +; const char *limit, +; const char *thresh, +; unsigned char *v +;) +global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) +sym(vp8_mbloop_filter_vertical_edge_uv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 160 ; reserve 160 bytes + %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; + %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; + %define srct [rsp + 32] ;__declspec(align(16)) char srct[128]; + + mov rsi, arg(0) ; u_ptr + movsxd rax, dword ptr arg(1) ; src_pixel_step + + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing + lea rcx, [rax+2*rax] + + lea rdx, srct + + ; Transpose + TRANSPOSE_16X8 0, 0 + + ; calculate filter mask and high edge variance + LFV_FILTER_MASK_HEV_MASK 0 + + ; start work on filters + MB_FILTER_AND_WRITEBACK 2 + + ; transpose and write back + MBV_TRANSPOSE + + mov rsi, arg(0) ;u_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] + MBV_WRITEBACK_1 + mov rsi, arg(5) ;v_ptr + lea rsi, [rsi - 4] + lea rdi, [rsi + rax] + MBV_WRITEBACK_2 + + add rsp, 160 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1582,6 +1386,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -1610,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): psubusb xmm0, xmm1 ; q1-=p1 psubusb xmm1, xmm4 ; p1-=q1 por xmm1, xmm0 ; abs(p1-q1) - pand xmm1, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm1, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm1, 1 ; abs(p1-q1)/2 movdqu xmm5, [rsi+rax] ; p0 @@ -1628,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): pcmpeqb xmm5, xmm3 ; start work on filters - pxor xmm2, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor xmm2, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values - pxor xmm0, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) @@ -1642,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): pand xmm5, xmm2 ; mask filter values we don't care about ; do + 4 side - paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1655,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): por xmm0, xmm1 ; put the two together to get result psubsb xmm3, xmm0 ; q0-= q0 add - pxor xmm3, [t80 GLOBAL] ; unoffset + pxor xmm3, [GLOBAL(t80)] ; unoffset movdqu [rsi], xmm3 ; write back ; now do +3 side - psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1671,13 +1476,14 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2): paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset + pxor xmm6, [GLOBAL(t80)] ; unoffset movdqu [rsi+rax], xmm6 ; write back ; begin epilog pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1697,6 +1503,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx ; save callee-saved reg push rsi push rdi @@ -1789,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): psubusb xmm7, xmm0 ; q1-=p1 psubusb xmm6, xmm3 ; p1-=q1 por xmm6, xmm7 ; abs(p1-q1) - pand xmm6, [tfe GLOBAL] ; set lsb of each byte to zero + pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm6, 1 ; abs(p1-q1)/2 movdqa xmm5, xmm1 ; p0 @@ -1815,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): movdqa t0, xmm0 movdqa t1, xmm3 - pxor xmm0, [t80 GLOBAL] ; p1 offset to convert to signed values - pxor xmm3, [t80 GLOBAL] ; q1 offset to convert to signed values + pxor xmm0, [GLOBAL(t80)] ; p1 offset to convert to signed values + pxor xmm3, [GLOBAL(t80)] ; q1 offset to convert to signed values psubsb xmm0, xmm3 ; p1 - q1 movdqa xmm6, xmm1 ; p0 movdqa xmm7, xmm2 ; q0 - pxor xmm6, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm6, [GLOBAL(t80)] ; offset to convert to signed values - pxor xmm7, [t80 GLOBAL] ; offset to convert to signed values + pxor xmm7, [GLOBAL(t80)] ; offset to convert to signed values movdqa xmm3, xmm7 ; offseted ; q0 psubsb xmm7, xmm6 ; q0 - p0 @@ -1836,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pand xmm5, xmm0 ; mask filter values we don't care about - paddsb xmm5, [t4 GLOBAL] ; 3* (q0 - p0) + (p1 - q1) + 4 + paddsb xmm5, [GLOBAL(t4)] ; 3* (q0 - p0) + (p1 - q1) + 4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1851,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): por xmm0, xmm7 ; put the two together to get result psubsb xmm3, xmm0 ; q0-= q0sz add - pxor xmm3, [t80 GLOBAL] ; unoffset q0 + pxor xmm3, [GLOBAL(t80)] ; unoffset q0 ; now do +3 side - psubsb xmm5, [t1s GLOBAL] ; +3 instead of +4 + psubsb xmm5, [GLOBAL(t1s)] ; +3 instead of +4 movdqa xmm0, xmm5 ; get a copy of filters psllw xmm0, 8 ; shift left 8 @@ -1867,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): por xmm0, xmm5 ; put the two together to get result paddsb xmm6, xmm0 ; p0+= p0 add - pxor xmm6, [t80 GLOBAL] ; unoffset p0 + pxor xmm6, [GLOBAL(t80)] ; unoffset p0 movdqa xmm0, t0 ; p1 movdqa xmm4, t1 ; q1 @@ -1941,6 +1748,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2): pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -1965,12 +1773,6 @@ align 16 ones: times 8 dw 0x0001 align 16 -s27: - times 8 dw 0x1b00 -align 16 -s18: - times 8 dw 0x1200 -align 16 s9: times 8 dw 0x0900 align 16 diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c index 143ee7469..93107e179 100644 --- a/vp8/common/x86/loopfilter_x86.c +++ b/vp8/common/x86/loopfilter_x86.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -33,8 +34,13 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2); prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2); prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2); +extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; +extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; +extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; +extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; + #if HAVE_MMX -// Horizontal MB filtering +/* Horizontal MB filtering */ void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -60,7 +66,7 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign } -// Vertical MB Filtering +/* Vertical MB Filtering */ void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -86,7 +92,7 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign } -// Horizontal B Filtering +/* Horizontal B Filtering */ void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -116,7 +122,7 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne } -// Vertical B Filtering +/* Vertical B Filtering */ void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -147,7 +153,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne #endif -// Horizontal MB filtering +/* Horizontal MB filtering */ #if HAVE_SSE2 void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) @@ -156,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); if (u_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); - - if (v_ptr) - vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); } @@ -174,7 +177,7 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig } -// Vertical MB Filtering +/* Vertical MB Filtering */ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -182,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2); if (u_ptr) - vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); - - if (v_ptr) - vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1); + vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr); } @@ -200,7 +200,7 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig } -// Horizontal B Filtering +/* Horizontal B Filtering */ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -210,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); - - if (v_ptr) - vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride); } @@ -230,7 +227,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign } -// Vertical B Filtering +/* Vertical B Filtering */ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf) { @@ -240,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2); if (u_ptr) - vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); - - if (v_ptr) - vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1); + vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4); } diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h index c87f38a31..80dbebc8d 100644 --- a/vp8/common/x86/loopfilter_x86.h +++ b/vp8/common/x86/loopfilter_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm index 721c8d612..787e83268 100644 --- a/vp8/common/x86/postproc_mmx.asm +++ b/vp8/common/x86/postproc_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -36,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx): %if ABI_IS_32BIT=1 && CONFIG_PIC=1 ; move the global rd onto the stack, since we don't have enough registers ; to do PIC addressing - movq mm0, [rd GLOBAL] + movq mm0, [GLOBAL(rd)] sub rsp, 8 movq [rsp], mm0 %define RD [rsp] %else -%define RD [rd GLOBAL] +%define RD [GLOBAL(rd)] %endif push rbx - lea rbx, [Blur GLOBAL] + lea rbx, [GLOBAL(Blur)] movd mm2, dword ptr arg(6) ;flimit punpcklwd mm2, mm2 punpckldq mm2, mm2 @@ -285,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx): %define flimit2 [rsp+128] %if ABI_IS_32BIT=0 - lea r8, [sym(vp8_rv) GLOBAL] + lea r8, [GLOBAL(sym(vp8_rv))] %endif ;rows +=8; @@ -403,7 +404,7 @@ loop_row: and rcx, 127 %if ABI_IS_32BIT=1 && CONFIG_PIC=1 push rax - lea rax, [sym(vp8_rv) GLOBAL] + lea rax, [GLOBAL(sym(vp8_rv))] movq mm4, [rax + rcx*2] ;vp8_rv[rcx*2] pop rax %elif ABI_IS_32BIT=0 diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c index 095797b1e..6b6321ace 100644 --- a/vp8/common/x86/postproc_mmx.c +++ b/vp8/common/x86/postproc_mmx.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm index bfa36fa70..30b4bf53a 100644 --- a/vp8/common/x86/postproc_sse2.asm +++ b/vp8/common/x86/postproc_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -25,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -34,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm): ALIGN_STACK 16, rax ; move the global rd onto the stack, since we don't have enough registers ; to do PIC addressing - movdqa xmm0, [rd42 GLOBAL] + movdqa xmm0, [GLOBAL(rd42)] sub rsp, 16 movdqa [rsp], xmm0 %define RD42 [rsp] %else -%define RD42 [rd42 GLOBAL] +%define RD42 [GLOBAL(rd42)] %endif @@ -239,6 +241,7 @@ acrossnextcol: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -253,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -271,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm): %define flimit4 [rsp+128] %if ABI_IS_32BIT=0 - lea r8, [sym(vp8_rv) GLOBAL] + lea r8, [GLOBAL(sym(vp8_rv))] %endif ;rows +=8; @@ -389,7 +393,7 @@ loop_row: and rcx, 127 %if ABI_IS_32BIT=1 && CONFIG_PIC=1 push rax - lea rax, [sym(vp8_rv) GLOBAL] + lea rax, [GLOBAL(sym(vp8_rv))] movdqu xmm4, [rax + rcx*2] ;vp8_rv[rcx*2] pop rax %elif ABI_IS_32BIT=0 @@ -438,6 +442,7 @@ loop_row: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -451,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -573,7 +579,7 @@ nextcol4: punpcklwd xmm1, xmm0 paddd xmm1, xmm6 - paddd xmm1, [four8s GLOBAL] + paddd xmm1, [GLOBAL(four8s)] psrad xmm1, 4 packssdw xmm1, xmm0 @@ -611,6 +617,7 @@ nextcol4: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h index 49a190793..899dd2f89 100644 --- a/vp8/common/x86/postproc_x86.h +++ b/vp8/common/x86/postproc_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm index ba60c5db7..e7211fccb 100644 --- a/vp8/common/x86/recon_mmx.asm +++ b/vp8/common/x86/recon_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm index f2685a76f..4ad3973ec 100644 --- a/vp8/common/x86/recon_sse2.asm +++ b/vp8/common/x86/recon_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -66,6 +67,7 @@ sym(vp8_recon4b_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 + SAVE_XMM push rsi push rdi ; end prolog @@ -118,6 +120,7 @@ sym(vp8_recon4b_sse2): ; begin epilog pop rdi pop rsi + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h index c46977842..40ee65a12 100644 --- a/vp8/common/x86/recon_x86.h +++ b/vp8/common/x86/recon_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm index c50211813..23ed4e208 100644 --- a/vp8/common/x86/subpixel_mmx.asm +++ b/vp8/common/x86/subpixel_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -83,7 +84,7 @@ nextrow: pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers paddsw mm3, mm5 ; mm3 += mm5 - paddsw mm3, [rd GLOBAL] ; mm3 += round value + paddsw mm3, [GLOBAL(rd)] ; mm3 += round value psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 packuswb mm3, mm0 ; pack and unpack to saturate punpcklbw mm3, mm0 ; @@ -135,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx): push rdi ; end prolog - movq mm5, [rd GLOBAL] + movq mm5, [GLOBAL(rd)] push rbx mov rbx, arg(6) ;vp8_filter movq mm1, [rbx + 16] ; do both the negative taps first!!! @@ -224,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx): push rdi ; end prolog - movq mm5, [rd GLOBAL] + movq mm5, [GLOBAL(rd)] push rbx mov rbx, arg(7) ;vp8_filter movq mm1, [rbx + 16] ; do both the negative taps first!!! @@ -319,7 +320,7 @@ sym(vp8_bilinear_predict8x8_mmx): mov rdi, arg(4) ;dst_ptr ; shl rax, 5 ; offset * 32 - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] add rax, rcx ; HFilter mov rsi, arg(0) ;src_ptr ; @@ -362,10 +363,10 @@ sym(vp8_bilinear_predict8x8_mmx): paddw mm3, mm5 ; paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -403,10 +404,10 @@ next_row_8x8: pmullw mm5, [rax] ; pmullw mm6, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -420,10 +421,10 @@ next_row_8x8: paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; packuswb mm3, mm4 @@ -475,7 +476,7 @@ sym(vp8_bilinear_predict8x4_mmx): movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] shl rax, 5 mov rsi, arg(0) ;src_ptr ; @@ -517,10 +518,10 @@ sym(vp8_bilinear_predict8x4_mmx): paddw mm3, mm5 ; paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -558,10 +559,10 @@ next_row_8x4: pmullw mm5, [rax] ; pmullw mm6, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; movq mm7, mm3 ; @@ -575,10 +576,10 @@ next_row_8x4: paddw mm4, mm6 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw mm4, [rd GLOBAL] ; + paddw mm4, [GLOBAL(rd)] ; psraw mm4, VP8_FILTER_SHIFT ; packuswb mm3, mm4 @@ -630,7 +631,7 @@ sym(vp8_bilinear_predict4x4_mmx): movsxd rax, dword ptr arg(2) ;xoffset mov rdi, arg(4) ;dst_ptr ; - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] shl rax, 5 add rax, rcx ; HFilter @@ -661,7 +662,7 @@ sym(vp8_bilinear_predict4x4_mmx): pmullw mm5, mm2 ; paddw mm3, mm5 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 @@ -685,7 +686,7 @@ next_row_4x4: punpcklbw mm5, mm0 ; pmullw mm5, [rax] ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movq mm7, mm3 ; @@ -696,7 +697,7 @@ next_row_4x4: paddw mm3, mm5 ; - paddw mm3, [rd GLOBAL] ; xmm3 += round value + paddw mm3, [GLOBAL(rd)] ; xmm3 += round value psraw mm3, VP8_FILTER_SHIFT ; xmm3 /= 128 packuswb mm3, mm0 @@ -730,7 +731,7 @@ rd: times 4 dw 0x40 align 16 -global sym(vp8_six_tap_mmx) +global HIDDEN_DATA(sym(vp8_six_tap_mmx)) sym(vp8_six_tap_mmx): times 8 dw 0 times 8 dw 0 @@ -790,7 +791,7 @@ sym(vp8_six_tap_mmx): align 16 -global sym(vp8_bilinear_filters_mmx) +global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx)) sym(vp8_bilinear_filters_mmx): times 8 dw 128 times 8 dw 0 diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm index dee04f2d9..b87cad259 100644 --- a/vp8/common/x86/subpixel_sse2.asm +++ b/vp8/common/x86/subpixel_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -36,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -105,7 +107,7 @@ filter_block1d8_h6_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -128,6 +130,7 @@ filter_block1d8_h6_rowloop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -154,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -227,7 +231,7 @@ filter_block1d16_h6_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm1 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -280,7 +284,7 @@ filter_block1d16_h6_sse2_rowloop: paddsw xmm4, xmm6 paddsw xmm4, xmm2 - paddsw xmm4, [rd GLOBAL] + paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 @@ -303,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -328,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -345,7 +351,7 @@ sym(vp8_filter_block1d8_v6_sse2): movsxd rcx, DWORD PTR arg(5) ;[output_height] pxor xmm0, xmm0 ; clear xmm0 - movdqa xmm7, XMMWORD PTR [rd GLOBAL] + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(2) ; dst_ptich %endif @@ -396,6 +402,494 @@ vp8_filter_block1d8_v6_sse2_loop: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d16_v6_sse2 +;( +; unsigned short *src_ptr, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int pixels_per_line, +; unsigned int pixel_step, +; unsigned int output_height, +; unsigned int output_width, +; const short *vp8_filter +;) +;/************************************************************************************ +; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The +; input pixel array has output_height rows. +;*************************************************************************************/ +global sym(vp8_filter_block1d16_v6_sse2) +sym(vp8_filter_block1d16_v6_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 8 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rax, arg(7) ;vp8_filter + movsxd rdx, dword ptr arg(3) ;pixels_per_line + + mov rdi, arg(1) ;output_ptr + mov rsi, arg(0) ;src_ptr + + sub rsi, rdx + sub rsi, rdx + + movsxd rcx, DWORD PTR arg(5) ;[output_height] +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(2) ; dst_ptich +%endif + +vp8_filter_block1d16_v6_sse2_loop: +; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order. + movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2 + movdqa xmm2, XMMWORD PTR [rsi + rdx + 16] + pmullw xmm1, [rax + 16] + pmullw xmm2, [rax + 16] + + movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5 + movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16] + pmullw xmm3, [rax + 64] + pmullw xmm4, [rax + 64] + + movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3 + movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16] + pmullw xmm5, [rax + 32] + pmullw xmm6, [rax + 32] + + movdqa xmm7, XMMWORD PTR [rsi] ; line 1 + movdqa xmm0, XMMWORD PTR [rsi + 16] + pmullw xmm7, [rax] + pmullw xmm0, [rax] + + paddsw xmm1, xmm3 + paddsw xmm2, xmm4 + paddsw xmm1, xmm5 + paddsw xmm2, xmm6 + paddsw xmm1, xmm7 + paddsw xmm2, xmm0 + + add rsi, rdx + + movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4 + movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16] + pmullw xmm3, [rax + 48] + pmullw xmm4, [rax + 48] + + movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6 + movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16] + pmullw xmm5, [rax + 80] + pmullw xmm6, [rax + 80] + + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] + pxor xmm0, xmm0 ; clear xmm0 + + paddsw xmm1, xmm3 + paddsw xmm2, xmm4 + paddsw xmm1, xmm5 + paddsw xmm2, xmm6 + + paddsw xmm1, xmm7 + paddsw xmm2, xmm7 + + psraw xmm1, 7 + psraw xmm2, 7 + + packuswb xmm1, xmm2 ; pack and saturate + movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(2) ;[dst_ptich] +%else + add rdi, r8 +%endif + dec rcx ; decrement count + jnz vp8_filter_block1d16_v6_sse2_loop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d8_h6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; First-pass filter only when yoffset==0 +global sym(vp8_filter_block1d8_h6_only_sse2) +sym(vp8_filter_block1d8_h6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(5) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ;dst_ptich +%endif + pxor xmm0, xmm0 ; clear xmm0 for unpack + +filter_block1d8_h6_only_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 + + movq QWORD PTR [rdi], xmm4 ; store the results in the destination + lea rsi, [rsi + rax] + +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(3) ;dst_ptich +%else + add rdi, r8 +%endif + dec rcx + + jnz filter_block1d8_h6_only_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d16_h6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; First-pass filter only when yoffset==0 +global sym(vp8_filter_block1d16_h6_only_sse2) +sym(vp8_filter_block1d16_h6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(5) ;vp8_filter + mov rsi, arg(0) ;src_ptr + + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ;dst_ptich +%endif + + pxor xmm0, xmm0 ; clear xmm0 for unpack + +filter_block1d16_h6_only_sse2_rowloop: + movq xmm3, MMWORD PTR [rsi - 2] + movq xmm1, MMWORD PTR [rsi + 6] + + movq xmm2, MMWORD PTR [rsi +14] + pslldq xmm2, 8 + + por xmm2, xmm1 + prefetcht2 [rsi+rax-2] + + pslldq xmm1, 8 + por xmm1, xmm3 + + movdqa xmm4, xmm1 + movdqa xmm5, xmm1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm1 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm1 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 ; lower 8 bytes + + movq QWORD Ptr [rdi], xmm4 ; store the results in the destination + + movdqa xmm3, xmm2 + movdqa xmm4, xmm2 + + movdqa xmm5, xmm2 + movdqa xmm6, xmm2 + + movdqa xmm7, xmm2 + + punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 + psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 + + pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 + punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 + + psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 + pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 + + punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 + psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 + + pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 + + punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 + psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 + + pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 + + punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 + psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 + + pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 + + punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 + pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 + + paddsw xmm4, xmm7 + paddsw xmm4, xmm5 + + paddsw xmm4, xmm3 + paddsw xmm4, xmm6 + + paddsw xmm4, xmm2 + paddsw xmm4, [GLOBAL(rd)] + + psraw xmm4, 7 + + packuswb xmm4, xmm0 ; higher 8 bytes + + movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination + + lea rsi, [rsi + rax] +%if ABI_IS_32BIT + add rdi, DWORD Ptr arg(3) ;dst_ptich +%else + add rdi, r8 +%endif + + dec rcx + jnz filter_block1d16_h6_only_sse2_rowloop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_filter_block1d8_v6_only_sse2 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; int dst_ptich, +; unsigned int output_height, +; const short *vp8_filter +;) +; Second-pass filter only when xoffset==0 +global sym(vp8_filter_block1d8_v6_only_sse2) +sym(vp8_filter_block1d8_v6_only_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + mov rax, arg(5) ;vp8_filter + + pxor xmm0, xmm0 ; clear xmm0 + + movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(3) ; dst_ptich +%endif + +vp8_filter_block1d8_v6_only_sse2_loop: + movq xmm1, MMWORD PTR [rsi] + movq xmm2, MMWORD PTR [rsi + rdx] + movq xmm3, MMWORD PTR [rsi + rdx * 2] + movq xmm5, MMWORD PTR [rsi + rdx * 4] + add rsi, rdx + movq xmm4, MMWORD PTR [rsi + rdx * 2] + movq xmm6, MMWORD PTR [rsi + rdx * 4] + + punpcklbw xmm1, xmm0 + pmullw xmm1, [rax] + + punpcklbw xmm2, xmm0 + pmullw xmm2, [rax + 16] + + punpcklbw xmm3, xmm0 + pmullw xmm3, [rax + 32] + + punpcklbw xmm5, xmm0 + pmullw xmm5, [rax + 64] + + punpcklbw xmm4, xmm0 + pmullw xmm4, [rax + 48] + + punpcklbw xmm6, xmm0 + pmullw xmm6, [rax + 80] + + paddsw xmm2, xmm5 + paddsw xmm2, xmm3 + + paddsw xmm2, xmm1 + paddsw xmm2, xmm4 + + paddsw xmm2, xmm6 + paddsw xmm2, xmm7 + + psraw xmm2, 7 + packuswb xmm2, xmm0 ; pack and saturate + + movq QWORD PTR [rdi], xmm2 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[dst_ptich] +%else + add rdi, r8 +%endif + dec rcx ; decrement count + jnz vp8_filter_block1d8_v6_only_sse2_loop ; next row + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -414,6 +908,7 @@ sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 + ;SAVE_XMM ;xmm6, xmm7 are not used here. GET_GOT rbx push rsi push rdi @@ -453,164 +948,7 @@ unpack_block1d16_h6_sse2_rowloop: pop rdi pop rsi RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_unpack_block1d8_h6_sse2 -;( -; unsigned char *src_ptr, -; unsigned short *output_ptr, -; unsigned int src_pixels_per_line, -; unsigned int output_height, -; unsigned int output_width -;) -global sym(vp8_unpack_block1d8_h6_sse2) -sym(vp8_unpack_block1d8_h6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(1) ;output_ptr - - movsxd rcx, dword ptr arg(3) ;output_height - movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source - - pxor xmm0, xmm0 ; clear xmm0 for unpack -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source -%endif - -unpack_block1d8_h6_sse2_rowloop: - movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2 - lea rsi, [rsi + rax] - - punpcklbw xmm1, xmm0 - movdqa XMMWORD Ptr [rdi], xmm1 - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(4) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx - jnz unpack_block1d8_h6_sse2_rowloop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_pack_block1d8_v6_sse2 -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int output_height, -; unsigned int output_width -;) -global sym(vp8_pack_block1d8_v6_sse2) -sym(vp8_pack_block1d8_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, dword ptr arg(3) ;pixels_per_line - mov rdi, arg(1) ;output_ptr - - mov rsi, arg(0) ;src_ptr - movsxd rcx, DWORD PTR arg(4) ;[output_height] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(5) ;output_width ; Pitch for Source -%endif - -pack_block1d8_v6_sse2_loop: - movdqa xmm0, XMMWORD PTR [rsi] - packuswb xmm0, xmm0 - - movq QWORD PTR [rdi], xmm0 ; store the results in the destination - lea rsi, [rsi+rdx] - -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(5) ;[output_width] -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz pack_block1d8_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_pack_block1d16_v6_sse2 -;( -; short *src_ptr, -; unsigned char *output_ptr, -; int dst_ptich, -; unsigned int pixels_per_line, -; unsigned int output_height, -; unsigned int output_width -;) -global sym(vp8_pack_block1d16_v6_sse2) -sym(vp8_pack_block1d16_v6_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - GET_GOT rbx - push rsi - push rdi - ; end prolog - - movsxd rdx, dword ptr arg(3) ;pixels_per_line - mov rdi, arg(1) ;output_ptr - - mov rsi, arg(0) ;src_ptr - movsxd rcx, DWORD PTR arg(4) ;[output_height] -%if ABI_IS_32BIT=0 - movsxd r8, dword ptr arg(2) ;dst_pitch -%endif - -pack_block1d16_v6_sse2_loop: - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm1, XMMWORD PTR [rsi+16] - - packuswb xmm0, xmm1 - movdqa XMMWORD PTR [rdi], xmm0 ; store the results in the destination - - add rsi, rdx -%if ABI_IS_32BIT - add rdi, DWORD Ptr arg(2) ;dst_pitch -%else - add rdi, r8 -%endif - dec rcx ; decrement count - jnz pack_block1d16_v6_sse2_loop ; next row - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT + ;RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -631,6 +969,7 @@ sym(vp8_bilinear_predict16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -639,7 +978,7 @@ sym(vp8_bilinear_predict16x16_sse2): ;const short *HFilter = bilinear_filters_mmx[xoffset] ;const short *VFilter = bilinear_filters_mmx[yoffset] - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] movsxd rax, dword ptr arg(2) ;xoffset cmp rax, 0 ;skip first_pass filter if xoffset=0 @@ -694,10 +1033,10 @@ sym(vp8_bilinear_predict16x16_sse2): paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT movdqa xmm7, xmm3 @@ -735,10 +1074,10 @@ next_row: pmullw xmm5, [rax] pmullw xmm6, [rax] - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT movdqa xmm7, xmm3 @@ -750,10 +1089,10 @@ next_row: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -815,10 +1154,10 @@ next_row_spo: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -859,10 +1198,10 @@ next_row_fpo: paddw xmm3, xmm5 paddw xmm4, xmm6 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 - paddw xmm4, [rd GLOBAL] + paddw xmm4, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT packuswb xmm3, xmm4 @@ -878,6 +1217,7 @@ done: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret @@ -898,6 +1238,7 @@ sym(vp8_bilinear_predict8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 + SAVE_XMM GET_GOT rbx push rsi push rdi @@ -908,7 +1249,7 @@ sym(vp8_bilinear_predict8x8_sse2): ;const short *HFilter = bilinear_filters_mmx[xoffset] ;const short *VFilter = bilinear_filters_mmx[yoffset] - lea rcx, [sym(vp8_bilinear_filters_mmx) GLOBAL] + lea rcx, [GLOBAL(sym(vp8_bilinear_filters_mmx))] mov rsi, arg(0) ;src_ptr movsxd rdx, dword ptr arg(1) ;src_pixels_per_line @@ -974,7 +1315,7 @@ sym(vp8_bilinear_predict8x8_sse2): paddw xmm3, xmm4 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm7, xmm3 @@ -993,7 +1334,7 @@ next_row8x8: paddw xmm3, xmm4 pmullw xmm7, xmm5 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm4, xmm3 @@ -1003,7 +1344,7 @@ next_row8x8: movdqa xmm7, xmm4 - paddw xmm3, [rd GLOBAL] ; xmm3 += round value + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 packuswb xmm3, xmm0 @@ -1021,6 +1362,7 @@ next_row8x8: pop rdi pop rsi RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm new file mode 100644 index 000000000..7f6fd93e4 --- /dev/null +++ b/vp8/common/x86/subpixel_ssse3.asm @@ -0,0 +1,1554 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%define BLOCK_HEIGHT_WIDTH 4 +%define VP8_FILTER_WEIGHT 128 +%define VP8_FILTER_SHIFT 7 + + +;/************************************************************************************ +; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The +; input pixel array has output_height rows. This routine assumes that output_height is an +; even number. This function handles 8 pixels in horizontal direction, calculating ONE +; rows each iteration to take advantage of the 128 bits operations. +; +; This is an implementation of some of the SSE optimizations first seen in ffvp8 +; +;*************************************************************************************/ +;void vp8_filter_block1d8_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d8_h6_ssse3) +sym(vp8_filter_block1d8_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 + + movdqa xmm7, [GLOBAL(rd)] + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + mov rdi, arg(2) ;output_ptr + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d8_h4_ssse3 + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + + sub rdi, rdx +;xmm3 free +filter_block1d8_h6_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm1, xmm0 + pmaddubsw xmm0, xmm4 + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + pmaddubsw xmm1, xmm5 + + lea rdi, [rdi + rdx] + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] + dec rcx + + paddsw xmm0, xmm1 + paddsw xmm2, xmm7 + + paddsw xmm0, xmm2 + + psraw xmm0, 7 + + packuswb xmm0, xmm0 + + movq MMWORD Ptr [rdi], xmm0 + jnz filter_block1d8_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d8_h4_ssse3: + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] + movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] + + mov rsi, arg(0) ;src_ptr + + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + + sub rdi, rdx + +filter_block1d8_h4_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm2, xmm0 + pshufb xmm0, xmm3 + + pshufb xmm2, xmm4 + pmaddubsw xmm0, xmm5 + + lea rdi, [rdi + rdx] + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] + dec rcx + + paddsw xmm0, xmm7 + + paddsw xmm0, xmm2 + + psraw xmm0, 7 + + packuswb xmm0, xmm0 + + movq MMWORD Ptr [rdi], xmm0 + + jnz filter_block1d8_h4_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret +;void vp8_filter_block1d16_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d16_h6_ssse3) +sym(vp8_filter_block1d16_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + mov rdi, arg(2) ;output_ptr + +;; +;; cmp esi, DWORD PTR [rax] +;; je vp8_filter_block1d16_h4_ssse3 + + mov rsi, arg(0) ;src_ptr + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rdx, dword ptr arg(3) ;output_pitch + +filter_block1d16_h6_rowloop_ssse3: + movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 + + movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 + + punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 + + movdqa xmm1, xmm0 + pmaddubsw xmm0, xmm4 + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + movq xmm3, MMWORD PTR [rsi + 6] + + pmaddubsw xmm1, xmm5 + movq xmm7, MMWORD PTR [rsi + 11] + + pmaddubsw xmm2, xmm6 + punpcklbw xmm3, xmm7 + + paddsw xmm0, xmm1 + movdqa xmm1, xmm3 + + pmaddubsw xmm3, xmm4 + paddsw xmm0, xmm2 + + movdqa xmm2, xmm1 + paddsw xmm0, [GLOBAL(rd)] + + pshufb xmm1, [GLOBAL(shuf2bfrom1)] + pshufb xmm2, [GLOBAL(shuf3bfrom1)] + + psraw xmm0, 7 + pmaddubsw xmm1, xmm5 + + pmaddubsw xmm2, xmm6 + packuswb xmm0, xmm0 + + lea rsi, [rsi + rax] + paddsw xmm3, xmm1 + + paddsw xmm3, xmm2 + + paddsw xmm3, [GLOBAL(rd)] + + psraw xmm3, 7 + + packuswb xmm3, xmm3 + + punpcklqdq xmm0, xmm3 + + movdqa XMMWORD Ptr [rdi], xmm0 + + lea rdi, [rdi + rdx] + dec rcx + jnz filter_block1d16_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d16_h4_ssse3: + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + movsxd rdx, dword ptr arg(3) ;output_pitch + +filter_block1d16_h4_rowloop_ssse3: + movdqu xmm1, XMMWORD PTR [rsi - 2] + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2b)] + pshufb xmm2, [GLOBAL(shuf3b)] + pmaddubsw xmm1, xmm5 + + movdqu xmm3, XMMWORD PTR [rsi + 6] + + pmaddubsw xmm2, xmm6 + movdqa xmm0, xmm3 + pshufb xmm3, [GLOBAL(shuf3b)] + pshufb xmm0, [GLOBAL(shuf2b)] + + paddsw xmm1, [GLOBAL(rd)] + paddsw xmm1, xmm2 + + pmaddubsw xmm0, xmm5 + pmaddubsw xmm3, xmm6 + + psraw xmm1, 7 + packuswb xmm1, xmm1 + lea rsi, [rsi + rax] + paddsw xmm3, xmm0 + paddsw xmm3, [GLOBAL(rd)] + psraw xmm3, 7 + packuswb xmm3, xmm3 + + punpcklqdq xmm1, xmm3 + + movdqa XMMWORD Ptr [rdi], xmm1 + + add rdi, rdx + dec rcx + jnz filter_block1d16_h4_rowloop_ssse3 + + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_filter_block1d4_h6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pixels_per_line, +; unsigned char *output_ptr, +; unsigned int output_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d4_h6_ssse3) +sym(vp8_filter_block1d4_h6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + movdqa xmm7, [GLOBAL(rd)] + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d4_h4_ssse3 + + movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + +;xmm3 free +filter_block1d4_h6_rowloop_ssse3: + movdqu xmm0, XMMWORD PTR [rsi - 2] + + movdqa xmm1, xmm0 + pshufb xmm0, [GLOBAL(shuf1b)] + + movdqa xmm2, xmm1 + pshufb xmm1, [GLOBAL(shuf2b)] + pmaddubsw xmm0, xmm4 + pshufb xmm2, [GLOBAL(shuf3b)] + pmaddubsw xmm1, xmm5 + +;-- + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] +;-- + paddsw xmm0, xmm1 + paddsw xmm0, xmm7 + pxor xmm1, xmm1 + paddsw xmm0, xmm2 + psraw xmm0, 7 + packuswb xmm0, xmm0 + + movd DWORD PTR [rdi], xmm0 + + add rdi, rdx + dec rcx + jnz filter_block1d4_h6_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d4_h4_ssse3: + movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 + movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] + movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;output_ptr + movsxd rax, dword ptr arg(1) ;src_pixels_per_line + movsxd rcx, dword ptr arg(4) ;output_height + + movsxd rdx, dword ptr arg(3) ;output_pitch + +filter_block1d4_h4_rowloop_ssse3: + movdqu xmm1, XMMWORD PTR [rsi - 2] + + movdqa xmm2, xmm1 + pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] + pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] + pmaddubsw xmm1, xmm5 + +;-- + pmaddubsw xmm2, xmm6 + + lea rsi, [rsi + rax] +;-- + paddsw xmm1, xmm7 + paddsw xmm1, xmm2 + psraw xmm1, 7 + packuswb xmm1, xmm1 + + movd DWORD PTR [rdi], xmm1 + + add rdi, rdx + dec rcx + jnz filter_block1d4_h4_rowloop_ssse3 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + + +;void vp8_filter_block1d16_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d16_v6_ssse3) +sym(vp8_filter_block1d16_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d16_v4_ssse3 + + movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr + +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ;out_pitch +%endif + mov rax, rsi + movsxd rcx, DWORD PTR arg(4) ;output_height + add rax, rdx + + +vp8_filter_block1d16_v6_ssse3_loop: + movq xmm1, MMWORD PTR [rsi] ;A + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4] ;F + + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, [GLOBAL(rd)] + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 ;store the results + + movq xmm1, MMWORD PTR [rsi + 8] ;A + movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, [GLOBAL(rd)] + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi+8], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;out_pitch +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d16_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d16_v4_ssse3: + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr + +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ;out_pitch +%endif + mov rax, rsi + movsxd rcx, DWORD PTR arg(4) ;output_height + add rax, rdx + +vp8_filter_block1d16_v4_ssse3_loop: + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + pmaddubsw xmm3, xmm6 + pmaddubsw xmm2, xmm7 + movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B + movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E + + paddsw xmm2, [GLOBAL(rd)] + paddsw xmm2, xmm3 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + punpcklbw xmm5, xmm4 ;B D + punpcklbw xmm1, xmm0 ;C E + + pmaddubsw xmm1, xmm6 + pmaddubsw xmm5, xmm7 + + movdqa xmm4, [GLOBAL(rd)] + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm5, xmm1 + paddsw xmm5, xmm4 + psraw xmm5, 7 + packuswb xmm5, xmm5 + + punpcklqdq xmm2, xmm5 + + movdqa XMMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;out_pitch +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d16_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_filter_block1d8_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d8_v6_ssse3) +sym(vp8_filter_block1d8_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ; out_pitch +%endif + movsxd rcx, DWORD PTR arg(4) ;[output_height] + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d8_v4_ssse3 + + movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +vp8_filter_block1d8_v6_ssse3_loop: + movq xmm1, MMWORD PTR [rsi] ;A + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + movq xmm0, MMWORD PTR [rax + rdx * 4] ;F + movdqa xmm4, [GLOBAL(rd)] + + pmaddubsw xmm3, xmm6 + punpcklbw xmm1, xmm0 ;A F + pmaddubsw xmm2, xmm7 + pmaddubsw xmm1, xmm5 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm1 + paddsw xmm2, xmm4 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d8_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d8_v4_ssse3: + movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 + movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 + movdqa xmm5, [GLOBAL(rd)] + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +vp8_filter_block1d8_v4_ssse3_loop: + movq xmm2, MMWORD PTR [rsi + rdx] ;B + movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C + movq xmm4, MMWORD PTR [rax + rdx * 2] ;D + movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E + + punpcklbw xmm2, xmm4 ;B D + punpcklbw xmm3, xmm0 ;C E + + pmaddubsw xmm3, xmm6 + pmaddubsw xmm2, xmm7 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw xmm2, xmm3 + paddsw xmm2, xmm5 + psraw xmm2, 7 + packuswb xmm2, xmm2 + + movq MMWORD PTR [rdi], xmm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d8_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret +;void vp8_filter_block1d4_v6_ssse3 +;( +; unsigned char *src_ptr, +; unsigned int src_pitch, +; unsigned char *output_ptr, +; unsigned int out_pitch, +; unsigned int output_height, +; unsigned int vp8_filter_index +;) +global sym(vp8_filter_block1d4_v6_ssse3) +sym(vp8_filter_block1d4_v6_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + movsxd rdx, DWORD PTR arg(5) ;table index + xor rsi, rsi + shl rdx, 4 ; + + lea rax, [GLOBAL(k0_k5)] + add rax, rdx + + movsxd rdx, DWORD PTR arg(1) ;pixels_per_line + mov rdi, arg(2) ;output_ptr +%if ABI_IS_32BIT=0 + movsxd r8, DWORD PTR arg(3) ; out_pitch +%endif + movsxd rcx, DWORD PTR arg(4) ;[output_height] + + cmp esi, DWORD PTR [rax] + je vp8_filter_block1d4_v4_ssse3 + + movq mm5, MMWORD PTR [rax] ;k0_k5 + movq mm6, MMWORD PTR [rax+256] ;k2_k4 + movq mm7, MMWORD PTR [rax+128] ;k1_k3 + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +vp8_filter_block1d4_v6_ssse3_loop: + movd mm1, DWORD PTR [rsi] ;A + movd mm2, DWORD PTR [rsi + rdx] ;B + movd mm3, DWORD PTR [rsi + rdx * 2] ;C + movd mm4, DWORD PTR [rax + rdx * 2] ;D + movd mm0, DWORD PTR [rsi + rdx * 4] ;E + + punpcklbw mm2, mm4 ;B D + punpcklbw mm3, mm0 ;C E + + movd mm0, DWORD PTR [rax + rdx * 4] ;F + + movq mm4, [GLOBAL(rd)] + + pmaddubsw mm3, mm6 + punpcklbw mm1, mm0 ;A F + pmaddubsw mm2, mm7 + pmaddubsw mm1, mm5 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw mm2, mm3 + paddsw mm2, mm1 + paddsw mm2, mm4 + psraw mm2, 7 + packuswb mm2, mm2 + + movd DWORD PTR [rdi], mm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d4_v6_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +vp8_filter_block1d4_v4_ssse3: + movq mm6, MMWORD PTR [rax+256] ;k2_k4 + movq mm7, MMWORD PTR [rax+128] ;k1_k3 + movq mm5, MMWORD PTR [GLOBAL(rd)] + + mov rsi, arg(0) ;src_ptr + + mov rax, rsi + add rax, rdx + +vp8_filter_block1d4_v4_ssse3_loop: + movd mm2, DWORD PTR [rsi + rdx] ;B + movd mm3, DWORD PTR [rsi + rdx * 2] ;C + movd mm4, DWORD PTR [rax + rdx * 2] ;D + movd mm0, DWORD PTR [rsi + rdx * 4] ;E + + punpcklbw mm2, mm4 ;B D + punpcklbw mm3, mm0 ;C E + + pmaddubsw mm3, mm6 + pmaddubsw mm2, mm7 + add rsi, rdx + add rax, rdx +;-- +;-- + paddsw mm2, mm3 + paddsw mm2, mm5 + psraw mm2, 7 + packuswb mm2, mm2 + + movd DWORD PTR [rdi], mm2 + +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(3) ;[out_pitch] +%else + add rdi, r8 +%endif + dec rcx + jnz vp8_filter_block1d4_v4_ssse3_loop + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_bilinear_predict16x16_ssse3 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict16x16_ssse3) +sym(vp8_bilinear_predict16x16_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] + movsxd rax, dword ptr arg(2) ; xoffset + + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je b16x16_sp_only + + shl rax, 4 + lea rax, [rax + rcx] ; HFilter + + mov rdi, arg(4) ; dst_ptr + mov rsi, arg(0) ; src_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm1, [rax] + + movsxd rax, dword ptr arg(3) ; yoffset + + cmp rax, 0 ; skip second_pass filter if yoffset=0 + je b16x16_fp_only + + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rdx, dword ptr arg(1) ; src_pixels_per_line + + movdqa xmm2, [rax] + +%if ABI_IS_32BIT=0 + movsxd r8, dword ptr arg(5) ; dst_pitch +%endif + movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 + movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 + + movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 + + lea rsi, [rsi + rdx] ; next line + + pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 + + punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 + pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value + psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 + + movdqa xmm7, xmm3 + packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + +.next_row: + movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm6, xmm5 + movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 + + movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 + lea rsi, [rsi + rdx] ; next line + + pmaddubsw xmm6, xmm1 + + punpcklbw xmm4, xmm5 + pmaddubsw xmm4, xmm1 + + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value + psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 + + paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value + psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 + + packuswb xmm6, xmm4 + movdqa xmm5, xmm7 + + punpcklbw xmm5, xmm6 + pmaddubsw xmm5, xmm2 + + punpckhbw xmm7, xmm6 + pmaddubsw xmm7, xmm2 + + paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value + psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 + + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value + psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 + + packuswb xmm5, xmm7 + movdqa xmm7, xmm6 + + movdqa [rdi], xmm5 ; store the results in the destination +%if ABI_IS_32BIT + add rdi, DWORD PTR arg(5) ; dst_pitch +%else + add rdi, r8 +%endif + + cmp rdi, rcx + jne .next_row + + jmp done + +b16x16_sp_only: + movsxd rax, dword ptr arg(3) ; yoffset + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + mov rdi, arg(4) ; dst_ptr + mov rsi, arg(0) ; src_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm1, [rax] ; VFilter + + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ; src_pixels_per_line + + ; get the first horizontal line done + movq xmm4, [rsi] ; load row 0 + movq xmm2, [rsi + 8] ; load row 0 + + lea rsi, [rsi + rax] ; next line +.next_row: + movq xmm3, [rsi] ; load row + 1 + movq xmm5, [rsi + 8] ; load row + 1 + + punpcklbw xmm4, xmm3 + punpcklbw xmm2, xmm5 + + pmaddubsw xmm4, xmm1 + movq xmm7, [rsi + rax] ; load row + 2 + + pmaddubsw xmm2, xmm1 + movq xmm6, [rsi + rax + 8] ; load row + 2 + + punpcklbw xmm3, xmm7 + punpcklbw xmm5, xmm6 + + pmaddubsw xmm3, xmm1 + paddw xmm4, [GLOBAL(rd)] + + pmaddubsw xmm5, xmm1 + paddw xmm2, [GLOBAL(rd)] + + psraw xmm4, VP8_FILTER_SHIFT + psraw xmm2, VP8_FILTER_SHIFT + + packuswb xmm4, xmm2 + paddw xmm3, [GLOBAL(rd)] + + movdqa [rdi], xmm4 ; store row 0 + paddw xmm5, [GLOBAL(rd)] + + psraw xmm3, VP8_FILTER_SHIFT + psraw xmm5, VP8_FILTER_SHIFT + + packuswb xmm3, xmm5 + movdqa xmm4, xmm7 + + movdqa [rdi + rdx],xmm3 ; store row 1 + lea rsi, [rsi + 2*rax] + + movdqa xmm2, xmm6 + lea rdi, [rdi + 2*rdx] + + cmp rdi, rcx + jne .next_row + + jmp done + +b16x16_fp_only: + lea rcx, [rdi+rdx*8] + lea rcx, [rcx+rdx*8] + movsxd rax, dword ptr arg(1) ; src_pixels_per_line + +.next_row: + movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 + movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 + + punpcklbw xmm2, xmm4 + movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 + + pmaddubsw xmm2, xmm1 + movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 + + lea rsi, [rsi + rax] ; next line + punpcklbw xmm3, xmm4 + + pmaddubsw xmm3, xmm1 + movq xmm5, [rsi] + + paddw xmm2, [GLOBAL(rd)] + movq xmm7, [rsi+1] + + movq xmm6, [rsi+8] + psraw xmm2, VP8_FILTER_SHIFT + + punpcklbw xmm5, xmm7 + movq xmm7, [rsi+9] + + paddw xmm3, [GLOBAL(rd)] + pmaddubsw xmm5, xmm1 + + psraw xmm3, VP8_FILTER_SHIFT + punpcklbw xmm6, xmm7 + + packuswb xmm2, xmm3 + pmaddubsw xmm6, xmm1 + + movdqa [rdi], xmm2 ; store the results in the destination + paddw xmm5, [GLOBAL(rd)] + + lea rdi, [rdi + rdx] ; dst_pitch + psraw xmm5, VP8_FILTER_SHIFT + + paddw xmm6, [GLOBAL(rd)] + psraw xmm6, VP8_FILTER_SHIFT + + packuswb xmm5, xmm6 + lea rsi, [rsi + rax] ; next line + + movdqa [rdi], xmm5 ; store the results in the destination + lea rdi, [rdi + rdx] ; dst_pitch + + cmp rdi, rcx + + jne .next_row + +done: + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +;void vp8_bilinear_predict8x8_ssse3 +;( +; unsigned char *src_ptr, +; int src_pixels_per_line, +; int xoffset, +; int yoffset, +; unsigned char *dst_ptr, +; int dst_pitch +;) +global sym(vp8_bilinear_predict8x8_ssse3) +sym(vp8_bilinear_predict8x8_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + ALIGN_STACK 16, rax + sub rsp, 144 ; reserve 144 bytes + + lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] + + mov rsi, arg(0) ;src_ptr + movsxd rdx, dword ptr arg(1) ;src_pixels_per_line + + ;Read 9-line unaligned data in and put them on stack. This gives a big + ;performance boost. + movdqu xmm0, [rsi] + lea rax, [rdx + rdx*2] + movdqu xmm1, [rsi+rdx] + movdqu xmm2, [rsi+rdx*2] + add rsi, rax + movdqu xmm3, [rsi] + movdqu xmm4, [rsi+rdx] + movdqu xmm5, [rsi+rdx*2] + add rsi, rax + movdqu xmm6, [rsi] + movdqu xmm7, [rsi+rdx] + + movdqa XMMWORD PTR [rsp], xmm0 + + movdqu xmm0, [rsi+rdx*2] + + movdqa XMMWORD PTR [rsp+16], xmm1 + movdqa XMMWORD PTR [rsp+32], xmm2 + movdqa XMMWORD PTR [rsp+48], xmm3 + movdqa XMMWORD PTR [rsp+64], xmm4 + movdqa XMMWORD PTR [rsp+80], xmm5 + movdqa XMMWORD PTR [rsp+96], xmm6 + movdqa XMMWORD PTR [rsp+112], xmm7 + movdqa XMMWORD PTR [rsp+128], xmm0 + + movsxd rax, dword ptr arg(2) ; xoffset + cmp rax, 0 ; skip first_pass filter if xoffset=0 + je b8x8_sp_only + + shl rax, 4 + add rax, rcx ; HFilter + + mov rdi, arg(4) ; dst_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm0, [rax] + + movsxd rax, dword ptr arg(3) ; yoffset + cmp rax, 0 ; skip second_pass filter if yoffset=0 + je b8x8_fp_only + + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + lea rcx, [rdi+rdx*8] + + movdqa xmm1, [rax] + + ; get the first horizontal line done + movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx + + psrldq xmm5, 1 + lea rsp, [rsp + 16] ; next line + + punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 + pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 + + paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value + psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 + + movdqa xmm7, xmm3 + packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + +.next_row: + movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 + lea rsp, [rsp + 16] ; next line + + movdqa xmm5, xmm6 + + psrldq xmm5, 1 + + punpcklbw xmm6, xmm5 + pmaddubsw xmm6, xmm0 + + paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value + psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 + + packuswb xmm6, xmm6 + + punpcklbw xmm7, xmm6 + pmaddubsw xmm7, xmm1 + + paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value + psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 + + packuswb xmm7, xmm7 + + movq [rdi], xmm7 ; store the results in the destination + lea rdi, [rdi + rdx] + + movdqa xmm7, xmm6 + + cmp rdi, rcx + jne .next_row + + jmp done8x8 + +b8x8_sp_only: + movsxd rax, dword ptr arg(3) ; yoffset + shl rax, 4 + lea rax, [rax + rcx] ; VFilter + + mov rdi, arg(4) ;dst_ptr + movsxd rdx, dword ptr arg(5) ; dst_pitch + + movdqa xmm0, [rax] ; VFilter + + movq xmm1, XMMWORD PTR [rsp] + movq xmm2, XMMWORD PTR [rsp+16] + + movq xmm3, XMMWORD PTR [rsp+32] + punpcklbw xmm1, xmm2 + + movq xmm4, XMMWORD PTR [rsp+48] + punpcklbw xmm2, xmm3 + + movq xmm5, XMMWORD PTR [rsp+64] + punpcklbw xmm3, xmm4 + + movq xmm6, XMMWORD PTR [rsp+80] + punpcklbw xmm4, xmm5 + + movq xmm7, XMMWORD PTR [rsp+96] + punpcklbw xmm5, xmm6 + + pmaddubsw xmm1, xmm0 + pmaddubsw xmm2, xmm0 + + pmaddubsw xmm3, xmm0 + pmaddubsw xmm4, xmm0 + + pmaddubsw xmm5, xmm0 + punpcklbw xmm6, xmm7 + + pmaddubsw xmm6, xmm0 + paddw xmm1, [GLOBAL(rd)] + + paddw xmm2, [GLOBAL(rd)] + psraw xmm1, VP8_FILTER_SHIFT + + paddw xmm3, [GLOBAL(rd)] + psraw xmm2, VP8_FILTER_SHIFT + + paddw xmm4, [GLOBAL(rd)] + psraw xmm3, VP8_FILTER_SHIFT + + paddw xmm5, [GLOBAL(rd)] + psraw xmm4, VP8_FILTER_SHIFT + + paddw xmm6, [GLOBAL(rd)] + psraw xmm5, VP8_FILTER_SHIFT + + psraw xmm6, VP8_FILTER_SHIFT + packuswb xmm1, xmm1 + + packuswb xmm2, xmm2 + movq [rdi], xmm1 + + packuswb xmm3, xmm3 + movq [rdi+rdx], xmm2 + + packuswb xmm4, xmm4 + movq xmm1, XMMWORD PTR [rsp+112] + + lea rdi, [rdi + 2*rdx] + movq xmm2, XMMWORD PTR [rsp+128] + + packuswb xmm5, xmm5 + movq [rdi], xmm3 + + packuswb xmm6, xmm6 + movq [rdi+rdx], xmm4 + + lea rdi, [rdi + 2*rdx] + punpcklbw xmm7, xmm1 + + movq [rdi], xmm5 + pmaddubsw xmm7, xmm0 + + movq [rdi+rdx], xmm6 + punpcklbw xmm1, xmm2 + + pmaddubsw xmm1, xmm0 + paddw xmm7, [GLOBAL(rd)] + + psraw xmm7, VP8_FILTER_SHIFT + paddw xmm1, [GLOBAL(rd)] + + psraw xmm1, VP8_FILTER_SHIFT + packuswb xmm7, xmm7 + + packuswb xmm1, xmm1 + lea rdi, [rdi + 2*rdx] + + movq [rdi], xmm7 + + movq [rdi+rdx], xmm1 + lea rsp, [rsp + 144] + + jmp done8x8 + +b8x8_fp_only: + lea rcx, [rdi+rdx*8] + +.next_row: + movdqa xmm1, XMMWORD PTR [rsp] + movdqa xmm3, XMMWORD PTR [rsp+16] + + movdqa xmm2, xmm1 + movdqa xmm5, XMMWORD PTR [rsp+32] + + psrldq xmm2, 1 + movdqa xmm7, XMMWORD PTR [rsp+48] + + movdqa xmm4, xmm3 + psrldq xmm4, 1 + + movdqa xmm6, xmm5 + psrldq xmm6, 1 + + punpcklbw xmm1, xmm2 + pmaddubsw xmm1, xmm0 + + punpcklbw xmm3, xmm4 + pmaddubsw xmm3, xmm0 + + punpcklbw xmm5, xmm6 + pmaddubsw xmm5, xmm0 + + movdqa xmm2, xmm7 + psrldq xmm2, 1 + + punpcklbw xmm7, xmm2 + pmaddubsw xmm7, xmm0 + + paddw xmm1, [GLOBAL(rd)] + psraw xmm1, VP8_FILTER_SHIFT + + paddw xmm3, [GLOBAL(rd)] + psraw xmm3, VP8_FILTER_SHIFT + + paddw xmm5, [GLOBAL(rd)] + psraw xmm5, VP8_FILTER_SHIFT + + paddw xmm7, [GLOBAL(rd)] + psraw xmm7, VP8_FILTER_SHIFT + + packuswb xmm1, xmm1 + packuswb xmm3, xmm3 + + packuswb xmm5, xmm5 + movq [rdi], xmm1 + + packuswb xmm7, xmm7 + movq [rdi+rdx], xmm3 + + lea rdi, [rdi + 2*rdx] + movq [rdi], xmm5 + + lea rsp, [rsp + 4*16] + movq [rdi+rdx], xmm7 + + lea rdi, [rdi + 2*rdx] + cmp rdi, rcx + + jne .next_row + + lea rsp, [rsp + 16] + +done8x8: + ;add rsp, 144 + pop rsp + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +shuf1b: + db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 +shuf2b: + db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 +shuf3b: + db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 + +align 16 +shuf2bfrom1: + db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 +align 16 +shuf3bfrom1: + db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 + +align 16 +rd: + times 8 dw 0x40 + +align 16 +k0_k5: + times 8 db 0, 0 ;placeholder + times 8 db 0, 0 + times 8 db 2, 1 + times 8 db 0, 0 + times 8 db 3, 3 + times 8 db 0, 0 + times 8 db 1, 2 + times 8 db 0, 0 +k1_k3: + times 8 db 0, 0 ;placeholder + times 8 db -6, 12 + times 8 db -11, 36 + times 8 db -9, 50 + times 8 db -16, 77 + times 8 db -6, 93 + times 8 db -8, 108 + times 8 db -1, 123 +k2_k4: + times 8 db 128, 0 ;placeholder + times 8 db 123, -1 + times 8 db 108, -8 + times 8 db 93, -6 + times 8 db 77, -16 + times 8 db 50, -9 + times 8 db 36, -11 + times 8 db 12, -6 +align 16 +vp8_bilinear_filters_ssse3: + times 8 db 128, 0 + times 8 db 112, 16 + times 8 db 96, 32 + times 8 db 80, 48 + times 8 db 64, 64 + times 8 db 48, 80 + times 8 db 32, 96 + times 8 db 16, 112 + diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h index efa7b2e09..75991cc4f 100644 --- a/vp8/common/x86/subpixel_x86.h +++ b/vp8/common/x86/subpixel_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -85,4 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2); #endif #endif +#if HAVE_SSSE3 +extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3); +extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3); +extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3); +extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3); +extern prototype_subpixel_predict(vp8_bilinear_predict16x16_ssse3); +extern prototype_subpixel_predict(vp8_bilinear_predict8x8_ssse3); + +#if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_subpix_sixtap16x16 +#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3 + +#undef vp8_subpix_sixtap8x8 +#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3 + +#undef vp8_subpix_sixtap8x4 +#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3 + +#undef vp8_subpix_sixtap4x4 +#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3 + + +#undef vp8_subpix_bilinear16x16 +#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_ssse3 + +#undef vp8_subpix_bilinear8x8 +#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_ssse3 + +#endif +#endif + + + #endif diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c index 68454f709..8dd07c90d 100644 --- a/vp8/common/x86/vp8_asm_stubs.c +++ b/vp8/common/x86/vp8_asm_stubs.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -67,6 +68,17 @@ extern void vp8_filter_block1d8_v6_sse2 unsigned int output_width, const short *vp8_filter ); +extern void vp8_filter_block1d16_v6_sse2 +( + unsigned short *src_ptr, + unsigned char *output_ptr, + int dst_ptich, + unsigned int pixels_per_line, + unsigned int pixel_step, + unsigned int output_height, + unsigned int output_width, + const short *vp8_filter +); extern void vp8_unpack_block1d16_h6_sse2 ( unsigned char *src_ptr, @@ -75,31 +87,32 @@ extern void vp8_unpack_block1d16_h6_sse2 unsigned int output_height, unsigned int output_width ); -extern void vp8_unpack_block1d8_h6_sse2 +extern void vp8_filter_block1d8_h6_only_sse2 ( unsigned char *src_ptr, - unsigned short *output_ptr, unsigned int src_pixels_per_line, + unsigned char *output_ptr, + int dst_ptich, unsigned int output_height, - unsigned int output_width + const short *vp8_filter ); -extern void vp8_pack_block1d8_v6_sse2 +extern void vp8_filter_block1d16_h6_only_sse2 ( - unsigned short *src_ptr, + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + int dst_ptich, + unsigned int output_height, + const short *vp8_filter +); +extern void vp8_filter_block1d8_v6_only_sse2 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, unsigned char *output_ptr, int dst_ptich, - unsigned int pixels_per_line, - unsigned int output_height, - unsigned int output_width -); -extern void vp8_pack_block1d16_v6_sse2 -( - unsigned short *src_ptr, - unsigned char *output_ptr, - int dst_ptich, - unsigned int pixels_per_line, - unsigned int output_height, - unsigned int output_width + unsigned int output_height, + const short *vp8_filter ); extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx); @@ -115,7 +128,7 @@ void vp8_sixtap_predict4x4_mmx int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; HFilter = vp8_six_tap_mmx[xoffset]; vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); @@ -136,7 +149,7 @@ void vp8_sixtap_predict16x16_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -168,7 +181,7 @@ void vp8_sixtap_predict8x8_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -194,7 +207,7 @@ void vp8_sixtap_predict8x4_mmx ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; @@ -240,29 +253,32 @@ void vp8_sixtap_predict16x16_sse2 ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter); + } } else { - vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); - } - - if (yoffset) - { + /* Second-pass only */ VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, 16, VFilter); - vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter); - } - else - { - vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16); + vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); + vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16 , 16, dst_pitch, VFilter); } } @@ -277,30 +293,31 @@ void vp8_sixtap_predict8x8_sse2 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter); + } } else { - vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 13, 16); - } - - if (yoffset) - { + /* Second-pass only */ VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 8, dst_pitch, VFilter); + vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter); } - else - { - vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, dst_pitch); - } - - } @@ -314,29 +331,223 @@ void vp8_sixtap_predict8x4_sse2 int dst_pitch ) { - DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); // Temp data bufffer used in filtering + DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { - HFilter = vp8_six_tap_mmx[xoffset]; - vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); + if (yoffset) + { + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); + VFilter = vp8_six_tap_mmx[yoffset]; + vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter); + } + else + { + /* First-pass only */ + HFilter = vp8_six_tap_mmx[xoffset]; + vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter); + } } else { - vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 9, 16); - } - - if (yoffset) - { + /* Second-pass only */ VFilter = vp8_six_tap_mmx[yoffset]; - vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8 , 4, dst_pitch, VFilter); + vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter); + } +} + +#endif + +#if HAVE_SSSE3 + +extern void vp8_filter_block1d8_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d16_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d16_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d8_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d4_h6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pixels_per_line, + unsigned char *output_ptr, + unsigned int output_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +extern void vp8_filter_block1d4_v6_ssse3 +( + unsigned char *src_ptr, + unsigned int src_pitch, + unsigned char *output_ptr, + unsigned int out_pitch, + unsigned int output_height, + unsigned int vp8_filter_index +); + +void vp8_sixtap_predict16x16_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch + +) +{ + DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); + vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset); + } + else + { + /* First-pass only */ + vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); + } } else { - vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 4, dst_pitch); + /* Second-pass only */ + vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); } +} +void vp8_sixtap_predict8x8_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); + } + else + { + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); + } + } + else + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); + } +} + + +void vp8_sixtap_predict8x4_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); + vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); + } + else + { + /* First-pass only */ + vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { + /* Second-pass only */ + vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + } +} + +void vp8_sixtap_predict4x4_ssse3 +( + unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + unsigned char *dst_ptr, + int dst_pitch +) +{ + DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9); + + if (xoffset) + { + if (yoffset) + { + vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); + vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); + } + else + { + vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); + } + } + else + { + vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); + } } + #endif diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c index 5312e06da..38500fd01 100644 --- a/vp8/common/x86/x86_systemdependent.c +++ b/vp8/common/x86/x86_systemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -26,6 +27,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) int mmx_enabled = flags & HAS_MMX; int xmm_enabled = flags & HAS_SSE; int wmt_enabled = flags & HAS_SSE2; + int SSSE3Enabled = flags & HAS_SSSE3; /* Note: * @@ -41,7 +43,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) { rtcd->idct.idct1 = vp8_short_idct4x4llm_1_mmx; rtcd->idct.idct16 = vp8_short_idct4x4llm_mmx; - rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx; + rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx; rtcd->idct.iwalsh16 = vp8_short_inv_walsh4x4_mmx; rtcd->idct.iwalsh1 = vp8_short_inv_walsh4x4_1_mmx; @@ -72,7 +74,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) #if CONFIG_POSTPROC rtcd->postproc.down = vp8_mbpost_proc_down_mmx; - //rtcd->postproc.across = vp8_mbpost_proc_across_ip_c; + /*rtcd->postproc.across = vp8_mbpost_proc_across_ip_c;*/ rtcd->postproc.downacross = vp8_post_proc_down_and_across_mmx; rtcd->postproc.addnoise = vp8_plane_add_noise_mmx; #endif @@ -113,5 +115,19 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx) } #endif + +#if HAVE_SSSE3 + + if (SSSE3Enabled) + { + rtcd->subpix.sixtap16x16 = vp8_sixtap_predict16x16_ssse3; + rtcd->subpix.sixtap8x8 = vp8_sixtap_predict8x8_ssse3; + rtcd->subpix.sixtap8x4 = vp8_sixtap_predict8x4_ssse3; + rtcd->subpix.sixtap4x4 = vp8_sixtap_predict4x4_ssse3; + rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3; + rtcd->subpix.bilinear8x8 = vp8_bilinear_predict8x8_ssse3; + } +#endif + #endif } diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c new file mode 100644 index 000000000..e9741e286 --- /dev/null +++ b/vp8/decoder/arm/arm_dsystemdependent.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "blockd.h" +#include "pragmas.h" +#include "postproc.h" +#include "dboolhuff.h" +#include "dequantize.h" +#include "onyxd_int.h" + +void vp8_arch_arm_decode_init(VP8D_COMP *pbi) +{ +#if CONFIG_RUNTIME_CPU_DETECT + int flags = pbi->common.rtcd.flags; + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + +#if HAVE_ARMV6 + if (has_media) + { + pbi->dequant.block = vp8_dequantize_b_v6; + pbi->dequant.idct_add = vp8_dequant_idct_add_v6; + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_v6; + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_v6; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_v6; +#if 0 /*For use with RTCD, when implemented*/ + pbi->dboolhuff.start = vp8dx_start_decode_c; + pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; + pbi->dboolhuff.debool = vp8dx_decode_bool_c; + pbi->dboolhuff.devalue = vp8dx_decode_value_c; +#endif + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + pbi->dequant.block = vp8_dequantize_b_neon; + pbi->dequant.idct_add = vp8_dequant_idct_add_neon; + /*This is not used: NEON always dequants two blocks at once. + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_neon;*/ + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_neon; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon; +#if 0 /*For use with RTCD, when implemented*/ + pbi->dboolhuff.start = vp8dx_start_decode_c; + pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; + pbi->dboolhuff.debool = vp8dx_decode_bool_c; + pbi->dboolhuff.devalue = vp8dx_decode_value_c; +#endif + } +#endif +#endif +} diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm index eb3f0307c..de3648ae2 100644 --- a/vp8/decoder/arm/armv5/dequantize_v5.asm +++ b/vp8/decoder/arm/armv5/dequantize_v5.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm index 143e33e46..6515804bb 100644 --- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm +++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm new file mode 100644 index 000000000..6bebda24f --- /dev/null +++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm @@ -0,0 +1,218 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |vp8_dequant_dc_idct_add_v6| + + AREA |.text|, CODE, READONLY + +;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred, +; unsigned char *dest, int pitch, int stride, int Dc) +; r0 = input +; r1 = dq +; r2 = pred +; r3 = dest +; sp + 36 = pitch ; +4 = 40 +; sp + 40 = stride ; +4 = 44 +; sp + 44 = Dc ; +4 = 48 + + +|vp8_dequant_dc_idct_add_v6| PROC + stmdb sp!, {r4-r11, lr} + + ldr r6, [sp, #44] + + ldr r4, [r0] ;input + ldr r5, [r1], #4 ;dq + + sub sp, sp, #4 + str r3, [sp] + + smultt r7, r4, r5 + + ldr r4, [r0, #4] ;input + ldr r5, [r1], #4 ;dq + + strh r6, [r0], #2 + strh r7, [r0], #2 + + smulbb r6, r4, r5 + smultt r7, r4, r5 + + ldr r4, [r0, #4] ;input + ldr r5, [r1], #4 ;dq + + strh r6, [r0], #2 + strh r7, [r0], #2 + + mov r12, #3 + +vp8_dequant_dc_add_loop + smulbb r6, r4, r5 + smultt r7, r4, r5 + + ldr r4, [r0, #4] ;input + ldr r5, [r1], #4 ;dq + + strh r6, [r0], #2 + strh r7, [r0], #2 + + smulbb r6, r4, r5 + smultt r7, r4, r5 + + subs r12, r12, #1 + + ldrne r4, [r0, #4] + ldrne r5, [r1], #4 + + strh r6, [r0], #2 + strh r7, [r0], #2 + + bne vp8_dequant_dc_add_loop + + sub r0, r0, #32 + mov r1, r0 + +; short_idct4x4llm_v6_dual + ldr r3, cospi8sqrt2minus1 + ldr r4, sinpi8sqrt2 + ldr r6, [r0, #8] + mov r5, #2 +vp8_dequant_dc_idct_loop1_v6 + ldr r12, [r0, #24] + ldr r14, [r0, #16] + smulwt r9, r3, r6 + smulwb r7, r3, r6 + smulwt r10, r4, r6 + smulwb r8, r4, r6 + pkhbt r7, r7, r9, lsl #16 + smulwt r11, r3, r12 + pkhbt r8, r8, r10, lsl #16 + uadd16 r6, r6, r7 + smulwt r7, r4, r12 + smulwb r9, r3, r12 + smulwb r10, r4, r12 + subs r5, r5, #1 + pkhbt r9, r9, r11, lsl #16 + ldr r11, [r0], #4 + pkhbt r10, r10, r7, lsl #16 + uadd16 r7, r12, r9 + usub16 r7, r8, r7 + uadd16 r6, r6, r10 + uadd16 r10, r11, r14 + usub16 r8, r11, r14 + uadd16 r9, r10, r6 + usub16 r10, r10, r6 + uadd16 r6, r8, r7 + usub16 r7, r8, r7 + str r6, [r1, #8] + ldrne r6, [r0, #8] + str r7, [r1, #16] + str r10, [r1, #24] + str r9, [r1], #4 + bne vp8_dequant_dc_idct_loop1_v6 + + mov r5, #2 + sub r0, r1, #8 +vp8_dequant_dc_idct_loop2_v6 + ldr r6, [r0], #4 + ldr r7, [r0], #4 + ldr r8, [r0], #4 + ldr r9, [r0], #4 + smulwt r1, r3, r6 + smulwt r12, r4, r6 + smulwt lr, r3, r8 + smulwt r10, r4, r8 + pkhbt r11, r8, r6, lsl #16 + pkhbt r1, lr, r1, lsl #16 + pkhbt r12, r10, r12, lsl #16 + pkhtb r6, r6, r8, asr #16 + uadd16 r6, r1, r6 + pkhbt lr, r9, r7, lsl #16 + uadd16 r10, r11, lr + usub16 lr, r11, lr + pkhtb r8, r7, r9, asr #16 + subs r5, r5, #1 + smulwt r1, r3, r8 + smulwb r7, r3, r8 + smulwt r11, r4, r8 + smulwb r9, r4, r8 + pkhbt r1, r7, r1, lsl #16 + uadd16 r8, r1, r8 + pkhbt r11, r9, r11, lsl #16 + usub16 r1, r12, r8 + uadd16 r8, r11, r6 + ldr r9, c0x00040004 + ldr r12, [sp, #40] + uadd16 r6, r10, r8 + usub16 r7, r10, r8 + uadd16 r7, r7, r9 + uadd16 r6, r6, r9 + uadd16 r10, r14, r1 + usub16 r1, r14, r1 + uadd16 r10, r10, r9 + uadd16 r1, r1, r9 + ldr r11, [r2], r12 + mov r8, r7, asr #3 + pkhtb r9, r8, r10, asr #19 + mov r8, r1, asr #3 + pkhtb r8, r8, r6, asr #19 + uxtb16 lr, r11, ror #8 + qadd16 r9, r9, lr + uxtb16 lr, r11 + qadd16 r8, r8, lr + usat16 r9, #8, r9 + usat16 r8, #8, r8 + orr r9, r8, r9, lsl #8 + ldr r11, [r2], r12 + ldr lr, [sp] + ldr r12, [sp, #44] + mov r7, r7, lsl #16 + mov r1, r1, lsl #16 + mov r10, r10, lsl #16 + mov r6, r6, lsl #16 + mov r7, r7, asr #3 + pkhtb r7, r7, r10, asr #19 + mov r1, r1, asr #3 + pkhtb r1, r1, r6, asr #19 + uxtb16 r8, r11, ror #8 + qadd16 r7, r7, r8 + uxtb16 r8, r11 + qadd16 r1, r1, r8 + usat16 r7, #8, r7 + usat16 r1, #8, r1 + orr r1, r1, r7, lsl #8 + str r9, [lr], r12 + str r1, [lr], r12 + str lr, [sp] + bne vp8_dequant_dc_idct_loop2_v6 + +; vpx_memset + sub r0, r0, #32 + add sp, sp, #4 + + mov r12, #0 + str r12, [r0] + str r12, [r0, #4] + str r12, [r0, #8] + str r12, [r0, #12] + str r12, [r0, #16] + str r12, [r0, #20] + str r12, [r0, #24] + str r12, [r0, #28] + + ldmia sp!, {r4 - r11, pc} + ENDP ; |vp8_dequant_dc_idct_add_v6| + +; Constant Pool +cospi8sqrt2minus1 DCD 0x00004E7B +sinpi8sqrt2 DCD 0x00008A8C +c0x00040004 DCD 0x00040004 + + END diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm new file mode 100644 index 000000000..47b671ca6 --- /dev/null +++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm @@ -0,0 +1,196 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + EXPORT |vp8_dequant_idct_add_v6| + + AREA |.text|, CODE, READONLY +;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred, +; unsigned char *dest, int pitch, int stride) +; r0 = input +; r1 = dq +; r2 = pred +; r3 = dest +; sp + 36 = pitch ; +4 = 40 +; sp + 40 = stride ; +4 = 44 + + +|vp8_dequant_idct_add_v6| PROC + stmdb sp!, {r4-r11, lr} + + ldr r4, [r0] ;input + ldr r5, [r1], #4 ;dq + + sub sp, sp, #4 + str r3, [sp] + + mov r12, #4 + +vp8_dequant_add_loop + smulbb r6, r4, r5 + smultt r7, r4, r5 + + ldr r4, [r0, #4] ;input + ldr r5, [r1], #4 ;dq + + strh r6, [r0], #2 + strh r7, [r0], #2 + + smulbb r6, r4, r5 + smultt r7, r4, r5 + + subs r12, r12, #1 + + ldrne r4, [r0, #4] + ldrne r5, [r1], #4 + + strh r6, [r0], #2 + strh r7, [r0], #2 + + bne vp8_dequant_add_loop + + sub r0, r0, #32 + mov r1, r0 + +; short_idct4x4llm_v6_dual + ldr r3, cospi8sqrt2minus1 + ldr r4, sinpi8sqrt2 + ldr r6, [r0, #8] + mov r5, #2 +vp8_dequant_idct_loop1_v6 + ldr r12, [r0, #24] + ldr r14, [r0, #16] + smulwt r9, r3, r6 + smulwb r7, r3, r6 + smulwt r10, r4, r6 + smulwb r8, r4, r6 + pkhbt r7, r7, r9, lsl #16 + smulwt r11, r3, r12 + pkhbt r8, r8, r10, lsl #16 + uadd16 r6, r6, r7 + smulwt r7, r4, r12 + smulwb r9, r3, r12 + smulwb r10, r4, r12 + subs r5, r5, #1 + pkhbt r9, r9, r11, lsl #16 + ldr r11, [r0], #4 + pkhbt r10, r10, r7, lsl #16 + uadd16 r7, r12, r9 + usub16 r7, r8, r7 + uadd16 r6, r6, r10 + uadd16 r10, r11, r14 + usub16 r8, r11, r14 + uadd16 r9, r10, r6 + usub16 r10, r10, r6 + uadd16 r6, r8, r7 + usub16 r7, r8, r7 + str r6, [r1, #8] + ldrne r6, [r0, #8] + str r7, [r1, #16] + str r10, [r1, #24] + str r9, [r1], #4 + bne vp8_dequant_idct_loop1_v6 + + mov r5, #2 + sub r0, r1, #8 +vp8_dequant_idct_loop2_v6 + ldr r6, [r0], #4 + ldr r7, [r0], #4 + ldr r8, [r0], #4 + ldr r9, [r0], #4 + smulwt r1, r3, r6 + smulwt r12, r4, r6 + smulwt lr, r3, r8 + smulwt r10, r4, r8 + pkhbt r11, r8, r6, lsl #16 + pkhbt r1, lr, r1, lsl #16 + pkhbt r12, r10, r12, lsl #16 + pkhtb r6, r6, r8, asr #16 + uadd16 r6, r1, r6 + pkhbt lr, r9, r7, lsl #16 + uadd16 r10, r11, lr + usub16 lr, r11, lr + pkhtb r8, r7, r9, asr #16 + subs r5, r5, #1 + smulwt r1, r3, r8 + smulwb r7, r3, r8 + smulwt r11, r4, r8 + smulwb r9, r4, r8 + pkhbt r1, r7, r1, lsl #16 + uadd16 r8, r1, r8 + pkhbt r11, r9, r11, lsl #16 + usub16 r1, r12, r8 + uadd16 r8, r11, r6 + ldr r9, c0x00040004 + ldr r12, [sp, #40] + uadd16 r6, r10, r8 + usub16 r7, r10, r8 + uadd16 r7, r7, r9 + uadd16 r6, r6, r9 + uadd16 r10, r14, r1 + usub16 r1, r14, r1 + uadd16 r10, r10, r9 + uadd16 r1, r1, r9 + ldr r11, [r2], r12 + mov r8, r7, asr #3 + pkhtb r9, r8, r10, asr #19 + mov r8, r1, asr #3 + pkhtb r8, r8, r6, asr #19 + uxtb16 lr, r11, ror #8 + qadd16 r9, r9, lr + uxtb16 lr, r11 + qadd16 r8, r8, lr + usat16 r9, #8, r9 + usat16 r8, #8, r8 + orr r9, r8, r9, lsl #8 + ldr r11, [r2], r12 + ldr lr, [sp] + ldr r12, [sp, #44] + mov r7, r7, lsl #16 + mov r1, r1, lsl #16 + mov r10, r10, lsl #16 + mov r6, r6, lsl #16 + mov r7, r7, asr #3 + pkhtb r7, r7, r10, asr #19 + mov r1, r1, asr #3 + pkhtb r1, r1, r6, asr #19 + uxtb16 r8, r11, ror #8 + qadd16 r7, r7, r8 + uxtb16 r8, r11 + qadd16 r1, r1, r8 + usat16 r7, #8, r7 + usat16 r1, #8, r1 + orr r1, r1, r7, lsl #8 + str r9, [lr], r12 + str r1, [lr], r12 + str lr, [sp] + bne vp8_dequant_idct_loop2_v6 + +; vpx_memset + sub r0, r0, #32 + add sp, sp, #4 + + mov r12, #0 + str r12, [r0] + str r12, [r0, #4] + str r12, [r0, #8] + str r12, [r0, #12] + str r12, [r0, #16] + str r12, [r0, #20] + str r12, [r0, #24] + str r12, [r0, #28] + + ldmia sp!, {r4 - r11, pc} + ENDP ; |vp8_dequant_idct_add_v6| + +; Constant Pool +cospi8sqrt2minus1 DCD 0x00004E7B +sinpi8sqrt2 DCD 0x00008A8C +c0x00040004 DCD 0x00040004 + + END diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm deleted file mode 100644 index 3daa9b34f..000000000 --- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm +++ /dev/null @@ -1,202 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dequant_dc_idct_v6| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code -;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc) -|vp8_dequant_dc_idct_v6| PROC - stmdb sp!, {r4-r11, lr} - - ldr r6, [sp, #36] ;load Dc - - ldr r4, [r0] ;input - ldr r5, [r1], #4 ;dq - - sub sp, sp, #4 - str r0, [sp] - - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - mov r12, #3 - -dequant_dc_idct_loop - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - subs r12, r12, #1 - - ldrne r4, [r0, #4] - ldrne r5, [r1], #4 - - strh r6, [r0], #2 - strh r7, [r0], #2 - - bne dequant_dc_idct_loop - - sub r0, r0, #32 - mov r1, r2 - mov r2, r3 - -; short_idct4x4llm_v6_dual - - mov r3, #0x00004E00 ; cos - orr r3, r3, #0x0000007B ; cospi8sqrt2minus1 - mov r4, #0x00008A00 ; sin - orr r4, r4, #0x0000008C ; sinpi8sqrt2 - mov r5, #0x2 ; i=2 i -loop1_dual_11 - ldr r6, [r0, #(4*2)] ; i5 | i4 5|4 - ldr r12, [r0, #(12*2)] ; i13 | i12 13|12 - ldr r14, [r0, #(8*2)] ; i9 | i8 9|8 - - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s - pkhbt r7, r7, r9, lsl #16 ; 5c | 4c - smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c - pkhbt r8, r8, r10, lsl #16 ; 5s | 4s - uadd16 r6, r6, r7 ; 5c+5 | 4c+4 - smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s - smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c - smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s - subs r5, r5, #0x1 ; i-- -- - pkhbt r9, r9, r11, lsl #16 ; 13c | 12c - ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0 - pkhbt r10, r10, r7, lsl #16 ; 13s | 12s - uadd16 r7, r12, r9 ; 13c+13 | 12c+12 - usub16 r7, r8, r7 ; c c - uadd16 r6, r6, r10 ; d d - uadd16 r10, r11, r14 ; a a - usub16 r8, r11, r14 ; b b - uadd16 r9, r10, r6 ; a+d a+d - usub16 r10, r10, r6 ; a-d a-d - uadd16 r6, r8, r7 ; b+c b+c - usub16 r7, r8, r7 ; b-c b-c - str r6, [r1, r2] ; o5 | o4 - add r6, r2, r2 ; pitch * 2 p2 - str r7, [r1, r6] ; o9 | o8 - add r6, r6, r2 ; pitch * 3 p3 - str r10, [r1, r6] ; o13 | o12 - str r9, [r1], #0x4 ; o1 | o0 ++ - bne loop1_dual_11 ; - mov r5, #0x2 ; i=2 i - sub r0, r1, #8 ; reset input/output i/o -loop2_dual_22 - ldr r6, [r0, r2] ; i5 | i4 5|4 - ldr r1, [r0] ; i1 | i0 1|0 - ldr r12, [r0, #0x4] ; i3 | i2 3|2 - add r14, r2, #0x4 ; pitch + 2 p+2 - ldr r14, [r0, r14] ; i7 | i6 7|6 - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s - pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4 - pkhbt r7, r9, r7, lsl #16 ; 1c | 5c - pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1 - pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5 - uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2 - pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6 - uadd16 r10, r11, r9 ; a a - usub16 r9, r11, r9 ; b b - pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7 - subs r5, r5, #0x1 ; i-- -- - smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c - smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s - smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c - smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s - - pkhbt r7, r12, r7, lsl #16 ; 3c | 7c - pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1 - uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2 - usub16 r12, r8, r6 ; c (o1 | o5) c - uadd16 r6, r11, r1 ; d (o3 | o7) d - uadd16 r7, r10, r6 ; a+d a+d - mov r8, #0x4 ; set up 4's 4 - orr r8, r8, #0x40000 ; 4|4 - usub16 r6, r10, r6 ; a-d a-d - uadd16 r6, r6, r8 ; a-d+4 3|7 - uadd16 r7, r7, r8 ; a+d+4 0|4 - uadd16 r10, r9, r12 ; b+c b+c - usub16 r1, r9, r12 ; b-c b-c - uadd16 r10, r10, r8 ; b+c+4 1|5 - uadd16 r1, r1, r8 ; b-c+4 2|6 - mov r8, r10, asr #19 ; o1 >> 3 - strh r8, [r0, #2] ; o1 - mov r8, r1, asr #19 ; o2 >> 3 - strh r8, [r0, #4] ; o2 - mov r8, r6, asr #19 ; o3 >> 3 - strh r8, [r0, #6] ; o3 - mov r8, r7, asr #19 ; o0 >> 3 - strh r8, [r0], r2 ; o0 +p - sxth r10, r10 ; - mov r8, r10, asr #3 ; o5 >> 3 - strh r8, [r0, #2] ; o5 - sxth r1, r1 ; - mov r8, r1, asr #3 ; o6 >> 3 - strh r8, [r0, #4] ; o6 - sxth r6, r6 ; - mov r8, r6, asr #3 ; o7 >> 3 - strh r8, [r0, #6] ; o7 - sxth r7, r7 ; - mov r8, r7, asr #3 ; o4 >> 3 - strh r8, [r0], r2 ; o4 +p -;;;;; subs r5, r5, #0x1 ; i-- -- - bne loop2_dual_22 ; - - -;vpx_memset - ldr r0, [sp] - add sp, sp, #4 - - mov r12, #0 - str r12, [r0] - str r12, [r0, #4] - str r12, [r0, #8] - str r12, [r0, #12] - str r12, [r0, #16] - str r12, [r0, #20] - str r12, [r0, #24] - str r12, [r0, #28] - - ldmia sp!, {r4 - r11, pc} ; replace vars, return restore - - ENDP ;|vp8_dequant_dc_idct_v68| - - END diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm deleted file mode 100644 index 61bb48d04..000000000 --- a/vp8/decoder/arm/armv6/dequantidct_v6.asm +++ /dev/null @@ -1,183 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dequant_idct_v6| - ; ARM - ; REQUIRE8 - ; PRESERVE8 - - AREA |.text|, CODE, READONLY ; name this block of code -;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch) -|vp8_dequant_idct_v6| PROC - stmdb sp!, {r4-r11, lr} - - ldr r4, [r0] ;input - ldr r5, [r1], #4 ;dq - - sub sp, sp, #4 - str r0, [sp] - - mov r12, #4 - -dequant_idct_loop - smulbb r6, r4, r5 - smultt r7, r4, r5 - - ldr r4, [r0, #4] ;input - ldr r5, [r1], #4 ;dq - - strh r6, [r0], #2 - strh r7, [r0], #2 - - smulbb r6, r4, r5 - smultt r7, r4, r5 - - subs r12, r12, #1 - - ldrne r4, [r0, #4] - ldrne r5, [r1], #4 - - strh r6, [r0], #2 - strh r7, [r0], #2 - - bne dequant_idct_loop - - sub r0, r0, #32 - mov r1, r2 - mov r2, r3 - -; short_idct4x4llm_v6_dual - - mov r3, #0x00004E00 ; cos - orr r3, r3, #0x0000007B ; cospi8sqrt2minus1 - mov r4, #0x00008A00 ; sin - orr r4, r4, #0x0000008C ; sinpi8sqrt2 - mov r5, #0x2 ; i=2 i -loop1_dual_1 - ldr r6, [r0, #(4*2)] ; i5 | i4 5|4 - ldr r12, [r0, #(12*2)] ; i13 | i12 13|12 - ldr r14, [r0, #(8*2)] ; i9 | i8 9|8 - - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwb r7, r3, r6 ; (ip[4] * cospi8sqrt2minus1) >> 16 4c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwb r8, r4, r6 ; (ip[4] * sinpi8sqrt2) >> 16 4s - pkhbt r7, r7, r9, lsl #16 ; 5c | 4c - smulwt r11, r3, r12 ; (ip[13] * cospi8sqrt2minus1) >> 16 13c - pkhbt r8, r8, r10, lsl #16 ; 5s | 4s - uadd16 r6, r6, r7 ; 5c+5 | 4c+4 - smulwt r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16 13s - smulwb r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16 12c - smulwb r10, r4, r12 ; (ip[12] * sinpi8sqrt2) >> 16 12s - subs r5, r5, #0x1 ; i-- -- - pkhbt r9, r9, r11, lsl #16 ; 13c | 12c - ldr r11, [r0], #0x4 ; i1 | i0 ++ 1|0 - pkhbt r10, r10, r7, lsl #16 ; 13s | 12s - uadd16 r7, r12, r9 ; 13c+13 | 12c+12 - usub16 r7, r8, r7 ; c c - uadd16 r6, r6, r10 ; d d - uadd16 r10, r11, r14 ; a a - usub16 r8, r11, r14 ; b b - uadd16 r9, r10, r6 ; a+d a+d - usub16 r10, r10, r6 ; a-d a-d - uadd16 r6, r8, r7 ; b+c b+c - usub16 r7, r8, r7 ; b-c b-c - str r6, [r1, r2] ; o5 | o4 - add r6, r2, r2 ; pitch * 2 p2 - str r7, [r1, r6] ; o9 | o8 - add r6, r6, r2 ; pitch * 3 p3 - str r10, [r1, r6] ; o13 | o12 - str r9, [r1], #0x4 ; o1 | o0 ++ - bne loop1_dual_1 ; - mov r5, #0x2 ; i=2 i - sub r0, r1, #8 ; reset input/output i/o -loop2_dual_2 - ldr r6, [r0, r2] ; i5 | i4 5|4 - ldr r1, [r0] ; i1 | i0 1|0 - ldr r12, [r0, #0x4] ; i3 | i2 3|2 - add r14, r2, #0x4 ; pitch + 2 p+2 - ldr r14, [r0, r14] ; i7 | i6 7|6 - smulwt r9, r3, r6 ; (ip[5] * cospi8sqrt2minus1) >> 16 5c - smulwt r7, r3, r1 ; (ip[1] * cospi8sqrt2minus1) >> 16 1c - smulwt r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16 5s - smulwt r8, r4, r1 ; (ip[1] * sinpi8sqrt2) >> 16 1s - pkhbt r11, r6, r1, lsl #16 ; i0 | i4 0|4 - pkhbt r7, r9, r7, lsl #16 ; 1c | 5c - pkhbt r8, r10, r8, lsl #16 ; 1s | 5s = temp1 © tc1 - pkhtb r1, r1, r6, asr #16 ; i1 | i5 1|5 - uadd16 r1, r7, r1 ; 1c+1 | 5c+5 = temp2 (d) td2 - pkhbt r9, r14, r12, lsl #16 ; i2 | i6 2|6 - uadd16 r10, r11, r9 ; a a - usub16 r9, r11, r9 ; b b - pkhtb r6, r12, r14, asr #16 ; i3 | i7 3|7 - subs r5, r5, #0x1 ; i-- -- - smulwt r7, r3, r6 ; (ip[3] * cospi8sqrt2minus1) >> 16 3c - smulwt r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16 3s - smulwb r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16 7c - smulwb r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16 7s - - pkhbt r7, r12, r7, lsl #16 ; 3c | 7c - pkhbt r11, r14, r11, lsl #16 ; 3s | 7s = temp1 (d) td1 - uadd16 r6, r7, r6 ; 3c+3 | 7c+7 = temp2 (c) tc2 - usub16 r12, r8, r6 ; c (o1 | o5) c - uadd16 r6, r11, r1 ; d (o3 | o7) d - uadd16 r7, r10, r6 ; a+d a+d - mov r8, #0x4 ; set up 4's 4 - orr r8, r8, #0x40000 ; 4|4 - usub16 r6, r10, r6 ; a-d a-d - uadd16 r6, r6, r8 ; a-d+4 3|7 - uadd16 r7, r7, r8 ; a+d+4 0|4 - uadd16 r10, r9, r12 ; b+c b+c - usub16 r1, r9, r12 ; b-c b-c - uadd16 r10, r10, r8 ; b+c+4 1|5 - uadd16 r1, r1, r8 ; b-c+4 2|6 - mov r8, r10, asr #19 ; o1 >> 3 - strh r8, [r0, #2] ; o1 - mov r8, r1, asr #19 ; o2 >> 3 - strh r8, [r0, #4] ; o2 - mov r8, r6, asr #19 ; o3 >> 3 - strh r8, [r0, #6] ; o3 - mov r8, r7, asr #19 ; o0 >> 3 - strh r8, [r0], r2 ; o0 +p - sxth r10, r10 ; - mov r8, r10, asr #3 ; o5 >> 3 - strh r8, [r0, #2] ; o5 - sxth r1, r1 ; - mov r8, r1, asr #3 ; o6 >> 3 - strh r8, [r0, #4] ; o6 - sxth r6, r6 ; - mov r8, r6, asr #3 ; o7 >> 3 - strh r8, [r0, #6] ; o7 - sxth r7, r7 ; - mov r8, r7, asr #3 ; o4 >> 3 - strh r8, [r0], r2 ; o4 +p -;;;;; subs r5, r5, #0x1 ; i-- -- - bne loop2_dual_2 ; - ; - -;vpx_memset - ldr r0, [sp] - add sp, sp, #4 - - mov r12, #0 - str r12, [r0] - str r12, [r0, #4] - str r12, [r0, #8] - str r12, [r0, #12] - str r12, [r0, #16] - str r12, [r0, #20] - str r12, [r0, #24] - str r12, [r0, #28] - - ldmia sp!, {r4 - r11, pc} ; replace vars, return restore - - ENDP ;|vp8_dequant_idct_v6| - - END diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm index 95e38594f..72f7e0ee5 100644 --- a/vp8/decoder/arm/armv6/dequantize_v6.asm +++ b/vp8/decoder/arm/armv6/dequantize_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c new file mode 100644 index 000000000..3c7bc502f --- /dev/null +++ b/vp8/decoder/arm/armv6/idct_blk_v6.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "idct.h" +#include "dequantize.h" + +void vp8_dequant_dc_idct_add_y_block_v6 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (eobs[0] > 1) + vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]); + else + vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride); + + if (eobs[1] > 1) + vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]); + else + vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride); + + if (eobs[2] > 1) + vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]); + else + vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride); + + if (eobs[3] > 1) + vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]); + else + vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride); + + q += 64; + dc += 4; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_y_block_v6 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride); + else + { + vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride); + else + { + vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride); + ((int *)(q+16))[0] = 0; + } + + if (eobs[2] > 1) + vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride); + else + { + vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride); + ((int *)(q+32))[0] = 0; + } + + if (eobs[3] > 1) + vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride); + else + { + vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride); + ((int *)(q+48))[0] = 0; + } + + q += 64; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_v6 + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride); + else + { + vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride); + else + { + vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride); + ((int *)(q+16))[0] = 0; + } + + q += 32; + pre += 32; + dstu += 4*stride; + eobs += 2; + } + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride); + else + { + vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride); + else + { + vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride); + ((int *)(q+16))[0] = 0; + } + + q += 32; + pre += 32; + dstv += 4*stride; + eobs += 2; + } +} diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h index 495004f9c..985951c7c 100644 --- a/vp8/decoder/arm/dboolhuff_arm.h +++ b/vp8/decoder/arm/dboolhuff_arm.h @@ -11,14 +11,11 @@ * to be useless. However, its been left (for now) * for reference. */ -/* +#if 0 #if HAVE_ARMV6 #undef vp8_dbool_start #define vp8_dbool_start vp8dx_start_decode_v6 -#undef vp8_dbool_stop -#define vp8_dbool_stop vp8dx_stop_decode_v6 - #undef vp8_dbool_fill #define vp8_dbool_fill vp8_bool_decoder_fill_v6 @@ -27,15 +24,12 @@ #undef vp8_dbool_devalue #define vp8_dbool_devalue vp8_decode_value_v6 -#endif // HAVE_ARMV6 +#endif /* HAVE_ARMV6 */ #if HAVE_ARMV7 #undef vp8_dbool_start #define vp8_dbool_start vp8dx_start_decode_neon -#undef vp8_dbool_stop -#define vp8_dbool_stop vp8dx_stop_decode_neon - #undef vp8_dbool_fill #define vp8_dbool_fill vp8_bool_decoder_fill_neon @@ -44,6 +38,6 @@ #undef vp8_dbool_devalue #define vp8_dbool_devalue vp8_decode_value_neon -#endif // HAVE_ARMV7 -*/ -#endif // DBOOLHUFF_ARM_H +#endif /* HAVE_ARMV7 */ +#endif +#endif /* DBOOLHUFF_ARM_H */ diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c index 54006a921..b3e14b793 100644 --- a/vp8/decoder/arm/dequantize_arm.c +++ b/vp8/decoder/arm/dequantize_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,7 +30,7 @@ void vp8_dequantize_b_neon(BLOCKD *d) int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = &d->dequant[0][0]; + short *DQC = d->dequant; vp8_dequantize_b_loop_neon(Q, DQC, DQ); } @@ -41,7 +42,7 @@ void vp8_dequantize_b_v6(BLOCKD *d) int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = &d->dequant[0][0]; + short *DQC = d->dequant; vp8_dequantize_b_loop_v6(Q, DQC, DQ); } diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h index c8a61a4a7..b7d800d26 100644 --- a/vp8/decoder/arm/dequantize_arm.h +++ b/vp8/decoder/arm/dequantize_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,32 +14,60 @@ #if HAVE_ARMV6 extern prototype_dequant_block(vp8_dequantize_b_v6); -extern prototype_dequant_idct(vp8_dequant_idct_v6); -extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6); +extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6); +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6); +extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6); +extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6); +extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_v6 -#undef vp8_dequant_idct -#define vp8_dequant_idct vp8_dequant_idct_v6 +#undef vp8_dequant_idct_add +#define vp8_dequant_idct_add vp8_dequant_idct_add_v6 -#undef vp8_dequant_idct_dc -#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6 +#undef vp8_dequant_dc_idct_add +#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6 + +#undef vp8_dequant_dc_idct_add_y_block +#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6 + +#undef vp8_dequant_idct_add_y_block +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6 + +#undef vp8_dequant_idct_add_uv_block +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6 +#endif #endif #if HAVE_ARMV7 extern prototype_dequant_block(vp8_dequantize_b_neon); -extern prototype_dequant_idct(vp8_dequant_idct_neon); -extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon); +extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon); +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon); +extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon); +extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon); +extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_neon -#undef vp8_dequant_idct -#define vp8_dequant_idct vp8_dequant_idct_neon +#undef vp8_dequant_idct_add +#define vp8_dequant_idct_add vp8_dequant_idct_add_neon -#undef vp8_dequant_idct_dc -#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon +#undef vp8_dequant_dc_idct_add +#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon + +#undef vp8_dequant_dc_idct_add_y_block +#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon + +#undef vp8_dequant_idct_add_y_block +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon + +#undef vp8_dequant_idct_add_uv_block +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon +#endif #endif #endif diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm new file mode 100644 index 000000000..45e068a9f --- /dev/null +++ b/vp8/decoder/arm/detokenize.asm @@ -0,0 +1,320 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp8_decode_mb_tokens_v6| + + AREA |.text|, CODE, READONLY ; name this block of code + + INCLUDE vpx_asm_offsets.asm + +l_qcoeff EQU 0 +l_i EQU 4 +l_type EQU 8 +l_stop EQU 12 +l_c EQU 16 +l_l_ptr EQU 20 +l_a_ptr EQU 24 +l_bc EQU 28 +l_coef_ptr EQU 32 +l_stacksize EQU 64 + + +;; constant offsets -- these should be created at build time +c_block2above_offset EQU 25 +c_entropy_nodes EQU 11 +c_dct_eob_token EQU 11 + +|vp8_decode_mb_tokens_v6| PROC + stmdb sp!, {r4 - r11, lr} + sub sp, sp, #l_stacksize + mov r7, r1 ; type + mov r9, r0 ; detoken + + ldr r1, [r9, #detok_current_bc] + ldr r0, [r9, #detok_qcoeff_start_ptr] + mov r11, #0 ; i + mov r3, #16 ; stop + + cmp r7, #1 ; type ?= 1 + addeq r11, r11, #24 ; i = 24 + addeq r3, r3, #8 ; stop = 24 + addeq r0, r0, #3, 24 ; qcoefptr += 24*16 + + str r0, [sp, #l_qcoeff] + str r11, [sp, #l_i] + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + str r1, [sp, #l_bc] + + add lr, r9, r7, lsl #2 ; detoken + type*4 + + ldr r8, [r1, #bool_decoder_user_buffer] + + ldr r10, [lr, #detok_coef_probs] + ldr r5, [r1, #bool_decoder_count] + ldr r6, [r1, #bool_decoder_range] + ldr r4, [r1, #bool_decoder_value] + + str r10, [sp, #l_coef_ptr] + +BLOCK_LOOP + ldr r3, [r9, #detok_ptr_block2leftabove] + ldr r1, [r9, #detok_L] + ldr r2, [r9, #detok_A] + ldrb r12, [r3, r11]! ; block2left[i] + ldrb r3, [r3, #c_block2above_offset]; block2above[i] + + cmp r7, #0 ; c = !type + moveq r7, #1 + movne r7, #0 + + ldrb r0, [r1, r12]! ; *(L += block2left[i]) + ldrb r3, [r2, r3]! ; *(A += block2above[i]) + mov lr, #c_entropy_nodes ; ENTROPY_NODES = 11 + +; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0) + cmp r0, #0 ; *l ?= 0 + movne r0, #1 + cmp r3, #0 ; *a ?= 0 + addne r0, r0, #1 ; t + + str r1, [sp, #l_l_ptr] ; save &l + str r2, [sp, #l_a_ptr] ; save &a + smlabb r0, r0, lr, r10 ; Prob = coef_probs + (t * ENTROPY_NODES) + mov r1, #0 ; t = 0 + str r7, [sp, #l_c] + + ;align 4 +COEFF_LOOP + ldr r3, [r9, #detok_ptr_coef_bands_x] + ldr lr, [r9, #detok_coef_tree_ptr] + ;STALL + ldrb r3, [r3, r7] ; coef_bands_x[c] + ;STALL + ;STALL + add r0, r0, r3 ; Prob += coef_bands_x[c] + +get_token_loop + ldrb r2, [r0, +r1, asr #1] ; Prob[t >> 1] + mov r3, r6, lsl #8 ; range << 8 + sub r3, r3, #256 ; (range << 8) - (1 << 8) + mov r10, #1 ; 1 + + smlawb r2, r3, r2, r10 ; split = 1 + (((range-1) * probability) >> 8) + + ldrb r12, [r8] ; load cx data byte in stall slot : r8 = bufptr + ;++ + + subs r3, r4, r2, lsl #24 ; value-(split<<24): used later to calculate shift for NORMALIZE + addhs r1, r1, #1 ; t += 1 + movhs r4, r3 ; value -= bigsplit (split << 24) + subhs r2, r6, r2 ; range -= split + ; movlo r6, r2 ; range = split + + ldrsb r1, [lr, r1] ; t = onyx_coef_tree_ptr[t] + +; NORMALIZE + clz r3, r2 ; vp8dx_bitreader_norm[range] + 24 + sub r3, r3, #24 ; vp8dx_bitreader_norm[range] + subs r5, r5, r3 ; count -= shift + mov r6, r2, lsl r3 ; range <<= shift + mov r4, r4, lsl r3 ; value <<= shift + +; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16 + addle r5, r5, #8 ; count += 8 + rsble r3, r5, #24 ; 24 - count + addle r8, r8, #1 ; bufptr++ + orrle r4, r4, r12, lsl r3 ; value |= *bufptr << shift + 16 + + cmp r1, #0 ; t ?= 0 + bgt get_token_loop ; while (t > 0) + + cmn r1, #c_dct_eob_token ; if(t == -DCT_EOB_TOKEN) + beq END_OF_BLOCK ; break + + rsb lr, r1, #0 ; v = -t; + + cmp lr, #4 ; if(v > FOUR_TOKEN) + ble SKIP_EXTRABITS + + ldr r3, [r9, #detok_teb_base_ptr] + mov r11, #1 ; 1 in split = 1 + ... nope, v+= 1 << bits_count + add r7, r3, lr, lsl #4 ; detok_teb_base_ptr + (v << 4) + + ldrsh lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val + ldrsh r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length + +extrabits_loop + add r3, r0, r7 ; &teb_ptr->Probs[bits_count] + + ldrb r2, [r3, #4] ; probability. why +4? + mov r3, r6, lsl #8 ; range << 8 + sub r3, r3, #256 ; range << 8 + 1 << 8 + + smlawb r2, r3, r2, r11 ; split = 1 + (((range-1) * probability) >> 8) + + ldrb r12, [r8] ; *bufptr + ;++ + + subs r10, r4, r2, lsl #24 ; value - (split<<24) + movhs r4, r10 ; value = value - (split << 24) + subhs r2, r6, r2 ; range = range - split + addhs lr, lr, r11, lsl r0 ; v += ((UINT16)1<> 1 + + subs r3, r4, r2, lsl #24 ; value - (split<<24) + movhs r4, r3 ; value -= (split << 24) + subhs r2, r6, r2 ; range -= split + mvnhs r3, lr ; -v + addhs lr, r3, #1 ; v = (v ^ -1) + 1 + +; NORMALIZE + clz r3, r2 ; leading 0s in split + sub r3, r3, #24 ; shift + subs r5, r5, r3 ; count -= shift + mov r6, r2, lsl r3 ; range <<= shift + mov r4, r4, lsl r3 ; value <<= shift + ldrleb r2, [r8], #1 ; *(bufptr++) + addle r5, r5, #8 ; count += 8 + rsble r3, r5, #24 ; BR_COUNT - count + orrle r4, r4, r2, lsl r3 ; value |= *bufptr << (BR_COUNT - count) + + add r0, r0, #11 ; Prob += ENTROPY_NODES (11) + + cmn r1, #1 ; t < -ONE_TOKEN + + addlt r0, r0, #11 ; Prob += ENTROPY_NODES (11) + + mvn r1, #1 ; t = -1 ???? C is -2 + +SKIP_EOB_CHECK + ldr r7, [sp, #l_c] ; c + ldr r3, [r9, #detok_scan] + add r1, r1, #2 ; t+= 2 + cmp r7, #15 ; c should will be one higher + + ldr r3, [r3, +r7, lsl #2] ; scan[c] this needs pre-inc c value + add r7, r7, #1 ; c++ + add r3, r11, r3, lsl #1 ; qcoeff + scan[c] + + str r7, [sp, #l_c] ; store c + strh lr, [r3] ; qcoef_ptr[scan[c]] = v + + blt COEFF_LOOP + + sub r7, r7, #1 ; if(t != -DCT_EOB_TOKEN) --c + +END_OF_BLOCK + ldr r3, [sp, #l_type] ; type + ldr r10, [sp, #l_coef_ptr] ; coef_ptr + ldr r0, [sp, #l_qcoeff] ; qcoeff + ldr r11, [sp, #l_i] ; i + ldr r12, [sp, #l_stop] ; stop + + cmp r3, #0 ; type ?= 0 + moveq r1, #1 + movne r1, #0 + add r3, r11, r9 ; detok + i + + cmp r7, r1 ; c ?= !type + strb r7, [r3, #detok_eob] ; eob[i] = c + + ldr r7, [sp, #l_l_ptr] ; l + ldr r2, [sp, #l_a_ptr] ; a + movne r3, #1 ; t + moveq r3, #0 + + add r0, r0, #32 ; qcoeff += 32 (16 * 2?) + add r11, r11, #1 ; i++ + strb r3, [r7] ; *l = t + strb r3, [r2] ; *a = t + str r0, [sp, #l_qcoeff] ; qcoeff + str r11, [sp, #l_i] ; i + + cmp r11, r12 ; i < stop + ldr r7, [sp, #l_type] ; type + + blt BLOCK_LOOP + + cmp r11, #25 ; i ?= 25 + bne ln2_decode_mb_to + + ldr r12, [r9, #detok_qcoeff_start_ptr] + ldr r10, [r9, #detok_coef_probs] + mov r7, #0 ; type/i = 0 + mov r3, #16 ; stop = 16 + str r12, [sp, #l_qcoeff] ; qcoeff_ptr = qcoeff_start_ptr + str r7, [sp, #l_i] + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + + str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type=0] + + b BLOCK_LOOP + +ln2_decode_mb_to + cmp r11, #16 ; i ?= 16 + bne ln1_decode_mb_to + + mov r10, #detok_coef_probs + add r10, r10, #2*4 ; coef_probs[type] + ldr r10, [r9, r10] ; detok + detok_coef_probs[type] + + mov r7, #2 ; type = 2 + mov r3, #24 ; stop = 24 + + str r7, [sp, #l_type] + str r3, [sp, #l_stop] + + str r10, [sp, #l_coef_ptr] ; coef_probs = coef_probs[type] + b BLOCK_LOOP + +ln1_decode_mb_to + ldr r2, [sp, #l_bc] + mov r0, #0 + nop + + str r8, [r2, #bool_decoder_user_buffer] + str r5, [r2, #bool_decoder_count] + str r4, [r2, #bool_decoder_value] + str r6, [r2, #bool_decoder_range] + + add sp, sp, #l_stacksize + ldmia sp!, {r4 - r11, pc} + + ENDP ; |vp8_decode_mb_tokens_v6| + + END diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h new file mode 100644 index 000000000..9bb19b6cf --- /dev/null +++ b/vp8/decoder/arm/detokenize_arm.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef DETOKENIZE_ARM_H +#define DETOKENIZE_ARM_H + +#if HAVE_ARMV6 +#if CONFIG_ARM_ASM_DETOK +void vp8_init_detokenizer(VP8D_COMP *dx); +void vp8_decode_mb_tokens_v6(DETOK *detoken, int type); +#endif +#endif + +#endif diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c deleted file mode 100644 index c714452a6..000000000 --- a/vp8/decoder/arm/detokenizearm_sjl.c +++ /dev/null @@ -1,730 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "type_aliases.h" -#include "blockd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "vpx_ports/mem.h" - -#define BR_COUNT 8 -#define BOOL_DATA UINT8 - -#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES -//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X}; -DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X}; - -#define EOB_CONTEXT_NODE 0 -#define ZERO_CONTEXT_NODE 1 -#define ONE_CONTEXT_NODE 2 -#define LOW_VAL_CONTEXT_NODE 3 -#define TWO_CONTEXT_NODE 4 -#define THREE_CONTEXT_NODE 5 -#define HIGH_LOW_CONTEXT_NODE 6 -#define CAT_ONE_CONTEXT_NODE 7 -#define CAT_THREEFOUR_CONTEXT_NODE 8 -#define CAT_THREE_CONTEXT_NODE 9 -#define CAT_FIVE_CONTEXT_NODE 10 - - - - -DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) = -{ - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN - { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN - { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN - { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN - { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN - { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1 - { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2 - { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3 - { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4 - { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5 - { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6 - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN -}; - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left - 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above -}; - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -void vp8_reset_mb_tokens_context(MACROBLOCKD *x) -{ - ENTROPY_CONTEXT **const A = x->above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; - - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int i; - - for (i = 0; i < 24; i++) - { - - a = A[ vp8_block2context[i] ] + vp8_block2above[i]; - l = L[ vp8_block2context[i] ] + vp8_block2left[i]; - - *a = *l = 0; - } - - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) - { - a = A[Y2CONTEXT] + vp8_block2above[24]; - l = L[Y2CONTEXT] + vp8_block2left[24]; - *a = *l = 0; - } - - -} - -#define ONYXBLOCK2CONTEXT_OFFSET 0 -#define ONYXBLOCK2LEFT_OFFSET 25 -#define ONYXBLOCK2ABOVE_OFFSET 50 - -DECLARE_ALIGNED(16, const static unsigned char, norm[128]) = -{ - 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 -}; - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -void init_detokenizer(VP8D_COMP *dx) -{ - const VP8_COMMON *const oc = & dx->common; - MACROBLOCKD *x = & dx->mb; - - dx->detoken.norm_ptr = (unsigned char *)norm; - dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree; - dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove; - dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x; - dx->detoken.scan = (int *)vp8_default_zig_zag1d; - dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2; - - dx->detoken.qcoeff_start_ptr = &x->qcoeff[0]; - - - dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]); - dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]); - dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]); - dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]); - -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - - -//shift = norm[range]; \ -// shift = norm_ptr[range]; \ - -#define NORMALIZE \ - /*if(range < 0x80)*/ \ - { \ - shift = detoken->norm_ptr[range]; \ - range <<= shift; \ - value <<= shift; \ - count -= shift; \ - if(count <= 0) \ - { \ - count += BR_COUNT ; \ - value |= (*bufptr) << (BR_COUNT-count); \ - bufptr++; \ - } \ - } -#if 1 -#define DECODE_AND_APPLYSIGN(value_to_sign) \ - split = (range + 1) >> 1; \ - if ( (value >> 24) < split ) \ - { \ - range = split; \ - v= value_to_sign; \ - } \ - else \ - { \ - range = range-split; \ - value = value-(split<<24); \ - v = -value_to_sign; \ - } \ - range +=range; \ - value +=value; \ - if (!--count) \ - { \ - count = BR_COUNT; \ - value |= *bufptr; \ - bufptr++; \ - } - -#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \ - { \ - split = 1 + ((( probability*(range-1) ) )>> 8); \ - if ( (value >> 24) < split ) \ - { \ - range = split; \ - NORMALIZE \ - goto branch; \ - } \ - value -= (split<<24); \ - range = range - split; \ - NORMALIZE \ - } - -#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \ - { \ - split = 1 + ((( probability*(range-1) ) ) >> 8); \ - if ( (value >> 24) < split ) \ - { \ - range = split; \ - NORMALIZE \ - Prob = coef_probs; \ - ++c; \ - Prob += vp8_coef_bands_x[c]; \ - goto branch; \ - } \ - value -= (split<<24); \ - range = range - split; \ - NORMALIZE \ - } - -#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \ - DECODE_AND_APPLYSIGN(val) \ - Prob = coef_probs + (ENTROPY_NODES*2); \ - if(c < 15){\ - qcoeff_ptr [ scan[c] ] = (INT16) v; \ - ++c; \ - goto DO_WHILE; }\ - qcoeff_ptr [ scan[15] ] = (INT16) v; \ - goto BLOCK_FINISHED; - - -#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\ - split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \ - if(value >= (split<<24))\ - {\ - range = range-split;\ - value = value-(split<<24);\ - val += ((UINT16)1<above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; - const VP8_COMMON *const oc = & dx->common; - - BOOL_DECODER *bc = x->current_bc; - - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int i; - - int eobtotal = 0; - - register int count; - - BOOL_DATA *bufptr; - register unsigned int range; - register unsigned int value; - const int *scan; - register unsigned int shift; - UINT32 split; - INT16 *qcoeff_ptr; - - UINT8 *coef_probs; - int type; - int stop; - INT16 val, bits_count; - INT16 c; - INT16 t; - INT16 v; - vp8_prob *Prob; - - //int *scan; - type = 3; - i = 0; - stop = 16; - - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) - { - i = 24; - stop = 24; - type = 1; - qcoeff_ptr = &x->qcoeff[24*16]; - scan = vp8_default_zig_zag1d; - eobtotal -= 16; - } - else - { - scan = vp8_default_zig_zag1d; - qcoeff_ptr = &x->qcoeff[0]; - } - - count = bc->count; - range = bc->range; - value = bc->value; - bufptr = &bc->buffer[bc->pos]; - - - coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]); - -BLOCK_LOOP: - a = A[ vp8_block2context[i] ] + vp8_block2above[i]; - l = L[ vp8_block2context[i] ] + vp8_block2left[i]; - c = (INT16)(!type); - - VP8_COMBINEENTROPYCONTEXTS(t, *a, *l); - Prob = coef_probs; - Prob += t * ENTROPY_NODES; - -DO_WHILE: - Prob += vp8_coef_bands_x[c]; - DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED); - -CHECK_0_: - DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_); - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val; - bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length; - - do - { - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count); - bits_count -- ; - } - while (bits_count >= 0); - - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_FIVE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_THREEFOUR_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_); - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_THREE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -HIGH_LOW_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_); - - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1); - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -CAT_ONE_CONTEXT_NODE_0_: - val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val; - DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val); - -LOW_VAL_CONTEXT_NODE_0_: - DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_); - DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_); - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4); - -THREE_CONTEXT_NODE_0_: - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3); - -TWO_CONTEXT_NODE_0_: - DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2); - -ONE_CONTEXT_NODE_0_: - DECODE_AND_APPLYSIGN(1); - Prob = coef_probs + ENTROPY_NODES; - - if (c < 15) - { - qcoeff_ptr [ scan[c] ] = (INT16) v; - ++c; - goto DO_WHILE; - } - - qcoeff_ptr [ scan[15] ] = (INT16) v; -BLOCK_FINISHED: - t = ((x->Block[i].eob = c) != !type); // any nonzero data? - eobtotal += x->Block[i].eob; - *a = *l = t; - qcoeff_ptr += 16; - - i++; - - if (i < stop) - goto BLOCK_LOOP; - - if (i == 25) - { - scan = vp8_default_zig_zag1d;//x->scan_order1d; - type = 0; - i = 0; - stop = 16; - coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]); - qcoeff_ptr = &x->qcoeff[0]; - goto BLOCK_LOOP; - } - - if (i == 16) - { - type = 2; - coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]); - stop = 24; - goto BLOCK_LOOP; - } - - bc->count = count; - bc->value = value; - bc->range = range; - bc->pos = bufptr - bc->buffer; - return eobtotal; - -} -//#endif -#else -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -#if 0 -//uses relative offsets - -const vp8_tree_index vp8_coef_tree_x[ 22] = /* corresponding _CONTEXT_NODEs */ -{ - -DCT_EOB_TOKEN, 1, /* 0 = EOB */ - -ZERO_TOKEN, 1, /* 1 = ZERO */ - -ONE_TOKEN, 1, /* 2 = ONE */ - 2, 5, /* 3 = LOW_VAL */ - -TWO_TOKEN, 1, /* 4 = TWO */ - -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ - 2, 3, /* 6 = HIGH_LOW */ - -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ - 2, 3, /* 8 = CAT_THREEFOUR */ - -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ - -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ -}; -#endif - -#define _SCALEDOWN 8 //16 //8 - -int vp8_decode_mb_tokens_v5(DETOK *detoken, int type); - -int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type) -{ - BOOL_DECODER *bc = detoken->current_bc; - - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int i; - - register int count; - - BOOL_DATA *bufptr; - register unsigned int range; - register unsigned int value; - register unsigned int shift; - UINT32 split; - INT16 *qcoeff_ptr; - - UINT8 *coef_probs; -// int type; - int stop; - INT16 c; - INT16 t; - INT16 v; - vp8_prob *Prob; - - - -// type = 3; - i = 0; - stop = 16; - qcoeff_ptr = detoken->qcoeff_start_ptr; - -// if( detoken->mode != B_PRED && detoken->mode != SPLITMV) - if (type == 1) - { - i += 24; - stop += 8; //24; -// type = 1; - qcoeff_ptr += 24 * 16; -// eobtotal-=16; - } - - count = bc->count; - range = bc->range; - value = bc->value; - bufptr = &bc->buffer[bc->pos]; - - - coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]); - -BLOCK_LOOP: - a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ]; - l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ]; - c = !type; - a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET]; - l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET]; - - //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \ - //Dest = ((A)!=0) + ((B)!=0); - - VP8_COMBINEENTROPYCONTEXTS(t, *a, *l); - - Prob = coef_probs; - Prob += t * ENTROPY_NODES; - t = 0; - - do - { - - { -// onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x; - - Prob += detoken->ptr_onyx_coef_bands_x[c]; - - GET_TOKEN_START: - - do - { - split = 1 + (((range - 1) * (Prob[t>>1])) >> 8); - - if (value >> 24 >= split) - { - range = range - split; - value = value - (split << 24); - t += 1; - - //used to eliminate else branch - split = range; - } - - range = split; - - t = detoken->vp8_coef_tree_ptr[ t ]; - - NORMALIZE - - } - while (t > 0) ; - } - GET_TOKEN_STOP: - - if (t == -DCT_EOB_TOKEN) - { - break; - } - - v = -t; - - if (v > FOUR_TOKEN) - { - INT16 bits_count; - TOKENEXTRABITS *teb_ptr; - -// teb_ptr = &onyxd_token_extra_bits2[t]; -// teb_ptr = &onyxd_token_extra_bits2[v]; - teb_ptr = &detoken->teb_base_ptr[v]; - - - v = teb_ptr->min_val; - bits_count = teb_ptr->Length; - - do - { - split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN); - - if ((value >> 24) >= split) - { - range = range - split; - value = value - (split << 24); - v += ((UINT16)1 << bits_count); - - //used to eliminate else branch - split = range; - } - - range = split; - - NORMALIZE - - bits_count -- ; - } - while (bits_count >= 0); - } - - Prob = coef_probs; - - if (t) - { - split = 1 + (((range - 1) * vp8_prob_half) >> 8); - - if ((value >> 24) >= split) - { - range = range - split; - value = value - (split << 24); - v = (v ^ -1) + 1; /* negate w/out conditionals */ - - //used to eliminate else branch - split = range; - } - - range = split; - - NORMALIZE - Prob += ENTROPY_NODES; - - if (t < -ONE_TOKEN) - Prob += ENTROPY_NODES; - - t = -2; - } - - //if t is zero, we will skip the eob table check - t += 2; - qcoeff_ptr [detoken->scan [c] ] = (INT16) v; - - } - while (++c < 16); - - if (t != -DCT_EOB_TOKEN) - { - --c; - } - - t = ((detoken->eob[i] = c) != !type); // any nonzero data? -// eobtotal += detoken->eob[i]; - *a = *l = t; - qcoeff_ptr += 16; - - i++; - - if (i < stop) - goto BLOCK_LOOP; - - if (i == 25) - { - type = 0; - i = 0; - stop = 16; -// coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]); - coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]); - qcoeff_ptr = detoken->qcoeff_start_ptr; - goto BLOCK_LOOP; - } - - if (i == 16) - { - type = 2; -// coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]); - coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]); - stop = 24; - goto BLOCK_LOOP; - } - - bc->count = count; - bc->value = value; - bc->range = range; - bc->pos = bufptr - bc->buffer; - return 0; -} -//#if 0 -int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) -{ -// const ONYX_COMMON * const oc = & dx->common; - int eobtotal = 0; - int i, type; - /* - dx->detoken.norm_ptr = norm; - dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree; - dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE; - dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x; - dx->detoken.scan = default_zig_zag1d; - dx->detoken.teb_base_ptr = onyxd_token_extra_bits2; - - dx->detoken.qcoeff_start_ptr = &x->qcoeff[0]; - - dx->detoken.A = x->above_context; - dx->detoken.L = x->left_context; - - dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]); - dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]); - dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]); - dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]); - */ - - dx->detoken.current_bc = x->current_bc; - dx->detoken.A = x->above_context; - dx->detoken.L = x->left_context; - - type = 3; - - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) - { - type = 1; - eobtotal -= 16; - } - - vp8_decode_mb_tokens_v5(&dx->detoken, type); - - for (i = 0; i < 25; i++) - { - x->Block[i].eob = dx->detoken.eob[i]; - eobtotal += dx->detoken.eob[i]; - } - - return eobtotal; -} -#endif diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm deleted file mode 100644 index 4d87ee5bd..000000000 --- a/vp8/decoder/arm/detokenizearm_v6.asm +++ /dev/null @@ -1,364 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_decode_mb_tokens_v5| - - AREA |.text|, CODE, READONLY ; name this block of code - - INCLUDE vpx_asm_offsets.asm - -l_qcoeff EQU 0 -l_i EQU 4 -l_type EQU 8 -l_stop EQU 12 -l_c EQU 16 -l_l_ptr EQU 20 -l_a_ptr EQU 24 -l_bc EQU 28 -l_coef_ptr EQU 32 -l_stacksize EQU 64 - - -;; constant offsets -- these should be created at build time -c_onyxblock2left_offset EQU 25 -c_onyxblock2above_offset EQU 50 -c_entropy_nodes EQU 11 -c_dct_eob_token EQU 11 - -|vp8_decode_mb_tokens_v5| PROC - stmdb sp!, {r4 - r11, lr} - sub sp, sp, #l_stacksize - mov r7, r1 - mov r9, r0 ;DETOK *detoken - - ldr r1, [r9, #detok_current_bc] - ldr r0, [r9, #detok_qcoeff_start_ptr] - mov r11, #0 - mov r3, #0x10 - - cmp r7, #1 - addeq r11, r11, #24 - addeq r3, r3, #8 - addeq r0, r0, #3, 24 - - str r0, [sp, #l_qcoeff] - str r11, [sp, #l_i] - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - str r1, [sp, #l_bc] - - add lr, r9, r7, lsl #2 - - ldr r2, [r1, #bool_decoder_buffer] - ldr r3, [r1, #bool_decoder_pos] - - ldr r10, [lr, #detok_coef_probs] - ldr r5, [r1, #bool_decoder_count] - ldr r6, [r1, #bool_decoder_range] - ldr r4, [r1, #bool_decoder_value] - add r8, r2, r3 - - str r10, [sp, #l_coef_ptr] - - - ;align 4 -BLOCK_LOOP - ldr r3, [r9, #detok_ptr_onyxblock2context_leftabove] - ldr r2, [r9, #DETOK_A] - ldr r1, [r9, #DETOK_L] - ldrb r12, [r3, +r11] ; detoken->ptr_onyxblock2context_leftabove[i] - - cmp r7, #0 ; check type - moveq r7, #1 - movne r7, #0 - - ldr r0, [r2, +r12, lsl #2] ; a - add r1, r1, r12, lsl #4 - add r3, r3, r11 - - ldrb r2, [r3, #c_onyxblock2above_offset] - ldrb r3, [r3, #c_onyxblock2left_offset] - mov lr, #c_entropy_nodes -;; ;++ - - ldr r2, [r0, +r2, lsl #2]! - add r3, r1, r3, lsl #2 - str r3, [sp, #l_l_ptr] - ldr r3, [r3] - - cmp r2, #0 - movne r2, #1 - cmp r3, #0 - addne r2, r2, #1 - - str r0, [sp, #l_a_ptr] - smlabb r0, r2, lr, r10 - mov r1, #0 ; t = 0 - str r7, [sp, #l_c] - - ;align 4 -COEFF_LOOP - ldr r3, [r9, #detok_ptr_onyx_coef_bands_x] - ldr lr, [r9, #detok_onyx_coef_tree_ptr] - -;;the following two lines are used if onyx_coef_bands_x is UINT16 -;; add r3, r3, r7, lsl #1 -;; ldrh r3, [r3] - -;;the following line is used if onyx_coef_bands_x is UINT8 - ldrb r3, [r7, +r3] - - -;; ;++ -;; pld [r8] - ;++ - add r0, r0, r3 - - ;align 4 -get_token_loop - ldrb r2, [r0, +r1, asr #1] - mov r3, r6, lsl #8 - sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8) - mov r10, #1 - - smlawb r2, r3, r2, r10 - ldrb r12, [r8] ;load cx data byte in stall slot - ;++ - - subs r3, r4, r2, lsl #24 ;x = value-(split<<24) - addhs r1, r1, #1 ;t += 1 - movhs r4, r3 ;update value - subhs r2, r6, r2 ;range = range - split - movlo r6, r2 - -;;; ldrsbhs r1, [r1, +lr] - ldrsb r1, [r1, +lr] - - -;; use branch for short pipelines ??? -;; cmp r2, #0x80 -;; bcs |$LN22@decode_mb_to| - - clz r3, r2 - sub r3, r3, #24 - subs r5, r5, r3 - mov r6, r2, lsl r3 - mov r4, r4, lsl r3 - -;; use branch for short pipelines ??? -;; bgt |$LN22@decode_mb_to| - - addle r5, r5, #8 - rsble r3, r5, #8 - addle r8, r8, #1 - orrle r4, r4, r12, lsl r3 - -;;|$LN22@decode_mb_to| - - cmp r1, #0 - bgt get_token_loop - - cmn r1, #c_dct_eob_token ;if(t == -DCT_EOB_TOKEN) - beq END_OF_BLOCK - - rsb lr, r1, #0 ;v = -t; - - cmp lr, #4 ;if(v > FOUR_TOKEN) - ble SKIP_EXTRABITS - - ldr r3, [r9, #detok_teb_base_ptr] - mov r11, #1 - add r7, r3, lr, lsl #4 - - ldrsh lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val - ldrsh r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length - -extrabits_loop - add r3, r0, r7 - - ldrb r2, [r3, #4] - mov r3, r6, lsl #8 - sub r3, r3, #256 ;split = 1 + (((range-1) * probability) >> 8) - mov r10, #1 - - smlawb r2, r3, r2, r10 - ldrb r12, [r8] - ;++ - - subs r10, r4, r2, lsl #24 ;x = value-(split<<24) - movhs r4, r10 ;update value - subhs r2, r6, r2 ;range = range - split - addhs lr, lr, r11, lsl r0 ;v += ((UINT16)1<= stop ? - ldr r7, [sp, #l_type] - mov lr, #0xB - - blt BLOCK_LOOP - - cmp r11, #0x19 - bne ln2_decode_mb_to - - ldr r12, [r9, #detok_qcoeff_start_ptr] - ldr r10, [r9, #detok_coef_probs] - mov r7, #0 - mov r3, #0x10 - str r12, [sp, #l_qcoeff] - str r7, [sp, #l_i] - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - - str r10, [sp, #l_coef_ptr] - - b BLOCK_LOOP - -ln2_decode_mb_to - cmp r11, #0x10 - bne ln1_decode_mb_to - - ldr r10, [r9, #0x30] - - mov r7, #2 - mov r3, #0x18 - - str r7, [sp, #l_type] - str r3, [sp, #l_stop] - - str r10, [sp, #l_coef_ptr] - b BLOCK_LOOP - -ln1_decode_mb_to - ldr r2, [sp, #l_bc] - mov r0, #0 - nop - - ldr r3, [r2, #bool_decoder_buffer] - str r5, [r2, #bool_decoder_count] - str r4, [r2, #bool_decoder_value] - sub r3, r8, r3 - str r3, [r2, #bool_decoder_pos] - str r6, [r2, #bool_decoder_range] - - add sp, sp, #l_stacksize - ldmia sp!, {r4 - r11, pc} - - ENDP ; |vp8_decode_mb_tokens_v5| - - END diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c deleted file mode 100644 index 455c83a9c..000000000 --- a/vp8/decoder/arm/dsystemdependent.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "blockd.h" -#include "pragmas.h" -#include "postproc.h" -#include "dboolhuff.h" -#include "dequantize.h" -#include "onyxd_int.h" - -void vp8_dmachine_specific_config(VP8D_COMP *pbi) -{ -#if CONFIG_RUNTIME_CPU_DETECT - pbi->mb.rtcd = &pbi->common.rtcd; -#if HAVE_ARMV7 - pbi->dequant.block = vp8_dequantize_b_neon; - pbi->dequant.idct = vp8_dequant_idct_neon; - pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.stop = vp8dx_stop_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; - -#elif HAVE_ARMV6 - pbi->dequant.block = vp8_dequantize_b_v6; - pbi->dequant.idct = vp8_dequant_idct_v6; - pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.stop = vp8dx_stop_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; - pbi->dboolhuff.debool = vp8dx_decode_bool_c; - pbi->dboolhuff.devalue = vp8dx_decode_value_c; -#endif -#endif -} diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm index 7ec62a3d8..ff3ffda97 100644 --- a/vp8/decoder/arm/neon/dboolhuff_neon.asm +++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm similarity index 56% rename from vp8/decoder/arm/neon/dequantidct_neon.asm rename to vp8/decoder/arm/neon/dequant_idct_neon.asm index bba4d5dfb..1923be42a 100644 --- a/vp8/decoder/arm/neon/dequantidct_neon.asm +++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm @@ -1,29 +1,41 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8_dequant_idct_neon| + EXPORT |vp8_dequant_idct_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch); +;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred, +; unsigned char *dest, int pitch, int stride) ; r0 short *input, ; r1 short *dq, -; r2 short *output, -; r3 int pitch, -|vp8_dequant_idct_neon| PROC +; r2 unsigned char *pred +; r3 unsigned char *dest +; sp int pitch +; sp+4 int stride + +|vp8_dequant_idct_add_neon| PROC vld1.16 {q3, q4}, [r0] vld1.16 {q5, q6}, [r1] + ldr r1, [sp] ; pitch + vld1.32 {d14[0]}, [r2], r1 + vld1.32 {d14[1]}, [r2], r1 + vld1.32 {d15[0]}, [r2], r1 + vld1.32 {d15[1]}, [r2] - ldr r12, _didct_coeff_ + ldr r1, [sp, #4] ; stride + + ldr r12, _CONSTANTS_ vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon vmul.i16 q2, q4, q6 @@ -41,14 +53,9 @@ vshr.s16 q3, q3, #1 vshr.s16 q4, q4, #1 - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) + vqadd.s16 q3, q3, q2 vqadd.s16 q4, q4, q2 - ;d6 - c1:temp1 - ;d7 - d1:temp2 - ;d8 - d1:temp1 - ;d9 - c1:temp2 - vqsub.s16 d10, d6, d9 ;c1 vqadd.s16 d11, d7, d8 ;d1 @@ -77,7 +84,7 @@ vshr.s16 q3, q3, #1 vshr.s16 q4, q4, #1 - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) + vqadd.s16 q3, q3, q2 vqadd.s16 q4, q4, q2 vqsub.s16 d10, d6, d9 ;c1 @@ -95,34 +102,29 @@ vrshr.s16 d4, d4, #3 vrshr.s16 d5, d5, #3 - add r1, r2, r3 - add r12, r1, r3 - add r0, r12, r3 - vtrn.32 d2, d4 vtrn.32 d3, d5 vtrn.16 d2, d3 vtrn.16 d4, d5 - vst1.16 {d2}, [r2] - vst1.16 {d3}, [r1] - vst1.16 {d4}, [r12] - vst1.16 {d5}, [r0] + vaddw.u8 q1, q1, d14 + vaddw.u8 q2, q2, d15 + + vqmovun.s16 d0, q1 + vqmovun.s16 d1, q2 + + vst1.32 {d0[0]}, [r3], r1 + vst1.32 {d0[1]}, [r3], r1 + vst1.32 {d1[0]}, [r3], r1 + vst1.32 {d1[1]}, [r3] bx lr - ENDP + ENDP ; |vp8_dequant_idct_add_neon| -;----------------- - AREA didct4x4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_didct_coeff_ - DCD didct_coeff -didct_coeff - DCD 0x4e7b4e7b, 0x8a8c8a8c - -;20091, 20091, 35468, 35468 +; Constant Pool +_CONSTANTS_ DCD cospi8sqrt2minus1 +cospi8sqrt2minus1 DCD 0x4e7b4e7b +sinpi8sqrt2 DCD 0x8a8c8a8c END diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm deleted file mode 100644 index 3392f2c2b..000000000 --- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm +++ /dev/null @@ -1,133 +0,0 @@ -; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. -; - - - EXPORT |vp8_dequant_dc_idct_neon| - ARM - REQUIRE8 - PRESERVE8 - - AREA ||.text||, CODE, READONLY, ALIGN=2 -;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc); -; r0 short *input, -; r1 short *dq, -; r2 short *output, -; r3 int pitch, -; (stack) int Dc -|vp8_dequant_dc_idct_neon| PROC - vld1.16 {q3, q4}, [r0] - vld1.16 {q5, q6}, [r1] - - ldr r1, [sp] ;load Dc from stack - - ldr r12, _dcidct_coeff_ - - vmul.i16 q1, q3, q5 ;input for short_idct4x4llm_neon - vmul.i16 q2, q4, q6 - - vmov.16 d2[0], r1 - -;|short_idct4x4llm_neon| PROC - vld1.16 {d0}, [r12] - vswp d3, d4 ;q2(vp[4] vp[12]) - - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) - vqadd.s16 q4, q4, q2 - - ;d6 - c1:temp1 - ;d7 - d1:temp2 - ;d8 - d1:temp1 - ;d9 - c1:temp2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - -; memset(input, 0, 32) -- 32bytes - vmov.i16 q14, #0 - - vswp d3, d4 - vqdmulh.s16 q3, q2, d0[2] - vqdmulh.s16 q4, q2, d0[0] - - vqadd.s16 d12, d2, d3 ;a1 - vqsub.s16 d13, d2, d3 ;b1 - - vmov q15, q14 - - vshr.s16 q3, q3, #1 - vshr.s16 q4, q4, #1 - - vqadd.s16 q3, q3, q2 ;modify since sinpi8sqrt2 > 65536/2 (negtive number) - vqadd.s16 q4, q4, q2 - - vqsub.s16 d10, d6, d9 ;c1 - vqadd.s16 d11, d7, d8 ;d1 - - vqadd.s16 d2, d12, d11 - vqadd.s16 d3, d13, d10 - vqsub.s16 d4, d13, d10 - vqsub.s16 d5, d12, d11 - - vst1.16 {q14, q15}, [r0] - - vrshr.s16 d2, d2, #3 - vrshr.s16 d3, d3, #3 - vrshr.s16 d4, d4, #3 - vrshr.s16 d5, d5, #3 - - add r1, r2, r3 - add r12, r1, r3 - add r0, r12, r3 - - vtrn.32 d2, d4 - vtrn.32 d3, d5 - vtrn.16 d2, d3 - vtrn.16 d4, d5 - - vst1.16 {d2}, [r2] - vst1.16 {d3}, [r1] - vst1.16 {d4}, [r12] - vst1.16 {d5}, [r0] - - bx lr - - ENDP - -;----------------- - AREA dcidct4x4_dat, DATA, READWRITE ;read/write by default -;Data section with name data_area is specified. DCD reserves space in memory for 48 data. -;One word each is reserved. Label filter_coeff can be used to access the data. -;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ... -_dcidct_coeff_ - DCD dcidct_coeff -dcidct_coeff - DCD 0x4e7b4e7b, 0x8a8c8a8c - -;20091, 20091, 35468, 35468 - - END diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm index 1bde94607..c8e0c31f2 100644 --- a/vp8/decoder/arm/neon/dequantizeb_neon.asm +++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c new file mode 100644 index 000000000..fe4f2e0d4 --- /dev/null +++ b/vp8/decoder/arm/neon/idct_blk_neon.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "idct.h" +#include "dequantize.h" + +/* place these declarations here because we don't want to maintain them + * outside of this scope + */ +void idct_dequant_dc_full_2x_neon + (short *input, short *dq, unsigned char *pre, unsigned char *dst, + int stride, short *dc); +void idct_dequant_dc_0_2x_neon + (short *dc, unsigned char *pre, unsigned char *dst, int stride); +void idct_dequant_full_2x_neon + (short *q, short *dq, unsigned char *pre, unsigned char *dst, + int pitch, int stride); +void idct_dequant_0_2x_neon + (short *q, short dq, unsigned char *pre, int pitch, + unsigned char *dst, int stride); + +void vp8_dequant_dc_idct_add_y_block_neon + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)eobs)[0] & 0xfefe) + idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc); + else + idct_dequant_dc_0_2x_neon(dc, pre, dst, stride); + + if (((short *)eobs)[1] & 0xfefe) + idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2); + else + idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride); + + q += 64; + dc += 4; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_y_block_neon + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)eobs)[0] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride); + else + idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride); + + if (((short *)eobs)[1] & 0xfefe) + idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride); + else + idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride); + + q += 64; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_neon + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + if (((short *)eobs)[0] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); + else + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); + + q += 32; + pre += 32; + dstu += 4*stride; + + if (((short *)eobs)[1] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride); + else + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride); + + q += 32; + pre += 32; + + if (((short *)eobs)[2] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); + else + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); + + q += 32; + pre += 32; + dstv += 4*stride; + + if (((short *)eobs)[3] & 0xfefe) + idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride); + else + idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride); +} diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm new file mode 100644 index 000000000..456f8e1d4 --- /dev/null +++ b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm @@ -0,0 +1,79 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |idct_dequant_0_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *pre, +; int pitch, unsigned char *dst, int stride); +; r0 *q +; r1 dq +; r2 *pre +; r3 pitch +; sp *dst +; sp+4 stride +|idct_dequant_0_2x_neon| PROC + add r12, r2, #4 + vld1.32 {d2[0]}, [r2], r3 + vld1.32 {d2[1]}, [r2], r3 + vld1.32 {d4[0]}, [r2], r3 + vld1.32 {d4[1]}, [r2] + vld1.32 {d8[0]}, [r12], r3 + vld1.32 {d8[1]}, [r12], r3 + vld1.32 {d10[0]}, [r12], r3 + vld1.32 {d10[1]}, [r12] + + ldrh r12, [r0] ; lo q + ldrh r2, [r0, #32] ; hi q + mov r3, #0 + strh r3, [r0] + strh r3, [r0, #32] + + sxth r12, r12 ; lo + mul r0, r12, r1 + add r0, r0, #4 + asr r0, r0, #3 + vdup.16 q0, r0 + sxth r2, r2 ; hi + mul r0, r2, r1 + add r0, r0, #4 + asr r0, r0, #3 + vdup.16 q3, r0 + + vaddw.u8 q1, q0, d2 ; lo + vaddw.u8 q2, q0, d4 + vaddw.u8 q4, q3, d8 ; hi + vaddw.u8 q5, q3, d10 + + ldr r2, [sp] ; dst + ldr r3, [sp, #4] ; stride + + vqmovun.s16 d2, q1 ; lo + vqmovun.s16 d4, q2 + vqmovun.s16 d8, q4 ; hi + vqmovun.s16 d10, q5 + + add r0, r2, #4 + vst1.32 {d2[0]}, [r2], r3 ; lo + vst1.32 {d2[1]}, [r2], r3 + vst1.32 {d4[0]}, [r2], r3 + vst1.32 {d4[1]}, [r2] + vst1.32 {d8[0]}, [r0], r3 ; hi + vst1.32 {d8[1]}, [r0], r3 + vst1.32 {d10[0]}, [r0], r3 + vst1.32 {d10[1]}, [r0] + + bx lr + + ENDP ; |idct_dequant_0_2x_neon| + END diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm new file mode 100644 index 000000000..0dc036acb --- /dev/null +++ b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm @@ -0,0 +1,69 @@ +; +; Copyright (c) 2010 The Webm project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + + EXPORT |idct_dequant_dc_0_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre, +; unsigned char *dst, int stride); +; r0 *dc +; r1 *pre +; r2 *dst +; r3 stride +|idct_dequant_dc_0_2x_neon| PROC + ldr r0, [r0] ; *dc + mov r12, #16 + + vld1.32 {d2[0]}, [r1], r12 ; lo + vld1.32 {d2[1]}, [r1], r12 + vld1.32 {d4[0]}, [r1], r12 + vld1.32 {d4[1]}, [r1] + sub r1, r1, #44 + vld1.32 {d8[0]}, [r1], r12 ; hi + vld1.32 {d8[1]}, [r1], r12 + vld1.32 {d10[0]}, [r1], r12 + vld1.32 {d10[1]}, [r1] + + sxth r1, r0 ; lo *dc + add r1, r1, #4 + asr r1, r1, #3 + vdup.16 q0, r1 + sxth r0, r0, ror #16 ; hi *dc + add r0, r0, #4 + asr r0, r0, #3 + vdup.16 q3, r0 + + vaddw.u8 q1, q0, d2 ; lo + vaddw.u8 q2, q0, d4 + vaddw.u8 q4, q3, d8 ; hi + vaddw.u8 q5, q3, d10 + + vqmovun.s16 d2, q1 ; lo + vqmovun.s16 d4, q2 + vqmovun.s16 d8, q4 ; hi + vqmovun.s16 d10, q5 + + add r0, r2, #4 + vst1.32 {d2[0]}, [r2], r3 ; lo + vst1.32 {d2[1]}, [r2], r3 + vst1.32 {d4[0]}, [r2], r3 + vst1.32 {d4[1]}, [r2] + vst1.32 {d8[0]}, [r0], r3 ; hi + vst1.32 {d8[1]}, [r0], r3 + vst1.32 {d10[0]}, [r0], r3 + vst1.32 {d10[1]}, [r0] + + bx lr + + ENDP ;|idct_dequant_dc_0_2x_neon| + END diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm new file mode 100644 index 000000000..ad4364adc --- /dev/null +++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm @@ -0,0 +1,206 @@ +; +; Copyright (c) 2010 The Webm project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |idct_dequant_dc_full_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_dc_full_2x_neon(short *q, short *dq, unsigned char *pre, +; unsigned char *dst, int stride, short *dc); +; r0 *q, +; r1 *dq, +; r2 *pre +; r3 *dst +; sp stride +; sp+4 *dc +|idct_dequant_dc_full_2x_neon| PROC + vld1.16 {q0, q1}, [r1] ; dq (same l/r) + vld1.16 {q2, q3}, [r0] ; l q + mov r1, #16 ; pitch + add r0, r0, #32 + vld1.16 {q4, q5}, [r0] ; r q + add r12, r2, #4 + ; interleave the predictors + vld1.32 {d28[0]}, [r2], r1 ; l pre + vld1.32 {d28[1]}, [r12], r1 ; r pre + vld1.32 {d29[0]}, [r2], r1 + vld1.32 {d29[1]}, [r12], r1 + vld1.32 {d30[0]}, [r2], r1 + vld1.32 {d30[1]}, [r12], r1 + vld1.32 {d31[0]}, [r2] + ldr r1, [sp, #4] + vld1.32 {d31[1]}, [r12] + + ldr r2, _CONSTANTS_ + + ldrh r12, [r1], #2 ; lo *dc + ldrh r1, [r1] ; hi *dc + + ; dequant: q[i] = q[i] * dq[i] + vmul.i16 q2, q2, q0 + vmul.i16 q3, q3, q1 + vmul.i16 q4, q4, q0 + vmul.i16 q5, q5, q1 + + ; move dc up to neon and overwrite first element + vmov.16 d4[0], r12 + vmov.16 d8[0], r1 + + vld1.16 {d0}, [r2] + + ; q2: l0r0 q3: l8r8 + ; q4: l4r4 q5: l12r12 + vswp d5, d8 + vswp d7, d10 + + ; _CONSTANTS_ * 4,12 >> 16 + ; q6: 4 * sinpi : c1/temp1 + ; q7: 12 * sinpi : d1/temp2 + ; q8: 4 * cospi + ; q9: 12 * cospi + vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q7, q5, d0[2] + vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q9, q5, d0[0] + + vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 + vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 + + ; vqdmulh only accepts signed values. this was a problem because + ; our constant had the high bit set, and was treated as a negative value. + ; vqdmulh also doubles the value before it shifts by 16. we need to + ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, + ; so we can shift the constant without losing precision. this avoids + ; shift again afterward, but also avoids the sign issue. win win! + ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we + ; pre-shift it + vshr.s16 q8, q8, #1 + vshr.s16 q9, q9, #1 + + ; q4: 4 + 4 * cospi : d1/temp1 + ; q5: 12 + 12 * cospi : c1/temp2 + vqadd.s16 q4, q4, q8 + vqadd.s16 q5, q5, q9 + + ; c1 = temp1 - temp2 + ; d1 = temp1 + temp2 + vqsub.s16 q2, q6, q5 + vqadd.s16 q3, q4, q7 + + ; [0]: a1+d1 + ; [1]: b1+c1 + ; [2]: b1-c1 + ; [3]: a1-d1 + vqadd.s16 q4, q10, q3 + vqadd.s16 q5, q11, q2 + vqsub.s16 q6, q11, q2 + vqsub.s16 q7, q10, q3 + + ; rotate + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + ; idct loop 2 + ; q4: l 0, 4, 8,12 r 0, 4, 8,12 + ; q5: l 1, 5, 9,13 r 1, 5, 9,13 + ; q6: l 2, 6,10,14 r 2, 6,10,14 + ; q7: l 3, 7,11,15 r 3, 7,11,15 + + ; q8: 1 * sinpi : c1/temp1 + ; q9: 3 * sinpi : d1/temp2 + ; q10: 1 * cospi + ; q11: 3 * cospi + vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q9, q7, d0[2] + vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q11, q7, d0[0] + + vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 + vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 + + ; see note on shifting above + vshr.s16 q10, q10, #1 + vshr.s16 q11, q11, #1 + + ; q10: 1 + 1 * cospi : d1/temp1 + ; q11: 3 + 3 * cospi : c1/temp2 + vqadd.s16 q10, q5, q10 + vqadd.s16 q11, q7, q11 + + ; q8: c1 = temp1 - temp2 + ; q9: d1 = temp1 + temp2 + vqsub.s16 q8, q8, q11 + vqadd.s16 q9, q10, q9 + + ; a1+d1 + ; b1+c1 + ; b1-c1 + ; a1-d1 + vqadd.s16 q4, q2, q9 + vqadd.s16 q5, q3, q8 + vqsub.s16 q6, q3, q8 + vqsub.s16 q7, q2, q9 + + ; +4 >> 3 (rounding) + vrshr.s16 q4, q4, #3 ; lo + vrshr.s16 q5, q5, #3 + vrshr.s16 q6, q6, #3 ; hi + vrshr.s16 q7, q7, #3 + + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + ; adding pre + ; input is still packed. pre was read interleaved + vaddw.u8 q4, q4, d28 + vaddw.u8 q5, q5, d29 + vaddw.u8 q6, q6, d30 + vaddw.u8 q7, q7, d31 + + vmov.i16 q14, #0 + vmov q15, q14 + vst1.16 {q14, q15}, [r0] ; write over high input + sub r0, r0, #32 + vst1.16 {q14, q15}, [r0] ; write over low input + + ;saturate and narrow + vqmovun.s16 d0, q4 ; lo + vqmovun.s16 d1, q5 + vqmovun.s16 d2, q6 ; hi + vqmovun.s16 d3, q7 + + ldr r1, [sp] ; stride + add r2, r3, #4 ; hi + vst1.32 {d0[0]}, [r3], r1 ; lo + vst1.32 {d0[1]}, [r2], r1 ; hi + vst1.32 {d1[0]}, [r3], r1 + vst1.32 {d1[1]}, [r2], r1 + vst1.32 {d2[0]}, [r3], r1 + vst1.32 {d2[1]}, [r2], r1 + vst1.32 {d3[0]}, [r3] + vst1.32 {d3[1]}, [r2] + + bx lr + + ENDP ; |idct_dequant_dc_full_2x_neon| + +; Constant Pool +_CONSTANTS_ DCD cospi8sqrt2minus1 +cospi8sqrt2minus1 DCD 0x4e7b +; because the lowest bit in 0x8a8c is 0, we can pre-shift this +sinpi8sqrt2 DCD 0x4546 + + END diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm new file mode 100644 index 000000000..85fff11b3 --- /dev/null +++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm @@ -0,0 +1,198 @@ +; +; Copyright (c) 2010 The Webm project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |idct_dequant_full_2x_neon| + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 +;void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *pre, +; unsigned char *dst, int pitch, int stride); +; r0 *q, +; r1 *dq, +; r2 *pre +; r3 *dst +; sp pitch +; sp+4 stride +|idct_dequant_full_2x_neon| PROC + vld1.16 {q0, q1}, [r1] ; dq (same l/r) + vld1.16 {q2, q3}, [r0] ; l q + ldr r1, [sp] ; pitch + add r0, r0, #32 + vld1.16 {q4, q5}, [r0] ; r q + add r12, r2, #4 + ; interleave the predictors + vld1.32 {d28[0]}, [r2], r1 ; l pre + vld1.32 {d28[1]}, [r12], r1 ; r pre + vld1.32 {d29[0]}, [r2], r1 + vld1.32 {d29[1]}, [r12], r1 + vld1.32 {d30[0]}, [r2], r1 + vld1.32 {d30[1]}, [r12], r1 + vld1.32 {d31[0]}, [r2] + vld1.32 {d31[1]}, [r12] + + ldr r2, _CONSTANTS_ + + ; dequant: q[i] = q[i] * dq[i] + vmul.i16 q2, q2, q0 + vmul.i16 q3, q3, q1 + vmul.i16 q4, q4, q0 + vmul.i16 q5, q5, q1 + + vld1.16 {d0}, [r2] + + ; q2: l0r0 q3: l8r8 + ; q4: l4r4 q5: l12r12 + vswp d5, d8 + vswp d7, d10 + + ; _CONSTANTS_ * 4,12 >> 16 + ; q6: 4 * sinpi : c1/temp1 + ; q7: 12 * sinpi : d1/temp2 + ; q8: 4 * cospi + ; q9: 12 * cospi + vqdmulh.s16 q6, q4, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q7, q5, d0[2] + vqdmulh.s16 q8, q4, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q9, q5, d0[0] + + vqadd.s16 q10, q2, q3 ; a1 = 0 + 8 + vqsub.s16 q11, q2, q3 ; b1 = 0 - 8 + + ; vqdmulh only accepts signed values. this was a problem because + ; our constant had the high bit set, and was treated as a negative value. + ; vqdmulh also doubles the value before it shifts by 16. we need to + ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0, + ; so we can shift the constant without losing precision. this avoids + ; shift again afterward, but also avoids the sign issue. win win! + ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we + ; pre-shift it + vshr.s16 q8, q8, #1 + vshr.s16 q9, q9, #1 + + ; q4: 4 + 4 * cospi : d1/temp1 + ; q5: 12 + 12 * cospi : c1/temp2 + vqadd.s16 q4, q4, q8 + vqadd.s16 q5, q5, q9 + + ; c1 = temp1 - temp2 + ; d1 = temp1 + temp2 + vqsub.s16 q2, q6, q5 + vqadd.s16 q3, q4, q7 + + ; [0]: a1+d1 + ; [1]: b1+c1 + ; [2]: b1-c1 + ; [3]: a1-d1 + vqadd.s16 q4, q10, q3 + vqadd.s16 q5, q11, q2 + vqsub.s16 q6, q11, q2 + vqsub.s16 q7, q10, q3 + + ; rotate + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + ; idct loop 2 + ; q4: l 0, 4, 8,12 r 0, 4, 8,12 + ; q5: l 1, 5, 9,13 r 1, 5, 9,13 + ; q6: l 2, 6,10,14 r 2, 6,10,14 + ; q7: l 3, 7,11,15 r 3, 7,11,15 + + ; q8: 1 * sinpi : c1/temp1 + ; q9: 3 * sinpi : d1/temp2 + ; q10: 1 * cospi + ; q11: 3 * cospi + vqdmulh.s16 q8, q5, d0[2] ; sinpi8sqrt2 + vqdmulh.s16 q9, q7, d0[2] + vqdmulh.s16 q10, q5, d0[0] ; cospi8sqrt2minus1 + vqdmulh.s16 q11, q7, d0[0] + + vqadd.s16 q2, q4, q6 ; a1 = 0 + 2 + vqsub.s16 q3, q4, q6 ; b1 = 0 - 2 + + ; see note on shifting above + vshr.s16 q10, q10, #1 + vshr.s16 q11, q11, #1 + + ; q10: 1 + 1 * cospi : d1/temp1 + ; q11: 3 + 3 * cospi : c1/temp2 + vqadd.s16 q10, q5, q10 + vqadd.s16 q11, q7, q11 + + ; q8: c1 = temp1 - temp2 + ; q9: d1 = temp1 + temp2 + vqsub.s16 q8, q8, q11 + vqadd.s16 q9, q10, q9 + + ; a1+d1 + ; b1+c1 + ; b1-c1 + ; a1-d1 + vqadd.s16 q4, q2, q9 + vqadd.s16 q5, q3, q8 + vqsub.s16 q6, q3, q8 + vqsub.s16 q7, q2, q9 + + ; +4 >> 3 (rounding) + vrshr.s16 q4, q4, #3 ; lo + vrshr.s16 q5, q5, #3 + vrshr.s16 q6, q6, #3 ; hi + vrshr.s16 q7, q7, #3 + + vtrn.32 q4, q6 + vtrn.32 q5, q7 + vtrn.16 q4, q5 + vtrn.16 q6, q7 + + ; adding pre + ; input is still packed. pre was read interleaved + vaddw.u8 q4, q4, d28 + vaddw.u8 q5, q5, d29 + vaddw.u8 q6, q6, d30 + vaddw.u8 q7, q7, d31 + + vmov.i16 q14, #0 + vmov q15, q14 + vst1.16 {q14, q15}, [r0] ; write over high input + sub r0, r0, #32 + vst1.16 {q14, q15}, [r0] ; write over low input + + ;saturate and narrow + vqmovun.s16 d0, q4 ; lo + vqmovun.s16 d1, q5 + vqmovun.s16 d2, q6 ; hi + vqmovun.s16 d3, q7 + + ldr r1, [sp, #4] ; stride + add r2, r3, #4 ; hi + vst1.32 {d0[0]}, [r3], r1 ; lo + vst1.32 {d0[1]}, [r2], r1 ; hi + vst1.32 {d1[0]}, [r3], r1 + vst1.32 {d1[1]}, [r2], r1 + vst1.32 {d2[0]}, [r3], r1 + vst1.32 {d2[1]}, [r2], r1 + vst1.32 {d3[0]}, [r3] + vst1.32 {d3[1]}, [r2] + + bx lr + + ENDP ; |idct_dequant_full_2x_neon| + +; Constant Pool +_CONSTANTS_ DCD cospi8sqrt2minus1 +cospi8sqrt2minus1 DCD 0x4e7b +; because the lowest bit in 0x8a8c is 0, we can pre-shift this +sinpi8sqrt2 DCD 0x4546 + + END diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c index 442054ed3..57cba16a3 100644 --- a/vp8/decoder/dboolhuff.c +++ b/vp8/decoder/dboolhuff.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -12,7 +13,7 @@ #include "vpx_ports/mem.h" #include "vpx_mem/vpx_mem.h" -DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) = +DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -25,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) = }; -static void copy_in(BOOL_DECODER *br, unsigned int to_write) -{ - if (to_write > br->user_buffer_sz) - to_write = br->user_buffer_sz; - - memcpy(br->write_ptr, br->user_buffer, to_write); - br->user_buffer += to_write; - br->user_buffer_sz -= to_write; - br->write_ptr = br_ptr_advance(br->write_ptr, to_write); -} - int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source, unsigned int source_sz) { - br->lowvalue = 0; + br->user_buffer_end = source+source_sz; + br->user_buffer = source; + br->value = 0; + br->count = -8; br->range = 255; - br->count = 0; - br->user_buffer = source; - br->user_buffer_sz = source_sz; if (source_sz && !source) return 1; - /* Allocate the ring buffer backing store with alignment equal to the - * buffer size*2 so that a single pointer can be used for wrapping rather - * than a pointer+offset. - */ - br->decode_buffer = vpx_memalign(VP8_BOOL_DECODER_SZ * 2, - VP8_BOOL_DECODER_SZ); - - if (!br->decode_buffer) - return 1; - /* Populate the buffer */ - br->read_ptr = br->decode_buffer; - br->write_ptr = br->decode_buffer; - copy_in(br, VP8_BOOL_DECODER_SZ); + vp8dx_bool_decoder_fill_c(br); - /* Read the first byte */ - br->value = (*br->read_ptr++) << 8; return 0; } void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br) { - int left, right; + const unsigned char *bufptr; + const unsigned char *bufend; + VP8_BD_VALUE value; + int count; + bufend = br->user_buffer_end; + bufptr = br->user_buffer; + value = br->value; + count = br->count; - /* Find available room in the buffer */ - left = 0; - right = br->read_ptr - br->write_ptr; + VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend); - if (right < 0) - { - /* Read pointer is behind the write pointer. We can write from the - * write pointer to the end of the buffer. - */ - right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer); - left = br->read_ptr - br->decode_buffer; - } - - if (right + left < 128) - return; - - if (right) - copy_in(br, right); - - if (left) - { - br->write_ptr = br->decode_buffer; - copy_in(br, left); - } - -} - - -void vp8dx_stop_decode_c(BOOL_DECODER *bc) -{ - vpx_free(bc->decode_buffer); - bc->decode_buffer = 0; + br->user_buffer = bufptr; + br->value = value; + br->count = count; } #if 0 @@ -119,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc) int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability) { unsigned int bit=0; + VP8_BD_VALUE value; unsigned int split; - unsigned int bigsplit; - register unsigned int range = br->range; - register unsigned int value = br->value; + VP8_BD_VALUE bigsplit; + int count; + unsigned int range; + + value = br->value; + count = br->count; + range = br->range; split = 1 + (((range-1) * probability) >> 8); - bigsplit = (split<<8); + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); range = split; if(value >= bigsplit) @@ -143,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability) }*/ { - int count = br->count; register unsigned int shift = vp8dx_bitreader_norm[range]; range <<= shift; value <<= shift; count -= shift; - if(count <= 0) - { - value |= (*br->read_ptr) << (-count); - br->read_ptr = br_ptr_advance(br->read_ptr, 1); - count += 8 ; - } - br->count = count; } br->value = value; + br->count = count; br->range = range; + if (count < 0) + vp8dx_bool_decoder_fill_c(br); return bit; } diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h index 772dbdb2e..c851aa7e5 100644 --- a/vp8/decoder/dboolhuff.h +++ b/vp8/decoder/dboolhuff.h @@ -1,60 +1,41 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef DBOOLHUFF_H #define DBOOLHUFF_H +#include +#include #include "vpx_ports/config.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" -/* Size of the bool decoder backing storage - * - * This size was chosen to be greater than the worst case encoding of a - * single macroblock. This was calcluated as follows (python): - * - * def max_cost(prob): - * return max(prob_costs[prob], prob_costs[255-prob]) / 256; - * - * tree_nodes_cost = 7 * max_cost(255) - * extra_bits_cost = sum([max_cost(bit) for bit in extra_bits]) - * sign_bit_cost = max_cost(128) - * total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost - * - * where the prob_costs table was taken from the C vp8_prob_cost table in - * boolhuff.c and the extra_bits table was taken from the 11 extrabits for - * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c - * - * This equation produced a maximum of 79 bits per coefficient. Scaling up - * to the macroblock level: - * - * 79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb - * - * 4096 bytes = 32768 bits > 31600 - */ -#define VP8_BOOL_DECODER_SZ 4096 -#define VP8_BOOL_DECODER_MASK (VP8_BOOL_DECODER_SZ-1) -#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ)) +typedef size_t VP8_BD_VALUE; + +# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT) +/*This is meant to be a large, positive constant that can still be efficiently + loaded as an immediate (on platforms like ARM, for example). + Even relatively modest values like 100 would work fine.*/ +# define VP8_LOTS_OF_BITS (0x40000000) + + struct vp8_dboolhuff_rtcd_vtable; typedef struct { - unsigned int lowvalue; - unsigned int range; - unsigned int value; - int count; + const unsigned char *user_buffer_end; const unsigned char *user_buffer; - unsigned int user_buffer_sz; - unsigned char *decode_buffer; - const unsigned char *read_ptr; - unsigned char *write_ptr; + VP8_BD_VALUE value; + int count; + unsigned int range; #if CONFIG_RUNTIME_CPU_DETECT struct vp8_dboolhuff_rtcd_vtable *rtcd; #endif @@ -62,10 +43,9 @@ typedef struct #define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \ const unsigned char *source, unsigned int source_sz) -#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc) #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br) #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability) -#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits); +#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits) #if ARCH_ARM #include "arm/dboolhuff_arm.h" @@ -75,10 +55,6 @@ typedef struct #define vp8_dbool_start vp8dx_start_decode_c #endif -#ifndef vp8_dbool_stop -#define vp8_dbool_stop vp8dx_stop_decode_c -#endif - #ifndef vp8_dbool_fill #define vp8_dbool_fill vp8dx_bool_decoder_fill_c #endif @@ -92,48 +68,35 @@ typedef struct #endif extern prototype_dbool_start(vp8_dbool_start); -extern prototype_dbool_stop(vp8_dbool_stop); extern prototype_dbool_fill(vp8_dbool_fill); extern prototype_dbool_debool(vp8_dbool_debool); extern prototype_dbool_devalue(vp8_dbool_devalue); typedef prototype_dbool_start((*vp8_dbool_start_fn_t)); -typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t)); typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t)); typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t)); typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t)); typedef struct vp8_dboolhuff_rtcd_vtable { vp8_dbool_start_fn_t start; - vp8_dbool_stop_fn_t stop; vp8_dbool_fill_fn_t fill; vp8_dbool_debool_fn_t debool; vp8_dbool_devalue_fn_t devalue; } vp8_dboolhuff_rtcd_vtable_t; -// There are no processor-specific versions of these -// functions right now. Disable RTCD to avoid using -// function pointers which gives a speed boost -//#ifdef ENABLE_RUNTIME_CPU_DETECT -//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn -//#define IF_RTCD(x) (x) -//#else +/* There are no processor-specific versions of these + * functions right now. Disable RTCD to avoid using + * function pointers which gives a speed boost + */ +/*#ifdef ENABLE_RUNTIME_CPU_DETECT +#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn +#define IF_RTCD(x) (x) +#else*/ #define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn #define IF_RTCD(x) NULL -//#endif +/*#endif*/ -static unsigned char *br_ptr_advance(const unsigned char *_ptr, - unsigned int n) -{ - uintptr_t ptr = (uintptr_t)_ptr; - - ptr += n; - ptr &= VP8_BOOL_DECODER_PTR_MASK; - - return (void *)ptr; -} - -DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); +DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]); /* wrapper functions to hide RTCD. static means inline means hopefully no * penalty @@ -146,12 +109,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br, #endif return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz); } -static void vp8dx_stop_decode(BOOL_DECODER *br) { - DBOOLHUFF_INVOKE(br->rtcd, stop)(br); -} static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) { DBOOLHUFF_INVOKE(br->rtcd, fill)(br); } + +/*The refill loop is used in several places, so define it in a macro to make + sure they're all consistent. + An inline function would be cleaner, but has a significant penalty, because + multiple BOOL_DECODER fields must be modified, and the compiler is not smart + enough to eliminate the stores to those fields and the subsequent reloads + from them when inlining the function.*/ +#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \ + do \ + { \ + int shift; \ + for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \ + { \ + if((_bufptr) >= (_bufend)) { \ + (_count) = VP8_LOTS_OF_BITS; \ + break; \ + } \ + (_count) += 8; \ + (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \ + shift -= 8; \ + } \ + } \ + while(0) + + static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { /* * Until optimized versions of this function are available, we @@ -160,13 +145,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability); */ unsigned int bit = 0; + VP8_BD_VALUE value; unsigned int split; - unsigned int bigsplit; - register unsigned int range = br->range; - register unsigned int value = br->value; + VP8_BD_VALUE bigsplit; + int count; + unsigned int range; + + value = br->value; + count = br->count; + range = br->range; split = 1 + (((range - 1) * probability) >> 8); - bigsplit = (split << 8); + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); range = split; @@ -185,23 +175,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { }*/ { - int count = br->count; register unsigned int shift = vp8dx_bitreader_norm[range]; range <<= shift; value <<= shift; count -= shift; - - if (count <= 0) - { - value |= (*br->read_ptr) << (-count); - br->read_ptr = br_ptr_advance(br->read_ptr, 1); - count += 8 ; - } - - br->count = count; } br->value = value; + br->count = count; br->range = range; + if(count < 0) + vp8dx_bool_decoder_fill(br); return bit; } diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c index f151ef3cc..415392b68 100644 --- a/vp8/decoder/decodemv.c +++ b/vp8/decoder/decodemv.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,10 +14,127 @@ #include "entropymode.h" #include "onyxd_int.h" #include "findnearmv.h" -#include "demode.h" + #if CONFIG_DEBUG #include #endif +static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_bmode_tree, p); + + return i; +} + + +static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_ymode_tree, p); + + return i; +} + +static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); + + return i; +} + + + +static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p) +{ + const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); + + return i; +} + +static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) +{ + /* Is segmentation enabled */ + if (x->segmentation_enabled && x->update_mb_segmentation_map) + { + /* If so then read the segment id. */ + if (vp8_read(r, x->mb_segment_tree_probs[0])) + mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); + else + mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); + } +} + +static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col) +{ + vp8_reader *const bc = & pbi->bc; + const int mis = pbi->common.mode_info_stride; + + { + MB_PREDICTION_MODE y_mode; + + /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default) + * By default on a key frame reset all MBs to segment 0 + */ + m->mbmi.segment_id = 0; + + if (pbi->mb.update_mb_segmentation_map) + vp8_read_mb_features(bc, &m->mbmi, &pbi->mb); + + /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */ + if (pbi->common.mb_no_coeff_skip) + m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false); + else + m->mbmi.mb_skip_coeff = 0; + + y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob); + + m->mbmi.ref_frame = INTRA_FRAME; + + if ((m->mbmi.mode = y_mode) == B_PRED) + { + int i = 0; + + do + { + const B_PREDICTION_MODE A = vp8_above_bmi(m, i, mis)->mode; + const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode; + + m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]); + } + while (++i < 16); + } + else + { + int BMode; + int i = 0; + + switch (y_mode) + { + case DC_PRED: + BMode = B_DC_PRED; + break; + case V_PRED: + BMode = B_VE_PRED; + break; + case H_PRED: + BMode = B_HE_PRED; + break; + case TM_PRED: + BMode = B_TM_PRED; + break; + default: + BMode = B_DC_PRED; + break; + } + + do + { + m->bmi[i].mode = (B_PREDICTION_MODE)BMode; + } + while (++i < 16); + } + + m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob); + } +} static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) { @@ -98,6 +216,8 @@ static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p) return (MB_PREDICTION_MODE)i; } + +#ifdef VPX_MODE_COUNT unsigned int vp8_mv_cont_count[5][4] = { { 0, 0, 0, 0 }, @@ -106,87 +226,108 @@ unsigned int vp8_mv_cont_count[5][4] = { 0, 0, 0, 0 }, { 0, 0, 0, 0 } }; - -void vp8_decode_mode_mvs(VP8D_COMP *pbi) -{ - const MV Zero = { 0, 0}; - - VP8_COMMON *const pc = & pbi->common; - vp8_reader *const bc = & pbi->bc; - MACROBLOCKD *xd = &pbi->mb; - MODE_INFO *mi = pc->mi, *ms; - const int mis = pc->mode_info_stride; - - MV_CONTEXT *const mvc = pc->fc.mvc; - - int mb_row = -1; -#if CONFIG_SEGMENTATION - int left_id, above_id; - int i; - int sum; - int index = 0; #endif - vp8_prob prob_intra; - vp8_prob prob_last; - vp8_prob prob_gf; - vp8_prob prob_skip_false = 0; - if (pc->mb_no_coeff_skip) - prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); +unsigned char vp8_mbsplit_offset[4][16] = { + { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; - prob_intra = (vp8_prob)vp8_read_literal(bc, 8); - prob_last = (vp8_prob)vp8_read_literal(bc, 8); - prob_gf = (vp8_prob)vp8_read_literal(bc, 8); +unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1}; +unsigned char vp8_mbsplit_fill_offset[4][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15}, + { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; - ms = pc->mi - 1; - if (vp8_read_bit(bc)) + +void vp8_mb_mode_mv_init(VP8D_COMP *pbi) +{ + vp8_reader *const bc = & pbi->bc; + MV_CONTEXT *const mvc = pbi->common.fc.mvc; +#if CONFIG_SEGMENTATION + MACROBLOCKD *const xd = & pbi->mb; +#endif + + pbi->prob_skip_false = 0; + if (pbi->common.mb_no_coeff_skip) + pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); + + if(pbi->common.frame_type != KEY_FRAME) { - int i = 0; + pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); + pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); - do + if (vp8_read_bit(bc)) { - pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8); + int i = 0; + + do + { + pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8); + } + while (++i < 4); } - while (++i < 4); - } - if (vp8_read_bit(bc)) - { - int i = 0; - - do + if (vp8_read_bit(bc)) { - pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8); - } - while (++i < 3); - } + int i = 0; - read_mvcontexts(bc, mvc); + do + { + pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8); + } + while (++i < 3); + } + + read_mvcontexts(bc, mvc); #if CONFIG_SEGMENTATION xd->temporal_update = vp8_read_bit(bc); #endif - while (++mb_row < pc->mb_rows) - { - int mb_col = -1; + } +} - while (++mb_col < pc->mb_cols) - { - MB_MODE_INFO *const mbmi = & mi->mbmi; - MV *const mv = & mbmi->mv.as_mv; - VP8_COMMON *const pc = &pbi->common; - // MACROBLOCKD *xd = &pbi->mb; +void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, + int mb_row, int mb_col) +{ + const MV Zero = { 0, 0}; + vp8_reader *const bc = & pbi->bc; + MV_CONTEXT *const mvc = pbi->common.fc.mvc; + const int mis = pbi->common.mode_info_stride; +#if CONFIG_SEGMENTATION + MACROBLOCKD *const xd = & pbi->mb; + int sum; + int index = mb_row * pbi->common.mb_cols + mb_col; +#endif + MV *const mv = & mbmi->mv.as_mv; + int mb_to_left_edge; + int mb_to_right_edge; + int mb_to_top_edge; + int mb_to_bottom_edge; - vp8dx_bool_decoder_fill(bc); - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; + mb_to_top_edge = pbi->mb.mb_to_top_edge; + mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; + mb_to_top_edge -= LEFT_TOP_MARGIN; + mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; + mbmi->need_to_clamp_mvs = 0; + /* Distance of Mb to the various image edges. + * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units + */ + pbi->mb.mb_to_left_edge = + mb_to_left_edge = -((mb_col * 16) << 3); + mb_to_left_edge -= LEFT_TOP_MARGIN; - // If required read in new segmentation data for this MB - if (pbi->mb.update_mb_segmentation_map) + pbi->mb.mb_to_right_edge = + mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3; + mb_to_right_edge += RIGHT_BOTTOM_MARGIN; + + /* If required read in new segmentation data for this MB */ + if (pbi->mb.update_mb_segmentation_map) { #if CONFIG_SEGMENTATION if (xd->temporal_update) @@ -196,7 +337,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) if (mb_col != 0) sum += (mi-1)->mbmi.segment_flag; if (mb_row != 0) - sum += (mi-pc->mb_cols)->mbmi.segment_flag; + sum += (mi-pbi->common.mb_cols)->mbmi.segment_flag; if (vp8_read(bc, xd->mb_segment_tree_probs[3+sum]) == 0) { @@ -223,236 +364,237 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi) } - // Read the macroblock coeff skip flag if this feature is in use, else default to 0 - if (pc->mb_no_coeff_skip) - mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false); - else - mbmi->mb_skip_coeff = 0; + /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */ + if (pbi->common.mb_no_coeff_skip) + mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false); + else + mbmi->mb_skip_coeff = 0; - mbmi->uv_mode = DC_PRED; + if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra))) /* inter MB */ + { + int rct[4]; + vp8_prob mv_ref_p [VP8_MVREFS-1]; + MV nearest, nearby, best_mv; - if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra))) /* inter MB */ + if (vp8_read(bc, pbi->prob_last)) + { + mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf))); + } + + vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias); + + vp8_mv_ref_probs(mv_ref_p, rct); + + mbmi->uv_mode = DC_PRED; + switch (mbmi->mode = read_mv_ref(bc, mv_ref_p)) + { + case SPLITMV: + { + const int s = mbmi->partitioning = + vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs); + const int num_p = vp8_mbsplit_count [s]; + int j = 0; + + do /* for each subset j */ { - int rct[4]; - vp8_prob mv_ref_p [VP8_MVREFS-1]; - MV nearest, nearby, best_mv; + B_MODE_INFO bmi; + MV *const mv = & bmi.mv.as_mv; - if (vp8_read(bc, prob_last)) + int k; /* first block in subset j */ + int mv_contz; + k = vp8_mbsplit_offset[s][j]; + + mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv)); + + switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/ { - mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf))); - } - - vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias); - - vp8_mv_ref_probs(mv_ref_p, rct); - - switch (mbmi->mode = read_mv_ref(bc, mv_ref_p)) - { - case SPLITMV: - { - const int s = mbmi->partitioning = vp8_treed_read( - bc, vp8_mbsplit_tree, vp8_mbsplit_probs - ); - const int num_p = vp8_mbsplit_count [s]; - const int *const L = vp8_mbsplits [s]; - int j = 0; - - do /* for each subset j */ - { - B_MODE_INFO *const bmi = mbmi->partition_bmi + j; - MV *const mv = & bmi->mv.as_mv; - - int k = -1; /* first block in subset j */ - int mv_contz; - - while (j != L[++k]) - if (k >= 16) -#if CONFIG_DEBUG - assert(0); - -#else - ; -#endif - - mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv)); - - switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob)) - { - case NEW4X4: - read_mv(bc, mv, (const MV_CONTEXT *) mvc); - mv->row += best_mv.row; - mv->col += best_mv.col; -#ifdef VPX_MODE_COUNT - vp8_mv_cont_count[mv_contz][3]++; -#endif - break; - case LEFT4X4: - *mv = vp8_left_bmi(mi, k)->mv.as_mv; -#ifdef VPX_MODE_COUNT - vp8_mv_cont_count[mv_contz][0]++; -#endif - break; - case ABOVE4X4: - *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv; -#ifdef VPX_MODE_COUNT - vp8_mv_cont_count[mv_contz][1]++; -#endif - break; - case ZERO4X4: - *mv = Zero; -#ifdef VPX_MODE_COUNT - vp8_mv_cont_count[mv_contz][2]++; -#endif - break; - default: - break; - } - - /* Fill (uniform) modes, mvs of jth subset. - Must do it here because ensuing subsets can - refer back to us via "left" or "above". */ - do - if (j == L[k]) - mi->bmi[k] = *bmi; - - while (++k < 16); - } - while (++j < num_p); - } - - *mv = mi->bmi[15].mv.as_mv; - - break; /* done with SPLITMV */ - - case NEARMV: - *mv = nearby; - - // Clip "next_nearest" so that it does not extend to far out of image - if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - - goto propagate_mv; - - case NEARESTMV: - *mv = nearest; - - // Clip "next_nearest" so that it does not extend to far out of image - if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - - goto propagate_mv; - - case ZEROMV: - *mv = Zero; - goto propagate_mv; - - case NEWMV: + case NEW4X4: read_mv(bc, mv, (const MV_CONTEXT *) mvc); mv->row += best_mv.row; mv->col += best_mv.col; - /* Encoder should not produce invalid motion vectors, but since - * arbitrary length MVs can be parsed from the bitstream, we - * need to clamp them here in case we're reading bad data to - * avoid a crash. - */ -#if CONFIG_DEBUG - assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN)); - assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)); - assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN)); - assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)); -#endif - - if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) - mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; - else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) - mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; - - if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) - mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; - else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) - mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; - - propagate_mv: /* same MV throughout */ - { - //int i=0; - //do - //{ - // mi->bmi[i].mv.as_mv = *mv; - //} - //while( ++i < 16); - - mi->bmi[0].mv.as_mv = *mv; - mi->bmi[1].mv.as_mv = *mv; - mi->bmi[2].mv.as_mv = *mv; - mi->bmi[3].mv.as_mv = *mv; - mi->bmi[4].mv.as_mv = *mv; - mi->bmi[5].mv.as_mv = *mv; - mi->bmi[6].mv.as_mv = *mv; - mi->bmi[7].mv.as_mv = *mv; - mi->bmi[8].mv.as_mv = *mv; - mi->bmi[9].mv.as_mv = *mv; - mi->bmi[10].mv.as_mv = *mv; - mi->bmi[11].mv.as_mv = *mv; - mi->bmi[12].mv.as_mv = *mv; - mi->bmi[13].mv.as_mv = *mv; - mi->bmi[14].mv.as_mv = *mv; - mi->bmi[15].mv.as_mv = *mv; - } - + #ifdef VPX_MODE_COUNT + vp8_mv_cont_count[mv_contz][3]++; + #endif break; + case LEFT4X4: + *mv = vp8_left_bmi(mi, k)->mv.as_mv; + #ifdef VPX_MODE_COUNT + vp8_mv_cont_count[mv_contz][0]++; + #endif + break; + case ABOVE4X4: + *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv; + #ifdef VPX_MODE_COUNT + vp8_mv_cont_count[mv_contz][1]++; + #endif + break; + case ZERO4X4: + *mv = Zero; + #ifdef VPX_MODE_COUNT + vp8_mv_cont_count[mv_contz][2]++; + #endif + break; + default: + break; + } - default:; -#if CONFIG_DEBUG - assert(0); -#endif + mbmi->need_to_clamp_mvs |= (mv->col < mb_to_left_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0; + + { + /* Fill (uniform) modes, mvs of jth subset. + Must do it here because ensuing subsets can + refer back to us via "left" or "above". */ + unsigned char *fill_offset; + unsigned int fill_count = vp8_mbsplit_fill_count[s]; + + fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]]; + + do { + mi->bmi[ *fill_offset] = bmi; + fill_offset++; + + }while (--fill_count); } } - else - { - /* MB is intra coded */ - - int j = 0; - - do - { - mi->bmi[j].mv.as_mv = Zero; - } - while (++j < 16); - - *mv = Zero; - - if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED) - { - int j = 0; - - do - { - mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob); - } - while (++j < 16); - } - - mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob); - } - - mi++; // next macroblock + while (++j < num_p); } - mi++; // skip left predictor each row + *mv = mi->bmi[15].mv.as_mv; + + break; /* done with SPLITMV */ + + case NEARMV: + *mv = nearby; + /* Clip "next_nearest" so that it does not extend to far out of image */ + mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col; + mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col; + mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row; + mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row; + goto propagate_mv; + + case NEARESTMV: + *mv = nearest; + /* Clip "next_nearest" so that it does not extend to far out of image */ + mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col; + mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col; + mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row; + mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row; + goto propagate_mv; + + case ZEROMV: + *mv = Zero; + goto propagate_mv; + + case NEWMV: + read_mv(bc, mv, (const MV_CONTEXT *) mvc); + mv->row += best_mv.row; + mv->col += best_mv.col; + + /* Don't need to check this on NEARMV and NEARESTMV modes + * since those modes clamp the MV. The NEWMV mode does not, + * so signal to the prediction stage whether special + * handling may be required. + */ + mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0; + mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0; + + propagate_mv: /* same MV throughout */ + { + /*int i=0; + do + { + mi->bmi[i].mv.as_mv = *mv; + } + while( ++i < 16);*/ + + mi->bmi[0].mv.as_mv = *mv; + mi->bmi[1].mv.as_mv = *mv; + mi->bmi[2].mv.as_mv = *mv; + mi->bmi[3].mv.as_mv = *mv; + mi->bmi[4].mv.as_mv = *mv; + mi->bmi[5].mv.as_mv = *mv; + mi->bmi[6].mv.as_mv = *mv; + mi->bmi[7].mv.as_mv = *mv; + mi->bmi[8].mv.as_mv = *mv; + mi->bmi[9].mv.as_mv = *mv; + mi->bmi[10].mv.as_mv = *mv; + mi->bmi[11].mv.as_mv = *mv; + mi->bmi[12].mv.as_mv = *mv; + mi->bmi[13].mv.as_mv = *mv; + mi->bmi[14].mv.as_mv = *mv; + mi->bmi[15].mv.as_mv = *mv; + } + break; + default:; + #if CONFIG_DEBUG + assert(0); + #endif + } + } + else + { + /* MB is intra coded */ + int j = 0; + do + { + mi->bmi[j].mv.as_mv = Zero; + } + while (++j < 16); + + if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED) + { + j = 0; + do + { + mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob); + } + while (++j < 16); + } + + mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob); + } + +} + +void vp8_decode_mode_mvs(VP8D_COMP *pbi) +{ + MODE_INFO *mi = pbi->common.mi; + int mb_row = -1; + + vp8_mb_mode_mv_init(pbi); + + while (++mb_row < pbi->common.mb_rows) + { + int mb_col = -1; + int mb_to_top_edge; + int mb_to_bottom_edge; + + pbi->mb.mb_to_top_edge = + mb_to_top_edge = -((mb_row * 16)) << 3; + mb_to_top_edge -= LEFT_TOP_MARGIN; + + pbi->mb.mb_to_bottom_edge = + mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3; + mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; + + while (++mb_col < pbi->common.mb_cols) + { + /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/ + if(pbi->common.frame_type == KEY_FRAME) + vp8_kfread_modes(pbi, mi, mb_row, mb_col); + else + vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col); + + mi++; /* next macroblock */ + } + + mi++; /* skip left predictor each row */ } } + diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h index 403007183..940342447 100644 --- a/vp8/decoder/decodemv.h +++ b/vp8/decoder/decodemv.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h index ebc5c27b2..25dee8fe8 100644 --- a/vp8/decoder/decoderthreading.h +++ b/vp8/decoder/decoderthreading.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,11 +15,12 @@ #ifndef _DECODER_THREADING_H #define _DECODER_THREADING_H - -extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi, - MACROBLOCKD *xd); -extern void vp8_stop_lfthread(VP8D_COMP *pbi); -extern void vp8_start_lfthread(VP8D_COMP *pbi); +#if CONFIG_MULTITHREAD +extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); extern void vp8_decoder_remove_threads(VP8D_COMP *pbi); extern void vp8_decoder_create_threads(VP8D_COMP *pbi); +extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); +extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); +#endif + #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 01cd7aedf..06204fec6 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,9 +21,10 @@ #include "alloccommon.h" #include "entropymode.h" #include "quant_common.h" -#include "segmentation_common.h" +#include "vpx_scale/vpxscale.h" +#include "vpx_scale/yv12extend.h" #include "setupintrarecon.h" -#include "demode.h" + #include "decodemv.h" #include "extend.h" #include "vpx_mem/vpx_mem.h" @@ -38,56 +40,53 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi) { - int r, c; int i; int Q; VP8_COMMON *const pc = & pbi->common; for (Q = 0; Q < QINDEX_RANGE; Q++) { - pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); - pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); - pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); + pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); + pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); + pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); - // all the ac values = ; + /* all the ac values = ; */ for (i = 1; i < 16; i++) { int rc = vp8_default_zig_zag1d[i]; - r = (rc >> 2); - c = (rc & 3); - pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q); - pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); - pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); + pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q); + pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); + pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); } } } -static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) +void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) { int i; int QIndex; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; VP8_COMMON *const pc = & pbi->common; - // Decide whether to use the default or alternate baseline Q value. + /* Decide whether to use the default or alternate baseline Q value. */ if (xd->segmentation_enabled) { - // Abs Value + /* Abs Value */ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - // Delta Value + /* Delta Value */ else { QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; - QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range + QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ } } else QIndex = pc->base_qindex; - // Set up the block level dequant pointers + /* Set up the block level dequant pointers */ for (i = 0; i < 16; i++) { xd->block[i].dequant = pc->Y1dequant[QIndex]; @@ -108,11 +107,12 @@ static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) #define RTCD_VTABLE(x) NULL #endif -//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it -// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. +/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it + * to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy. + */ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) { - if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME) + if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_build_intra_predictors_mbuv_s(xd); @@ -125,42 +125,114 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) } } -static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd) +static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { - if (xd->frame_type == KEY_FRAME || xd->mbmi.ref_frame == INTRA_FRAME) + /* If the MV points so far into the UMV border that no visible pixels + * are used for reconstruction, the subpel part of the MV can be + * discarded and the MV limited to 16 pixels with equivalent results. + * + * This limit kicks in at 19 pixels for the top and left edges, for + * the 16 pixels plus 3 taps right of the central pixel when subpel + * filtering. The bottom and right edges use 16 pixels plus 2 pixels + * left of the central pixel when filtering. + */ + if (mv->col < (xd->mb_to_left_edge - (19 << 3))) + mv->col = xd->mb_to_left_edge - (16 << 3); + else if (mv->col > xd->mb_to_right_edge + (18 << 3)) + mv->col = xd->mb_to_right_edge + (16 << 3); + + if (mv->row < (xd->mb_to_top_edge - (19 << 3))) + mv->row = xd->mb_to_top_edge - (16 << 3); + else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) + mv->row = xd->mb_to_bottom_edge + (16 << 3); +} + +/* A version of the above function for chroma block MVs.*/ +static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) +{ + mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; + mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; + + mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; + mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; +} + +void clamp_mvs(MACROBLOCKD *xd) +{ + if (xd->mode_info_context->mbmi.mode == SPLITMV) + { + int i; + + for (i=0; i<16; i++) + clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd); + for (i=16; i<24; i++) + clamp_uvmv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd); + } + else + { + clamp_mv_to_umv_border(&xd->mode_info_context->mbmi.mv.as_mv, xd); + clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd); + } + +} + +void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) +{ + int eobtotal = 0; + int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs; + + if (xd->mode_info_context->mbmi.mb_skip_coeff) + { + vp8_reset_mb_tokens_context(xd); + } + else + { + eobtotal = vp8_decode_mb_tokens(pbi, xd); + } + + /* Perform temporary clamping of the MV to be used for prediction */ + if (do_clamp) + { + clamp_mvs(xd); + } + + xd->mode_info_context->mbmi.dc_diff = 1; + + if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0) + { + xd->mode_info_context->mbmi.dc_diff = 0; + skip_recon_mb(pbi, xd); + return; + } + + if (xd->segmentation_enabled) + mb_init_dequantizer(pbi, xd); + + /* do prediction */ + if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_build_intra_predictors_mbuv(xd); - if (xd->mbmi.mode != B_PRED) + if (xd->mode_info_context->mbmi.mode != B_PRED) { vp8_build_intra_predictors_mby_ptr(xd); - vp8_recon16x16mb(RTCD_VTABLE(recon), xd); - } - else - { - vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd); + } else { + vp8_intra_prediction_down_copy(xd); } } else { vp8_build_inter_predictors_mb(xd); - vp8_recon16x16mb(RTCD_VTABLE(recon), xd); } -} - -static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int i; - BLOCKD *b = &xd->block[24]; - - - if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV) + /* dequantization and idct */ + if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) { + BLOCKD *b = &xd->block[24]; DEQUANT_INVOKE(&pbi->dequant, block)(b); - // do 2nd order transform on the dc block - if (b->eob > 1) + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) { IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; @@ -178,85 +250,49 @@ static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd) ((int *)b->qcoeff)[0] = 0; } - + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff); + } + else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED) + { for (i = 0; i < 16; i++) { - b = &xd->block[i]; + BLOCKD *b = &xd->block[i]; + vp8_predict_intra4x4(b, b->bmi.mode, b->predictor); - if (b->eob > 1) + if (xd->eobs[i] > 1) { - DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]); + DEQUANT_INVOKE(&pbi->dequant, idct_add) + (b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } else { - IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32); - } - } - - for (i = 16; i < 24; i++) - { - b = &xd->block[i]; - - if (b->eob > 1) - { - DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16); - } - else - { - IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16); + IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) + (b->qcoeff[0] * b->dequant[0], b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); ((int *)b->qcoeff)[0] = 0; } } + } else { - for (i = 0; i < 24; i++) - { - - b = &xd->block[i]; - - if (b->eob > 1) - { - DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16))); - } - else - { - IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16))); - ((int *)b->qcoeff)[0] = 0; - } - } + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); } + + DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); } -void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd) -{ - int eobtotal = 0; - - if (xd->mbmi.mb_skip_coeff) - { - vp8_reset_mb_tokens_context(xd); - } - else - { - eobtotal = vp8_decode_mb_tokens(pbi, xd); - } - - xd->mode_info_context->mbmi.dc_diff = 1; - - if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0) - { - xd->mode_info_context->mbmi.dc_diff = 0; - skip_recon_mb(pbi, xd); - return; - } - - if (xd->segmentation_enabled) - mb_init_dequantizer(pbi, xd); - - de_quantand_idct(pbi, xd); - reconstruct_mb(pbi, xd); -} static int get_delta_q(vp8_reader *bc, int prev, int *q_update) { @@ -293,18 +329,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi, int i; int recon_yoffset, recon_uvoffset; int mb_col; - int recon_y_stride = pc->last_frame.y_stride; - int recon_uv_stride = pc->last_frame.uv_stride; + int ref_fb_idx = pc->lst_fb_idx; + int dst_fb_idx = pc->new_fb_idx; + int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); + vpx_memset(&pc->left_context, 0, sizeof(pc->left_context)); recon_yoffset = mb_row * recon_y_stride * 16; recon_uvoffset = mb_row * recon_uv_stride * 8; - // reset above block coeffs + /* reset above block coeffs */ - xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT]; - xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT]; - xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT]; - xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT]; + xd->above_context = pc->above_context; xd->up_available = (mb_row != 0); xd->mb_to_top_edge = -((mb_row * 16)) << 3; @@ -312,10 +347,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi, for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi - vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) ); - if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED) + if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED) { for (i = 0; i < 16; i++) { @@ -324,48 +357,38 @@ void vp8_decode_mb_row(VP8D_COMP *pbi, } } - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset; + xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); - // Select the appropriate reference frame for this MB - if (xd->mbmi.ref_frame == LAST_FRAME) - { - xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset; - } - else if (xd->mbmi.ref_frame == GOLDEN_FRAME) - { - // Golden frame reconstruction buffer - xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset; - } + /* Select the appropriate reference frame for this MB */ + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = pc->lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = pc->gld_fb_idx; else - { - // Alternate reference frame reconstruction buffer - xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset; - } + ref_fb_idx = pc->alt_fb_idx; + + xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; vp8_build_uvmvs(xd, pc->full_pixel); /* - if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0) + if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0) pbi->debugoutput =1; else pbi->debugoutput =0; */ - vp8dx_bool_decoder_fill(xd->current_bc); vp8_decode_macroblock(pbi, xd); @@ -374,25 +397,17 @@ void vp8_decode_mb_row(VP8D_COMP *pbi, ++xd->mode_info_context; /* next mb */ - xd->gf_active_ptr++; // GF useage flag for next MB + xd->above_context++; - xd->above_context[Y1CONTEXT] += 4; - xd->above_context[UCONTEXT ] += 2; - xd->above_context[VCONTEXT ] += 2; - xd->above_context[Y2CONTEXT] ++; - - pbi->current_mb_col_main = mb_col; } - // adjust to the next row of mbs + /* adjust to the next row of mbs */ vp8_extend_mb_row( - &pc->new_frame, + &pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 ); ++xd->mode_info_context; /* skip prediction column */ - - pbi->last_mb_row_decoded = mb_row; } @@ -432,7 +447,7 @@ static void setup_token_decoder(VP8D_COMP *pbi, for (i = 0; i < num_part; i++) { const unsigned char *partition_size_ptr = cx_data + i * 3; - unsigned int partition_size; + ptrdiff_t partition_size; /* Calculate the length of this partition. The last partition * size is implicit. @@ -446,7 +461,8 @@ static void setup_token_decoder(VP8D_COMP *pbi, partition_size = user_data_end - partition; } - if (partition + partition_size > user_data_end) + if (partition + partition_size > user_data_end + || partition + partition_size < partition) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition " "%d length", i + 1); @@ -473,18 +489,7 @@ static void stop_token_decoder(VP8D_COMP *pbi) VP8_COMMON *pc = &pbi->common; if (pc->multi_token_partition != ONE_PARTITION) - { - int num_part = (1 << pc->multi_token_partition); - - for (i = 0; i < num_part; i++) - { - vp8dx_stop_decode(&pbi->mbc[i]); - } - vpx_free(pbi->mbc); - } - else - vp8dx_stop_decode(& pbi->bc2); } static void init_frame(VP8D_COMP *pbi) @@ -494,7 +499,7 @@ static void init_frame(VP8D_COMP *pbi) if (pc->frame_type == KEY_FRAME) { - // Various keyframe initializations + /* Various keyframe initializations */ vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_init_mbmode_probs(pc); @@ -502,22 +507,23 @@ static void init_frame(VP8D_COMP *pbi) vp8_default_coef_probs(pc); vp8_kf_default_bmode_probs(pc->kf_bmode_prob); - // reset the segment feature data to 0 with delta coding (Default state). + /* reset the segment feature data to 0 with delta coding (Default state). */ vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); xd->mb_segement_abs_delta = SEGMENT_DELTADATA; - // reset the mode ref deltasa for loop filter + /* reset the mode ref deltasa for loop filter */ vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); - // All buffers are implicitly updated on key frames. + /* All buffers are implicitly updated on key frames. */ pc->refresh_golden_frame = 1; pc->refresh_alt_ref_frame = 1; pc->copy_buffer_to_gf = 0; pc->copy_buffer_to_arf = 0; - // Note that Golden and Altref modes cannot be used on a key frame so - // ref_frame_sign_bias[] is undefined and meaningless + /* Note that Golden and Altref modes cannot be used on a key frame so + * ref_frame_sign_bias[] is undefined and meaningless + */ pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; } @@ -528,7 +534,7 @@ static void init_frame(VP8D_COMP *pbi) else pc->mcomp_filter_type = BILINEAR; - // To enable choice of different interploation filters + /* To enable choice of different interploation filters */ if (pc->mcomp_filter_type == SIXTAP) { xd->subpixel_predict = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4); @@ -548,7 +554,7 @@ static void init_frame(VP8D_COMP *pbi) xd->left_context = &pc->left_context; xd->mode_info_context = pc->mi; xd->frame_type = pc->frame_type; - xd->mbmi.mode = DC_PRED; + xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_stride = pc->mode_info_stride; } @@ -559,11 +565,14 @@ int vp8_decode_frame(VP8D_COMP *pbi) MACROBLOCKD *const xd = & pbi->mb; const unsigned char *data = (const unsigned char *)pbi->Source; const unsigned char *const data_end = data + pbi->source_sz; - int first_partition_length_in_bytes; + ptrdiff_t first_partition_length_in_bytes; int mb_row; int i, j, k, l; const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; + if (data_end - data < 3) + vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet"); pc->frame_type = (FRAME_TYPE)(data[0] & 1); pc->version = (data[0] >> 1) & 7; pc->show_frame = (data[0] >> 4) & 1; @@ -571,7 +580,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5; data += 3; - if (data + first_partition_length_in_bytes > data_end) + if (data + first_partition_length_in_bytes > data_end + || data + first_partition_length_in_bytes < data) vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition 0 length"); vp8_setup_version(pc); @@ -581,7 +591,7 @@ int vp8_decode_frame(VP8D_COMP *pbi) const int Width = pc->Width; const int Height = pc->Height; - // vet via sync code + /* vet via sync code */ if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a) vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); @@ -594,6 +604,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) if (Width != pc->Width || Height != pc->Height) { + int prev_mb_rows = pc->mb_rows; + if (pc->Width <= 0) { pc->Width = Width; @@ -608,9 +620,14 @@ int vp8_decode_frame(VP8D_COMP *pbi) "Invalid frame height"); } - if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height)) + if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); + +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); +#endif } } @@ -630,11 +647,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); } - // Is segmentation enabled + /* Is segmentation enabled */ xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); if (xd->segmentation_enabled) { - // Signal whether or not the segmentation map is being explicitly updated this frame. + /* Signal whether or not the segmentation map is being explicitly updated this frame. */ xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); @@ -644,12 +661,12 @@ int vp8_decode_frame(VP8D_COMP *pbi) vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); - // For each segmentation feature (Quant and loop filter level) + /* For each segmentation feature (Quant and loop filter level) */ for (i = 0; i < MB_LVL_MAX; i++) { for (j = 0; j < MAX_MB_SEGMENTS; j++) { - // Frame level data + /* Frame level data */ if (vp8_read_bit(bc)) { xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); @@ -665,60 +682,60 @@ int vp8_decode_frame(VP8D_COMP *pbi) if (xd->update_mb_segmentation_map) { - // Which macro block level features are enabled + /* Which macro block level features are enabled */ vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); #if CONFIG_SEGMENTATION - // Read the probs used to decode the segment id for each macro block. + /* Read the probs used to decode the segment id for each macro block. */ for (i = 0; i < MB_FEATURE_TREE_PROBS+3; i++) #else for (i = 0; i < MB_FEATURE_TREE_PROBS; i++) #endif { - // If not explicitly set value is defaulted to 255 by memset above + /* If not explicitly set value is defaulted to 255 by memset above */ if (vp8_read_bit(bc)) xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); } } } - // Read the loop filter level and type + /* Read the loop filter level and type */ pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc); pc->filter_level = vp8_read_literal(bc, 6); pc->sharpness_level = vp8_read_literal(bc, 3); - // Read in loop filter deltas applied at the MB level based on mode or ref frame. + /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */ xd->mode_ref_lf_delta_update = 0; xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); if (xd->mode_ref_lf_delta_enabled) { - // Do the deltas need to be updated + /* Do the deltas need to be updated */ xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); if (xd->mode_ref_lf_delta_update) { - // Send update + /* Send update */ for (i = 0; i < MAX_REF_LF_DELTAS; i++) { if (vp8_read_bit(bc)) { - //sign = vp8_read_bit( bc ); + /*sign = vp8_read_bit( bc );*/ xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - if (vp8_read_bit(bc)) // Apply sign + if (vp8_read_bit(bc)) /* Apply sign */ xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; } } - // Send update + /* Send update */ for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { if (vp8_read_bit(bc)) { - //sign = vp8_read_bit( bc ); + /*sign = vp8_read_bit( bc );*/ xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); - if (vp8_read_bit(bc)) // Apply sign + if (vp8_read_bit(bc)) /* Apply sign */ xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; } } @@ -728,11 +745,11 @@ int vp8_decode_frame(VP8D_COMP *pbi) setup_token_decoder(pbi, data + first_partition_length_in_bytes); xd->current_bc = &pbi->bc2; - // Read the default quantizers. + /* Read the default quantizers. */ { int Q, q_update; - Q = vp8_read_literal(bc, 7); // AC 1st order Q = default + Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ pc->base_qindex = Q; q_update = 0; pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); @@ -744,20 +761,21 @@ int vp8_decode_frame(VP8D_COMP *pbi) if (q_update) vp8cx_init_de_quantizer(pbi); - // MB level dequantizer setup + /* MB level dequantizer setup */ mb_init_dequantizer(pbi, &pbi->mb); } - // Determine if the golden frame or ARF buffer should be updated and how. - // For all non key frames the GF and ARF refresh flags and sign bias - // flags must be set explicitly. + /* Determine if the golden frame or ARF buffer should be updated and how. + * For all non key frames the GF and ARF refresh flags and sign bias + * flags must be set explicitly. + */ if (pc->frame_type != KEY_FRAME) { - // Should the GF or ARF be updated from the current frame + /* Should the GF or ARF be updated from the current frame */ pc->refresh_golden_frame = vp8_read_bit(bc); pc->refresh_alt_ref_frame = vp8_read_bit(bc); - // Buffer to buffer copy flags. + /* Buffer to buffer copy flags. */ pc->copy_buffer_to_gf = 0; if (!pc->refresh_golden_frame) @@ -793,9 +811,8 @@ int vp8_decode_frame(VP8D_COMP *pbi) fclose(z); } - vp8dx_bool_decoder_fill(bc); { - // read coef probability tree + /* read coef probability tree */ for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < COEF_BANDS; j++) @@ -813,57 +830,54 @@ int vp8_decode_frame(VP8D_COMP *pbi) } } - vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG)); - vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG)); + vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG)); + vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG)); #if CONFIG_SEGMENTATION // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(pbi->segmentation_map, vpx_calloc((pc->mb_rows * pc->mb_cols), 1)); #endif - // set up frame new frame for intra coded blocks - vp8_setup_intra_recon(&pc->new_frame); + /* set up frame new frame for intra coded blocks */ + if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level)) + vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]); vp8_setup_block_dptrs(xd); vp8_build_block_doffsets(xd); - // clear out the coeff buffer + /* clear out the coeff buffer */ vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); - // Read the mb_no_coeff_skip flag + /* Read the mb_no_coeff_skip flag */ pc->mb_no_coeff_skip = (int)vp8_read_bit(bc); - if (pc->frame_type == KEY_FRAME) - vp8_kfread_modes(pbi); - else - vp8_decode_mode_mvs(pbi); - // reset since these guys are used as iterators - vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4); - vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2); - vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2); - vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols); - - xd->gf_active_ptr = (signed char *)pc->gf_active_flags; // Point to base of GF active flags data structure + vp8_decode_mode_mvs(pbi); + vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO)); - - if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0) - vp8_start_lfthread(pbi); - - if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION) + if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION) { - vp8_mtdecode_mb_rows(pbi, xd); + vp8mt_decode_mb_rows(pbi, xd); + if(pbi->common.filter_level) + { + /*vp8_mt_loop_filter_frame(pbi);*/ /*cm, &pbi->mb, cm->filter_level);*/ + + pc->last_frame_type = pc->frame_type; + pc->last_filter_type = pc->filter_type; + pc->last_sharpness_level = pc->sharpness_level; + } + vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]); /*cm->frame_to_show);*/ } else { int ibc = 0; - int num_part = 1 << pbi->common.multi_token_partition; + int num_part = 1 << pc->multi_token_partition; - // Decode the individual macro block + /* Decode the individual macro block */ for (mb_row = 0; mb_row < pc->mb_rows; mb_row++) { @@ -878,20 +892,19 @@ int vp8_decode_frame(VP8D_COMP *pbi) vp8_decode_mb_row(pbi, pc, mb_row, xd); } - - pbi->last_mb_row_decoded = mb_row; } stop_token_decoder(pbi); - vp8dx_stop_decode(bc); + /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); */ - // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes \n",bc->pos+pbi->bc2.pos); - - // If this was a kf or Gf note the Q used - if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame) + /* If this was a kf or Gf note the Q used */ + if ((pc->frame_type == KEY_FRAME) || + pc->refresh_golden_frame || pc->refresh_alt_ref_frame) + { pc->last_kf_gf_q = pc->base_qindex; + } if (pc->refresh_entropy_probs == 0) { diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c deleted file mode 100644 index 74fe91803..000000000 --- a/vp8/decoder/demode.c +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "onyxd_int.h" -#include "entropymode.h" -#include "findnearmv.h" - - -int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_bmode_tree, p); - - return i; -} - - -int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_ymode_tree, p); - - return i; -} - -int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); - - return i; -} - - - -int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p) -{ - const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); - - return i; -} - -void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) -{ - // Is segmentation enabled - if (x->segmentation_enabled && x->update_mb_segmentation_map) - { - // If so then read the segment id. - if (vp8_read(r, x->mb_segment_tree_probs[0])) - mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); - else - mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); - } -} - -void vp8_kfread_modes(VP8D_COMP *pbi) -{ - VP8_COMMON *const cp = & pbi->common; - vp8_reader *const bc = & pbi->bc; - - MODE_INFO *m = cp->mi; - const int ms = cp->mode_info_stride; -#if CONFIG_SEGMENTATION - int left_id,above_id; - int i; -#endif - int mb_row = -1; - vp8_prob prob_skip_false = 0; - - if (cp->mb_no_coeff_skip) - prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8)); - - while (++mb_row < cp->mb_rows) - { - int mb_col = -1; - - while (++mb_col < cp->mb_cols) - { - MB_PREDICTION_MODE y_mode; - vp8dx_bool_decoder_fill(bc); - - // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default) - // By default on a key frame reset all MBs to segment 0 - m->mbmi.segment_id = 0; - - if (pbi->mb.update_mb_segmentation_map) - { - -#if CONFIG_SEGMENTATION - vp8_read_mb_features(bc, &m->mbmi, &pbi->mb); - pbi->segmentation_map[(mb_row * cp->mb_cols) + mb_col] = m->mbmi.segment_id; -#else - vp8_read_mb_features(bc, &m->mbmi, &pbi->mb); -#endif - } - - // Read the macroblock coeff skip flag if this feature is in use, else default to 0 - if (cp->mb_no_coeff_skip) - m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false); - else - m->mbmi.mb_skip_coeff = 0; - - y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob); - - m->mbmi.ref_frame = INTRA_FRAME; - - if ((m->mbmi.mode = y_mode) == B_PRED) - { - int i = 0; - - do - { - const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode; - const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode; - - m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]); - } - while (++i < 16); - } - else - { - int BMode; - int i = 0; - - switch (y_mode) - { - case DC_PRED: - BMode = B_DC_PRED; - break; - case V_PRED: - BMode = B_VE_PRED; - break; - case H_PRED: - BMode = B_HE_PRED; - break; - case TM_PRED: - BMode = B_TM_PRED; - break; - default: - BMode = B_DC_PRED; - break; - } - - do - { - m->bmi[i].mode = (B_PREDICTION_MODE)BMode; - } - while (++i < 16); - } - - (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob); - } - - m++; // skip the border - } -} diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h deleted file mode 100644 index 51bbc5e7a..000000000 --- a/vp8/decoder/demode.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "onyxd_int.h" - -/* Read (intra) modes for all blocks in a keyframe */ - -void vp8_kfread_modes(VP8D_COMP *pbi); - -/* Intra mode for a Y subblock */ - -int vp8_read_bmode(vp8_reader *, const vp8_prob *); - -/* MB intra Y mode trees differ for key and inter frames. */ - -int vp8_read_ymode(vp8_reader *, const vp8_prob *); -int vp8_kfread_ymode(vp8_reader *, const vp8_prob *); - -/* MB intra UV mode trees are the same for key and inter frames. */ - -int vp8_read_uv_mode(vp8_reader *, const vp8_prob *); - -/* Read any macroblock-level features that may be present. */ - -void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *); diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index 14798d9af..84a9fd943 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,7 +24,7 @@ void vp8_dequantize_b_c(BLOCKD *d) int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; - short *DQC = &d->dequant[0][0]; + short *DQC = d->dequant; for (i = 0; i < 16; i++) { @@ -31,8 +32,12 @@ void vp8_dequantize_b_c(BLOCKD *d) } } -void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch) +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride) { + short output[16]; + short *diff_ptr = output; + int r, c; int i; for (i = 0; i < 16; i++) @@ -40,13 +45,40 @@ void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch) input[i] = dq[i] * input[i]; } - vp8_short_idct4x4llm_c(input, output, pitch); + /* the idct halves ( >> 1) the pitch */ + vp8_short_idct4x4llm_c(input, output, 4 << 1); + vpx_memset(input, 0, 32); + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 4; + pred += pitch; + } } -void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc) +void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride, + int Dc) { int i; + short output[16]; + short *diff_ptr = output; + int r, c; input[0] = (short)Dc; @@ -55,6 +87,28 @@ void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, in input[i] = dq[i] * input[i]; } - vp8_short_idct4x4llm_c(input, output, pitch); + /* the idct halves ( >> 1) the pitch */ + vp8_short_idct4x4llm_c(input, output, 4 << 1); + vpx_memset(input, 0, 32); + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int a = diff_ptr[c] + pred[c]; + + if (a < 0) + a = 0; + + if (a > 255) + a = 255; + + dest[c] = (unsigned char) a; + } + + dest += stride; + diff_ptr += 4; + pred += pitch; + } } diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h index d16b02e58..b78e39c1d 100644 --- a/vp8/decoder/dequantize.h +++ b/vp8/decoder/dequantize.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -15,11 +16,31 @@ #define prototype_dequant_block(sym) \ void sym(BLOCKD *x) -#define prototype_dequant_idct(sym) \ - void sym(short *input, short *dq, short *output, int pitch) +#define prototype_dequant_idct_add(sym) \ + void sym(short *input, short *dq, \ + unsigned char *pred, unsigned char *output, \ + int pitch, int stride) -#define prototype_dequant_idct_dc(sym) \ - void sym(short *input, short *dq, short *output, int pitch, int dc) +#define prototype_dequant_dc_idct_add(sym) \ + void sym(short *input, short *dq, \ + unsigned char *pred, unsigned char *output, \ + int pitch, int stride, \ + int dc) + +#define prototype_dequant_dc_idct_add_y_block(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs, short *dc) + +#define prototype_dequant_idct_add_y_block(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst, \ + int stride, char *eobs) + +#define prototype_dequant_idct_add_uv_block(sym) \ + void sym(short *q, short *dq, \ + unsigned char *pre, unsigned char *dst_u, \ + unsigned char *dst_v, int stride, char *eobs) #if ARCH_X86 || ARCH_X86_64 #include "x86/dequantize_x86.h" @@ -34,25 +55,52 @@ #endif extern prototype_dequant_block(vp8_dequant_block); -#ifndef vp8_dequant_idct -#define vp8_dequant_idct vp8_dequant_idct_c +#ifndef vp8_dequant_idct_add +#define vp8_dequant_idct_add vp8_dequant_idct_add_c #endif -extern prototype_dequant_idct(vp8_dequant_idct); +extern prototype_dequant_idct_add(vp8_dequant_idct_add); -#ifndef vp8_dequant_idct_dc -#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c +#ifndef vp8_dequant_dc_idct_add +#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c #endif -extern prototype_dequant_idct_dc(vp8_dequant_idct_dc); +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add); + +#ifndef vp8_dequant_dc_idct_add_y_block +#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_c +#endif +extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block); + +#ifndef vp8_dequant_idct_add_y_block +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c +#endif +extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block); + +#ifndef vp8_dequant_idct_add_uv_block +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c +#endif +extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block); typedef prototype_dequant_block((*vp8_dequant_block_fn_t)); -typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t)); -typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t)); + +typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t)); + +typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t)); + +typedef prototype_dequant_dc_idct_add_y_block((*vp8_dequant_dc_idct_add_y_block_fn_t)); + +typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t)); + +typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t)); + typedef struct { - vp8_dequant_block_fn_t block; - vp8_dequant_idct_fn_t idct; - vp8_dequant_idct_dc_fn_t idct_dc; + vp8_dequant_block_fn_t block; + vp8_dequant_idct_add_fn_t idct_add; + vp8_dequant_dc_idct_add_fn_t dc_idct_add; + vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; + vp8_dequant_idct_add_y_block_fn_t idct_add_y_block; + vp8_dequant_idct_add_uv_block_fn_t idct_add_uv_block; } vp8_dequant_rtcd_vtable_t; #if CONFIG_RUNTIME_CPU_DETECT diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index a42f18dd7..7d013d240 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,12 +14,12 @@ #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" +#include "detokenize.h" -#define BR_COUNT 8 #define BOOL_DATA UINT8 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES -DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X}; +DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X}; #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 @@ -43,49 +44,72 @@ typedef struct DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) = { - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ZERO_TOKEN - { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //ONE_TOKEN - { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //TWO_TOKEN - { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //THREE_TOKEN - { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //FOUR_TOKEN - { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY1 - { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY2 - { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY3 - { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY4 - { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, //DCT_VAL_CATEGORY5 - { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, //DCT_VAL_CATEGORY6 - { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, // EOB TOKEN + { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ZERO_TOKEN */ + { 1, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* ONE_TOKEN */ + { 2, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* TWO_TOKEN */ + { 3, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* THREE_TOKEN */ + { 4, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* FOUR_TOKEN */ + { 5, 0, { 159, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY1 */ + { 7, 1, { 145, 165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY2 */ + { 11, 2, { 140, 148, 173, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY3 */ + { 19, 3, { 135, 140, 155, 176, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY4 */ + { 35, 4, { 130, 134, 141, 157, 180, 0, 0, 0, 0, 0, 0, 0 } }, /* DCT_VAL_CATEGORY5 */ + { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0 } }, /* DCT_VAL_CATEGORY6 */ + { 0, -1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, /* EOB TOKEN */ }; void vp8_reset_mb_tokens_context(MACROBLOCKD *x) { - ENTROPY_CONTEXT **const A = x->above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; - - ENTROPY_CONTEXT *a; - ENTROPY_CONTEXT *l; - int i; - - for (i = 0; i < 24; i++) + /* Clear entropy contexts for Y2 blocks */ + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { - - a = A[ vp8_block2context[i] ] + vp8_block2above[i]; - l = L[ vp8_block2context[i] ] + vp8_block2left[i]; - - *a = *l = 0; + vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } - - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) + else { - a = A[Y2CONTEXT] + vp8_block2above[24]; - l = L[Y2CONTEXT] + vp8_block2left[24]; - *a = *l = 0; + vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); } - - } -DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); + +#if CONFIG_ARM_ASM_DETOK +/* mashup of vp8_block2left and vp8_block2above so we only need one pointer + * for the assembly version. + */ +DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) = +{ + /* vp8_block2left */ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + /* vp8_block2above */ + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 +}; + +void vp8_init_detokenizer(VP8D_COMP *dx) +{ + const VP8_COMMON *const oc = & dx->common; + MACROBLOCKD *x = & dx->mb; + + dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree; + dx->detoken.ptr_block2leftabove = vp8_block2leftabove; + dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x; + dx->detoken.scan = vp8_default_zig_zag1d; + dx->detoken.teb_base_ptr = vp8d_token_extra_bits2; + dx->detoken.qcoeff_start_ptr = &x->qcoeff[0]; + + dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]); + dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]); + dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]); + dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]); +} +#endif + +DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]); +#define FILL \ + if(count < 0) \ + VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend); + #define NORMALIZE \ /*if(range < 0x80)*/ \ { \ @@ -93,17 +117,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); range <<= shift; \ value <<= shift; \ count -= shift; \ - if(count <= 0) \ - { \ - count += BR_COUNT ; \ - value |= (*bufptr) << (BR_COUNT-count); \ - bufptr = br_ptr_advance(bufptr, 1); \ - } \ } #define DECODE_AND_APPLYSIGN(value_to_sign) \ split = (range + 1) >> 1; \ - if ( (value >> 8) < split ) \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ { \ range = split; \ v= value_to_sign; \ @@ -111,28 +131,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); else \ { \ range = range-split; \ - value = value-(split<<8); \ + value = value-bigsplit; \ v = -value_to_sign; \ } \ range +=range; \ value +=value; \ - if (!--count) \ - { \ - count = BR_COUNT; \ - value |= *bufptr; \ - bufptr = br_ptr_advance(bufptr, 1); \ - } + count--; #define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \ { \ split = 1 + ((( probability*(range-1) ) )>> 8); \ - if ( (value >> 8) < split ) \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ { \ range = split; \ NORMALIZE \ goto branch; \ } \ - value -= (split<<8); \ + value -= bigsplit; \ range = range - split; \ NORMALIZE \ } @@ -140,7 +157,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); #define DECODE_AND_LOOP_IF_ZERO(probability,branch) \ { \ split = 1 + ((( probability*(range-1) ) ) >> 8); \ - if ( (value >> 8) < split ) \ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if ( value < bigsplit ) \ { \ range = split; \ NORMALIZE \ @@ -151,7 +170,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); goto branch; \ } goto BLOCK_FINISHED; /*for malformed input */\ } \ - value -= (split<<8); \ + value -= bigsplit; \ range = range - split; \ NORMALIZE \ } @@ -169,10 +188,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]); #define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\ split = 1 + (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \ - if(value >= (split<<8))\ + bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \ + FILL \ + if(value >= bigsplit)\ {\ range = range-split;\ - value = value-(split<<8);\ + value = value-bigsplit;\ val += ((UINT16)1<above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; + int eobtotal = 0; + int i, type; + + dx->detoken.current_bc = x->current_bc; + dx->detoken.A = x->above_context; + dx->detoken.L = x->left_context; + + type = 3; + + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + { + type = 1; + eobtotal -= 16; + } + + vp8_decode_mb_tokens_v6(&dx->detoken, type); + + for (i = 0; i < 25; i++) + { + x->eobs[i] = dx->detoken.eob[i]; + eobtotal += dx->detoken.eob[i]; + } + + return eobtotal; +} +#else +int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) +{ + ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; const VP8_COMMON *const oc = & dx->common; BOOL_DECODER *bc = x->current_bc; + char *eobs = x->eobs; + ENTROPY_CONTEXT *a; ENTROPY_CONTEXT *l; int i; @@ -198,11 +250,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) register int count; const BOOL_DATA *bufptr; + const BOOL_DATA *bufend; register unsigned int range; - register unsigned int value; + VP8_BD_VALUE value; const int *scan; register unsigned int shift; UINT32 split; + VP8_BD_VALUE bigsplit; INT16 *qcoeff_ptr; const vp8_prob *coef_probs; @@ -210,46 +264,44 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) int stop; INT16 val, bits_count; INT16 c; - INT16 t; INT16 v; const vp8_prob *Prob; - //int *scan; type = 3; i = 0; stop = 16; - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) + scan = vp8_default_zig_zag1d; + qcoeff_ptr = &x->qcoeff[0]; + + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { i = 24; stop = 24; type = 1; - qcoeff_ptr = &x->qcoeff[24*16]; - scan = vp8_default_zig_zag1d; + qcoeff_ptr += 24*16; eobtotal -= 16; } - else - { - scan = vp8_default_zig_zag1d; - qcoeff_ptr = &x->qcoeff[0]; - } + bufend = bc->user_buffer_end; + bufptr = bc->user_buffer; + value = bc->value; count = bc->count; range = bc->range; - value = bc->value; - bufptr = bc->read_ptr; coef_probs = oc->fc.coef_probs [type] [ 0 ] [0]; BLOCK_LOOP: - a = A[ vp8_block2context[i] ] + vp8_block2above[i]; - l = L[ vp8_block2context[i] ] + vp8_block2left[i]; + a = A + vp8_block2above[i]; + l = L + vp8_block2left[i]; + c = (INT16)(!type); - VP8_COMBINEENTROPYCONTEXTS(t, *a, *l); + /*Dest = ((A)!=0) + ((B)!=0);*/ + VP8_COMBINEENTROPYCONTEXTS(v, *a, *l); Prob = coef_probs; - Prob += t * ENTROPY_NODES; + Prob += v * ENTROPY_NODES; DO_WHILE: Prob += vp8_coef_bands_x[c]; @@ -336,9 +388,8 @@ ONE_CONTEXT_NODE_0_: qcoeff_ptr [ scan[15] ] = (INT16) v; BLOCK_FINISHED: - t = ((x->block[i].eob = c) != !type); // any nonzero data? - eobtotal += x->block[i].eob; - *a = *l = t; + *a = *l = ((eobs[i] = c) != !type); /* any nonzero data? */ + eobtotal += c; qcoeff_ptr += 16; i++; @@ -348,12 +399,11 @@ BLOCK_FINISHED: if (i == 25) { - scan = vp8_default_zig_zag1d;//x->scan_order1d; type = 0; i = 0; stop = 16; coef_probs = oc->fc.coef_probs [type] [ 0 ] [0]; - qcoeff_ptr = &x->qcoeff[0]; + qcoeff_ptr -= (24*16 + 16); goto BLOCK_LOOP; } @@ -365,10 +415,12 @@ BLOCK_FINISHED: goto BLOCK_LOOP; } - bc->count = count; + FILL + bc->user_buffer = bufptr; bc->value = value; + bc->count = count; bc->range = range; - bc->read_ptr = bufptr; return eobtotal; } +#endif /*!CONFIG_ASM_DETOK*/ diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h index 6a9a47607..294a4a55d 100644 --- a/vp8/decoder/detokenize.h +++ b/vp8/decoder/detokenize.h @@ -1,19 +1,24 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ -#ifndef detokenize_h -#define detokenize_h 1 +#ifndef DETOKENIZE_H +#define DETOKENIZE_H #include "onyxd_int.h" +#if ARCH_ARM +#include "arm/detokenize_arm.h" +#endif + void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); -#endif /* detokenize_h */ +#endif /* DETOKENIZE_H */ diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c index 302b64bf8..2e284729b 100644 --- a/vp8/decoder/generic/dsystemdependent.c +++ b/vp8/decoder/generic/dsystemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,19 +14,22 @@ #include "onyxd_int.h" extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi); +extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi); void vp8_dmachine_specific_config(VP8D_COMP *pbi) { - // Pure C: + /* Pure C: */ #if CONFIG_RUNTIME_CPU_DETECT - pbi->mb.rtcd = &pbi->common.rtcd; - pbi->dequant.block = vp8_dequantize_b_c; - pbi->dequant.idct = vp8_dequant_idct_c; - pbi->dequant.idct_dc = vp8_dequant_dc_idct_c; - pbi->dboolhuff.start = vp8dx_start_decode_c; - pbi->dboolhuff.stop = vp8dx_stop_decode_c; - pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; -#if 0 //For use with RTCD, when implemented + pbi->mb.rtcd = &pbi->common.rtcd; + pbi->dequant.block = vp8_dequantize_b_c; + pbi->dequant.idct_add = vp8_dequant_idct_add_c; + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_c; + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_c; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_c; + pbi->dboolhuff.start = vp8dx_start_decode_c; + pbi->dboolhuff.fill = vp8dx_bool_decoder_fill_c; +#if 0 /*For use with RTCD, when implemented*/ pbi->dboolhuff.debool = vp8dx_decode_bool_c; pbi->dboolhuff.devalue = vp8dx_decode_value_c; #endif @@ -34,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi) #if ARCH_X86 || ARCH_X86_64 vp8_arch_x86_decode_init(pbi); #endif + +#if ARCH_ARM + vp8_arch_arm_decode_init(pbi); +#endif } diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c new file mode 100644 index 000000000..c98bd5bb8 --- /dev/null +++ b/vp8/decoder/idct_blk.c @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "idct.h" +#include "dequantize.h" + +void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride, + int Dc); +void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, + unsigned char *dest, int pitch, int stride); +void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, + unsigned char *dst_ptr, int pitch, int stride); + +void vp8_dequant_dc_idct_add_y_block_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc) +{ + int i, j; + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + if (*eobs++ > 1) + vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]); + else + vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride); + + q += 16; + pre += 4; + dst += 4; + dc ++; + } + + pre += 64 - 16; + dst += 4*stride - 16; + } +} + +void vp8_dequant_idct_add_y_block_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 4; i++) + { + for (j = 0; j < 4; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride); + ((int *)q)[0] = 0; + } + + q += 16; + pre += 4; + dst += 4; + } + + pre += 64 - 16; + dst += 4*stride - 16; + } +} + +void vp8_dequant_idct_add_uv_block_c + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i, j; + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride); + ((int *)q)[0] = 0; + } + + q += 16; + pre += 4; + dstu += 4; + } + + pre += 32 - 8; + dstu += 4*stride - 8; + } + + for (i = 0; i < 2; i++) + { + for (j = 0; j < 2; j++) + { + if (*eobs++ > 1) + vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride); + else + { + vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride); + ((int *)q)[0] = 0; + } + + q += 16; + pre += 4; + dstv += 4; + } + + pre += 32 - 8; + dstv += 4*stride - 8; + } +} diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c index 8d2b267a9..063b6a468 100644 --- a/vp8/decoder/onyxd_if.c +++ b/vp8/decoder/onyxd_if.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,18 +24,19 @@ #include "threading.h" #include "decoderthreading.h" #include -#include "segmentation_common.h" + #include "quant_common.h" #include "vpx_scale/vpxscale.h" #include "systemdependent.h" #include "vpx_ports/vpx_timer.h" - +#include "detokenize.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif extern void vp8_init_loop_filter(VP8_COMMON *cm); - extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); -// DEBUG code #if CONFIG_DEBUG void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s) { @@ -110,12 +112,13 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; - pbi->CPUFreq = 0; //vp8_get_processor_freq(); + pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/ pbi->max_threads = oxcf->max_threads; vp8_decoder_create_threads(pbi); - //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid - // unnecessary calling of vp8cx_init_de_quantizer() for every frame. + /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid + * unnecessary calling of vp8cx_init_de_quantizer() for every frame. + */ vp8cx_init_de_quantizer(pbi); { @@ -127,6 +130,9 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) cm->last_sharpness_level = cm->sharpness_level; } +#if CONFIG_ARM_ASM_DETOK + vp8_init_detokenizer(pbi); +#endif pbi->common.error.setjmp = 0; return (VP8D_PTR) pbi; } @@ -142,6 +148,11 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr) if (pbi->segmentation_map != 0) vpx_free(pbi->segmentation_map); #endif + +#if CONFIG_MULTITHREAD + if (pbi->b_multithreaded_rd) + vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); +#endif vp8_decoder_remove_threads(pbi); vp8_remove_common(&pbi->common); vpx_free(pbi); @@ -181,57 +192,143 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C { VP8D_COMP *pbi = (VP8D_COMP *) ptr; VP8_COMMON *cm = &pbi->common; + int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(&cm->last_frame, sd); - + ref_fb_idx = cm->lst_fb_idx; else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd); - + ref_fb_idx = cm->gld_fb_idx; else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd); - + ref_fb_idx = cm->alt_fb_idx; else return -1; + vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd); + return 0; } int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8D_COMP *pbi = (VP8D_COMP *) ptr; VP8_COMMON *cm = &pbi->common; + int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->last_frame); - + ref_fb_idx = cm->lst_fb_idx; else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame); - + ref_fb_idx = cm->gld_fb_idx; else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame); - + ref_fb_idx = cm->alt_fb_idx; else return -1; + vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]); + return 0; } -//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us. +/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/ #if HAVE_ARMV7 extern void vp8_push_neon(INT64 *store); extern void vp8_pop_neon(INT64 *store); -static INT64 dx_store_reg[8]; #endif + +static int get_free_fb (VP8_COMMON *cm) +{ + int i; + for (i = 0; i < NUM_YV12_BUFFERS; i++) + if (cm->fb_idx_ref_cnt[i] == 0) + break; + + cm->fb_idx_ref_cnt[i] = 1; + return i; +} + +static void ref_cnt_fb (int *buf, int *idx, int new_idx) +{ + if (buf[*idx] > 0) + buf[*idx]--; + + *idx = new_idx; + + buf[new_idx]++; +} + +/* If any buffer copy / swapping is signalled it should be done here. */ +static int swap_frame_buffers (VP8_COMMON *cm) +{ + int fb_to_update_with, err = 0; + + if (cm->refresh_last_frame) + fb_to_update_with = cm->lst_fb_idx; + else + fb_to_update_with = cm->new_fb_idx; + + /* The alternate reference frame or golden frame can be updated + * using the new, last, or golden/alt ref frame. If it + * is updated using the newly decoded frame it is a refresh. + * An update using the last or golden/alt ref frame is a copy. + */ + if (cm->copy_buffer_to_arf) + { + int new_fb = 0; + + if (cm->copy_buffer_to_arf == 1) + new_fb = fb_to_update_with; + else if (cm->copy_buffer_to_arf == 2) + new_fb = cm->gld_fb_idx; + else + err = -1; + + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); + } + + if (cm->copy_buffer_to_gf) + { + int new_fb = 0; + + if (cm->copy_buffer_to_gf == 1) + new_fb = fb_to_update_with; + else if (cm->copy_buffer_to_gf == 2) + new_fb = cm->alt_fb_idx; + else + err = -1; + + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); + } + + if (cm->refresh_golden_frame) + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); + + if (cm->refresh_alt_ref_frame) + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); + + if (cm->refresh_last_frame) + { + ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); + + cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; + } + else + cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; + + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; + + return err; +} + int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp) { +#if HAVE_ARMV7 + INT64 dx_store_reg[8]; +#endif VP8D_COMP *pbi = (VP8D_COMP *) ptr; VP8_COMMON *cm = &pbi->common; int retcode = 0; - struct vpx_usec_timer timer; -// if(pbi->ready_for_new_data == 0) -// return -1; + /*if(pbi->ready_for_new_data == 0) + return -1;*/ if (ptr == 0) { @@ -240,21 +337,38 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign pbi->common.error.error_code = VPX_CODEC_OK; +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(dx_store_reg); + } +#endif + + cm->new_fb_idx = get_free_fb (cm); + if (setjmp(pbi->common.error.jmp)) { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif pbi->common.error.setjmp = 0; + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; return -1; } pbi->common.error.setjmp = 1; -#if HAVE_ARMV7 - vp8_push_neon(dx_store_reg); -#endif - vpx_usec_timer_start(&timer); - //cm->current_video_frame++; + /*cm->current_video_frame++;*/ pbi->Source = source; pbi->source_sz = size; @@ -263,103 +377,80 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign if (retcode < 0) { #if HAVE_ARMV7 - vp8_pop_neon(dx_store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } #endif pbi->common.error.error_code = VPX_CODEC_ERROR; pbi->common.error.setjmp = 0; + if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) + cm->fb_idx_ref_cnt[cm->new_fb_idx]--; return retcode; } - // Update the GF useage maps. - vp8_update_gf_useage_maps(cm, &pbi->mb); - - if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0) - vp8_stop_lfthread(pbi); - - if (cm->refresh_last_frame) + if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION) { - vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame); - - cm->frame_to_show = &cm->last_frame; - } - else - { - cm->frame_to_show = &cm->new_frame; - } - - if (!pbi->b_multithreaded_lf) - { - struct vpx_usec_timer lpftimer; - vpx_usec_timer_start(&lpftimer); - // Apply the loop filter if appropriate. - - if (cm->filter_level > 0) + if (swap_frame_buffers (cm)) { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif + pbi->common.error.error_code = VPX_CODEC_ERROR; + pbi->common.error.setjmp = 0; + return -1; + } + } else + { + if (swap_frame_buffers (cm)) + { +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } +#endif + pbi->common.error.error_code = VPX_CODEC_ERROR; + pbi->common.error.setjmp = 0; + return -1; + } + + if(pbi->common.filter_level) + { + struct vpx_usec_timer lpftimer; + vpx_usec_timer_start(&lpftimer); + /* Apply the loop filter if appropriate. */ + vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level); + + vpx_usec_timer_mark(&lpftimer); + pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer); + cm->last_frame_type = cm->frame_type; cm->last_filter_type = cm->filter_type; cm->last_sharpness_level = cm->sharpness_level; - } - - vpx_usec_timer_mark(&lpftimer); - pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer); + vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); } - vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - - - // DEBUG code #if 0 + /* DEBUG code */ + /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/ vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show); if (cm->current_video_frame <= 5) write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame); #endif - // If any buffer copy / swaping is signalled it should be done here. - if (cm->copy_buffer_to_arf) - { - if (cm->copy_buffer_to_arf == 1) - { - if (cm->refresh_last_frame) - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame); - } - else if (cm->copy_buffer_to_arf == 2) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame); - } - - if (cm->copy_buffer_to_gf) - { - if (cm->copy_buffer_to_gf == 1) - { - if (cm->refresh_last_frame) - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame); - } - else if (cm->copy_buffer_to_gf == 2) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame); - } - - // Should the golden or alternate reference frame be refreshed? - if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) - { - if (cm->refresh_golden_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame); - - if (cm->refresh_alt_ref_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame); - - //vpx_log("Decoder: recovery frame received \n"); - - // Update data structures that monitors GF useage - vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cm->gf_active_count = cm->mb_rows * cm->mb_cols; - } - vp8_clear_system_state(); vpx_usec_timer_mark(&timer); @@ -367,7 +458,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign pbi->time_decoding += pbi->decode_microseconds; -// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame); + /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/ if (cm->show_frame) cm->current_video_frame++; @@ -410,12 +501,17 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign #endif #if HAVE_ARMV7 - vp8_pop_neon(dx_store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(dx_store_reg); + } #endif pbi->common.error.setjmp = 0; return retcode; } -int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags) +int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags) { int ret = -1; VP8D_COMP *pbi = (VP8D_COMP *) ptr; @@ -423,7 +519,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, if (pbi->ready_for_new_data == 1) return ret; - // ie no raw frame to show!!! + /* ie no raw frame to show!!! */ if (pbi->common.show_frame == 0) return ret; @@ -433,7 +529,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, sd->clrtype = pbi->common.clr_type; #if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags); + ret = vp8_post_proc_frame(&pbi->common, sd, flags); #else if (pbi->common.frame_to_show) @@ -449,7 +545,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, ret = -1; } -#endif //!CONFIG_POSTPROC +#endif /*!CONFIG_POSTPROC*/ vp8_clear_system_state(); return ret; } diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c deleted file mode 100644 index 363ad5d72..000000000 --- a/vp8/decoder/onyxd_if_sjl.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "onyxc_int.h" -#include "postproc.h" -#include "onyxd.h" -#include "onyxd_int.h" -#include "vpx_mem/vpx_mem.h" -#include "alloccommon.h" -#include "vpx_scale/yv12extend.h" -#include "loopfilter.h" -#include "swapyv12buffer.h" -#include "g_common.h" -#include "threading.h" -#include "decoderthreading.h" -#include -#include "segmentation_common.h" -#include "quant_common.h" -#include "vpx_scale/vpxscale.h" -#include "systemdependent.h" -#include "vpx_ports/vpx_timer.h" - - -#ifndef VPX_NO_GLOBALS -static int init_ct = 0; -#else -# include "vpx_global_handling.h" -# define init_ct ((int)vpxglobalm(onyxd,init_ct)) -#endif - -extern void vp8_init_loop_filter(VP8_COMMON *cm); - -extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi); -extern void init_detokenizer(VP8D_COMP *dx); - -// DEBUG code -void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s) -{ - FILE *yuv_file = fopen((char *)name, "ab"); - unsigned char *src = s->y_buffer; - int h = s->y_height; - - do - { - fwrite(src, s->y_width, 1, yuv_file); - src += s->y_stride; - } - while (--h); - - src = s->u_buffer; - h = s->uv_height; - - do - { - fwrite(src, s->uv_width, 1, yuv_file); - src += s->uv_stride; - } - while (--h); - - src = s->v_buffer; - h = s->uv_height; - - do - { - fwrite(src, s->uv_width, 1, yuv_file); - src += s->uv_stride; - } - while (--h); - - fclose(yuv_file); -} - -void vp8dx_initialize() -{ - if (!init_ct++) - { - vp8_initialize_common(); - vp8_scale_machine_specific_config(); - } -} - -void vp8dx_shutdown() -{ - if (!--init_ct) - { - vp8_shutdown_common(); - } -} - - -VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) -{ - VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); - - if (!pbi) - return NULL; - - vpx_memset(pbi, 0, sizeof(VP8D_COMP)); - - vp8dx_initialize(); - - vp8_create_common(&pbi->common); - vp8_dmachine_specific_config(pbi); - - pbi->common.current_video_frame = 0; - pbi->ready_for_new_data = 1; - - pbi->CPUFreq = 0; //vp8_get_processor_freq(); - pbi->max_threads = oxcf->max_threads; - vp8_decoder_create_threads(pbi); - - //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid - // unnecessary calling of vp8cx_init_de_quantizer() for every frame. - vp8cx_init_de_quantizer(pbi); - - { - VP8_COMMON *cm = &pbi->common; - - vp8_init_loop_filter(cm); - cm->last_frame_type = KEY_FRAME; - cm->last_filter_type = cm->filter_type; - cm->last_sharpness_level = cm->sharpness_level; - } - - init_detokenizer(pbi); - - return (VP8D_PTR) pbi; -} -void vp8dx_remove_decompressor(VP8D_PTR ptr) -{ - VP8D_COMP *pbi = (VP8D_COMP *) ptr; - - if (!pbi) - return; - - vp8_decoder_remove_threads(pbi); - vp8_remove_common(&pbi->common); - vpx_free(pbi); - vp8dx_shutdown(); - -} - -void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x) -{ - VP8D_COMP *pbi = (VP8D_COMP *) comp; - - (void) pbi; - (void) x; - - switch (oxst) - { - case VP8D_OK: - break; - } -} - -int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst) -{ - VP8D_COMP *pbi = (VP8D_COMP *) comp; - - (void) pbi; - - switch (oxst) - { - case VP8D_OK: - break; - } - - return -1; -} - -int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8D_COMP *pbi = (VP8D_COMP *) ptr; - VP8_COMMON *cm = &pbi->common; - - if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(&cm->last_frame, sd); - - else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd); - - else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd); - - else - return -1; - - return 0; -} -int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) -{ - VP8D_COMP *pbi = (VP8D_COMP *) ptr; - VP8_COMMON *cm = &pbi->common; - - if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->last_frame); - - else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame); - - else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame); - - else - return -1; - - return 0; -} -int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp) -{ - VP8D_COMP *pbi = (VP8D_COMP *) ptr; - VP8_COMMON *cm = &pbi->common; - int retcode = 0; - - struct vpx_usec_timer timer; - (void) size; - -// if(pbi->ready_for_new_data == 0) -// return -1; - - vpx_usec_timer_start(&timer); - - if (ptr == 0) - { - return -1; - } - - //cm->current_video_frame++; - pbi->Source = source; - - retcode = vp8_decode_frame(pbi); - - if (retcode < 0) - return retcode; - - // Update the GF useage maps. - vp8_update_gf_useage_maps(cm, &pbi->mb); - - if (pbi->b_multithreaded) - vp8_stop_lfthread(pbi); - - if (cm->refresh_last_frame) - { - vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame); - - cm->frame_to_show = &cm->last_frame; - } - else - { - cm->frame_to_show = &cm->new_frame; - } - - if (!pbi->b_multithreaded) - { - struct vpx_usec_timer lpftimer; - vpx_usec_timer_start(&lpftimer); - // Apply the loop filter if appropriate. - - if (cm->filter_level > 0) - { - vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level); - cm->last_frame_type = cm->frame_type; - cm->last_filter_type = cm->filter_type; - cm->last_sharpness_level = cm->sharpness_level; - - } - - vpx_usec_timer_mark(&lpftimer); - pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer); - } - - vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show); - -#if 0 - // DEBUG code - //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show); - if (cm->current_video_frame <= 5) - write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame); -#endif - - // If any buffer copy / swaping is signalled it should be done here. - if (cm->copy_buffer_to_arf) - { - if (cm->copy_buffer_to_arf == 1) - { - if (cm->refresh_last_frame) - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame); - } - else if (cm->copy_buffer_to_arf == 2) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame); - } - - if (cm->copy_buffer_to_gf) - { - if (cm->copy_buffer_to_gf == 1) - { - if (cm->refresh_last_frame) - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame); - } - else if (cm->copy_buffer_to_gf == 2) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame); - } - - // Should the golden or alternate reference frame be refreshed? - if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) - { - if (cm->refresh_golden_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame); - - if (cm->refresh_alt_ref_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame); - - //vpx_log("Decoder: recovery frame received \n"); - - // Update data structures that monitors GF useage - vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cm->gf_active_count = cm->mb_rows * cm->mb_cols; - } - - vp8_clear_system_state(); - - vpx_usec_timer_mark(&timer); - pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer); - - pbi->time_decoding += pbi->decode_microseconds; - -// vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame); - - cm->current_video_frame++; - pbi->ready_for_new_data = 0; - pbi->last_time_stamp = time_stamp; - - { - int i; - INT64 earliest_time = pbi->dr[0].time_stamp; - INT64 latest_time = pbi->dr[0].time_stamp; - INT64 time_diff = 0; - int bytes = 0; - - pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;; - pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp; - - for (i = 0; i < 16; i++) - { - - bytes += pbi->dr[i].size; - - if (pbi->dr[i].time_stamp < earliest_time) - earliest_time = pbi->dr[i].time_stamp; - - if (pbi->dr[i].time_stamp > latest_time) - latest_time = pbi->dr[i].time_stamp; - } - - time_diff = latest_time - earliest_time; - - if (time_diff > 0) - { - pbi->common.bitrate = 80000.00 * bytes / time_diff ; - pbi->common.framerate = 160000000.00 / time_diff ; - } - - } - return retcode; -} -int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level, int noise_level, int flags) -{ - int ret = -1; - VP8D_COMP *pbi = (VP8D_COMP *) ptr; - - if (pbi->ready_for_new_data == 1) - return ret; - - // ie no raw frame to show!!! - if (pbi->common.show_frame == 0) - return ret; - - pbi->ready_for_new_data = 1; - *time_stamp = pbi->last_time_stamp; - *time_end_stamp = 0; - - sd->clrtype = pbi->common.clr_type; - ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags); - vp8_clear_system_state(); - return ret; -} diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h index e8b5d409a..fc1811d7f 100644 --- a/vp8/decoder/onyxd_int.h +++ b/vp8/decoder/onyxd_int.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -47,21 +48,20 @@ typedef struct typedef struct { - int *scan; - UINT8 *ptr_onyxblock2context_leftabove; - vp8_tree_index *vp8_coef_tree_ptr; //onyx_coef_tree_ptr; ??? - TOKENEXTRABITS *teb_base_ptr; + int const *scan; + UINT8 const *ptr_block2leftabove; + vp8_tree_index const *vp8_coef_tree_ptr; + TOKENEXTRABITS const *teb_base_ptr; unsigned char *norm_ptr; -// UINT16 *ptr_onyx_coef_bands_x; - UINT8 *ptr_onyx_coef_bands_x; + UINT8 *ptr_coef_bands_x; - ENTROPY_CONTEXT **A; - ENTROPY_CONTEXT(*L)[4]; + ENTROPY_CONTEXT_PLANES *A; + ENTROPY_CONTEXT_PLANES *L; INT16 *qcoeff_start_ptr; BOOL_DECODER *current_bc; - UINT8 *coef_probs[4]; + vp8_prob const *coef_probs[4]; UINT8 eob[25]; @@ -88,27 +88,32 @@ typedef struct VP8Decompressor unsigned int time_loop_filtering; volatile int b_multithreaded_rd; - volatile int b_multithreaded_lf; int max_threads; - int last_mb_row_decoded; int current_mb_col_main; int decoding_thread_count; int allocated_decoding_thread_count; - // variable for threading - DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb); + /* variable for threading */ #if CONFIG_MULTITHREAD - pthread_t h_thread_lpf; // thread for postprocessing - sem_t h_event_lpf; // Event for post_proc completed - sem_t h_event_start_lpf; -#endif + int mt_baseline_filter_level[MAX_MB_SEGMENTS]; + int sync_range; + int *mt_current_mb_col; /* Each row remembers its already decoded column. */ + + unsigned char **mt_yabove_row; /* mb_rows x width */ + unsigned char **mt_uabove_row; + unsigned char **mt_vabove_row; + unsigned char **mt_yleft_col; /* mb_rows x 16 */ + unsigned char **mt_uleft_col; /* mb_rows x 8 */ + unsigned char **mt_vleft_col; /* mb_rows x 8 */ + MB_ROW_DEC *mb_row_di; - DECODETHREAD_DATA *de_thread_data; -#if CONFIG_MULTITHREAD + DECODETHREAD_DATA *de_thread_data; + pthread_t *h_decoding_thread; - sem_t *h_event_mbrdecoding; - sem_t h_event_main; - // end of threading data + sem_t *h_event_start_decoding; + sem_t h_event_end_decoding; + /* end of threading data */ #endif + vp8_reader *mbc; INT64 last_time_stamp; int ready_for_new_data; @@ -122,6 +127,12 @@ typedef struct VP8Decompressor struct vp8_dboolhuff_rtcd_vtable dboolhuff; #endif + + vp8_prob prob_intra; + vp8_prob prob_last; + vp8_prob prob_gf; + vp8_prob prob_skip_false; + } VP8D_COMP; int vp8_decode_frame(VP8D_COMP *cpi); diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c new file mode 100644 index 000000000..ad4324b27 --- /dev/null +++ b/vp8/decoder/reconintra_mt.c @@ -0,0 +1,982 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "recon.h" +#include "reconintra.h" +#include "vpx_mem/vpx_mem.h" +#include "onyxd_int.h" + +/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and + * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x). + */ + +void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ + unsigned char *yleft_col; + unsigned char yleft_buf[16]; + unsigned char ytop_left; /* = yabove_row[-1]; */ + unsigned char *ypred_ptr = x->predictor; + int r, c, i; + + if (pbi->common.filter_level) + { + yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32; + yleft_col = pbi->mt_yleft_col[mb_row]; + } else + { + yabove_row = x->dst.y_buffer - x->dst.y_stride; + + for (i = 0; i < 16; i++) + yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1]; + yleft_col = yleft_buf; + } + + ytop_left = yabove_row[-1]; + + /* for Y */ + switch (x->mode_info_context->mbmi.mode) + { + case DC_PRED: + { + int expected_dc; + int i; + int shift; + int average = 0; + + + if (x->up_available || x->left_available) + { + if (x->up_available) + { + for (i = 0; i < 16; i++) + { + average += yabove_row[i]; + } + } + + if (x->left_available) + { + + for (i = 0; i < 16; i++) + { + average += yleft_col[i]; + } + + } + + + + shift = 3 + x->up_available + x->left_available; + expected_dc = (average + (1 << (shift - 1))) >> shift; + } + else + { + expected_dc = 128; + } + + vpx_memset(ypred_ptr, expected_dc, 256); + } + break; + case V_PRED: + { + + for (r = 0; r < 16; r++) + { + + ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0]; + ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; + ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; + ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; + ypred_ptr += 16; + } + } + break; + case H_PRED: + { + + for (r = 0; r < 16; r++) + { + + vpx_memset(ypred_ptr, yleft_col[r], 16); + ypred_ptr += 16; + } + + } + break; + case TM_PRED: + { + + for (r = 0; r < 16; r++) + { + for (c = 0; c < 16; c++) + { + int pred = yleft_col[r] + yabove_row[ c] - ytop_left; + + if (pred < 0) + pred = 0; + + if (pred > 255) + pred = 255; + + ypred_ptr[c] = pred; + } + + ypred_ptr += 16; + } + + } + break; + case B_PRED: + case NEARESTMV: + case NEARMV: + case ZEROMV: + case NEWMV: + case SPLITMV: + case MB_MODE_COUNT: + break; + } +#else + (void) pbi; + (void) x; + (void) mb_row; + (void) mb_col; +#endif +} + +void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + unsigned char *yabove_row; /* = x->dst.y_buffer - x->dst.y_stride; */ + unsigned char *yleft_col; + unsigned char yleft_buf[16]; + unsigned char ytop_left; /* = yabove_row[-1]; */ + unsigned char *ypred_ptr = x->predictor; + int r, c, i; + + int y_stride = x->dst.y_stride; + ypred_ptr = x->dst.y_buffer; /*x->predictor;*/ + + if (pbi->common.filter_level) + { + yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32; + yleft_col = pbi->mt_yleft_col[mb_row]; + } else + { + yabove_row = x->dst.y_buffer - x->dst.y_stride; + + for (i = 0; i < 16; i++) + yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1]; + yleft_col = yleft_buf; + } + + ytop_left = yabove_row[-1]; + + /* for Y */ + switch (x->mode_info_context->mbmi.mode) + { + case DC_PRED: + { + int expected_dc; + int i; + int shift; + int average = 0; + + + if (x->up_available || x->left_available) + { + if (x->up_available) + { + for (i = 0; i < 16; i++) + { + average += yabove_row[i]; + } + } + + if (x->left_available) + { + + for (i = 0; i < 16; i++) + { + average += yleft_col[i]; + } + + } + + + + shift = 3 + x->up_available + x->left_available; + expected_dc = (average + (1 << (shift - 1))) >> shift; + } + else + { + expected_dc = 128; + } + + /*vpx_memset(ypred_ptr, expected_dc, 256);*/ + for (r = 0; r < 16; r++) + { + vpx_memset(ypred_ptr, expected_dc, 16); + ypred_ptr += y_stride; /*16;*/ + } + } + break; + case V_PRED: + { + + for (r = 0; r < 16; r++) + { + + ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0]; + ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1]; + ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2]; + ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3]; + ypred_ptr += y_stride; /*16;*/ + } + } + break; + case H_PRED: + { + + for (r = 0; r < 16; r++) + { + + vpx_memset(ypred_ptr, yleft_col[r], 16); + ypred_ptr += y_stride; /*16;*/ + } + + } + break; + case TM_PRED: + { + + for (r = 0; r < 16; r++) + { + for (c = 0; c < 16; c++) + { + int pred = yleft_col[r] + yabove_row[ c] - ytop_left; + + if (pred < 0) + pred = 0; + + if (pred > 255) + pred = 255; + + ypred_ptr[c] = pred; + } + + ypred_ptr += y_stride; /*16;*/ + } + + } + break; + case B_PRED: + case NEARESTMV: + case NEARMV: + case ZEROMV: + case NEWMV: + case SPLITMV: + case MB_MODE_COUNT: + break; + } +#else + (void) pbi; + (void) x; + (void) mb_row; + (void) mb_col; +#endif +} + +void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ + unsigned char *uleft_col; /*[16];*/ + unsigned char uleft_buf[8]; + unsigned char utop_left; /* = uabove_row[-1]; */ + unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */ + unsigned char *vleft_col; /*[20];*/ + unsigned char vleft_buf[8]; + unsigned char vtop_left; /* = vabove_row[-1]; */ + unsigned char *upred_ptr = &x->predictor[256]; + unsigned char *vpred_ptr = &x->predictor[320]; + int i, j; + + if (pbi->common.filter_level) + { + uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16; + vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16; + uleft_col = pbi->mt_uleft_col[mb_row]; + vleft_col = pbi->mt_vleft_col[mb_row]; + } else + { + uabove_row = x->dst.u_buffer - x->dst.uv_stride; + vabove_row = x->dst.v_buffer - x->dst.uv_stride; + + for (i = 0; i < 8; i++) + { + uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1]; + vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1]; + } + uleft_col = uleft_buf; + vleft_col = vleft_buf; + } + utop_left = uabove_row[-1]; + vtop_left = vabove_row[-1]; + + switch (x->mode_info_context->mbmi.uv_mode) + { + case DC_PRED: + { + int expected_udc; + int expected_vdc; + int i; + int shift; + int Uaverage = 0; + int Vaverage = 0; + + if (x->up_available) + { + for (i = 0; i < 8; i++) + { + Uaverage += uabove_row[i]; + Vaverage += vabove_row[i]; + } + } + + if (x->left_available) + { + for (i = 0; i < 8; i++) + { + Uaverage += uleft_col[i]; + Vaverage += vleft_col[i]; + } + } + + if (!x->up_available && !x->left_available) + { + expected_udc = 128; + expected_vdc = 128; + } + else + { + shift = 2 + x->up_available + x->left_available; + expected_udc = (Uaverage + (1 << (shift - 1))) >> shift; + expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift; + } + + + vpx_memset(upred_ptr, expected_udc, 64); + vpx_memset(vpred_ptr, expected_vdc, 64); + + + } + break; + case V_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + vpx_memcpy(upred_ptr, uabove_row, 8); + vpx_memcpy(vpred_ptr, vabove_row, 8); + upred_ptr += 8; + vpred_ptr += 8; + } + + } + break; + case H_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + vpx_memset(upred_ptr, uleft_col[i], 8); + vpx_memset(vpred_ptr, vleft_col[i], 8); + upred_ptr += 8; + vpred_ptr += 8; + } + } + + break; + case TM_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + int predu = uleft_col[i] + uabove_row[j] - utop_left; + int predv = vleft_col[i] + vabove_row[j] - vtop_left; + + if (predu < 0) + predu = 0; + + if (predu > 255) + predu = 255; + + if (predv < 0) + predv = 0; + + if (predv > 255) + predv = 255; + + upred_ptr[j] = predu; + vpred_ptr[j] = predv; + } + + upred_ptr += 8; + vpred_ptr += 8; + } + + } + break; + case B_PRED: + case NEARESTMV: + case NEARMV: + case ZEROMV: + case NEWMV: + case SPLITMV: + case MB_MODE_COUNT: + break; + } +#else + (void) pbi; + (void) x; + (void) mb_row; + (void) mb_col; +#endif +} + +void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + unsigned char *uabove_row; /* = x->dst.u_buffer - x->dst.uv_stride; */ + unsigned char *uleft_col; /*[16];*/ + unsigned char uleft_buf[8]; + unsigned char utop_left; /* = uabove_row[-1]; */ + unsigned char *vabove_row; /* = x->dst.v_buffer - x->dst.uv_stride; */ + unsigned char *vleft_col; /*[20];*/ + unsigned char vleft_buf[8]; + unsigned char vtop_left; /* = vabove_row[-1]; */ + unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/ + unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/ + int uv_stride = x->dst.uv_stride; + int i, j; + + if (pbi->common.filter_level) + { + uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16; + vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16; + uleft_col = pbi->mt_uleft_col[mb_row]; + vleft_col = pbi->mt_vleft_col[mb_row]; + } else + { + uabove_row = x->dst.u_buffer - x->dst.uv_stride; + vabove_row = x->dst.v_buffer - x->dst.uv_stride; + + for (i = 0; i < 8; i++) + { + uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1]; + vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1]; + } + uleft_col = uleft_buf; + vleft_col = vleft_buf; + } + utop_left = uabove_row[-1]; + vtop_left = vabove_row[-1]; + + switch (x->mode_info_context->mbmi.uv_mode) + { + case DC_PRED: + { + int expected_udc; + int expected_vdc; + int i; + int shift; + int Uaverage = 0; + int Vaverage = 0; + + if (x->up_available) + { + for (i = 0; i < 8; i++) + { + Uaverage += uabove_row[i]; + Vaverage += vabove_row[i]; + } + } + + if (x->left_available) + { + for (i = 0; i < 8; i++) + { + Uaverage += uleft_col[i]; + Vaverage += vleft_col[i]; + } + } + + if (!x->up_available && !x->left_available) + { + expected_udc = 128; + expected_vdc = 128; + } + else + { + shift = 2 + x->up_available + x->left_available; + expected_udc = (Uaverage + (1 << (shift - 1))) >> shift; + expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift; + } + + + /*vpx_memset(upred_ptr,expected_udc,64); + vpx_memset(vpred_ptr,expected_vdc,64);*/ + for (i = 0; i < 8; i++) + { + vpx_memset(upred_ptr, expected_udc, 8); + vpx_memset(vpred_ptr, expected_vdc, 8); + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ + } + } + break; + case V_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + vpx_memcpy(upred_ptr, uabove_row, 8); + vpx_memcpy(vpred_ptr, vabove_row, 8); + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ + } + + } + break; + case H_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + vpx_memset(upred_ptr, uleft_col[i], 8); + vpx_memset(vpred_ptr, vleft_col[i], 8); + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ + } + } + + break; + case TM_PRED: + { + int i; + + for (i = 0; i < 8; i++) + { + for (j = 0; j < 8; j++) + { + int predu = uleft_col[i] + uabove_row[j] - utop_left; + int predv = vleft_col[i] + vabove_row[j] - vtop_left; + + if (predu < 0) + predu = 0; + + if (predu > 255) + predu = 255; + + if (predv < 0) + predv = 0; + + if (predv > 255) + predv = 255; + + upred_ptr[j] = predu; + vpred_ptr[j] = predv; + } + + upred_ptr += uv_stride; /*8;*/ + vpred_ptr += uv_stride; /*8;*/ + } + + } + break; + case B_PRED: + case NEARESTMV: + case NEARMV: + case ZEROMV: + case NEWMV: + case SPLITMV: + case MB_MODE_COUNT: + break; + } +#else + (void) pbi; + (void) x; + (void) mb_row; + (void) mb_col; +#endif +} + + +void vp8mt_predict_intra4x4(VP8D_COMP *pbi, + MACROBLOCKD *xd, + int b_mode, + unsigned char *predictor, + int mb_row, + int mb_col, + int num) +{ +#if CONFIG_MULTITHREAD + int i, r, c; + + unsigned char *Above; /* = *(x->base_dst) + x->dst - x->dst_stride; */ + unsigned char Left[4]; + unsigned char top_left; /* = Above[-1]; */ + + BLOCKD *x = &xd->block[num]; + + /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/ + if (num < 4 && pbi->common.filter_level) + Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32; + else + Above = *(x->base_dst) + x->dst - x->dst_stride; + + if (num%4==0 && pbi->common.filter_level) + { + for (i=0; i<4; i++) + Left[i] = pbi->mt_yleft_col[mb_row][num + i]; + }else + { + Left[0] = (*(x->base_dst))[x->dst - 1]; + Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride]; + Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride]; + Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride]; + } + + if ((num==4 || num==8 || num==12) && pbi->common.filter_level) + top_left = pbi->mt_yleft_col[mb_row][num-1]; + else + top_left = Above[-1]; + + switch (b_mode) + { + case B_DC_PRED: + { + int expected_dc = 0; + + for (i = 0; i < 4; i++) + { + expected_dc += Above[i]; + expected_dc += Left[i]; + } + + expected_dc = (expected_dc + 4) >> 3; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + predictor[c] = expected_dc; + } + + predictor += 16; + } + } + break; + case B_TM_PRED: + { + /* prediction similar to true_motion prediction */ + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + int pred = Above[c] - top_left + Left[r]; + + if (pred < 0) + pred = 0; + + if (pred > 255) + pred = 255; + + predictor[c] = pred; + } + + predictor += 16; + } + } + break; + + case B_VE_PRED: + { + + unsigned int ap[4]; + ap[0] = (top_left + 2 * Above[0] + Above[1] + 2) >> 2; + ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2; + ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2; + ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + + predictor[c] = ap[c]; + } + + predictor += 16; + } + + } + break; + + + case B_HE_PRED: + { + + unsigned int lp[4]; + lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2; + lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2; + lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2; + lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2; + + for (r = 0; r < 4; r++) + { + for (c = 0; c < 4; c++) + { + predictor[c] = lp[r]; + } + + predictor += 16; + } + } + break; + case B_LD_PRED: + { + unsigned char *ptr = Above; + predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2; + predictor[0 * 16 + 1] = + predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2; + predictor[0 * 16 + 2] = + predictor[1 * 16 + 1] = + predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2; + predictor[0 * 16 + 3] = + predictor[1 * 16 + 2] = + predictor[2 * 16 + 1] = + predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2; + predictor[1 * 16 + 3] = + predictor[2 * 16 + 2] = + predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2; + predictor[2 * 16 + 3] = + predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2; + predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2; + + } + break; + case B_RD_PRED: + { + + unsigned char pp[9]; + + pp[0] = Left[3]; + pp[1] = Left[2]; + pp[2] = Left[1]; + pp[3] = Left[0]; + pp[4] = top_left; + pp[5] = Above[0]; + pp[6] = Above[1]; + pp[7] = Above[2]; + pp[8] = Above[3]; + + predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[3 * 16 + 1] = + predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[3 * 16 + 2] = + predictor[2 * 16 + 1] = + predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[3 * 16 + 3] = + predictor[2 * 16 + 2] = + predictor[1 * 16 + 1] = + predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * 16 + 3] = + predictor[1 * 16 + 2] = + predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[1 * 16 + 3] = + predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; + + } + break; + case B_VR_PRED: + { + + unsigned char pp[9]; + + pp[0] = Left[3]; + pp[1] = Left[2]; + pp[2] = Left[1]; + pp[3] = Left[0]; + pp[4] = top_left; + pp[5] = Above[0]; + pp[6] = Above[1]; + pp[7] = Above[2]; + pp[8] = Above[3]; + + + predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[3 * 16 + 1] = + predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * 16 + 1] = + predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1; + predictor[3 * 16 + 2] = + predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[2 * 16 + 2] = + predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1; + predictor[3 * 16 + 3] = + predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + predictor[2 * 16 + 3] = + predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1; + predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2; + predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1; + + } + break; + case B_VL_PRED: + { + + unsigned char *pp = Above; + + predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[2 * 16 + 0] = + predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1; + predictor[1 * 16 + 1] = + predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * 16 + 1] = + predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1; + predictor[3 * 16 + 1] = + predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[0 * 16 + 3] = + predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1; + predictor[1 * 16 + 3] = + predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + } + break; + + case B_HD_PRED: + { + unsigned char pp[9]; + pp[0] = Left[3]; + pp[1] = Left[2]; + pp[2] = Left[1]; + pp[3] = Left[0]; + pp[4] = top_left; + pp[5] = Above[0]; + pp[6] = Above[1]; + pp[7] = Above[2]; + pp[8] = Above[3]; + + + predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[2 * 16 + 0] = + predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1; + predictor[2 * 16 + 1] = + predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[2 * 16 + 2] = + predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[2 * 16 + 3] = + predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2; + predictor[1 * 16 + 2] = + predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1; + predictor[1 * 16 + 3] = + predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2; + predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2; + predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2; + } + break; + + + case B_HU_PRED: + { + unsigned char *pp = Left; + predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1; + predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2; + predictor[0 * 16 + 2] = + predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1; + predictor[0 * 16 + 3] = + predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2; + predictor[1 * 16 + 2] = + predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1; + predictor[1 * 16 + 3] = + predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2; + predictor[2 * 16 + 2] = + predictor[2 * 16 + 3] = + predictor[3 * 16 + 0] = + predictor[3 * 16 + 1] = + predictor[3 * 16 + 2] = + predictor[3 * 16 + 3] = pp[3]; + } + break; + + + } +#else + (void) pbi; + (void) xd; + (void) b_mode; + (void) predictor; + (void) mb_row; + (void) mb_col; + (void) num; +#endif +} + +/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and + * to the right prediction have filled in pixels to use. + */ +void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + unsigned char *above_right; /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */ + unsigned int *src_ptr; + unsigned int *dst_ptr0; + unsigned int *dst_ptr1; + unsigned int *dst_ptr2; + + if (pbi->common.filter_level) + above_right = pbi->mt_yabove_row[mb_row] + mb_col*16 + 32 +16; + else + above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; + + src_ptr = (unsigned int *)above_right; + /*dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride); + dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride); + dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);*/ + dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride); + dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride); + dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride); + *dst_ptr0 = *src_ptr; + *dst_ptr1 = *src_ptr; + *dst_ptr2 = *src_ptr; +#else + (void) pbi; + (void) x; + (void) mb_row; + (void) mb_col; +#endif +} diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h new file mode 100644 index 000000000..d401295b2 --- /dev/null +++ b/vp8/decoder/reconintra_mt.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_RECONINTRA_MT_H +#define __INC_RECONINTRA_MT_H + +/* reconintra functions used in multi-threaded decoder */ +#if CONFIG_MULTITHREAD +extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); +extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); +extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); +extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); + +extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num); +extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col); +#endif + +#endif diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c index e35d1757f..fc2fad516 100644 --- a/vp8/decoder/threading.c +++ b/vp8/decoder/threading.c @@ -1,16 +1,20 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef WIN32 # include #endif +#ifdef __APPLE__ +#include +#endif #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "threading.h" @@ -18,20 +22,22 @@ #include "loopfilter.h" #include "extend.h" #include "vpx_ports/vpx_timer.h" +#include "detokenize.h" +#include "reconinter.h" +#include "reconintra_mt.h" -extern void vp8_decode_mb_row(VP8D_COMP *pbi, - VP8_COMMON *pc, - int mb_row, - MACROBLOCKD *xd); - +extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); +extern void clamp_mvs(MACROBLOCKD *xd); extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel); -extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd); + +#if CONFIG_RUNTIME_CPU_DETECT +#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x) +#else +#define RTCD_VTABLE(x) NULL +#endif void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) { - - - #if CONFIG_MULTITHREAD VP8_COMMON *const pc = & pbi->common; int i, j; @@ -42,15 +48,11 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC #if CONFIG_RUNTIME_CPU_DETECT mbd->rtcd = xd->rtcd; #endif - - mbd->subpixel_predict = xd->subpixel_predict; mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; - mbd->gf_active_ptr = xd->gf_active_ptr; - mbd->mode_info = pc->mi - 1; mbd->mode_info_context = pc->mi + pc->mode_info_stride * (i + 1); mbd->mode_info_stride = pc->mode_info_stride; @@ -58,11 +60,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC mbd->frames_since_golden = pc->frames_since_golden; mbd->frames_till_alt_ref_frame = pc->frames_till_alt_ref_frame; - mbd->pre = pc->last_frame; - mbd->dst = pc->new_frame; - - - + mbd->pre = pc->yv12_fb[pc->lst_fb_idx]; + mbd->dst = pc->yv12_fb[pc->new_fb_idx]; vp8_setup_block_dptrs(mbd); vp8_build_block_doffsets(mbd); @@ -70,8 +69,14 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - mbd->mbmi.mode = DC_PRED; - mbd->mbmi.uv_mode = DC_PRED; + /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ + vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); + /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ + vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); + /*unsigned char mode_ref_lf_delta_enabled; + unsigned char mode_ref_lf_delta_update;*/ + mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; + mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; mbd->current_bc = &pbi->bc2; @@ -81,6 +86,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC } } + for (i=0; i< pc->mb_rows; i++) + pbi->mt_current_mb_col[i]=-1; #else (void) pbi; (void) xd; @@ -90,162 +97,347 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC } +void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col) +{ +#if CONFIG_MULTITHREAD + int eobtotal = 0; + int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs; + VP8_COMMON *pc = &pbi->common; + + if (xd->mode_info_context->mbmi.mb_skip_coeff) + { + vp8_reset_mb_tokens_context(xd); + } + else + { + eobtotal = vp8_decode_mb_tokens(pbi, xd); + } + + /* Perform temporary clamping of the MV to be used for prediction */ + if (do_clamp) + { + clamp_mvs(xd); + } + + xd->mode_info_context->mbmi.dc_diff = 1; + + if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0) + { + xd->mode_info_context->mbmi.dc_diff = 0; + + /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/ + if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + { + vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col); + vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col); + } + else + { + vp8_build_inter_predictors_mb_s(xd); + } + return; + } + + if (xd->segmentation_enabled) + mb_init_dequantizer(pbi, xd); + + /* do prediction */ + if (xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) + { + vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col); + + if (xd->mode_info_context->mbmi.mode != B_PRED) + { + vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col); + } else { + vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col); + } + } + else + { + vp8_build_inter_predictors_mb(xd); + } + + /* dequantization and idct */ + if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) + { + BLOCKD *b = &xd->block[24]; + DEQUANT_INVOKE(&pbi->dequant, block)(b); + + /* do 2nd order transform on the dc block */ + if (xd->eobs[24] > 1) + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + ((int *)b->qcoeff)[1] = 0; + ((int *)b->qcoeff)[2] = 0; + ((int *)b->qcoeff)[3] = 0; + ((int *)b->qcoeff)[4] = 0; + ((int *)b->qcoeff)[5] = 0; + ((int *)b->qcoeff)[6] = 0; + ((int *)b->qcoeff)[7] = 0; + } + else + { + IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff); + ((int *)b->qcoeff)[0] = 0; + } + + DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs, xd->block[24].diff); + } + else if ((xd->frame_type == KEY_FRAME || xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED) + { + for (i = 0; i < 16; i++) + { + BLOCKD *b = &xd->block[i]; + vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i); + + if (xd->eobs[i] > 1) + { + DEQUANT_INVOKE(&pbi->dequant, idct_add) + (b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + } + else + { + IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add) + (b->qcoeff[0] * b->dequant[0], b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); + ((int *)b->qcoeff)[0] = 0; + } + } + } + else + { + DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block) + (xd->qcoeff, xd->block[0].dequant, + xd->predictor, xd->dst.y_buffer, + xd->dst.y_stride, xd->eobs); + } + + DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block) + (xd->qcoeff+16*16, xd->block[16].dequant, + xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs+16); +#else + (void) pbi; + (void) xd; + (void) mb_row; + (void) mb_col; +#endif +} + + THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) { #if CONFIG_MULTITHREAD int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT mb_row_left_context[4][4]; + ENTROPY_CONTEXT_PLANES mb_row_left_context; while (1) { if (pbi->b_multithreaded_rd == 0) break; - //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0) - if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0) + /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/ + if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { if (pbi->b_multithreaded_rd == 0) break; else { VP8_COMMON *pc = &pbi->common; - int mb_row = mbrd->mb_row; MACROBLOCKD *xd = &mbrd->mbd; - //printf("ithread:%d mb_row %d\n", ithread, mb_row); - int i; - int recon_yoffset, recon_uvoffset; - int mb_col; - int recon_y_stride = pc->last_frame.y_stride; - int recon_uv_stride = pc->last_frame.uv_stride; - + int mb_row; + int num_part = 1 << pbi->common.multi_token_partition; volatile int *last_row_current_mb_col; + int nsync = pbi->sync_range; - if (ithread > 0) - last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col; - else - last_row_current_mb_col = &pbi->current_mb_col_main; - - recon_yoffset = mb_row * recon_y_stride * 16; - recon_uvoffset = mb_row * recon_uv_stride * 8; - // reset above block coeffs - - xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT]; - xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT]; - xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT]; - xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT]; - xd->left_context = mb_row_left_context; - vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context)); - xd->up_available = (mb_row != 0); - - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - - for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) + for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { + int i; + int recon_yoffset, recon_uvoffset; + int mb_col; + int ref_fb_idx = pc->lst_fb_idx; + int dst_fb_idx = pc->new_fb_idx; + int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1) + int filter_level; + loop_filter_info *lfi = pc->lf_info; + int alt_flt_enabled = xd->segmentation_enabled; + int Segment; + + pbi->mb_row_di[ithread].mb_row = mb_row; + pbi->mb_row_di[ithread].mbd.current_bc = &pbi->mbc[mb_row%num_part]; + + last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; + + recon_yoffset = mb_row * recon_y_stride * 16; + recon_uvoffset = mb_row * recon_uv_stride * 8; + /* reset above block coeffs */ + + xd->above_context = pc->above_context; + xd->left_context = &mb_row_left_context; + vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context)); + xd->up_available = (mb_row != 0); + + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; + + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) { - x86_pause_hint(); - thread_sleep(0); - } - - // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi - vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) ); - - if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED) - { - for (i = 0; i < 16; i++) + if ((mb_col & (nsync-1)) == 0) { - BLOCKD *d = &xd->block[i]; - vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO)); + while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) + { + x86_pause_hint(); + thread_sleep(0); + } } + + if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED) + { + for (i = 0; i < 16; i++) + { + BLOCKD *d = &xd->block[i]; + vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO)); + } + } + + if(pbi->common.filter_level) + { + /*update loopfilter info*/ + Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + filter_level = pbi->mt_baseline_filter_level[Segment]; + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + * Apply any context driven MB level adjustment + */ + vp8_adjust_mb_lf_value(xd, &filter_level); + } + + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; + + xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + + xd->left_available = (mb_col != 0); + + /* Select the appropriate reference frame for this MB */ + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = pc->lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = pc->gld_fb_idx; + else + ref_fb_idx = pc->alt_fb_idx; + + xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + vp8_build_uvmvs(xd, pc->full_pixel); + vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col); + + if (pbi->common.filter_level) + { + if( mb_row != pc->mb_rows-1 ) + { + /* Save decoded MB last row data for next-row decoding */ + vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); + vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); + vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); + } + + /* save left_col for next MB decoding */ + if(mb_col != pc->mb_cols-1) + { + MODE_INFO *next = xd->mode_info_context +1; + + if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME) + { + for (i = 0; i < 16; i++) + pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; + for (i = 0; i < 8; i++) + { + pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; + pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; + } + } + } + + /* loopfilter on this macroblock. */ + if (filter_level) + { + if (mb_col > 0) + pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + if (xd->mode_info_context->mbmi.dc_diff > 0) + pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + /* don't apply across umv border */ + if (mb_row > 0) + pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + if (xd->mode_info_context->mbmi.dc_diff > 0) + pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + } + } + + recon_yoffset += 16; + recon_uvoffset += 8; + + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; + + /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/ + pbi->mt_current_mb_col[mb_row] = mb_col; } - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - - xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset; - - xd->left_available = (mb_col != 0); - - // Select the appropriate reference frame for this MB - if (xd->mbmi.ref_frame == LAST_FRAME) + /* adjust to the next row of mbs */ + if (pbi->common.filter_level) { - xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset; - } - else if (xd->mbmi.ref_frame == GOLDEN_FRAME) - { - // Golden frame reconstruction buffer - xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset; - } - else - { - // Alternate reference frame reconstruction buffer - xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset; - } + if(mb_row != pc->mb_rows-1) + { + int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; + int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); - vp8_build_uvmvs(xd, pc->full_pixel); + for (i = 0; i < 4; i++) + { + pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; + pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; + pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; + } + } + } else + vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); - vp8dx_bool_decoder_fill(xd->current_bc); - vp8_decode_macroblock(pbi, xd); - - - recon_yoffset += 16; - recon_uvoffset += 8; - - ++xd->mode_info_context; /* next mb */ - - xd->gf_active_ptr++; // GF useage flag for next MB - - xd->above_context[Y1CONTEXT] += 4; - xd->above_context[UCONTEXT ] += 2; - xd->above_context[VCONTEXT ] += 2; - xd->above_context[Y2CONTEXT] ++; - pbi->mb_row_di[ithread].current_mb_col = mb_col; - - } - - // adjust to the next row of mbs - vp8_extend_mb_row( - &pc->new_frame, - xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8 - ); - - ++xd->mode_info_context; /* skip prediction column */ - - // since we have multithread - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - - //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb)); - if ((mb_row & 1) == 1) - { - pbi->last_mb_row_decoded = mb_row; - //printf("S%d", pbi->last_mb_row_decoded); - } - - if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1) - { - //SetEvent(pbi->h_event_main); - sem_post(&pbi->h_event_main); + ++xd->mode_info_context; /* skip prediction column */ + /* since we have multithread */ + xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; } } } + /* add this to each frame */ + if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1)) + { + /*SetEvent(pbi->h_event_end_decoding);*/ + sem_post(&pbi->h_event_end_decoding); + } } - #else (void) p_data; #endif @@ -253,185 +445,42 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data) return 0 ; } -THREAD_FUNCTION vp8_thread_loop_filter(void *p_data) -{ -#if CONFIG_MULTITHREAD - VP8D_COMP *pbi = (VP8D_COMP *)p_data; - - while (1) - { - if (pbi->b_multithreaded_lf == 0) - break; - - //printf("before waiting for start_lpf\n"); - - //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0) - if (sem_wait(&pbi->h_event_start_lpf) == 0) - { - if (pbi->b_multithreaded_lf == 0) // we're shutting down - break; - else - { - - VP8_COMMON *cm = &pbi->common; - MACROBLOCKD *mbd = &pbi->lpfmb; - int default_filt_lvl = pbi->common.filter_level; - - YV12_BUFFER_CONFIG *post = &cm->new_frame; - loop_filter_info *lfi = cm->lf_info; - - int mb_row; - int mb_col; - - - int baseline_filter_level[MAX_MB_SEGMENTS]; - int filter_level; - int alt_flt_enabled = mbd->segmentation_enabled; - - int i; - unsigned char *y_ptr, *u_ptr, *v_ptr; - - volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded; - - //MODE_INFO * this_mb_mode_info = cm->mi; - mbd->mode_info_context = cm->mi; // Point at base of Mb MODE_INFO list - - // Note the baseline filter values for each segment - if (alt_flt_enabled) - { - for (i = 0; i < MAX_MB_SEGMENTS; i++) - { - if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) - baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - else - { - baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; - baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; // Clamp to valid range - } - } - } - else - { - for (i = 0; i < MAX_MB_SEGMENTS; i++) - baseline_filter_level[i] = default_filt_lvl; - } - - // Initialize the loop filter for this frame. - vp8_init_loop_filter(cm); - - // Set up the buffer pointers - y_ptr = post->y_buffer; - u_ptr = post->u_buffer; - v_ptr = post->v_buffer; - - // vp8_filter each macro block - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) - { - - while (mb_row >= *last_mb_row_decoded) - { - x86_pause_hint(); - thread_sleep(0); - } - - //printf("R%d", mb_row); - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) - { - int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0; - - filter_level = baseline_filter_level[Segment]; - - // Apply any context driven MB level adjustment - vp8_adjust_mb_lf_value(mbd, &filter_level); - - if (filter_level) - { - if (mb_col > 0) - cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); - - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); - - // don't apply across umv border - if (mb_row > 0) - cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); - - if (mbd->mode_info_context->mbmi.dc_diff > 0) - cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf); - } - - y_ptr += 16; - u_ptr += 8; - v_ptr += 8; - - mbd->mode_info_context++; // step to next MB - - } - - y_ptr += post->y_stride * 16 - post->y_width; - u_ptr += post->uv_stride * 8 - post->uv_width; - v_ptr += post->uv_stride * 8 - post->uv_width; - - mbd->mode_info_context++; // Skip border mb - } - - //printf("R%d\n", mb_row); - // When done, signal main thread that ME is finished - //SetEvent(pbi->h_event_lpf); - sem_post(&pbi->h_event_lpf); - } - - } - } - -#else - (void) p_data; -#endif - return 0; -} void vp8_decoder_create_threads(VP8D_COMP *pbi) { #if CONFIG_MULTITHREAD int core_count = 0; int ithread; + int i; pbi->b_multithreaded_rd = 0; - pbi->b_multithreaded_lf = 0; pbi->allocated_decoding_thread_count = 0; - core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count(); - if (core_count > 1) - { - sem_init(&pbi->h_event_lpf, 0, 0); - sem_init(&pbi->h_event_start_lpf, 0, 0); - pbi->b_multithreaded_lf = 1; - pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi)); - } + core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; if (core_count > 1) { pbi->b_multithreaded_rd = 1; - pbi->decoding_thread_count = core_count - 1; + pbi->decoding_thread_count = core_count -1; CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count)); - CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); + CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count)); CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count)); vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count); CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count)); for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++) { - sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0); + sem_init(&pbi->h_event_start_decoding[ithread], 0, 0); pbi->de_thread_data[ithread].ithread = ithread; pbi->de_thread_data[ithread].ptr1 = (void *)pbi; pbi->de_thread_data[ithread].ptr2 = (void *) &pbi->mb_row_di[ithread]; pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread])); - } - sem_init(&pbi->h_event_main, 0, 0); + sem_init(&pbi->h_event_end_decoding, 0, 0); + pbi->allocated_decoding_thread_count = pbi->decoding_thread_count; } @@ -440,45 +489,196 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi) #endif } + +void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) +{ +#if CONFIG_MULTITHREAD + VP8_COMMON *const pc = & pbi->common; + int i; + + if (pbi->b_multithreaded_rd) + { + if (pbi->mt_current_mb_col) + { + vpx_free(pbi->mt_current_mb_col); + pbi->mt_current_mb_col = NULL ; + } + + /* Free above_row buffers. */ + if (pbi->mt_yabove_row) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_yabove_row[i]) + { + vpx_free(pbi->mt_yabove_row[i]); + pbi->mt_yabove_row[i] = NULL ; + } + } + vpx_free(pbi->mt_yabove_row); + pbi->mt_yabove_row = NULL ; + } + + if (pbi->mt_uabove_row) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_uabove_row[i]) + { + vpx_free(pbi->mt_uabove_row[i]); + pbi->mt_uabove_row[i] = NULL ; + } + } + vpx_free(pbi->mt_uabove_row); + pbi->mt_uabove_row = NULL ; + } + + if (pbi->mt_vabove_row) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_vabove_row[i]) + { + vpx_free(pbi->mt_vabove_row[i]); + pbi->mt_vabove_row[i] = NULL ; + } + } + vpx_free(pbi->mt_vabove_row); + pbi->mt_vabove_row = NULL ; + } + + /* Free left_col buffers. */ + if (pbi->mt_yleft_col) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_yleft_col[i]) + { + vpx_free(pbi->mt_yleft_col[i]); + pbi->mt_yleft_col[i] = NULL ; + } + } + vpx_free(pbi->mt_yleft_col); + pbi->mt_yleft_col = NULL ; + } + + if (pbi->mt_uleft_col) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_uleft_col[i]) + { + vpx_free(pbi->mt_uleft_col[i]); + pbi->mt_uleft_col[i] = NULL ; + } + } + vpx_free(pbi->mt_uleft_col); + pbi->mt_uleft_col = NULL ; + } + + if (pbi->mt_vleft_col) + { + for (i=0; i< mb_rows; i++) + { + if (pbi->mt_vleft_col[i]) + { + vpx_free(pbi->mt_vleft_col[i]); + pbi->mt_vleft_col[i] = NULL ; + } + } + vpx_free(pbi->mt_vleft_col); + pbi->mt_vleft_col = NULL ; + } + } +#else + (void) pbi; +#endif +} + + +int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) +{ +#if CONFIG_MULTITHREAD + VP8_COMMON *const pc = & pbi->common; + int i; + int uv_width; + + if (pbi->b_multithreaded_rd) + { + vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); + + /* our internal buffers are always multiples of 16 */ + if ((width & 0xf) != 0) + width += 16 - (width & 0xf); + + if (width < 640) pbi->sync_range = 1; + else if (width <= 1280) pbi->sync_range = 8; + else if (width <= 2560) pbi->sync_range =16; + else pbi->sync_range = 32; + + uv_width = width >>1; + + /* Allocate an int for each mb row. */ + CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows)); + + /* Allocate memory for above_row buffers. */ + CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1)); + + CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1)); + + CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1)); + + /* Allocate memory for left_col buffers. */ + CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); + + CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + + CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows)); + for (i=0; i< pc->mb_rows; i++) + CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); + } + return 0; +#else + (void) pbi; + (void) width; +#endif +} + + void vp8_decoder_remove_threads(VP8D_COMP *pbi) { #if CONFIG_MULTITHREAD - if (pbi->b_multithreaded_lf) - { - pbi->b_multithreaded_lf = 0; - sem_post(&pbi->h_event_start_lpf); - pthread_join(pbi->h_thread_lpf, 0); - sem_destroy(&pbi->h_event_start_lpf); - } - - //shutdown MB Decoding thread; + /* shutdown MB Decoding thread; */ if (pbi->b_multithreaded_rd) { + int i; + pbi->b_multithreaded_rd = 0; - // allow all threads to exit + + /* allow all threads to exit */ + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) { - int i; - - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - - sem_post(&pbi->h_event_mbrdecoding[i]); - pthread_join(pbi->h_decoding_thread[i], NULL); - } - } - { - - int i; - for (i = 0; i < pbi->allocated_decoding_thread_count; i++) - { - sem_destroy(&pbi->h_event_mbrdecoding[i]); - } - - + sem_post(&pbi->h_event_start_decoding[i]); + pthread_join(pbi->h_decoding_thread[i], NULL); } - sem_destroy(&pbi->h_event_main); + for (i = 0; i < pbi->allocated_decoding_thread_count; i++) + { + sem_destroy(&pbi->h_event_start_decoding[i]); + } + + sem_destroy(&pbi->h_event_end_decoding); if (pbi->h_decoding_thread) { @@ -486,10 +686,10 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) pbi->h_decoding_thread = NULL; } - if (pbi->h_event_mbrdecoding) + if (pbi->h_event_start_decoding) { - vpx_free(pbi->h_event_mbrdecoding); - pbi->h_event_mbrdecoding = NULL; + vpx_free(pbi->h_event_start_decoding); + pbi->h_event_start_decoding = NULL; } if (pbi->mb_row_di) @@ -504,43 +704,65 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi) pbi->de_thread_data = NULL; } } - #else (void) pbi; #endif } -void vp8_start_lfthread(VP8D_COMP *pbi) +void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl) { #if CONFIG_MULTITHREAD - memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb)); - pbi->last_mb_row_decoded = 0; - sem_post(&pbi->h_event_start_lpf); -#else - (void) pbi; -#endif -} - -void vp8_stop_lfthread(VP8D_COMP *pbi) -{ -#if CONFIG_MULTITHREAD - struct vpx_usec_timer timer; - - vpx_usec_timer_start(&timer); - - sem_wait(&pbi->h_event_lpf); - - vpx_usec_timer_mark(&timer); - pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer); + VP8_COMMON *cm = &pbi->common; + MACROBLOCKD *mbd = &pbi->mb; + /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/ /*frame_to_show;*/ + loop_filter_info *lfi = cm->lf_info; + FRAME_TYPE frame_type = cm->frame_type; + + /*int mb_row; + int mb_col; + int baseline_filter_level[MAX_MB_SEGMENTS];*/ + int filter_level; + int alt_flt_enabled = mbd->segmentation_enabled; + + int i; + /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/ + + /* Note the baseline filter values for each segment */ + if (alt_flt_enabled) + { + for (i = 0; i < MAX_MB_SEGMENTS; i++) + { + /* Abs value */ + if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) + pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i]; + /* Delta Value */ + else + { + pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i]; + pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0; /* Clamp to valid range */ + } + } + } + else + { + for (i = 0; i < MAX_MB_SEGMENTS; i++) + pbi->mt_baseline_filter_level[i] = default_filt_lvl; + } + + /* Initialize the loop filter for this frame. */ + if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level)) + vp8_init_loop_filter(cm); + else if (frame_type != cm->last_frame_type) + vp8_frame_init_loop_filter(lfi, frame_type); #else (void) pbi; + (void) default_filt_lvl; #endif } -void vp8_mtdecode_mb_rows(VP8D_COMP *pbi, - MACROBLOCKD *xd) +void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd) { #if CONFIG_MULTITHREAD int mb_row; @@ -548,47 +770,212 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi, int ibc = 0; int num_part = 1 << pbi->common.multi_token_partition; + int i, j; + volatile int *last_row_current_mb_col = NULL; + int nsync = pbi->sync_range; + + int filter_level; + loop_filter_info *lfi = pc->lf_info; + int alt_flt_enabled = xd->segmentation_enabled; + int Segment; + + if(pbi->common.filter_level) + { + /* Set above_row buffer to 127 for decoding first MB row */ + vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5); + vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); + vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5); + + for (i=1; imb_rows; i++) + { + vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1); + vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1); + } + + /* Set left_col to 129 initially */ + for (i=0; imb_rows; i++) + { + vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16); + vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8); + vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8); + } + vp8mt_lpf_init(pbi, pc->filter_level); + } vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); + for (i = 0; i < pbi->decoding_thread_count; i++) + sem_post(&pbi->h_event_start_decoding[i]); + for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { int i; - pbi->current_mb_col_main = -1; - xd->current_bc = &pbi->mbc[ibc]; - ibc++ ; + xd->current_bc = &pbi->mbc[mb_row%num_part]; - if (ibc == num_part) - ibc = 0; - - for (i = 0; i < pbi->decoding_thread_count; i++) + /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */ { - if ((mb_row + i + 1) >= pc->mb_rows) - break; + int i; + int recon_yoffset, recon_uvoffset; + int mb_col; + int ref_fb_idx = pc->lst_fb_idx; + int dst_fb_idx = pc->new_fb_idx; + int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - pbi->mb_row_di[i].mb_row = mb_row + i + 1; - pbi->mb_row_di[i].mbd.current_bc = &pbi->mbc[ibc]; - ibc++; + /* volatile int *last_row_current_mb_col = NULL; */ + if (mb_row > 0) + last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1]; - if (ibc == num_part) - ibc = 0; + vpx_memset(&pc->left_context, 0, sizeof(pc->left_context)); + recon_yoffset = mb_row * recon_y_stride * 16; + recon_uvoffset = mb_row * recon_uv_stride * 8; + /* reset above block coeffs */ - pbi->mb_row_di[i].current_mb_col = -1; - sem_post(&pbi->h_event_mbrdecoding[i]); + xd->above_context = pc->above_context; + xd->up_available = (mb_row != 0); + + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; + + for (mb_col = 0; mb_col < pc->mb_cols; mb_col++) + { + if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){ + while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1) + { + x86_pause_hint(); + thread_sleep(0); + } + } + + if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED) + { + for (i = 0; i < 16; i++) + { + BLOCKD *d = &xd->block[i]; + vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO)); + } + } + + if(pbi->common.filter_level) + { + /* update loopfilter info */ + Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0; + filter_level = pbi->mt_baseline_filter_level[Segment]; + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + * Apply any context driven MB level adjustment + */ + vp8_adjust_mb_lf_value(xd, &filter_level); + } + + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units + */ + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; + + xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + + xd->left_available = (mb_col != 0); + + /* Select the appropriate reference frame for this MB */ + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = pc->lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = pc->gld_fb_idx; + else + ref_fb_idx = pc->alt_fb_idx; + + xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + vp8_build_uvmvs(xd, pc->full_pixel); + vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col); + + if (pbi->common.filter_level) + { + /* Save decoded MB last row data for next-row decoding */ + if(mb_row != pc->mb_rows-1) + { + vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); + vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); + vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); + } + + /* save left_col for next MB decoding */ + if(mb_col != pc->mb_cols-1) + { + MODE_INFO *next = xd->mode_info_context +1; + + if (xd->frame_type == KEY_FRAME || next->mbmi.ref_frame == INTRA_FRAME) + { + for (i = 0; i < 16; i++) + pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15]; + for (i = 0; i < 8; i++) + { + pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7]; + pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7]; + } + } + } + + /* loopfilter on this macroblock. */ + if (filter_level) + { + if (mb_col > 0) + pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + if (xd->mode_info_context->mbmi.dc_diff > 0) + pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + /* don't apply across umv border */ + if (mb_row > 0) + pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + + if (xd->mode_info_context->mbmi.dc_diff > 0) + pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf); + } + } + + recon_yoffset += 16; + recon_uvoffset += 8; + + ++xd->mode_info_context; /* next mb */ + + xd->above_context++; + + pbi->mt_current_mb_col[mb_row] = mb_col; + } + + /* adjust to the next row of mbs */ + if (pbi->common.filter_level) + { + if(mb_row != pc->mb_rows-1) + { + int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS; + int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1); + + for (i = 0; i < 4; i++) + { + pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1]; + pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1]; + pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1]; + } + } + }else + vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + + ++xd->mode_info_context; /* skip prediction column */ } - - vp8_decode_mb_row(pbi, pc, mb_row, xd); - xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; - - if (mb_row < pc->mb_rows - 1) - { - sem_wait(&pbi->h_event_main); - } } - pbi->last_mb_row_decoded = mb_row; + sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ #else (void) pbi; (void) xd; diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h index eb10e2460..277842896 100644 --- a/vp8/decoder/treereader.h +++ b/vp8/decoder/treereader.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm index 02be4872e..0d6133a46 100644 --- a/vp8/decoder/x86/dequantize_mmx.asm +++ b/vp8/decoder/x86/dequantize_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -49,12 +50,12 @@ sym(vp8_dequantize_b_impl_mmx): ret -;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch) -global sym(vp8_dequant_idct_mmx) -sym(vp8_dequant_idct_mmx): +;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride) +global sym(vp8_dequant_idct_add_mmx) +sym(vp8_dequant_idct_add_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 4 + SHADOW_ARGS_TO_STACK 6 GET_GOT rbx push rsi push rdi @@ -76,7 +77,8 @@ sym(vp8_dequant_idct_mmx): movq mm3, [rax+24] pmullw mm3, [rdx+24] - mov rdx, arg(2) ;output + mov rdx, arg(3) ;dest + mov rsi, arg(2) ;pred pxor mm7, mm7 @@ -87,7 +89,8 @@ sym(vp8_dequant_idct_mmx): movq [rax+24],mm7 - movsxd rax, dword ptr arg(3) ;pitch + movsxd rax, dword ptr arg(4) ;pitch + movsxd rdi, dword ptr arg(5) ;stride psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; @@ -95,11 +98,11 @@ sym(vp8_dequant_idct_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -107,10 +110,10 @@ sym(vp8_dequant_idct_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -150,11 +153,11 @@ sym(vp8_dequant_idct_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -162,16 +165,16 @@ sym(vp8_dequant_idct_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -206,13 +209,34 @@ sym(vp8_dequant_idct_mmx): punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 - movq [rdx], mm0 + pxor mm7, mm7 - movq [rdx+rax], mm1 - movq [rdx+rax*2], mm2 + movd mm4, [rsi] + punpcklbw mm4, mm7 + paddsw mm0, mm4 + packuswb mm0, mm7 + movd [rdx], mm0 - add rdx, rax - movq [rdx+rax*2], mm5 + movd mm4, [rsi+rax] + punpcklbw mm4, mm7 + paddsw mm1, mm4 + packuswb mm1, mm7 + movd [rdx+rdi], mm1 + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm2, mm4 + packuswb mm2, mm7 + movd [rdx+rdi*2], mm2 + + add rdx, rdi + add rsi, rax + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm5, mm4 + packuswb mm5, mm7 + movd [rdx+rdi*2], mm5 ; begin epilog pop rdi @@ -223,12 +247,12 @@ sym(vp8_dequant_idct_mmx): ret -;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc) -global sym(vp8_dequant_dc_idct_mmx) -sym(vp8_dequant_dc_idct_mmx): +;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc) +global sym(vp8_dequant_dc_idct_add_mmx) +sym(vp8_dequant_dc_idct_add_mmx): push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 + SHADOW_ARGS_TO_STACK 7 GET_GOT rbx push rsi push rdi @@ -237,8 +261,6 @@ sym(vp8_dequant_dc_idct_mmx): mov rax, arg(0) ;input mov rdx, arg(1) ;dq - movsxd rcx, dword ptr arg(4) ;Dc - movq mm0, [rax ] pmullw mm0, [rdx] @@ -251,7 +273,8 @@ sym(vp8_dequant_dc_idct_mmx): movq mm3, [rax+24] pmullw mm3, [rdx+24] - mov rdx, arg(2) ;output + mov rdx, arg(3) ;dest + mov rsi, arg(2) ;pred pxor mm7, mm7 @@ -261,8 +284,15 @@ sym(vp8_dequant_dc_idct_mmx): movq [rax+16],mm7 movq [rax+24],mm7 - pinsrw mm0, rcx, 0 - movsxd rax, dword ptr arg(3) ;pitch + ; move lower word of Dc to lower word of mm0 + psrlq mm0, 16 + movzx rcx, word ptr arg(6) ;Dc + psllq mm0, 16 + movq mm7, rcx + por mm0, mm7 + + movsxd rax, dword ptr arg(4) ;pitch + movsxd rdi, dword ptr arg(5) ;stride psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; @@ -270,11 +300,11 @@ sym(vp8_dequant_dc_idct_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -282,10 +312,10 @@ sym(vp8_dequant_dc_idct_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 @@ -325,11 +355,11 @@ sym(vp8_dequant_dc_idct_mmx): movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 - pmulhw mm5, [x_s1sqr2 GLOBAL]; + pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; - pmulhw mm7, [x_c1sqr2less1 GLOBAL]; + pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 @@ -337,16 +367,16 @@ sym(vp8_dequant_dc_idct_mmx): movq mm5, mm1 movq mm4, mm3 - pmulhw mm5, [x_c1sqr2less1 GLOBAL] + pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 - pmulhw mm3, [x_s1sqr2 GLOBAL] + pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 - paddw mm0, [fours GLOBAL] + paddw mm0, [GLOBAL(fours)] - paddw mm2, [fours GLOBAL] + paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 @@ -381,13 +411,34 @@ sym(vp8_dequant_dc_idct_mmx): punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 - movq [rdx], mm0 + pxor mm7, mm7 - movq [rdx+rax], mm1 - movq [rdx+rax*2], mm2 + movd mm4, [rsi] + punpcklbw mm4, mm7 + paddsw mm0, mm4 + packuswb mm0, mm7 + movd [rdx], mm0 - add rdx, rax - movq [rdx+rax*2], mm5 + movd mm4, [rsi+rax] + punpcklbw mm4, mm7 + paddsw mm1, mm4 + packuswb mm1, mm7 + movd [rdx+rdi], mm1 + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm2, mm4 + packuswb mm2, mm7 + movd [rdx+rdi*2], mm2 + + add rdx, rdi + add rsi, rax + + movd mm4, [rsi+2*rax] + punpcklbw mm4, mm7 + paddsw mm5, mm4 + packuswb mm5, mm7 + movd [rdx+rdi*2], mm5 ; begin epilog pop rdi diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h index 5def406d3..dc68daab3 100644 --- a/vp8/decoder/x86/dequantize_x86.h +++ b/vp8/decoder/x86/dequantize_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,19 +21,48 @@ */ #if HAVE_MMX extern prototype_dequant_block(vp8_dequantize_b_mmx); -extern prototype_dequant_idct(vp8_dequant_idct_mmx); -extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx); - +extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx); +extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx); +extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx); +extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx); +extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_dequant_block #define vp8_dequant_block vp8_dequantize_b_mmx -#undef vp8_dequant_idct -#define vp8_dequant_idct vp8_dequant_idct_mmx +#undef vp8_dequant_idct_add +#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx -#undef vp8_dequant_idct_dc -#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx +#undef vp8_dequant_dc_idct_add +#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx + +#undef vp8_dequant_dc_idct_add_y_block +#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_mmx + +#undef vp8_dequant_idct_add_y_block +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx + +#undef vp8_dequant_idct_add_uv_block +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx + +#endif +#endif + +#if HAVE_SSE2 +extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_sse2); +extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2); +extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2); + +#if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_dequant_dc_idct_add_y_block +#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_sse2 + +#undef vp8_dequant_idct_add_y_block +#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2 + +#undef vp8_dequant_idct_add_uv_block +#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2 #endif #endif diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c new file mode 100644 index 000000000..78c91d3d2 --- /dev/null +++ b/vp8/decoder/x86/idct_blk_mmx.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "idct.h" +#include "dequantize.h" + +void vp8_dequant_dc_idct_add_y_block_mmx + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (eobs[0] > 1) + vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]); + else + vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride); + + if (eobs[1] > 1) + vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]); + else + vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride); + + if (eobs[2] > 1) + vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]); + else + vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride); + + if (eobs[3] > 1) + vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]); + else + vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride); + + q += 64; + dc += 4; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_y_block_mmx + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride); + else + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride); + else + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride); + ((int *)(q+16))[0] = 0; + } + + if (eobs[2] > 1) + vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride); + else + { + vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride); + ((int *)(q+32))[0] = 0; + } + + if (eobs[3] > 1) + vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride); + else + { + vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride); + ((int *)(q+48))[0] = 0; + } + + q += 64; + pre += 64; + dst += 4*stride; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_mmx + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride); + else + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride); + else + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride); + ((int *)(q+16))[0] = 0; + } + + q += 32; + pre += 32; + dstu += 4*stride; + eobs += 2; + } + + for (i = 0; i < 2; i++) + { + if (eobs[0] > 1) + vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride); + else + { + vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride); + ((int *)q)[0] = 0; + } + + if (eobs[1] > 1) + vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride); + else + { + vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride); + ((int *)(q+16))[0] = 0; + } + + q += 32; + pre += 32; + dstv += 4*stride; + eobs += 2; + } +} diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c new file mode 100644 index 000000000..0273d6ed2 --- /dev/null +++ b/vp8/decoder/x86/idct_blk_sse2.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_ports/config.h" +#include "idct.h" +#include "dequantize.h" + +void idct_dequant_dc_0_2x_sse2 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int dst_stride, short *dc); +void idct_dequant_dc_full_2x_sse2 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int dst_stride, short *dc); + +void idct_dequant_0_2x_sse2 + (short *q, short *dq ,unsigned char *pre, + unsigned char *dst, int dst_stride, int blk_stride); +void idct_dequant_full_2x_sse2 + (short *q, short *dq ,unsigned char *pre, + unsigned char *dst, int dst_stride, int blk_stride); + +void vp8_dequant_dc_idct_add_y_block_sse2 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs, short *dc) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)(eobs))[0] & 0xfefe) + idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc); + else + idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc); + + if (((short *)(eobs))[1] & 0xfefe) + idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2); + else + idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2); + + q += 64; + dc += 4; + pre += 64; + dst += stride*4; + eobs += 4; + } +} + +void vp8_dequant_idct_add_y_block_sse2 + (short *q, short *dq, unsigned char *pre, + unsigned char *dst, int stride, char *eobs) +{ + int i; + + for (i = 0; i < 4; i++) + { + if (((short *)(eobs))[0] & 0xfefe) + idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16); + else + idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16); + + if (((short *)(eobs))[1] & 0xfefe) + idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16); + else + idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16); + + q += 64; + pre += 64; + dst += stride*4; + eobs += 4; + } +} + +void vp8_dequant_idct_add_uv_block_sse2 + (short *q, short *dq, unsigned char *pre, + unsigned char *dstu, unsigned char *dstv, int stride, char *eobs) +{ + if (((short *)(eobs))[0] & 0xfefe) + idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8); + else + idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8); + + q += 32; + pre += 32; + dstu += stride*4; + + if (((short *)(eobs))[1] & 0xfefe) + idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8); + else + idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8); + + q += 32; + pre += 32; + + if (((short *)(eobs))[2] & 0xfefe) + idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8); + else + idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8); + + q += 32; + pre += 32; + dstv += stride*4; + + if (((short *)(eobs))[3] & 0xfefe) + idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8); + else + idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8); +} diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c index 75a676a07..50293c792 100644 --- a/vp8/decoder/x86/onyxdxv.c +++ b/vp8/decoder/x86/onyxdxv.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c index 6d7cc3666..47e346dd9 100644 --- a/vp8/decoder/x86/x86_dsystemdependent.c +++ b/vp8/decoder/x86/x86_dsystemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -38,14 +39,24 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi) #if CONFIG_RUNTIME_CPU_DETECT /* Override default functions with fastest ones for this CPU. */ #if HAVE_MMX - if (flags & HAS_MMX) { - pbi->dequant.block = vp8_dequantize_b_mmx; - pbi->dequant.idct = vp8_dequant_idct_mmx; - pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx; + pbi->dequant.block = vp8_dequantize_b_mmx; + pbi->dequant.idct_add = vp8_dequant_idct_add_mmx; + pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx; + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_mmx; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx; } +#endif +#if HAVE_SSE2 + if (flags & HAS_SSE2) + { + pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2; + pbi->dequant.idct_add_y_block = vp8_dequant_idct_add_y_block_sse2; + pbi->dequant.idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2; + } +#endif -#endif #endif } diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c deleted file mode 100644 index cb2221c15..000000000 --- a/vp8/decoder/xprintf.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : xprintf.cpp -* -* Description : Display a printf style message on the current video frame. -* -****************************************************************************/ - -/**************************************************************************** -* Header Files -****************************************************************************/ - -#include -#include -#ifdef _WIN32_WCE -#include -#endif -#include "xprintf.h" - -/**************************************************************************** - * - * ROUTINE : xprintf - * - * INPUTS : const PB_INSTANCE *ppbi : Pointer to decoder instance. - * long n_pixel : Offset into buffer to write text. - * const char *format : Format string for print. - * ... : Variable length argument list. - * - * OUTPUTS : None. - * - * RETURNS : int: Size (in bytes) of the formatted text. - * - * FUNCTION : Display a printf style message on the current video frame. - * - * SPECIAL NOTES : None. - * - ****************************************************************************/ -int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...) -{ - BOOL b_rc; - va_list arglist; - HFONT hfont, hfonto; - - int rc = 0; - char sz_formatted[256] = ""; - unsigned char *p_dest = &ppbuffer[n_pixel]; - -#ifdef _WIN32_WCE - // Set up temporary bitmap - HDC hdc_memory = NULL; - HBITMAP hbm_temp = NULL; - HBITMAP hbm_orig = NULL; - - RECT rect; - - // Copy bitmap to video frame - long x; - long y; - - // Format text - va_start(arglist, format); - _vsnprintf(sz_formatted, sizeof(sz_formatted), format, arglist); - va_end(arglist); - - rect.left = 0; - rect.top = 0; - rect.right = 8 * strlen(sz_formatted); - rect.bottom = 8; - - hdc_memory = create_compatible_dc(NULL); - - if (hdc_memory == NULL) - goto Exit; - - hbm_temp = create_bitmap(rect.right, rect.bottom, 1, 1, NULL); - - if (hbm_temp == NULL) - goto Exit; - - hbm_orig = (HBITMAP)(select_object(hdc_memory, hbm_temp)); - - if (!hbm_orig) - goto Exit; - - // Write text into bitmap - // font? - hfont = create_font(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, ""); - - if (hfont == NULL) - goto Exit; - - hfonto = (HFONT)(select_object(hdc_memory, hbm_temp)); - - if (!hfonto) - goto Exit; - - select_object(hdc_memory, hfont); - set_text_color(hdc_memory, 1); - set_bk_color(hdc_memory, 0); - set_bk_mode(hdc_memory, TRANSPARENT); - - b_rc = bit_blt(hdc_memory, rect.left, rect.top, rect.right, rect.bottom, hdc_memory, rect.left, rect.top, BLACKNESS); - - if (!b_rc) - goto Exit; - - b_rc = ext_text_out(hdc_memory, 0, 0, ETO_CLIPPED, &rect, sz_formatted, strlen(sz_formatted), NULL); - - if (!b_rc) - goto Exit; - - for (y = rect.top; y < rect.bottom; ++y) - { - for (x = rect.left; x < rect.right; ++x) - { - if (get_pixel(hdc_memory, x, rect.bottom - 1 - y)) - p_dest[x] = 255; - } - - p_dest += n_stride; - } - - rc = strlen(sz_formatted); - -Exit: - - if (hbm_temp != NULL) - { - if (hbm_orig != NULL) - { - select_object(hdc_memory, hbm_orig); - } - - delete_object(hbm_temp); - } - - if (hfont != NULL) - { - if (hfonto != NULL) - select_object(hdc_memory, hfonto); - - delete_object(hfont); - } - - if (hdc_memory != NULL) - delete_dc(hdc_memory); - - hdc_memory = 0; - -#endif - - return rc; -} diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h deleted file mode 100644 index 2f175e943..000000000 --- a/vp8/decoder/xprintf.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -/**************************************************************************** -* -* Module Title : xprintf.h -* -* Description : Debug print interface header file. -* -****************************************************************************/ -#ifndef __INC_XPRINTF_H -#define __INC_XPRINTF_H - -/**************************************************************************** -* Header Files -****************************************************************************/ - -/**************************************************************************** -* Functions -****************************************************************************/ - -// Display a printf style message on the current video frame -extern int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...); - -#endif diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c new file mode 100644 index 000000000..a1f110260 --- /dev/null +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "vpx_ports/config.h" +#include "vpx_ports/arm.h" +#include "variance.h" +#include "onyx_int.h" + +extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); +extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); + +void vp8_arch_arm_encoder_init(VP8_COMP *cpi) +{ +#if CONFIG_RUNTIME_CPU_DETECT + int flags = cpi->common.rtcd.flags; + int has_edsp = flags & HAS_EDSP; + int has_media = flags & HAS_MEDIA; + int has_neon = flags & HAS_NEON; + +#if HAVE_ARMV6 + if (has_media) + { + /*cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c;*/ + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; + cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_c;*/ + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; + cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;*/ + + /*cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; + cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + /*cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; + cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c;*/ + + /*cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c;*/ + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; + cpi->rtcd.encodemb.subb = vp8_subtract_b_c; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c;*/ + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + } +#endif + +#if HAVE_ARMV7 + if (has_neon) + { + cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; + cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; + cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; + cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; + cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; + + /*cpi->rtcd.variance.var4x4 = vp8_variance4x4_c;*/ + cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; + cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; + cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; + cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; + + /*cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c;*/ + cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; + /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ + cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon; + + cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; + /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ + + cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; + /*cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; + cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;*/ + cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; + + cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; + cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; + cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; + + /*cpi->rtcd.encodemb.berr = vp8_block_error_c; + cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; + cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c;*/ + cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; + + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c;*/ + /* The neon quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + */ + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon;*/ + } +#endif + +#if HAVE_ARMV7 +#if CONFIG_RUNTIME_CPU_DETECT + if (has_neon) +#endif + { + vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; + } +#endif +#endif +} diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm similarity index 92% rename from vp8/encoder/arm/neon/boolhuff_armv7.asm rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm index 9a5f36661..e78dc3322 100644 --- a/vp8/encoder/arm/neon/boolhuff_armv7.asm +++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -204,17 +205,10 @@ token_count_lt_zero_se ldr r5, [r0, #vp8_writer_range] ldr r3, [r0, #vp8_writer_count] - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r11, r1 rsb r4, r10, #32 ; 32-n ; v is kept in r1 during the token pack loop - lsr r1, r11, r4 ; v >>= 32 - n + lsl r1, r1, r4 ; r1 = v << 32 - n encode_value_loop sub r7, r5, #1 ; range-1 @@ -222,7 +216,7 @@ encode_value_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r1, r1, #1 ; bit = v >> n + lsls r1, r1, #1 ; bit = v >> n mov r4, r7, lsl #7 ; ((range-1) * 128) mov r7, #1 diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm similarity index 90% rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm index 9c52c52f6..3233d2a96 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_tokens_armv7| + EXPORT |vp8cx_pack_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -24,7 +25,7 @@ ; r3 vp8_coef_encodings ; s0 vp8_extra_bits ; s1 vp8_coef_tree -|vp8cx_pack_tokens_armv7| PROC +|vp8cx_pack_tokens_armv5| PROC push {r4-r11, lr} ; Add size of xcount * sizeof (TOKENEXTRA) to get stop @@ -56,18 +57,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #52] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -77,7 +71,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -171,16 +165,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm similarity index 91% rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm index 92b098909..a9b552ae1 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_mb_row_tokens_armv7| + EXPORT |vp8cx_pack_mb_row_tokens_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -24,7 +25,7 @@ ; r3 vp8_extra_bits ; s0 vp8_coef_tree -|vp8cx_pack_mb_row_tokens_armv7| PROC +|vp8cx_pack_mb_row_tokens_armv5| PROC push {r4-r11, lr} sub sp, sp, #24 @@ -77,18 +78,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #60] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -98,7 +92,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -192,16 +186,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm similarity index 93% rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm index 6d5f882ed..0835164e5 100644 --- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm +++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8cx_pack_tokens_into_partitions_armv7| + EXPORT |vp8cx_pack_tokens_into_partitions_armv5| INCLUDE vpx_vp8_enc_asm_offsets.asm @@ -26,7 +27,7 @@ ; s1 vp8_extra_bits, ; s2 const vp8_tree_index *, -|vp8cx_pack_tokens_into_partitions_armv7| PROC +|vp8cx_pack_tokens_into_partitions_armv5| PROC push {r4-r11, lr} sub sp, sp, #44 @@ -105,18 +106,11 @@ while_p_lt_stop movne lr, #2 ; i = 2 subne r8, r8, #1 ; --n - ; reverse the stream of bits to be packed. Normally - ; the most significant bit is peeled off and compared - ; in the form of (v >> --n) & 1. ARM architecture has - ; the ability to set a flag based on the value of the - ; bit shifted off the bottom of the register. To make - ; that happen the bitstream is reversed. - rbit r12, r6 rsb r4, r8, #32 ; 32-n ldr r10, [sp, #88] ; vp8_coef_tree ; v is kept in r12 during the token pack loop - lsr r12, r12, r4 ; v >>= 32 - n + lsl r12, r6, r4 ; r12 = v << 32 - n ; loop start token_loop @@ -126,7 +120,7 @@ token_loop ; Decisions are made based on the bit value shifted ; off of v, so set a flag here based on this. ; This value is refered to as "bb" - lsrs r12, r12, #1 ; bb = v >> n + lsls r12, r12, #1 ; bb = v >> n mul r4, r4, r7 ; ((range-1) * pp[i>>1])) ; bb can only be 0 or 1. So only execute this statement @@ -220,16 +214,15 @@ token_count_lt_zero ldr r10, [r12, #vp8_extra_bit_struct_tree] str r10, [sp, #4] ; b->tree - rbit r12, r7 ; reverse v rsb r4, r8, #32 - lsr r12, r12, r4 + lsl r12, r7, r4 mov lr, #0 ; i = 0 extra_bits_loop ldrb r4, [r9, lr, asr #1] ; pp[i>>1] sub r7, r5, #1 ; range-1 - lsrs r12, r12, #1 ; v >> n + lsls r12, r12, #1 ; v >> n mul r4, r4, r7 ; (range-1) * pp[i>>1] addcs lr, lr, #1 ; i + bb diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm index 608c9ae65..61ffdb315 100644 --- a/vp8/encoder/arm/armv6/walsh_v6.asm +++ b/vp8/encoder/arm/armv6/walsh_v6.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vp8_short_walsh4x4_armv6| diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c index e70b3ad47..fe8e70c16 100644 --- a/vp8/encoder/arm/boolhuff_arm.c +++ b/vp8/encoder/arm/boolhuff_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c deleted file mode 100644 index 003979680..000000000 --- a/vp8/encoder/arm/csystemdependent.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "vpx_ports/config.h" -#include "variance.h" -#include "onyx_int.h" - -void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); -extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction); - -void vp8_cmachine_specific_config(VP8_COMP *cpi) -{ -#if CONFIG_RUNTIME_CPU_DETECT - cpi->rtcd.common = &cpi->common.rtcd; - -#if HAVE_ARMV7 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_neon; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_neon; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_neon; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_neon; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_neon; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_neon; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_neon; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_neon; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_neon; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_neon; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_neon; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_neon; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_neon; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_neon; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_neon; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_neon; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_neon; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_neon; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_neon; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_neon; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_neon; -#elif HAVE_ARMV6 - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_armv6; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#else - //pure c - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; - - cpi->rtcd.variance.get16x16prederror = vp8_get16x16pred_error_c; - cpi->rtcd.variance.get8x8var = vp8_get8x8var_c; - cpi->rtcd.variance.get16x16var = vp8_get16x16var_c;; - cpi->rtcd.variance.get4x4sse_cs = vp8_get4x4sse_cs_c; - - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; - - cpi->rtcd.encodemb.berr = vp8_block_error_c; - cpi->rtcd.encodemb.mberr = vp8_mbblock_error_c; - cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_c; - cpi->rtcd.encodemb.subb = vp8_subtract_b_c; - cpi->rtcd.encodemb.submby = vp8_subtract_mby_c; - cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_c; - - cpi->rtcd.quantize.quantb = vp8_regular_quantize_b; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_c; -#endif -#endif - -#if HAVE_ARMV7 - vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon; -#else - vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; -#endif -} diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h index a671862fb..41fa5d192 100644 --- a/vp8/encoder/arm/dct_arm.h +++ b/vp8/encoder/arm/dct_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,9 +15,11 @@ #if HAVE_ARMV6 extern prototype_fdct(vp8_short_walsh4x4_armv6); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6 #endif +#endif #if HAVE_ARMV7 extern prototype_fdct(vp8_short_fdct4x4_neon); @@ -25,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon); extern prototype_fdct(vp8_fast_fdct8x4_neon); extern prototype_fdct(vp8_short_walsh4x4_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon @@ -39,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon); #undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon +#endif #endif diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c index 3f1d05391..cc9e014b2 100644 --- a/vp8/encoder/arm/encodemb_arm.c +++ b/vp8/encoder/arm/encodemb_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h index 28f9e5c5f..8fe453735 100644 --- a/vp8/encoder/arm/encodemb_arm.h +++ b/vp8/encoder/arm/encodemb_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); //#undef vp8_encodemb_mbuverr //#define vp8_encodemb_mbuverr vp8_mbuverror_c +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_encodemb_subb #define vp8_encodemb_subb vp8_subtract_b_neon @@ -37,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon); #undef vp8_encodemb_submbuv #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon +#endif #endif diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c deleted file mode 100644 index 07f218605..000000000 --- a/vp8/encoder/arm/mcomp_arm.c +++ /dev/null @@ -1,1662 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "mcomp.h" -#include "vpx_mem/vpx_mem.h" - -#include -#include -#include - -#ifdef ENTROPY_STATS -static int mv_ref_ct [31] [4] [2]; -static int mv_mode_cts [4] [2]; -#endif - -static int mv_bits_sadcost[256]; - -extern unsigned int vp8_sub_pixel_variance16x16s_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); -extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon -( - unsigned char *src_ptr, - int src_pixels_per_line, - unsigned char *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse -); - -void vp8cx_init_mv_bits_sadcost() -{ - int i; - - for (i = 0; i < 256; i++) - { - mv_bits_sadcost[i] = (int)sqrt(i * 16); - } -} - - -int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight) -{ - // MV costing is based on the distribution of vectors in the previous frame and as such will tend to - // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the - // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks. - // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors. - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7; -} - -int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit) -{ - //int i; - //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8; - //return ( (vp8_mv_bit_cost(mv, ref, mvcost, 100) + 128) * error_per_bit) >> 8; - - //i = (vp8_mv_bit_cost(mv, ref, mvcost, 100) * error_per_bit + 128) >> 8; - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8; - //return (vp8_mv_bit_cost(mv, ref, mvcost, 128) * error_per_bit + 128) >> 8; -} - - -static int mv_bits(MV *mv, MV *ref, int *mvcost[2]) -{ - // get the estimated number of bits for a motion vector, to be used for costing in SAD based - // motion estimation - return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8; -} - -void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) -{ - int Len; - int search_site_count = 0; - - - // Generate offsets for 4 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) - { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 4; -} - -void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) -{ - int Len; - int search_site_count = 0; - - // Generate offsets for 8 search sites per step. - Len = MAX_FIRST_STEP; - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = 0; - search_site_count++; - - while (Len > 0) - { - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = 0; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = -Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = 0; - x->ss[search_site_count].offset = Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = -Len; - x->ss[search_site_count].offset = -Len * stride + Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = -Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride - Len; - search_site_count++; - - // Compute offsets for search sites. - x->ss[search_site_count].mv.col = Len; - x->ss[search_site_count].mv.row = Len; - x->ss[search_site_count].offset = Len * stride + Len; - search_site_count++; - - - // Contract. - Len /= 2; - } - - x->ss_count = search_site_count; - x->searches_per_step = 8; -} - - -#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector -#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc -#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. -#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; -#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) - -//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } - -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - - int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1; - int br = bestmv->row << 2, bc = bestmv->col << 2; - int tr = br, tc = bc; - unsigned int besterr = INT_MAX; - unsigned int left, right, up, down, diag; - unsigned int sse; - unsigned int whichdir; - unsigned int halfiters = 4; - unsigned int quarteriters = 4; - - int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1)); - int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1)); - int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1)); - int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1)); - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - - // calculate central point error - besterr = vf(y, d->pre_stride, z, b->src_stride, &sse); - besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) - while (--halfiters) - { - // 1/2 pel - CHECK_BETTER(left, tr, tc - 2); - CHECK_BETTER(right, tr, tc + 2); - CHECK_BETTER(up, tr - 2, tc); - CHECK_BETTER(down, tr + 2, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) - { - case 0: - CHECK_BETTER(diag, tr - 2, tc - 2); - break; - case 1: - CHECK_BETTER(diag, tr - 2, tc + 2); - break; - case 2: - CHECK_BETTER(diag, tr + 2, tc - 2); - break; - case 3: - CHECK_BETTER(diag, tr + 2, tc + 2); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) - // 1/4 pel - while (--quarteriters) - { - CHECK_BETTER(left, tr, tc - 1); - CHECK_BETTER(right, tr, tc + 1); - CHECK_BETTER(up, tr - 1, tc); - CHECK_BETTER(down, tr + 1, tc); - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - - switch (whichdir) - { - case 0: - CHECK_BETTER(diag, tr - 1, tc - 1); - break; - case 1: - CHECK_BETTER(diag, tr - 1, tc + 1); - break; - case 2: - CHECK_BETTER(diag, tr + 1, tc - 1); - break; - case 3: - CHECK_BETTER(diag, tr + 1, tc + 1); - break; - } - - // no reason to check the same one again. - if (tr == br && tc == bc) - break; - - tr = br; - tc = bc; - } - - bestmv->row = br << 1; - bestmv->col = bc << 1; - - if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL)) - return INT_MAX; - - return besterr; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER -#undef MIN -#undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - int whichdir ; - - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - //for(whichdir =0;whichdir<4;whichdir++) - //{ - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - - // time to check quarter pels. - if (bestmv->row < startmv.row) - y -= d->pre_stride; - - if (bestmv->col < startmv.col) - y--; - - startmv = *bestmv; - - - - // go left then right and check error - this_mv.row = startmv.row; - - if (startmv.col & 7) - { - this_mv.col = startmv.col - 2; - left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); - } - - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 4; - right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - - if (startmv.row & 7) - { - this_mv.row = startmv.row - 2; - up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 4; - down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - - // now check 1 more diagonal - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - -// for(whichdir=0;whichdir<4;whichdir++) -// { - this_mv = startmv; - - switch (whichdir) - { - case 0: - - if (startmv.row & 7) - { - this_mv.row -= 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - } - else - { - this_mv.row = (startmv.row - 8) | 6; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); - } - } - - break; - case 1: - this_mv.col += 2; - - if (startmv.row & 7) - { - this_mv.row -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.row = (startmv.row - 8) | 6; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); - } - - break; - case 2: - this_mv.row += 2; - - if (startmv.col & 7) - { - this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - } - else - { - this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; - } - - break; - case 3: - this_mv.col += 2; - this_mv.row += 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -// } - - return bestmse; -} - -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) -{ - int bestmse = INT_MAX; - MV startmv; - //MV this_mv; - MV this_mv; - unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; - unsigned char *z = (*(b->base_src) + b->src); - int left, right, up, down, diag; - unsigned int sse; - - // Trap uncodable vectors - if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL)) - { - bestmv->row <<= 3; - bestmv->col <<= 3; - return INT_MAX; - } - - // central mv - bestmv->row <<= 3; - bestmv->col <<= 3; - startmv = *bestmv; - - // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); - bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); - - // go left then right and check error - this_mv.row = startmv.row; - this_mv.col = ((startmv.col - 8) | 4); - left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (left < bestmse) - { - *bestmv = this_mv; - bestmse = left; - } - - this_mv.col += 8; - right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse); - right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (right < bestmse) - { - *bestmv = this_mv; - bestmse = right; - } - - // go up then down and check error - this_mv.col = startmv.col; - this_mv.row = ((startmv.row - 8) | 4); - up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (up < bestmse) - { - *bestmv = this_mv; - bestmse = up; - } - - this_mv.row += 8; - down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (down < bestmse) - { - *bestmv = this_mv; - bestmse = down; - } - - // somewhat strangely not doing all the diagonals for half pel is slower than doing them. -#if 0 - // now check 1 more diagonal - - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - this_mv = startmv; - - switch (whichdir) - { - case 0: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 1: - this_mv.col += 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 2: - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - case 3: - this_mv.col += 4; - this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); - break; - } - - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#else - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = (this_mv.row - 8) | 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col = (this_mv.col - 8) | 4; - this_mv.row = startmv.row + 4; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - - this_mv.col += 8; - diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse); - diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - - if (diag < bestmse) - { - *bestmv = this_mv; - bestmse = diag; - } - -#endif - return bestmse; -} - -#if 1 - -#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. -#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best -const MV next_chkpts[6][3] = -{ - {{ -2, 0}, { -1, -2}, {1, -2}}, - {{ -1, -2}, {1, -2}, {2, 0}}, - {{1, -2}, {2, 0}, {1, 2}}, - {{2, 0}, {1, 2}, { -1, 2}}, - {{1, 2}, { -1, 2}, { -2, 0}}, - {{ -1, 2}, { -2, 0}, { -1, -2}} -}; -int vp8_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, - int *mvsadcost[2], - int *mvcost[2] -) -{ - MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; - int i, j; - unsigned char *src = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; - unsigned int besterr, thiserr = 0x7fffffff; - int k = -1, tk; - - if (bc < x->mv_col_min) bc = x->mv_col_min; - - if (bc > x->mv_col_max) bc = x->mv_col_max; - - if (br < x->mv_row_min) br = x->mv_row_min; - - if (br > x->mv_row_max) br = x->mv_row_max; - - rr >>= 1; - rc >>= 1; - - besterr = ERR(br, bc, thiserr); - - // hex search - //j=0 - tr = br; - tc = bc; - - for (i = 0; i < 6; i++) - { - int nr = tr + hex[i].row, nc = tc + hex[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - //CHECK_BETTER(thiserr,nr,nc); - if ((thiserr = ERR(nr, nc, besterr)) < besterr) - { - besterr = thiserr; - br = nr; - bc = nc; - k = i; - } - } - - if (tr == br && tc == bc) - goto cal_neighbors; - - for (j = 1; j < 127; j++) - { - tr = br; - tc = bc; - tk = k; - - for (i = 0; i < 3; i++) - { - int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - //CHECK_BETTER(thiserr,nr,nc); - if ((thiserr = ERR(nr, nc, besterr)) < besterr) - { - besterr = thiserr; - br = nr; - bc = nc; //k=(tk+5+i)%6;} - k = tk + 5 + i; - - if (k >= 12) k -= 12; - else if (k >= 6) k -= 6; - } - } - - if (tr == br && tc == bc) - break; - } - - // check 8 1 away neighbors -cal_neighbors: - tr = br; - tc = bc; - - for (i = 0; i < 8; i++) - { - int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - best_mv->row = br; - best_mv->col = bc; - - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER - -#else - -#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) -#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. -#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best - -int vp8_hex_search -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, - int *mvsadcost[2], - int *mvcost[2] -) -{ - MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ; - MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; - int i, j; - unsigned char *src = (*(b->base_src) + b->src); - int src_stride = b->src_stride; - //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc; - int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; - unsigned int besterr, thiserr = 0x7fffffff; - - /* - if ( rc < x->mv_col_min) bc = x->mv_col_min; - if ( rc > x->mv_col_max) bc = x->mv_col_max; - if ( rr < x->mv_row_min) br = x->mv_row_min; - if ( rr > x->mv_row_max) br = x->mv_row_max; - rr>>=1; - rc>>=1; - br>>=3; - bc>>=3; - */ - if (bc < x->mv_col_min) bc = x->mv_col_min; - - if (bc > x->mv_col_max) bc = x->mv_col_max; - - if (br < x->mv_row_min) br = x->mv_row_min; - - if (br > x->mv_row_max) br = x->mv_row_max; - - rr >>= 1; - rc >>= 1; - - besterr = ERR(br, bc, thiserr); - - // hex search jbb changed to 127 to avoid max 256 problem steping by 2. - for (j = 0; j < 127; j++) - { - tr = br; - tc = bc; - - for (i = 0; i < 6; i++) - { - int nr = tr + hex[i].row, nc = tc + hex[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - if (tr == br && tc == bc) - break; - } - - // check 8 1 away neighbors - tr = br; - tc = bc; - - for (i = 0; i < 8; i++) - { - int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col; - - if (nc < x->mv_col_min) continue; - - if (nc > x->mv_col_max) continue; - - if (nr < x->mv_row_min) continue; - - if (nr > x->mv_row_max) continue; - - CHECK_BETTER(thiserr, nr, nc); - } - - best_mv->row = br; - best_mv->col = bc; - - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; -} -#undef MVC -#undef PRE -#undef SP -#undef DIST -#undef ERR -#undef CHECK_BETTER - -#endif - -int vp8_diamond_search_sad -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], - int *mvcost[2] -) -{ - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - MV this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - int this_row_offset; - int this_col_offset; - search_site *ss; - - unsigned char *check_here; - int thissad; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; - - *num00 = 0; - - for (step = 0; step < tot_steps ; step++) - { - for (j = 0 ; j < x->searches_per_step ; j++) - { - // Trap illegal vectors - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && - (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) - - { - check_here = ss[i].offset + best_address; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - - i++; - } - - if (best_site != last_site) - { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } - else if (best_address == in_what) - (*num00)++; - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); -} - -int vp8_diamond_search_sadx4 -( - MACROBLOCK *x, - BLOCK *b, - BLOCKD *d, - MV *ref_mv, - MV *best_mv, - int search_param, - int error_per_bit, - int *num00, - vp8_variance_fn_ptr_t *fn_ptr, - int *mvsadcost[2], - int *mvcost[2] -) -{ - int i, j, step; - - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - unsigned char *best_address; - - int tot_steps; - MV this_mv; - - int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - int this_row_offset; - int this_col_offset; - search_site *ss; - - unsigned char *check_here; - int thissad; - - // Work out the start point for the search - in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col); - best_address = in_what; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // search_param determines the length of the initial step and hence the number of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. - ss = &x->ss[search_param * x->searches_per_step]; - tot_steps = (x->ss_count / x->searches_per_step) - search_param; - - i = 1; - best_mv->row = ref_row; - best_mv->col = ref_col; - - *num00 = 0; - - for (step = 0; step < tot_steps ; step++) - { - int check_row_min, check_col_min, check_row_max, check_col_max; - - check_row_min = x->mv_row_min - best_mv->row; - check_row_max = x->mv_row_max - best_mv->row; - check_col_min = x->mv_col_min - best_mv->col; - check_col_max = x->mv_col_max - best_mv->col; - - for (j = 0 ; j < x->searches_per_step ; j += 4) - { - char *block_offset[4]; - unsigned int valid_block[4]; - int all_in = 1, t; - - for (t = 0; t < 4; t++) - { - valid_block [t] = (ss[t+i].mv.col > check_col_min); - valid_block [t] &= (ss[t+i].mv.col < check_col_max); - valid_block [t] &= (ss[t+i].mv.row > check_row_min); - valid_block [t] &= (ss[t+i].mv.row < check_row_max); - - all_in &= valid_block[t]; - block_offset[t] = ss[i+t].offset + best_address; - } - - if (all_in) - { - int sad_array[4]; - - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); - - for (t = 0; t < 4; t++, i++) - { - thissad = sad_array[t]; - - if (thissad < bestsad) - { - this_mv.row = (best_mv->row + ss[i].mv.row) << 3; - this_mv.col = (best_mv->col + ss[i].mv.col) << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - } - else - { - int t; - - for (t = 0; t < 4; i++, t++) - { - // Trap illegal vectors - if (valid_block[t]) - - { - check_here = block_offset[t]; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - } - } - } - - if (best_site != last_site) - { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; - } - else if (best_address == in_what) - (*num00)++; - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad == INT_MAX) - return INT_MAX; - - return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); -} - - -#if !(CONFIG_REALTIME_ONLY) -int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) -{ - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - MV *best_mv = &d->bmi.mv.as_mv; - MV this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - int thissad; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - - //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14)); - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max ; r++) - { - this_mv.row = r << 3; - check_here = r * mv_stride + in_what + col_min; - - for (c = col_min; c < col_max; c++) - { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - this_mv.col = c << 3; - //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14)); - //thissad += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)]; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - - check_here++; - } - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - else - return INT_MAX; -} - -int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) -{ - unsigned char *what = (*(b->base_src) + b->src); - int what_stride = b->src_stride; - unsigned char *in_what; - int in_what_stride = d->pre_stride; - int mv_stride = d->pre_stride; - unsigned char *bestaddress; - MV *best_mv = &d->bmi.mv.as_mv; - MV this_mv; - int bestsad = INT_MAX; - int r, c; - - unsigned char *check_here; - int thissad; - - int ref_row = ref_mv->row >> 3; - int ref_col = ref_mv->col >> 3; - - int row_min = ref_row - distance; - int row_max = ref_row + distance; - int col_min = ref_col - distance; - int col_max = ref_col + distance; - - int sad_array[3]; - - // Work out the mid point for the search - in_what = *(d->base_pre) + d->pre; - bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; - - best_mv->row = ref_row; - best_mv->col = ref_col; - - // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits - if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && - (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) - { - // Baseline value at the centre - bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); - } - - // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border - if (col_min < x->mv_col_min) - col_min = x->mv_col_min; - - if (col_max > x->mv_col_max) - col_max = x->mv_col_max; - - if (row_min < x->mv_row_min) - row_min = x->mv_row_min; - - if (row_max > x->mv_row_max) - row_max = x->mv_row_max; - - for (r = row_min; r < row_max ; r++) - { - this_mv.row = r << 3; - check_here = r * mv_stride + in_what + col_min; - c = col_min; - - while ((c + 3) < col_max) - { - int i; - - fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); - - for (i = 0; i < 3; i++) - { - thissad = sad_array[i]; - - if (thissad < bestsad) - { - this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - } - - check_here++; - c++; - } - } - - while (c < col_max) - { - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_mv.col = c << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_mv->row = r; - best_mv->col = c; - bestaddress = check_here; - } - } - - check_here ++; - c ++; - } - - } - - this_mv.row = best_mv->row << 3; - this_mv.col = best_mv->col << 3; - - if (bestsad < INT_MAX) - return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) - + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); - else - return INT_MAX; -} -#endif - -#ifdef ENTROPY_STATS -void print_mode_context(void) -{ - FILE *f = fopen("modecont.c", "w"); - int i, j; - - fprintf(f, "#include \"entropy.h\"\n"); - fprintf(f, "const int vp8_mode_contexts[6][4] =\n"); - fprintf(f, "{\n"); - - for (j = 0; j < 6; j++) - { - fprintf(f, " { // %d \n", j); - fprintf(f, " "); - - for (i = 0; i < 4; i++) - { - int overal_prob; - int this_prob; - int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1]; - - // Overall probs - count = mv_mode_cts[i][0] + mv_mode_cts[i][1]; - - if (count) - overal_prob = 256 * mv_mode_cts[i][0] / count; - else - overal_prob = 128; - - if (overal_prob == 0) - overal_prob = 1; - - // context probs - count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1]; - - if (count) - this_prob = 256 * mv_ref_ct[j][i][0] / count; - else - this_prob = 128; - - if (this_prob == 0) - this_prob = 1; - - fprintf(f, "%5d, ", this_prob); - //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob); - //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob); - } - - fprintf(f, " },\n"); - } - - fprintf(f, "};\n"); - fclose(f); -} - -/* MV ref count ENTROPY_STATS stats code */ -#ifdef ENTROPY_STATS -void init_mv_ref_counts() -{ - vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct)); - vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts)); -} - -void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4]) -{ - if (m == ZEROMV) - { - ++mv_ref_ct [ct[0]] [0] [0]; - ++mv_mode_cts[0][0]; - } - else - { - ++mv_ref_ct [ct[0]] [0] [1]; - ++mv_mode_cts[0][1]; - - if (m == NEARESTMV) - { - ++mv_ref_ct [ct[1]] [1] [0]; - ++mv_mode_cts[1][0]; - } - else - { - ++mv_ref_ct [ct[1]] [1] [1]; - ++mv_mode_cts[1][1]; - - if (m == NEARMV) - { - ++mv_ref_ct [ct[2]] [2] [0]; - ++mv_mode_cts[2][0]; - } - else - { - ++mv_ref_ct [ct[2]] [2] [1]; - ++mv_mode_cts[2][1]; - - if (m == NEWMV) - { - ++mv_ref_ct [ct[3]] [3] [0]; - ++mv_mode_cts[3][0]; - } - else - { - ++mv_ref_ct [ct[3]] [3] [1]; - ++mv_mode_cts[3][1]; - } - } - } - } -} - -#endif/* END MV ref count ENTROPY_STATS stats code */ - -#endif diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm index d5dec440d..8c191a753 100644 --- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm +++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm index de1c25469..ca351a1c4 100644 --- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm +++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm index 11070377b..ca1ea9c18 100644 --- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm +++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm index 6169f10da..d7c590e15 100644 --- a/vp8/encoder/arm/neon/sad16_neon.asm +++ b/vp8/encoder/arm/neon/sad16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm index 28604ddeb..23ba6df93 100644 --- a/vp8/encoder/arm/neon/sad8_neon.asm +++ b/vp8/encoder/arm/neon/sad8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm index 26bc0d06c..5af5cb888 100644 --- a/vp8/encoder/arm/neon/shortfdct_neon.asm +++ b/vp8/encoder/arm/neon/shortfdct_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm index 8781ca0cc..3ea00f8b9 100644 --- a/vp8/encoder/arm/neon/subtract_neon.asm +++ b/vp8/encoder/arm/neon/subtract_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm index 64b83ca43..e1a46869a 100644 --- a/vp8/encoder/arm/neon/variance_neon.asm +++ b/vp8/encoder/arm/neon/variance_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm index f26b4d7ae..b0450e523 100644 --- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm +++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm index f53596727..6af4e87ba 100644 --- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm index 5269c0af8..ba3decf6c 100644 --- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm +++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm index aec716e3b..1b09cfe4c 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm index 3d02d7c40..0a2b71c49 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm @@ -1,16 +1,17 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - EXPORT |vp8_sub_pixel_variance16x16s_4_0_neon| - EXPORT |vp8_sub_pixel_variance16x16s_0_4_neon| - EXPORT |vp8_sub_pixel_variance16x16s_4_4_neon| + EXPORT |vp8_variance_halfpixvar16x16_h_neon| + EXPORT |vp8_variance_halfpixvar16x16_v_neon| + EXPORT |vp8_variance_halfpixvar16x16_hv_neon| EXPORT |vp8_sub_pixel_variance16x16s_neon| ARM REQUIRE8 @@ -19,7 +20,7 @@ AREA ||.text||, CODE, READONLY, ALIGN=2 ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon +;unsigned int vp8_variance_halfpixvar16x16_h_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -28,7 +29,7 @@ ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_0_neon| PROC +|vp8_variance_halfpixvar16x16_h_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -119,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon +;unsigned int vp8_variance_halfpixvar16x16_v_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -128,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_0_4_neon| PROC +|vp8_variance_halfpixvar16x16_v_neon| PROC push {lr} mov r12, #4 ;loop counter @@ -215,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ENDP ;================================================ -;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon +;unsigned int vp8_variance_halfpixvar16x16_hv_neon ;( ; unsigned char *src_ptr, r0 ; int src_pixels_per_line, r1 @@ -224,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon ; unsigned int *sse ;); ;================================================ -|vp8_sub_pixel_variance16x16s_4_4_neon| PROC +|vp8_variance_halfpixvar16x16_hv_neon| PROC push {lr} vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm index bd56761fa..cf4da62fa 100644 --- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm +++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c index 0586e55d8..b2d8f2b2c 100644 --- a/vp8/encoder/arm/picklpf_arm.c +++ b/vp8/encoder/arm/picklpf_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c index 46906d3a2..65c616614 100644 --- a/vp8/encoder/arm/quantize_arm.c +++ b/vp8/encoder/arm/quantize_arm.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { - d->eob = vp8_fast_quantize_b_neon_func(b->coeff, &b->zbin[0][0], d->qcoeff, d->dqcoeff, d->dequant[0], vp8_rvsplus1_default_zig_zag1d, &b->round[0][0], &b->quant[0][0]); + d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant); } /* diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h index e93f0fef1..5f9155eb1 100644 --- a/vp8/encoder/arm/quantize_arm.h +++ b/vp8/encoder/arm/quantize_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,8 +15,11 @@ #if HAVE_ARMV7 extern prototype_quantize_block(vp8_fast_quantize_b_neon); -#undef vp8_quantize_fastquantb -#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon +/* The neon quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + */ +//#undef vp8_quantize_fastquantb +//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon #endif diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h index d9fc9b3e0..0e5f62fcf 100644 --- a/vp8/encoder/arm/variance_arm.h +++ b/vp8/encoder/arm/variance_arm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon); //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c); //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon); //extern prototype_getmbss(vp8_get_mb_ss_c); extern prototype_variance(vp8_mse16x16_neon); @@ -37,6 +41,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon); //extern prototype_variance2(vp8_get16x16var_c); extern prototype_sad(vp8_get4x4sse_cs_neon); +#if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_variance_sad4x4 #define vp8_variance_sad4x4 vp8_sad4x4_neon @@ -82,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon + //#undef vp8_variance_getmbss //#define vp8_variance_getmbss vp8_get_mb_ss_c @@ -99,6 +113,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon); #undef vp8_variance_get4x4sse_cs #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon +#endif #endif diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c index 8cdf0791f..c595ca3c0 100644 --- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c +++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c index 800892007..a4db12cee 100644 --- a/vp8/encoder/bitstream.c +++ b/vp8/encoder/bitstream.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -793,7 +794,8 @@ static void write_mv_ref assert(NEARESTMV <= m && m <= SPLITMV); - vp8_write_token(w, vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m); + vp8_write_token(w, vp8_mv_ref_tree, p, + vp8_mv_ref_encoding_array - NEARESTMV + m); } static void write_sub_mv_ref @@ -803,7 +805,8 @@ static void write_sub_mv_ref { assert(LEFT4X4 <= m && m <= NEW4X4); - vp8_write_token(w, vp8_sub_mv_ref_tree, p, VP8_SUBMVREFENCODINGS + m); + vp8_write_token(w, vp8_sub_mv_ref_tree, p, + vp8_sub_mv_ref_encoding_array - LEFT4X4 + m); } static void write_mv @@ -882,6 +885,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) int prob_skip_false = 0; ms = pc->mi - 1; + cpi->mb.partition_info = cpi->mb.pi; + // Calculate the probabilities to be used to code the reference frame based on actual useage this frame if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter))) cpi->prob_intra_coded = 1; @@ -1065,7 +1070,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) do { - const B_MODE_INFO *const b = mi->partition_bmi + j; + const B_MODE_INFO *const b = cpi->mb.partition_info->bmi + j; const int *const L = vp8_mbsplits [mi->partitioning]; int k = -1; /* first block in subset j */ int mv_contz; @@ -1087,7 +1092,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) write_mv(w, &b->mv.as_mv, &best_mv, (const MV_CONTEXT *) mvc); } } - while (++j < mi->partition_count); + while (++j < cpi->mb.partition_info->count); } break; default: @@ -1096,9 +1101,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) } ++m; + cpi->mb.partition_info++; } ++m; /* skip L prediction border */ + cpi->mb.partition_info++; } } @@ -1553,9 +1560,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) if (xd->mode_ref_lf_delta_enabled) { // Do the deltas need to be updated - vp8_write_bit(bc, (xd->mode_ref_lf_delta_update) ? 1 : 0); + int send_update = xd->mode_ref_lf_delta_update + || cpi->oxcf.error_resilient_mode; - if (xd->mode_ref_lf_delta_update) + vp8_write_bit(bc, send_update); + if (send_update) { int Data; @@ -1565,8 +1574,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) Data = xd->ref_lf_deltas[i]; // Frame level data - if (Data) + if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i] + || cpi->oxcf.error_resilient_mode) { + xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i]; vp8_write_bit(bc, 1); if (Data > 0) @@ -1590,8 +1601,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size) { Data = xd->mode_lf_deltas[i]; - if (Data) + if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i] + || cpi->oxcf.error_resilient_mode) { + xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i]; vp8_write_bit(bc, 1); if (Data > 0) diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h index ee69f66e4..f5d148ea4 100644 --- a/vp8/encoder/bitstream.h +++ b/vp8/encoder/bitstream.h @@ -1,35 +1,36 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef __INC_BITSTREAM_H #define __INC_BITSTREAM_H -#if HAVE_ARMV7 -void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount, +#if HAVE_ARMV5TE +void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); -void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *, +void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); -void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w, +void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w, vp8_token *, vp8_extra_bit_struct *, const vp8_tree_index *); # define pack_tokens(a,b,c) \ - vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) # define pack_tokens_into_partitions(a,b,c,d) \ - vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) # define pack_mb_row_tokens(a,b) \ - vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) + vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree) #else # define pack_tokens(a,b,c) pack_tokens_c(a,b,c) # define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d) diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index cc4cbe067..e94e54976 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -31,10 +32,11 @@ typedef struct short *coeff; // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries - short(*quant)[4]; - short(*zbin)[4]; - short(*zrun_zbin_boost); - short(*round)[4]; + short *quant; + short *quant_shift; + short *zbin; + short *zrun_zbin_boost; + short *round; // Zbin Over Quant value short zbin_extra; @@ -48,6 +50,12 @@ typedef struct } BLOCK; +typedef struct +{ + int count; + B_MODE_INFO bmi[16]; +} PARTITION_INFO; + typedef struct { DECLARE_ALIGNED(16, short, src_diff[400]); // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y @@ -59,6 +67,9 @@ typedef struct YV12_BUFFER_CONFIG src; MACROBLOCKD e_mbd; + PARTITION_INFO *partition_info; /* work pointer */ + PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */ + PARTITION_INFO *pip; /* Base of allocated array */ search_site *ss; int ss_count; @@ -91,6 +102,9 @@ typedef struct int encode_breakout; + //char * gf_active_ptr; + signed char *gf_active_ptr; + unsigned char *active_ptr; MV_CONTEXT *mvc; @@ -99,15 +113,8 @@ typedef struct void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); void (*vp8_short_fdct8x4)(short *input, short *output, int pitch); - void (*short_fdct4x4rd)(short *input, short *output, int pitch); - void (*short_fdct8x4rd)(short *input, short *output, int pitch); - void (*vp8_short_fdct4x4_ptr)(short *input, short *output, int pitch); void (*short_walsh4x4)(short *input, short *output, int pitch); - void (*quantize_b)(BLOCK *b, BLOCKD *d); - void (*quantize_brd)(BLOCK *b, BLOCKD *d); - - } MACROBLOCK; diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c index c101384d9..82006b196 100644 --- a/vp8/encoder/boolhuff.c +++ b/vp8/encoder/boolhuff.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h index 0d929f067..f723da3f0 100644 --- a/vp8/encoder/boolhuff.h +++ b/vp8/encoder/boolhuff.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index 5207e39c4..b5a11ae34 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -1,172 +1,64 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #include - -static const short dct_matrix2[4][4] = -{ - { 23170, 30274, 23170, 12540 }, - { 23170, 12540, -23170, -30274 }, - { 23170, -12540, -23170, 30274 }, - { 23170, -30274, 23170, -12540 } -}; - -static const short dct_matrix1[4][4] = -{ - { 23170, 23170, 23170, 23170 }, - { 30274, 12540, -12540, -30274 }, - { 23170, -23170, -23170, 23170 }, - { 12540, -30274, 30274, -12540 } -}; - - -#define _1STSTAGESHIFT 14 -#define _1STSTAGEROUNDING (1<<( _1STSTAGESHIFT-1)) -#define _2NDSTAGESHIFT 16 -#define _2NDSTAGEROUNDING (1<<( _2NDSTAGESHIFT-1)) - -// using matrix multiply void vp8_short_fdct4x4_c(short *input, short *output, int pitch) -{ - int i, j, k; - short temp[4][4]; - int sumtemp; - pitch >>= 1; - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - sumtemp = 0; - - for (k = 0; k < 4; k++) - { - sumtemp += input[i*pitch+k] * dct_matrix2[k][j]; - - } - - temp[i][j] = (short)((sumtemp + _1STSTAGEROUNDING) >> _1STSTAGESHIFT); - } - } - - - for (i = 0; i < 4; i++) - { - for (j = 0; j < 4; j++) - { - sumtemp = 0; - - for (k = 0; k < 4; k++) - { - sumtemp += dct_matrix1[i][ k] * temp[k][ j]; - } - - output[i*4+j] = (short)((sumtemp + _2NDSTAGEROUNDING) >> _2NDSTAGESHIFT); - } - } - -} - - -void vp8_short_fdct8x4_c(short *input, short *output, int pitch) -{ - vp8_short_fdct4x4_c(input, output, pitch); - vp8_short_fdct4x4_c(input + 4, output + 16, pitch); -} - - -static const signed short x_c1 = 60547; -static const signed short x_c2 = 46341; -static const signed short x_c3 = 25080; - -void vp8_fast_fdct4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; - int a2, b2, c2, d2; short *ip = input; - short *op = output; - int temp1, temp2; for (i = 0; i < 4; i++) { - a1 = (ip[0] + ip[3]) * 2; - b1 = (ip[1] + ip[2]) * 2; - c1 = (ip[1] - ip[2]) * 2; - d1 = (ip[0] - ip[3]) * 2; + a1 = ((ip[0] + ip[3])<<3); + b1 = ((ip[1] + ip[2])<<3); + c1 = ((ip[1] - ip[2])<<3); + d1 = ((ip[0] - ip[3])<<3); - temp1 = a1 + b1; - temp2 = a1 - b1; + op[0] = a1 + b1; + op[2] = a1 - b1; - op[0] = ((temp1 * x_c2) >> 16) + temp1; - op[2] = ((temp2 * x_c2) >> 16) + temp2; - - temp1 = (c1 * x_c3) >> 16; - temp2 = ((d1 * x_c1) >> 16) + d1; - - op[1] = temp1 + temp2; - - temp1 = (d1 * x_c3) >> 16; - temp2 = ((c1 * x_c1) >> 16) + c1; - - op[3] = temp1 - temp2; + op[1] = (c1 * 2217 + d1 * 5352 + 14500)>>12; + op[3] = (d1 * 2217 - c1 * 5352 + 7500)>>12; ip += pitch / 2; op += 4; - } + } ip = output; op = output; - for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; b1 = ip[4] + ip[8]; c1 = ip[4] - ip[8]; d1 = ip[0] - ip[12]; + op[0] = ( a1 + b1 + 7)>>4; + op[8] = ( a1 - b1 + 7)>>4; - temp1 = a1 + b1; - temp2 = a1 - b1; - - a2 = ((temp1 * x_c2) >> 16) + temp1; - c2 = ((temp2 * x_c2) >> 16) + temp2; - - temp1 = (c1 * x_c3) >> 16; - temp2 = ((d1 * x_c1) >> 16) + d1; - - b2 = temp1 + temp2; - - temp1 = (d1 * x_c3) >> 16; - temp2 = ((c1 * x_c1) >> 16) + c1; - - d2 = temp1 - temp2; - - - op[0] = (a2 + 1) >> 1; - op[4] = (b2 + 1) >> 1; - op[8] = (c2 + 1) >> 1; - op[12] = (d2 + 1) >> 1; + op[4] =((c1 * 2217 + d1 * 5352 + 12000)>>16) + (d1!=0); + op[12] = (d1 * 2217 - c1 * 5352 + 51000)>>16; ip++; op++; } } -void vp8_fast_fdct8x4_c(short *input, short *output, int pitch) +void vp8_short_fdct8x4_c(short *input, short *output, int pitch) { - vp8_fast_fdct4x4_c(input, output, pitch); - vp8_fast_fdct4x4_c(input + 4, output + 16, pitch); + vp8_short_fdct4x4_c(input, output, pitch); + vp8_short_fdct4x4_c(input + 4, output + 16, pitch); } void vp8_short_walsh4x4_c(short *input, short *output, int pitch) @@ -177,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) short *ip = input; short *op = output; + for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[3]; - b1 = ip[1] + ip[2]; - c1 = ip[1] - ip[2]; - d1 = ip[0] - ip[3]; + a1 = ((ip[0] + ip[2])<<2); + d1 = ((ip[1] + ip[3])<<2); + c1 = ((ip[1] - ip[3])<<2); + b1 = ((ip[0] - ip[2])<<2); - op[0] = a1 + b1; - op[1] = c1 + d1; - op[2] = a1 - b1; - op[3] = d1 - c1; + op[0] = a1 + d1 + (a1!=0); + op[1] = b1 + c1; + op[2] = b1 - c1; + op[3] = a1 - d1; ip += pitch / 2; op += 4; } @@ -197,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ip[0] + ip[12]; - b1 = ip[4] + ip[8]; - c1 = ip[4] - ip[8]; - d1 = ip[0] - ip[12]; + a1 = ip[0] + ip[8]; + d1 = ip[4] + ip[12]; + c1 = ip[4] - ip[12]; + b1 = ip[0] - ip[8]; - a2 = a1 + b1; - b2 = c1 + d1; - c2 = a1 - b1; - d2 = d1 - c1; + a2 = a1 + d1; + b2 = b1 + c1; + c2 = b1 - c1; + d2 = a1 - d1; - a2 += (a2 > 0); - b2 += (b2 > 0); - c2 += (c2 > 0); - d2 += (d2 > 0); + a2 += a2<0; + b2 += b2<0; + c2 += c2<0; + d2 += d2<0; - op[0] = (a2) >> 1; - op[4] = (b2) >> 1; - op[8] = (c2) >> 1; - op[12] = (d2) >> 1; + op[0] = (a2+3) >> 3; + op[4] = (b2+3) >> 3; + op[8] = (c2+3) >> 3; + op[12]= (d2+3) >> 3; ip++; op++; diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index fb307cfb3..fec3b4c37 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -31,15 +32,14 @@ extern prototype_fdct(vp8_fdct_short4x4); #endif extern prototype_fdct(vp8_fdct_short8x4); +// There is no fast4x4 (for now) #ifndef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_c +#define vp8_fdct_fast4x4 vp8_short_fdct4x4_c #endif -extern prototype_fdct(vp8_fdct_fast4x4); #ifndef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_c +#define vp8_fdct_fast8x4 vp8_short_fdct8x4_c #endif -extern prototype_fdct(vp8_fdct_fast8x4); #ifndef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_c diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index faf8e2bd4..b7d231b8c 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -16,7 +17,7 @@ #include "extend.h" #include "entropymode.h" #include "quant_common.h" -#include "segmentation_common.h" +#include "segmentation.h" #include "setupintrarecon.h" #include "encodeintra.h" #include "reconinter.h" @@ -67,10 +68,8 @@ unsigned int uv_modes[4] = {0, 0, 0, 0}; unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; #endif -// The first four entries are dummy values static const int qrounding_factors[129] = { - 56, 56, 56, 56, 56, 56, 56, 56, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, @@ -86,12 +85,18 @@ static const int qrounding_factors[129] = 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, - 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48 }; static const int qzbin_factors[129] = { - 64, 64, 64, 64, 80, 80, 80, 80, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, @@ -102,17 +107,76 @@ static const int qzbin_factors[129] = 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, 80, 80, 80, 80, 80, 80, 80, - 80, + 80 }; +static const int qrounding_factors_y2[129] = +{ + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48, 48, 48, 48, 48, 48, 48, 48, + 48 +}; + +static const int qzbin_factors_y2[129] = +{ + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 84, 84, 84, 84, 84, 84, 84, 84, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80, 80, 80, 80, 80, 80, 80, 80, + 80 +}; + +#define EXACT_QUANT +#ifdef EXACT_QUANT +static void vp8cx_invert_quant(int improved_quant, short *quant, + short *shift, short d) +{ + if(improved_quant) + { + unsigned t; + int l; + t = d; + for(l = 0; t > 1; l++) + t>>=1; + t = 1 + (1<<(16+l))/d; + *quant = (short)(t - (1<<16)); + *shift = l; + } + else + { + *quant = (1 << 16) / d; + *shift = 0; + } +} + void vp8cx_init_quantizer(VP8_COMP *cpi) { - int r, c; int i; int quant_val; int Q; @@ -123,63 +187,127 @@ void vp8cx_init_quantizer(VP8_COMP *cpi) { // dc values quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); - cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val; - cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.Y1dequant[Q][0][0] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, + cpi->Y1quant_shift[Q] + 0, quant_val); + cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.Y1dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); - cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val; - cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.Y2dequant[Q][0][0] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0, + cpi->Y2quant_shift[Q] + 0, quant_val); + cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; + cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7; + cpi->common.Y2dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); - cpi->UVquant[Q][0][0] = (1 << 16) / quant_val; - cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; - cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.UVdequant[Q][0][0] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0, + cpi->UVquant_shift[Q] + 0, quant_val); + cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; + cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.UVdequant[Q][0] = quant_val; cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; // all the ac values = ; for (i = 1; i < 16; i++) { int rc = vp8_default_zig_zag1d[i]; - r = (rc >> 2); - c = (rc & 3); quant_val = vp8_ac_yquant(Q); - cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val; - cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.Y1dequant[Q][r][c] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc, + cpi->Y1quant_shift[Q] + rc, quant_val); + cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.Y1dequant[Q][rc] = quant_val; cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); - cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val; - cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.Y2dequant[Q][r][c] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc, + cpi->Y2quant_shift[Q] + rc, quant_val); + cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; + cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7; + cpi->common.Y2dequant[Q][rc] = quant_val; cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); - cpi->UVquant[Q][r][c] = (1 << 16) / quant_val; - cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; - cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7; - cpi->common.UVdequant[Q][r][c] = quant_val; + vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc, + cpi->UVquant_shift[Q] + rc, quant_val); + cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.UVdequant[Q][rc] = quant_val; cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7; } } } +#else +void vp8cx_init_quantizer(VP8_COMP *cpi) +{ + int i; + int quant_val; + int Q; + int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44}; + + for (Q = 0; Q < QINDEX_RANGE; Q++) + { + // dc values + quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); + cpi->Y1quant[Q][0] = (1 << 16) / quant_val; + cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.Y1dequant[Q][0] = quant_val; + cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; + + quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); + cpi->Y2quant[Q][0] = (1 << 16) / quant_val; + cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; + cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7; + cpi->common.Y2dequant[Q][0] = quant_val; + cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; + + quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); + cpi->UVquant[Q][0] = (1 << 16) / quant_val; + cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;; + cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.UVdequant[Q][0] = quant_val; + cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; + + // all the ac values = ; + for (i = 1; i < 16; i++) + { + int rc = vp8_default_zig_zag1d[i]; + + quant_val = vp8_ac_yquant(Q); + cpi->Y1quant[Q][rc] = (1 << 16) / quant_val; + cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.Y1dequant[Q][rc] = quant_val; + cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7; + + quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); + cpi->Y2quant[Q][rc] = (1 << 16) / quant_val; + cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; + cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7; + cpi->common.Y2dequant[Q][rc] = quant_val; + cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7; + + quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); + cpi->UVquant[Q][rc] = (1 << 16) / quant_val; + cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; + cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7; + cpi->common.UVdequant[Q][rc] = quant_val; + cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7; + } + } +} +#endif void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { int i; int QIndex; MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = &xd->mbmi; int zbin_extra; // Select the baseline MB Q index. @@ -187,12 +315,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) { // Abs Value if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) - QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; // Delta Value else { - QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; + QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id]; QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range } } @@ -200,11 +328,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) QIndex = cpi->common.base_qindex; // Y - zbin_extra = (cpi->common.Y1dequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; + zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; for (i = 0; i < 16; i++) { x->block[i].quant = cpi->Y1quant[QIndex]; + x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].round = cpi->Y1round[QIndex]; x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex]; @@ -213,11 +342,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) } // UV - zbin_extra = (cpi->common.UVdequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; + zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7; for (i = 16; i < 24; i++) { x->block[i].quant = cpi->UVquant[QIndex]; + x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].round = cpi->UVround[QIndex]; x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex]; @@ -226,8 +356,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) } // Y2 - zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7; + zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7; x->block[24].quant = cpi->Y2quant[QIndex]; + x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; x->block[24].round = cpi->Y2round[QIndex]; x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex]; @@ -237,6 +368,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x) void vp8cx_frame_init_quantizer(VP8_COMP *cpi) { + // Clear Zbin mode boost for default case + cpi->zbin_mode_boost = 0; + // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called // when these values are not all zero. if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q) @@ -263,18 +397,17 @@ void encode_mb_row(VP8_COMP *cpi, int i; int recon_yoffset, recon_uvoffset; int mb_col; - int recon_y_stride = cm->last_frame.y_stride; - int recon_uv_stride = cm->last_frame.uv_stride; + int ref_fb_idx = cm->lst_fb_idx; + int dst_fb_idx = cm->new_fb_idx; + int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int seg_map_index = (mb_row * cpi->common.mb_cols); #if CONFIG_SEGMENTATION int left_id, above_id; int sum; #endif // reset above block coeffs - xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT]; - xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ]; - xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ]; - xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT]; + xd->above_context = cm->above_context; xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); @@ -283,25 +416,35 @@ void encode_mb_row(VP8_COMP *cpi, cpi->tplist[mb_row].start = *tp; //printf("Main mb_row = %d\n", mb_row); + // Distance of Mb to the top & bottom edges, specified in 1/8th pel + // units as they are always compared to values that are in 1/8th pel units + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; + + // Set up limit values for vertical motion vector components + // to prevent them extending beyond the UMV borders + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + + (VP8BORDERINPIXELS - 16); + // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to values that are in 1/8th pel units + // Distance of Mb to the left & right edges, specified in + // 1/8th pel units as they are always compared to values + // that are in 1/8th pel units xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - // Set up limit values for motion vectors used to prevent them extending outside the UMV borders + // Set up limit values for horizontal motion vector components + // to prevent them extending beyond the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); + x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + + (VP8BORDERINPIXELS - 16); - xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset; + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); // Is segmentation enabled @@ -310,15 +453,15 @@ void encode_mb_row(VP8_COMP *cpi, { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) if (cpi->segmentation_map[seg_map_index+mb_col] <= 3) - xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col]; + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col]; else - xd->mbmi.segment_id = 0; + xd->mode_info_context->mbmi.segment_id = 0; vp8cx_mb_init_quantizer(cpi, x); } else - xd->mbmi.segment_id = 0; // Set to Segment 0 by default + xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default x->active_ptr = cpi->active_map + seg_map_index + mb_col; @@ -342,14 +485,14 @@ void encode_mb_row(VP8_COMP *cpi, for (b = 0; b < xd->mbmi.partition_count; b++) { - inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++; + inter_b_modes[x->partition->bmi[b].mode] ++; } } #endif // Count of last ref frame 0,0 useage - if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME)) + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) cpi->inter_zz_count ++; // Special case code for cyclic refresh @@ -357,14 +500,14 @@ void encode_mb_row(VP8_COMP *cpi, // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled) { - cpi->segmentation_map[seg_map_index+mb_col] = xd->mbmi.segment_id; + cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id; // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh): // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0) // else mark it as dirty (1). - if (xd->mbmi.segment_id) + if (xd->mode_info_context->mbmi.segment_id) cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1; - else if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME)) + else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) { if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1) cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0; @@ -377,15 +520,12 @@ void encode_mb_row(VP8_COMP *cpi, cpi->tplist[mb_row].stop = *tp; - xd->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb + x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb - if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME)) - xd->mbmi.segment_id = 0; + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) + xd->mode_info_context->mbmi.segment_id = 0; else - xd->mbmi.segment_id = 1; - - // store macroblock mode info into context array - vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi)); + xd->mode_info_context->mbmi.segment_id = 1; for (i = 0; i < 16; i++) vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi)); @@ -412,7 +552,7 @@ void encode_mb_row(VP8_COMP *cpi, if (mb_row != 0) sum += (xd->mode_info_context-cm->mb_cols)->mbmi.segment_flag; - if (xd->mbmi.segment_id == cpi->segmentation_map[(mb_row*cm->mb_cols) + mb_col]) + if (xd->mode_info_context->mbmi.segment_id == cpi->segmentation_map[(mb_row*cm->mb_cols) + mb_col]) xd->mode_info_context->mbmi.segment_flag = 0; else xd->mode_info_context->mbmi.segment_flag = 1; @@ -430,29 +570,28 @@ void encode_mb_row(VP8_COMP *cpi, segment_counts[xd->mode_info_context->mbmi.segment_id] ++; } } - segment_counts[SEEK_SEGID + xd->mbmi.segment_id] ++; + segment_counts[SEEK_SEGID + xd->mode_info_context->mbmi.segment_id] ++; #else segment_counts[xd->mode_info_context->mbmi.segment_id] ++; #endif // skip to next mb xd->mode_info_context++; + x->partition_info++; - xd->above_context[Y1CONTEXT] += 4; - xd->above_context[UCONTEXT ] += 2; - xd->above_context[VCONTEXT ] += 2; - xd->above_context[Y2CONTEXT] ++; + xd->above_context++; cpi->current_mb_col_main = mb_col; } //extend the recon for intra prediction vp8_extend_mb_row( - &cm->new_frame, + &cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); // this is to account for the border xd->mode_info_context++; + x->partition_info++; } void vp8_encode_frame(VP8_COMP *cpi) @@ -473,32 +612,31 @@ void vp8_encode_frame(VP8_COMP *cpi) #endif int totalrate; - if (cm->frame_type != KEY_FRAME) + // Functions setup for all frame types so we can use MC in AltRef + if (cm->mcomp_filter_type == SIXTAP) { - if (cm->mcomp_filter_type == SIXTAP) - { - xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap4x4); - xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x4); - xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x8); - xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap16x16); - } - else - { - xd->subpixel_predict = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear4x4); - xd->subpixel_predict8x4 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x4); - xd->subpixel_predict8x8 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x8); - xd->subpixel_predict16x16 = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear16x16); - } + xd->subpixel_predict = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap4x4); + xd->subpixel_predict8x4 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap8x4); + xd->subpixel_predict8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap8x8); + xd->subpixel_predict16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, sixtap16x16); + } + else + { + xd->subpixel_predict = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear4x4); + xd->subpixel_predict8x4 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear8x4); + xd->subpixel_predict8x8 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear8x8); + xd->subpixel_predict16x16 = SUBPIX_INVOKE( + &cpi->common.rtcd.subpix, bilinear16x16); } - //else // Key Frame - //{ - // For key frames make sure the intra ref frame probability value - // is set to "all intra" - //cpi->prob_intra_coded = 255; - //} - - xd->gf_active_ptr = (signed char *)cm->gf_active_flags; // Point to base of GF active flags data structure + x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure x->vector_range = 32; @@ -523,7 +661,7 @@ void vp8_encode_frame(VP8_COMP *cpi) totalrate = 0; - xd->mode_info = cm->mi - 1; + x->partition_info = x->pi; xd->mode_info_context = cm->mi; xd->mode_info_stride = cm->mode_info_stride; @@ -559,12 +697,12 @@ void vp8_encode_frame(VP8_COMP *cpi) // Copy data over into macro block data sturctures. x->src = * cpi->Source; - xd->pre = cm->last_frame; - xd->dst = cm->new_frame; + xd->pre = cm->yv12_fb[cm->lst_fb_idx]; + xd->dst = cm->yv12_fb[cm->new_fb_idx]; // set up frame new frame for intra coded blocks - vp8_setup_intra_recon(&cm->new_frame); + vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); vp8_build_block_offsets(x); @@ -589,10 +727,10 @@ void vp8_encode_frame(VP8_COMP *cpi) //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 )); #endif - xd->mbmi.mode = DC_PRED; - xd->mbmi.uv_mode = DC_PRED; + xd->mode_info_context->mbmi.mode = DC_PRED; + xd->mode_info_context->mbmi.uv_mode = DC_PRED; - xd->left_context = cm->left_context; + xd->left_context = &cm->left_context; vp8_zero(cpi->count_mb_ref_frame_usage) vp8_zero(cpi->ymode_count) @@ -600,17 +738,7 @@ void vp8_encode_frame(VP8_COMP *cpi) x->mvc = cm->fc.mvc; - // vp8_zero( entropy_stats) - { - ENTROPY_CONTEXT **p = cm->above_context; - const size_t L = cm->mb_cols; - - vp8_zero_array(p [Y1CONTEXT], L * 4) - vp8_zero_array(p [ UCONTEXT], L * 2) - vp8_zero_array(p [ VCONTEXT], L * 2) - vp8_zero_array(p [Y2CONTEXT], L) - } - + vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); { struct vpx_usec_timer emr_timer; @@ -669,6 +797,7 @@ void vp8_encode_frame(VP8_COMP *cpi) x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; + x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; if (mb_row < cm->mb_rows - 1) //WaitForSingleObject(cpi->h_event_main, INFINITE); @@ -1029,8 +1158,8 @@ void vp8_build_block_offsets(MACROBLOCK *x) static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) { const MACROBLOCKD *xd = & x->e_mbd; - const MB_PREDICTION_MODE m = xd->mbmi.mode; - const MB_PREDICTION_MODE uvm = xd->mbmi.uv_mode; + const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; + const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; #ifdef MODE_STATS const int is_key = cpi->common.frame_type == KEY_FRAME; @@ -1068,7 +1197,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) int rateuv_tokenonly = 0; int i; - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; #if !(CONFIG_REALTIME_ONLY) @@ -1084,15 +1213,13 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv); - x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); rate += rateuv; if (Error4x4 < Error16x16) { rate += rate4x4; - x->e_mbd.mbmi.mode = B_PRED; + x->e_mbd.mode_info_context->mbmi.mode = B_PRED; // get back the intra block modes for (i = 0; i < 16; i++) @@ -1132,7 +1259,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) for (mode = DC_PRED; mode <= TM_PRED; mode ++) { - x->e_mbd.mbmi.mode = mode; + x->e_mbd.mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby_ptr(&x->e_mbd); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode]; @@ -1152,17 +1279,15 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) else Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); - x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; - if (Error4x4 < Error16x16) { - x->e_mbd.mbmi.mode = B_PRED; + x->e_mbd.mode_info_context->mbmi.mode = B_PRED; vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); cpi->prediction_error += Error4x4; } else { - x->e_mbd.mbmi.mode = best_mode; + x->e_mbd.mode_info_context->mbmi.mode = best_mode; vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); cpi->prediction_error += Error16x16; } @@ -1179,7 +1304,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) extern int cnt_pm; #endif -extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x); +extern void vp8_fix_contexts(MACROBLOCKD *x); int vp8cx_encode_inter_macroblock ( @@ -1196,7 +1321,7 @@ int vp8cx_encode_inter_macroblock x->skip = 0; if (xd->segmentation_enabled) - x->encode_breakout = cpi->segment_encode_breakout[xd->mbmi.segment_id]; + x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id]; else x->encode_breakout = cpi->oxcf.encode_breakout; @@ -1227,34 +1352,46 @@ int vp8cx_encode_inter_macroblock if (cpi->cyclic_refresh_mode_enabled) { // Clear segment_id back to 0 if not coded (last frame 0,0) - if ((xd->mbmi.segment_id == 1) && - ((xd->mbmi.ref_frame != LAST_FRAME) || (xd->mbmi.mode != ZEROMV))) + if ((xd->mode_info_context->mbmi.segment_id == 1) && + ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) { - xd->mbmi.segment_id = 0; + xd->mode_info_context->mbmi.segment_id = 0; } } // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise if (cpi->zbin_mode_boost_enabled) { - if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame != LAST_FRAME)) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME ) + cpi->zbin_mode_boost = 0; else - cpi->zbin_mode_boost = 0; + { + if (xd->mode_info_context->mbmi.mode == ZEROMV) + { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (xd->mode_info_context->mbmi.mode == SPLITMV) + cpi->zbin_mode_boost = 0; + else + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } } + else + cpi->zbin_mode_boost = 0; vp8cx_mb_init_quantizer(cpi, x); } - cpi->count_mb_ref_frame_usage[xd->mbmi.ref_frame] ++; + cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++; - if (xd->mbmi.ref_frame == INTRA_FRAME) + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0; - vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x); - if (xd->mbmi.mode == B_PRED) + if (xd->mode_info_context->mbmi.mode == B_PRED) { vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x); } @@ -1270,36 +1407,25 @@ int vp8cx_encode_inter_macroblock MV best_ref_mv; MV nearest, nearby; int mdcounts[4]; + int ref_fb_idx; vp8_find_near_mvs(xd, xd->mode_info_context, - &nearest, &nearby, &best_ref_mv, mdcounts, xd->mbmi.ref_frame, cpi->common.ref_frame_sign_bias); + &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias); vp8_build_uvmvs(xd, cpi->common.full_pixel); - // store motion vectors in our motion vector list - if (xd->mbmi.ref_frame == LAST_FRAME) - { - // Set up pointers for this macro block into the previous frame recon buffer - xd->pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset; - } - else if (xd->mbmi.ref_frame == GOLDEN_FRAME) - { - // Set up pointers for this macro block into the golden frame recon buffer - xd->pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset; - } + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = cpi->common.gld_fb_idx; else - { - // Set up pointers for this macro block into the alternate reference frame recon buffer - xd->pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset; - } + ref_fb_idx = cpi->common.alt_fb_idx; - if (xd->mbmi.mode == SPLITMV) + xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + if (xd->mode_info_context->mbmi.mode == SPLITMV) { int i; @@ -1312,19 +1438,19 @@ int vp8cx_encode_inter_macroblock } } } - else if (xd->mbmi.mode == NEWMV) + else if (xd->mode_info_context->mbmi.mode == NEWMV) { cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++; cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++; } - if (!x->skip && !x->e_mbd.mbmi.force_no_skip) + if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip) { vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x); // Clear mb_skip_coeff if mb_no_coeff_skip is not set if (!cpi->common.mb_no_coeff_skip) - xd->mbmi.mb_skip_coeff = 0; + xd->mode_info_context->mbmi.mb_skip_coeff = 0; } else @@ -1337,19 +1463,19 @@ int vp8cx_encode_inter_macroblock { if (cpi->common.mb_no_coeff_skip) { - if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV) - xd->mbmi.dc_diff = 0; + if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) + xd->mode_info_context->mbmi.dc_diff = 0; else - xd->mbmi.dc_diff = 1; + xd->mode_info_context->mbmi.dc_diff = 1; - xd->mbmi.mb_skip_coeff = 1; + xd->mode_info_context->mbmi.mb_skip_coeff = 1; cpi->skip_true_count ++; - vp8_fix_contexts(cpi, xd); + vp8_fix_contexts(xd); } else { vp8_stuff_mb(cpi, xd, t); - xd->mbmi.mb_skip_coeff = 0; + xd->mode_info_context->mbmi.mb_skip_coeff = 0; cpi->skip_false_count ++; } } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 403d0204a..1c72b90f1 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -52,8 +53,6 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK x->quantize_b(be, b); - x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob); - vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32); RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -65,11 +64,9 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16); - x->short_fdct4x4rd(be->src_diff, be->coeff, 32); + x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); - x->quantize_brd(be, b); - - x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob); + x->quantize_b(be, b); IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32); @@ -108,8 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) #if !(CONFIG_REALTIME_ONLY) #if 1 - - if (x->optimize && x->rddiv > 1) + if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) vp8_optimize_mby(x, rtcd); #endif @@ -117,14 +113,15 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, recon_mby) + (IF_RTCD(&rtcd->common->recon), &x->e_mbd); // make sure block modes are set the way we want them for context updates for (b = 0; b < 16; b++) { BLOCKD *d = &x->e_mbd.block[b]; - switch (x->e_mbd.mbmi.mode) + switch (x->e_mbd.mode_info_context->mbmi.mode) { case DC_PRED: @@ -155,23 +152,21 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride); - vp8_transform_intra_mbyrd(x); - - x->e_mbd.mbmi.mb_skip_coeff = 1; - - vp8_quantize_mbyrd(x); + vp8_transform_intra_mby(x); + vp8_quantize_mby(x); vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, recon_mby) + (IF_RTCD(&rtcd->common->recon), &x->e_mbd); // make sure block modes are set the way we want them for context updates for (b = 0; b < 16; b++) { BLOCKD *d = &x->e_mbd.block[b]; - switch (x->e_mbd.mbmi.mode) + switch (x->e_mbd.mode_info_context->mbmi.mode) { case DC_PRED: @@ -207,7 +202,7 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) #if !(CONFIG_REALTIME_ONLY) #if 1 - if (x->optimize && x->rddiv > 1) + if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) vp8_optimize_mbuv(x, rtcd); #endif @@ -224,11 +219,9 @@ void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - vp8_transform_mbuvrd(x); - - vp8_quantize_mbuvrd(x); - + vp8_transform_mbuv(x); + vp8_quantize_mbuv(x); vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd); diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h index 4a43ab275..5be23d12b 100644 --- a/vp8/encoder/encodeintra.h +++ b/vp8/encoder/encodeintra.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c index bb43d3d5b..e9753ac48 100644 --- a/vp8/encoder/encodemb.c +++ b/vp8/encoder/encodemb.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -12,6 +13,7 @@ #include "encodemb.h" #include "reconinter.h" #include "quantize.h" +#include "tokenize.h" #include "invtrans.h" #include "recon.h" #include "reconintra.h" @@ -119,19 +121,11 @@ void vp8_transform_mbuv(MACROBLOCK *x) for (i = 16; i < 24; i += 2) { - x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); + x->vp8_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); } } -void vp8_transform_mbuvrd(MACROBLOCK *x) -{ - int i; - - for (i = 16; i < 24; i += 2) - { - x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); - } -} void vp8_transform_intra_mby(MACROBLOCK *x) { @@ -139,32 +133,19 @@ void vp8_transform_intra_mby(MACROBLOCK *x) for (i = 0; i < 16; i += 2) { - x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); + x->vp8_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values vp8_build_dcblock(x); // do 2nd order transform on the dc block - x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); + x->short_walsh4x4(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); } -void vp8_transform_intra_mbyrd(MACROBLOCK *x) -{ - int i; - - for (i = 0; i < 16; i += 2) - { - x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); - } - - // build dc block from 16 y dc values - vp8_build_dcblock(x); - - // do 2nd order transform on the dc block - x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); -} void vp8_transform_mb(MACROBLOCK *x) { @@ -172,21 +153,24 @@ void vp8_transform_mb(MACROBLOCK *x) for (i = 0; i < 16; i += 2) { - x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); + x->vp8_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values - if (x->e_mbd.mbmi.mode != SPLITMV) + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) vp8_build_dcblock(x); for (i = 16; i < 24; i += 2) { - x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); + x->vp8_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 16); } // do 2nd order transform on the dc block - if (x->e_mbd.mbmi.mode != SPLITMV) - x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) + x->short_walsh4x4(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); } @@ -196,39 +180,19 @@ void vp8_transform_mby(MACROBLOCK *x) for (i = 0; i < 16; i += 2) { - x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); + x->vp8_short_fdct8x4(&x->block[i].src_diff[0], + &x->block[i].coeff[0], 32); } // build dc block from 16 y dc values - if (x->e_mbd.mbmi.mode != SPLITMV) + if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { vp8_build_dcblock(x); - x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); + x->short_walsh4x4(&x->block[24].src_diff[0], + &x->block[24].coeff[0], 8); } } -void vp8_transform_mbrd(MACROBLOCK *x) -{ - int i; - - for (i = 0; i < 16; i += 2) - { - x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); - } - - // build dc block from 16 y dc values - if (x->e_mbd.mbmi.mode != SPLITMV) - vp8_build_dcblock(x); - - for (i = 16; i < 24; i += 2) - { - x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); - } - - // do 2nd order transform on the dc block - if (x->e_mbd.mbmi.mode != SPLITMV) - x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); -} void vp8_stuff_inter16x16(MACROBLOCK *x) { @@ -265,727 +229,322 @@ void vp8_stuff_inter16x16(MACROBLOCK *x) } #if !(CONFIG_REALTIME_ONLY) -extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2]; -extern const TOKENEXTRA *vp8_dct_value_tokens_ptr; -extern int vp8_dct_value_cost[DCT_MAX_VALUE*2]; -extern int *vp8_dct_value_cost_ptr; +#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) +#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) -static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) +typedef struct vp8_token_state vp8_token_state; + +struct vp8_token_state{ + int rate; + int error; + signed char next; + signed char token; + short qc; +}; + +// TODO: experiments to find optimal multiple numbers +#define Y1_RD_MULT 1 +#define UV_RD_MULT 1 +#define Y2_RD_MULT 4 + +static const int plane_rd_mult[4]= { - int c = !type; /* start at coef 0, unless Y with Y2 */ - int eob = b->eob; - int pt ; /* surrounding block/prev coef predictor */ - int cost = 0; - short *qcoeff_ptr = b->qcoeff; + Y1_RD_MULT, + Y2_RD_MULT, + UV_RD_MULT, + Y1_RD_MULT +}; +void vp8_optimize_b(MACROBLOCK *mb, int ib, int type, + ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, + const VP8_ENCODER_RTCD *rtcd) +{ + BLOCK *b; + BLOCKD *d; + vp8_token_state tokens[17][2]; + unsigned best_mask[2]; + const short *dequant_ptr; + const short *coeff_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + int eob; + int i0; + int rc; + int x; + int sz; + int next; + int path; + int rdmult; + int rddiv; + int final_eob; + int rd_cost0; + int rd_cost1; + int rate0; + int rate1; + int error0; + int error1; + int t0; + int t1; + int best; + int band; + int pt; + int i; + int err_mult = plane_rd_mult[type]; + + b = &mb->block[ib]; + d = &mb->e_mbd.block[ib]; + + /* Enable this to test the effect of RDO as a replacement for the dynamic + * zero bin instead of an augmentation of it. + */ +#if 0 + vp8_strict_quantize_b(b, d); +#endif + + dequant_ptr = d->dequant; + coeff_ptr = b->coeff; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + i0 = !type; + eob = d->eob; + + /* Now set up a Viterbi trellis to evaluate alternative roundings. */ + rdmult = (mb->rdmult << 2)*err_mult; + if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME) + rdmult = (rdmult * 9)>>4; + + rddiv = mb->rddiv; + best_mask[0] = best_mask[1] = 0; + /* Initialize the sentinel node of the trellis. */ + tokens[eob][0].rate = 0; + tokens[eob][0].error = 0; + tokens[eob][0].next = 16; + tokens[eob][0].token = DCT_EOB_TOKEN; + tokens[eob][0].qc = 0; + *(tokens[eob] + 1) = *(tokens[eob] + 0); + next = eob; + for (i = eob; i-- > i0;) + { + int base_bits; + int d2; + int dx; + + rc = vp8_default_zig_zag1d[i]; + x = qcoeff_ptr[rc]; + /* Only add a trellis state for non-zero coefficients. */ + if (x) + { + int shortcut=0; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + /* Evaluate the first possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + t0 = (vp8_dct_value_tokens_ptr + x)->Token; + /* Consider both possible successor states. */ + if (next < 16) + { + band = vp8_coef_bands[i + 1]; + pt = vp8_prev_token_class[t0]; + rate0 += + mb->token_costs[type][band][pt][tokens[next][0].token]; + rate1 += + mb->token_costs[type][band][pt][tokens[next][1].token]; + } + rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; + d2 = dx*dx; + tokens[i][0].rate = base_bits + (best ? rate1 : rate0); + tokens[i][0].error = d2 + (best ? error1 : error0); + tokens[i][0].next = next; + tokens[i][0].token = t0; + tokens[i][0].qc = x; + best_mask[0] |= best << i; + /* Evaluate the second possibility for this state. */ + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + + if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) && + (abs(x)*dequant_ptr[rc]Token; + } + if (next < 16) + { + band = vp8_coef_bands[i + 1]; + if(t0!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t0]; + rate0 += mb->token_costs[type][band][pt][ + tokens[next][0].token]; + } + if(t1!=DCT_EOB_TOKEN) + { + pt = vp8_prev_token_class[t1]; + rate1 += mb->token_costs[type][band][pt][ + tokens[next][1].token]; + } + } + + rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) + { + rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); + } + /* And pick the best. */ + best = rd_cost1 < rd_cost0; + base_bits = *(vp8_dct_value_cost_ptr + x); + + if(shortcut) + { + dx -= (dequant_ptr[rc] + sz) ^ sz; + d2 = dx*dx; + } + tokens[i][1].rate = base_bits + (best ? rate1 : rate0); + tokens[i][1].error = d2 + (best ? error1 : error0); + tokens[i][1].next = next; + tokens[i][1].token =best?t1:t0; + tokens[i][1].qc = x; + best_mask[1] |= best << i; + /* Finally, make this the new head of the trellis. */ + next = i; + } + /* There's no choice to make for a zero coefficient, so we don't + * add a new trellis node, but we do need to update the costs. + */ + else + { + band = vp8_coef_bands[i + 1]; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + /* Update the cost of each path if we're past the EOB token. */ + if (t0 != DCT_EOB_TOKEN) + { + tokens[next][0].rate += mb->token_costs[type][band][0][t0]; + tokens[next][0].token = ZERO_TOKEN; + } + if (t1 != DCT_EOB_TOKEN) + { + tokens[next][1].rate += mb->token_costs[type][band][0][t1]; + tokens[next][1].token = ZERO_TOKEN; + } + /* Don't update next, because we didn't add a new node. */ + } + } + + /* Now pick the best path through the whole trellis. */ + band = vp8_coef_bands[i + 1]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - -# define QC( I) ( qcoeff_ptr [vp8_default_zig_zag1d[I]] ) - - for (; c < eob; c++) + rate0 = tokens[next][0].rate; + rate1 = tokens[next][1].rate; + error0 = tokens[next][0].error; + error1 = tokens[next][1].error; + t0 = tokens[next][0].token; + t1 = tokens[next][1].token; + rate0 += mb->token_costs[type][band][pt][t0]; + rate1 += mb->token_costs[type][band][pt][t1]; + rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); + rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); + if (rd_cost0 == rd_cost1) { - int v = QC(c); - int t = vp8_dct_value_tokens_ptr[v].Token; - cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t]; - cost += vp8_dct_value_cost_ptr[v]; - pt = vp8_prev_token_class[t]; + rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); + rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); } + best = rd_cost1 < rd_cost0; + final_eob = i0 - 1; + for (i = next; i < eob; i = next) + { + x = tokens[i][best].qc; + if (x) + final_eob = i; + rc = vp8_default_zig_zag1d[i]; + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; + next = tokens[i][best].next; + best = (best_mask[best] >> i) & 1; + } + final_eob++; -# undef QC - - if (c < 16) - cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN]; - - return cost; + d->eob = final_eob; + *a = *l = (d->eob != !type); } -static int mbycost_coeffs(MACROBLOCK *mb) -{ - int cost = 0; - int b; - TEMP_CONTEXT t; - int type = 0; - - MACROBLOCKD *x = &mb->e_mbd; - - vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4); - - if (x->mbmi.mode == SPLITMV) - type = 3; - - for (b = 0; b < 16; b++) - cost += cost_coeffs(mb, x->block + b, type, - t.a + vp8_block2above[b], t.l + vp8_block2left[b]); - - return cost; -} - -#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) ) - -void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd) -{ - BLOCK *b = &x->block[i]; - BLOCKD *bd = &x->e_mbd.block[i]; - short *dequant_ptr = &bd->dequant[0][0]; - int nzpos[16] = {0}; - short saved_qcoefs[16]; - short saved_dqcoefs[16]; - int baserate, baseerror, baserd; - int rate, error, thisrd; - int k; - int nzcoefcount = 0; - int nc, bestnc = 0; - int besteob; - - // count potential coefficient to be optimized - for (k = !type; k < 16; k++) - { - int qcoef = abs(bd->qcoeff[k]); - int coef = abs(b->coeff[k]); - int dq = dequant_ptr[k]; - - if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq)) - { - nzpos[nzcoefcount] = k; - nzcoefcount++; - } - } - - // if nothing here, do nothing for this block. - if (!nzcoefcount) - { - *a = *l = (bd->eob != !type); - return; - } - - // save a copy of quantized coefficients - vpx_memcpy(saved_qcoefs, bd->qcoeff, 32); - vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32); - - besteob = bd->eob; - baserate = cost_coeffs(x, bd, type, a, l); - baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; - baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); - - for (nc = 1; nc < (1 << nzcoefcount); nc++) - { - //reset coefficients - vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); - vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); - - for (k = 0; k < nzcoefcount; k++) - { - int pos = nzpos[k]; - - if ((nc & (1 << k))) - { - int cur_qcoef = bd->qcoeff[pos]; - - if (cur_qcoef < 0) - { - bd->qcoeff[pos]++; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - else - { - bd->qcoeff[pos]--; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - } - } - - { - int eob = -1; - int rc; - int m; - - for (m = 0; m < 16; m++) - { - rc = vp8_default_zig_zag1d[m]; - - if (bd->qcoeff[rc]) - eob = m; - } - - bd->eob = eob + 1; - } - - rate = cost_coeffs(x, bd, type, a, l); - error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; - thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); - - if (thisrd < baserd) - { - baserd = thisrd; - bestnc = nc; - besteob = bd->eob; - } - } - - //reset coefficients - vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); - vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); - - if (bestnc) - { - for (k = 0; k < nzcoefcount; k++) - { - int pos = nzpos[k]; - - if (bestnc & (1 << k)) - { - int cur_qcoef = bd->qcoeff[pos]; - - if (cur_qcoef < 0) - { - bd->qcoeff[pos]++; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - else - { - bd->qcoeff[pos]--; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - } - } - -#if 0 - { - int eob = -1; - int rc; - int m; - - for (m = 0; m < 16; m++) - { - rc = vp8_default_zig_zag1d[m]; - - if (bd->qcoeff[rc]) - eob = m; - } - - bd->eob = eob + 1; - } -#endif - } - -#if 1 - bd->eob = besteob; -#endif -#if 0 - { - int eob = -1; - int rc; - int m; - - for (m = 0; m < 16; m++) - { - rc = vp8_default_zig_zag1d[m]; - - if (bd->qcoeff[rc]) - eob = m; - } - - bd->eob = eob + 1; - } - -#endif - *a = *l = (bd->eob != !type); - return; -} - -void vp8_optimize_bplus(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd) -{ - BLOCK *b = &x->block[i]; - BLOCKD *bd = &x->e_mbd.block[i]; - short *dequant_ptr = &bd->dequant[0][0]; - int nzpos[16] = {0}; - short saved_qcoefs[16]; - short saved_dqcoefs[16]; - int baserate, baseerror, baserd; - int rate, error, thisrd; - int k; - int nzcoefcount = 0; - int nc, bestnc = 0; - int besteob; - - // count potential coefficient to be optimized - for (k = !type; k < 16; k++) - { - int qcoef = abs(bd->qcoeff[k]); - int coef = abs(b->coeff[k]); - int dq = dequant_ptr[k]; - - if (qcoef && (qcoef * dq < coef) && (coef < (qcoef * dq + dq))) - { - nzpos[nzcoefcount] = k; - nzcoefcount++; - } - } - - // if nothing here, do nothing for this block. - if (!nzcoefcount) - { - //do not update context, we need do the other half. - //*a = *l = (bd->eob != !type); - return; - } - - // save a copy of quantized coefficients - vpx_memcpy(saved_qcoefs, bd->qcoeff, 32); - vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32); - - besteob = bd->eob; - baserate = cost_coeffs(x, bd, type, a, l); - baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; - baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); - - for (nc = 1; nc < (1 << nzcoefcount); nc++) - { - //reset coefficients - vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); - vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); - - for (k = 0; k < nzcoefcount; k++) - { - int pos = nzpos[k]; - - if ((nc & (1 << k))) - { - int cur_qcoef = bd->qcoeff[pos]; - - if (cur_qcoef < 0) - { - bd->qcoeff[pos]--; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - else - { - bd->qcoeff[pos]++; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - } - } - - { - int eob = -1; - int rc; - int m; - - for (m = 0; m < 16; m++) - { - rc = vp8_default_zig_zag1d[m]; - - if (bd->qcoeff[rc]) - eob = m; - } - - bd->eob = eob + 1; - } - - rate = cost_coeffs(x, bd, type, a, l); - error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2; - thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); - - if (thisrd < baserd) - { - baserd = thisrd; - bestnc = nc; - besteob = bd->eob; - } - } - - //reset coefficients - vpx_memcpy(bd->qcoeff, saved_qcoefs, 32); - vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32); - - if (bestnc) - { - for (k = 0; k < nzcoefcount; k++) - { - int pos = nzpos[k]; - - if (bestnc & (1 << k)) - { - int cur_qcoef = bd->qcoeff[pos]; - - if (cur_qcoef < 0) - { - bd->qcoeff[pos]++; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - else - { - bd->qcoeff[pos]--; - bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos]; - } - } - } - } - - bd->eob = besteob; - //do not update context, we need do the other half. - //*a = *l = (bd->eob != !type); - return; -} - -void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd) -{ - - BLOCK *b = &x->block[i]; - BLOCKD *bd = &x->e_mbd.block[i]; - short *dequant_ptr = &bd->dequant[0][0]; - - int baserate, baseerror, baserd; - int rate, error, thisrd; - int k; - - if (bd->eob == 0) - return; - - baserate = cost_coeffs(x, bd, type, a, l); - baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4; - baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); - - for (k = 0; k < 16; k++) - { - int cur_qcoef = bd->qcoeff[k]; - - if (!cur_qcoef) - continue; - - if (cur_qcoef < 0) - { - bd->qcoeff[k]++; - bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k]; - } - else - { - bd->qcoeff[k]--; - bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k]; - } - - if (bd->qcoeff[k] == 0) - { - int eob = -1; - int rc; - int l; - - for (l = 0; l < 16; l++) - { - rc = vp8_default_zig_zag1d[l]; - - if (bd->qcoeff[rc]) - eob = l; - } - - bd->eob = eob + 1; - } - - rate = cost_coeffs(x, bd, type, a, l); - error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4; - thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); - - if (thisrd > baserd) - { - bd->qcoeff[k] = cur_qcoef; - bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k]; - } - else - { - baserd = thisrd; - } - - } - - { - int eob = -1; - int rc; - - for (k = 0; k < 16; k++) - { - rc = vp8_default_zig_zag1d[k]; - - if (bd->qcoeff[rc]) - eob = k; - } - - bd->eob = eob + 1; - } - - return; -} - - void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; - TEMP_CONTEXT t, t2; - int type = 0; + int type; + int has_2nd_order; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; - vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4); + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED) - type = 3; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; for (b = 0; b < 16; b++) { - //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]); - vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); + vp8_optimize_b(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } - vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2); - vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2); - for (b = 16; b < 20; b++) { - //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]); - vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); + vp8_optimize_b(x, b, vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } for (b = 20; b < 24; b++) { - //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]); - vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); - } -} - - - -void vp8_super_slow_yquant_optimization(MACROBLOCK *x, int type, const VP8_ENCODER_RTCD *rtcd) -{ - BLOCK *b = &x->block[0]; - BLOCKD *bd = &x->e_mbd.block[0]; - short *dequant_ptr = &bd->dequant[0][0]; - struct - { - int block; - int pos; - } nzpos[256]; - short saved_qcoefs[256]; - short saved_dqcoefs[256]; - short *coef_ptr = x->coeff; - short *qcoef_ptr = x->e_mbd.qcoeff; - short *dqcoef_ptr = x->e_mbd.dqcoeff; - - int baserate, baseerror, baserd; - int rate, error, thisrd; - int i, k; - int nzcoefcount = 0; - int nc, bestnc = 0; - int besteob; - - //this code has assumption in macroblock coeff buffer layout - for (i = 0; i < 16; i++) - { - // count potential coefficient to be optimized - for (k = !type; k < 16; k++) - { - int qcoef = abs(qcoef_ptr[i*16 + k]); - int coef = abs(coef_ptr[i*16 + k]); - int dq = dequant_ptr[k]; - - if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq)) - { - nzpos[nzcoefcount].block = i; - nzpos[nzcoefcount].pos = k; - nzcoefcount++; - } - } + vp8_optimize_b(x, b, vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } - // if nothing here, do nothing for this macro_block. - if (!nzcoefcount || nzcoefcount > 15) + + if (has_2nd_order) { - return; - } - - /****************************************************************************** - looking from each coeffient's perspective, each identifed coefficent above could - have 2 values:roundeddown(x) and roundedup(x). Therefore the total number of - different states is less than 2**nzcoefcount. - ******************************************************************************/ - // save the qunatized coefficents and dequantized coefficicents - vpx_memcpy(saved_qcoefs, x->e_mbd.qcoeff, 256); - vpx_memcpy(saved_dqcoefs, x->e_mbd.dqcoeff, 256); - - baserate = mbycost_coeffs(x); - baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type); - baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100); - - for (nc = 1; nc < (1 << nzcoefcount); nc++) - { - //reset coefficients - vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256); - vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256); - - for (k = 0; k < nzcoefcount; k++) - { - int bk = nzpos[k].block; - int pos = nzpos[k].pos; - int mbkpos = bk * 16 + pos; - - if ((nc & (1 << k))) - { - int cur_qcoef = x->e_mbd.qcoeff[mbkpos]; - - if (cur_qcoef < 0) - { - x->e_mbd.qcoeff[mbkpos]++; - x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos]; - } - else - { - x->e_mbd.qcoeff[mbkpos]--; - x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos]; - } - } - } - - for (i = 0; i < 16; i++) - { - BLOCKD *bd = &x->e_mbd.block[i]; - { - int eob = -1; - int rc; - int l; - - for (l = 0; l < 16; l++) - { - rc = vp8_default_zig_zag1d[l]; - - if (bd->qcoeff[rc]) - eob = l; - } - - bd->eob = eob + 1; - } - } - - rate = mbycost_coeffs(x); - error = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);; - thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100); - - if (thisrd < baserd) - { - baserd = thisrd; - bestnc = nc; - besteob = bd->eob; - } - } - - //reset coefficients - vpx_memcpy(x->e_mbd.qcoeff, saved_qcoefs, 256); - vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256); - - if (bestnc) - { - for (k = 0; k < nzcoefcount; k++) - { - int bk = nzpos[k].block; - int pos = nzpos[k].pos; - int mbkpos = bk * 16 + pos; - - if ((nc & (1 << k))) - { - int cur_qcoef = x->e_mbd.qcoeff[mbkpos]; - - if (cur_qcoef < 0) - { - x->e_mbd.qcoeff[mbkpos]++; - x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos]; - } - else - { - x->e_mbd.qcoeff[mbkpos]--; - x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos]; - } - } - } - } - - for (i = 0; i < 16; i++) - { - BLOCKD *bd = &x->e_mbd.block[i]; - { - int eob = -1; - int rc; - int l; - - for (l = 0; l < 16; l++) - { - rc = vp8_default_zig_zag1d[l]; - - if (bd->qcoeff[rc]) - eob = l; - } - - bd->eob = eob + 1; - } - } - - return; -} - -static void vp8_find_mb_skip_coef(MACROBLOCK *x) -{ - int i; - - x->e_mbd.mbmi.mb_skip_coeff = 1; - - if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) - { - for (i = 0; i < 16; i++) - { - x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); - } - - for (i = 16; i < 25; i++) - { - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } - else - { - for (i = 0; i < 24; i++) - { - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } -} - - -void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) -{ - int b; - TEMP_CONTEXT t, t2; - int type = 0; - - - vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4); - - if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED) - type = 3; - - vp8_super_slow_yquant_optimization(x, type, rtcd); - /* - for(b=0;b<16;b++) - { - vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]); - } - */ - - vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2); - - for (b = 16; b < 20; b++) - { - vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); - } - - vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2); - - for (b = 20; b < 24; b++) - { - vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); + b=24; + vp8_optimize_b(x, b, vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } } @@ -993,59 +552,73 @@ void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; - TEMP_CONTEXT t; - int type = 0; + int type; + int has_2nd_order; - if (!x->e_mbd.above_context[Y1CONTEXT]) + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + if (!x->e_mbd.above_context) return; - if (!x->e_mbd.left_context[Y1CONTEXT]) + if (!x->e_mbd.left_context) return; - vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4); + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED) - type = 3; + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); + type = has_2nd_order ? 0 : 3; for (b = 0; b < 16; b++) { - vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); + vp8_optimize_b(x, b, type, + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } + + if (has_2nd_order) + { + b=24; + vp8_optimize_b(x, b, vp8_block2type[b], + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); + } } void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd) { int b; - TEMP_CONTEXT t, t2; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; - if (!x->e_mbd.above_context[UCONTEXT]) + if (!x->e_mbd.above_context) return; - if (!x->e_mbd.left_context[UCONTEXT]) + if (!x->e_mbd.left_context) return; - if (!x->e_mbd.above_context[VCONTEXT]) - return; + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - if (!x->e_mbd.left_context[VCONTEXT]) - return; - - - vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2); - vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2); + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 20; b++) { vp8_optimize_b(x, b, vp8_block2type[b], - t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd); - + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } for (b = 20; b < 24; b++) { vp8_optimize_b(x, b, vp8_block2type[b], - t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd); + ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd); } } @@ -1062,20 +635,14 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_quantize_mb(x); #if !(CONFIG_REALTIME_ONLY) -#if 1 - - if (x->optimize && x->rddiv > 1) - { + if (x->optimize==2 ||(x->optimize && x->rddiv > 1)) vp8_optimize_mb(x, rtcd); - vp8_find_mb_skip_coef(x); - } - -#endif #endif vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, recon_mb) + (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } @@ -1092,7 +659,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd); - vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd); + RECON_INVOKE(&rtcd->common->recon, recon_mby) + (IF_RTCD(&rtcd->common->recon), &x->e_mbd); } @@ -1117,8 +685,8 @@ void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x) vp8_build_inter_predictors_mbuv(&x->e_mbd); ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride); - vp8_transform_mbuvrd(x); + vp8_transform_mbuv(x); - vp8_quantize_mbuvrd(x); + vp8_quantize_mbuv(x); } diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h index 91ca8f552..08f75c3b1 100644 --- a/vp8/encoder/encodemb.h +++ b/vp8/encoder/encodemb.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -99,9 +100,7 @@ extern void vp8_stuff_inter16x16(MACROBLOCK *x); void vp8_build_dcblock(MACROBLOCK *b); void vp8_transform_mb(MACROBLOCK *mb); void vp8_transform_mbuv(MACROBLOCK *x); -void vp8_transform_mbuvrd(MACROBLOCK *x); void vp8_transform_intra_mby(MACROBLOCK *x); -void vp8_transform_intra_mbyrd(MACROBLOCK *x); void Encode16x16Y(MACROBLOCK *x); void Encode16x16UV(MACROBLOCK *x); void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x); diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c index 2320b413a..cce753013 100644 --- a/vp8/encoder/encodemv.c +++ b/vp8/encoder/encodemv.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -252,7 +253,7 @@ static void write_component_probs( vp8_writer *const w, struct mv_context *cur_mvc, const struct mv_context *default_mvc_, - const struct mv_context *update_mvc, + const struct mv_context *update_mvc, const unsigned int events [MVvals], unsigned int rc, int *updated diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h index 1c1f450a0..e4481bff0 100644 --- a/vp8/encoder/encodemv.h +++ b/vp8/encoder/encodemv.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index 116c5cd20..c606d3e33 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -27,7 +28,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); - ENTROPY_CONTEXT mb_row_left_context[4][4]; + ENTROPY_CONTEXT_PLANES mb_row_left_context; //printf("Started thread %d\n", ithread); @@ -55,8 +56,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) int i; int recon_yoffset, recon_uvoffset; int mb_col; - int recon_y_stride = cm->last_frame.y_stride; - int recon_uv_stride = cm->last_frame.uv_stride; + int ref_fb_idx = cm->lst_fb_idx; + int dst_fb_idx = cm->new_fb_idx; + int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; volatile int *last_row_current_mb_col; if (ithread > 0) @@ -65,11 +68,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) last_row_current_mb_col = &cpi->current_mb_col_main; // reset above block coeffs - xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT]; - xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ]; - xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ]; - xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT]; - xd->left_context = mb_row_left_context; + xd->above_context = cm->above_context; + xd->left_context = &mb_row_left_context; vp8_zero(mb_row_left_context); @@ -106,9 +106,9 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); - xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset; + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); // Is segmentation enabled @@ -117,14 +117,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) { // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking) if (cpi->segmentation_map[seg_map_index+mb_col] <= 3) - xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col]; + xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col]; else - xd->mbmi.segment_id = 0; + xd->mode_info_context->mbmi.segment_id = 0; vp8cx_mb_init_quantizer(cpi, x); } else - xd->mbmi.segment_id = 0; // Set to Segment 0 by default + xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default if (cm->frame_type == KEY_FRAME) @@ -147,24 +147,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) for (b = 0; b < xd->mbmi.partition_count; b++) { - inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++; + inter_b_modes[x->partition->bmi[b].mode] ++; } } #endif // Count of last ref frame 0,0 useage - if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME)) + if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) cpi->inter_zz_count ++; } cpi->tplist[mb_row].stop = *tp; - xd->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb - - // store macroblock mode info into context array - vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi)); + x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb for (i = 0; i < 16; i++) vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi)); @@ -178,15 +175,13 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) recon_uvoffset += 8; // Keep track of segment useage - segment_counts[xd->mbmi.segment_id] ++; + segment_counts[xd->mode_info_context->mbmi.segment_id] ++; // skip to next mb xd->mode_info_context++; + x->partition_info++; - xd->above_context[Y1CONTEXT] += 4; - xd->above_context[UCONTEXT ] += 2; - xd->above_context[VCONTEXT ] += 2; - xd->above_context[Y2CONTEXT] ++; + xd->above_context++; cpi->mb_row_ei[ithread].current_mb_col = mb_col; @@ -194,19 +189,21 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data) //extend the recon for intra prediction vp8_extend_mb_row( - &cm->new_frame, + &cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); // this is to account for the border xd->mode_info_context++; + x->partition_info++; x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; + x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1) { @@ -256,13 +253,8 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->vp8_short_fdct4x4 = x->vp8_short_fdct4x4; z->vp8_short_fdct8x4 = x->vp8_short_fdct8x4; - z->short_fdct4x4rd = x->short_fdct4x4rd; - z->short_fdct8x4rd = x->short_fdct8x4rd; - z->short_fdct8x4rd = x->short_fdct8x4rd; - z->vp8_short_fdct4x4_ptr = x->vp8_short_fdct4x4_ptr; z->short_walsh4x4 = x->short_walsh4x4; z->quantize_b = x->quantize_b; - z->quantize_brd = x->quantize_brd; /* z->mvc = x->mvc; @@ -290,6 +282,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) for (i = 0; i < 25; i++) { z->block[i].quant = x->block[i].quant; + z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; z->block[i].round = x->block[i].round; @@ -334,11 +327,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - /* - memcpy(zd->above_context, xd->above_context, sizeof(xd->above_context)); - memcpy(zd->mb_segment_tree_probs, xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs)); - memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); - */ for (i = 0; i < 25; i++) { zd->block[i].dequant = xd->block[i].dequant; @@ -372,14 +360,15 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, #if CONFIG_RUNTIME_CPU_DETECT mbd->rtcd = xd->rtcd; #endif - mbd->gf_active_ptr = xd->gf_active_ptr; + mb->gf_active_ptr = x->gf_active_ptr; mb->vector_range = 32; vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); mbr_ei[i].totalrate = 0; - mbd->mode_info = cm->mi - 1; + mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); + mbd->mode_info_context = cm->mi + x->e_mbd.mode_info_stride * (i + 1); mbd->mode_info_stride = cm->mode_info_stride; @@ -389,8 +378,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mbd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame; mb->src = * cpi->Source; - mbd->pre = cm->last_frame; - mbd->dst = cm->new_frame; + mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; + mbd->dst = cm->yv12_fb[cm->new_fb_idx]; mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); @@ -406,10 +395,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi, mb->rddiv = cpi->RDDIV; mb->rdmult = cpi->RDMULT; - mbd->mbmi.mode = DC_PRED; - mbd->mbmi.uv_mode = DC_PRED; - - mbd->left_context = cm->left_context; + mbd->left_context = &cm->left_context; mb->mvc = cm->fc.mvc; setup_mbby_copy(&mbr_ei[i].mb, x); diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index d5d430906..a7f5ce44c 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -29,7 +30,6 @@ #include "encodemv.h" //#define OUTPUT_FPF 1 -//#define FIRSTPASS_MM 1 #if CONFIG_RUNTIME_CPU_DETECT #define IF_RTCD(x) (x) @@ -77,9 +77,9 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) if (use_dc_pred) { - x->e_mbd.mbmi.mode = DC_PRED; - x->e_mbd.mbmi.uv_mode = DC_PRED; - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x); } @@ -107,15 +107,6 @@ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) { - /*FIRSTPASS_STATS * start_pos; - int ret_val; - - start_pos = cpi->stats_in; - ret_val = vp8_input_stats(cpi, next_frame); - reset_fpf_position(cpi, start_pos); - - return ret_val;*/ - if (cpi->stats_in >= cpi->stats_in_end) return EOF; @@ -126,7 +117,7 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) // Calculate a modified Error used in distributing bits between easier and harder frames static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { - double av_err = cpi->total_stats.ssim_weighted_pred_err; + double av_err = cpi->total_stats->ssim_weighted_pred_err; double this_err = this_frame->ssim_weighted_pred_err; double modified_err; @@ -216,7 +207,7 @@ int frame_max_bits(VP8_COMP *cpi) // If we are running below the optimal level then we need to gradually tighten up on max_bits. if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - double buffer_fullness_ratio = (double)DOUBLE_DIVIDE_CHECK(cpi->buffer_level) / (double)cpi->oxcf.optimal_buffer_level; + double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); @@ -237,7 +228,7 @@ int frame_max_bits(VP8_COMP *cpi) else { // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user - max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); + max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); } // Trap case where we are out of bits @@ -247,17 +238,35 @@ int frame_max_bits(VP8_COMP *cpi) return max_bits; } -void vp8_output_stats(struct vpx_codec_pkt_list *pktlist, + +extern size_t vp8_firstpass_stats_sz(unsigned int mb_count) +{ + /* Calculate the size of a stats packet, which is dependent on the frame + * resolution. The FIRSTPASS_STATS struct has a single element array, + * motion_map, which is virtually expanded to have one element per + * macroblock. + */ + size_t stats_sz; + FIRSTPASS_STATS stats; + + stats_sz = sizeof(FIRSTPASS_STATS) + mb_count; + stats_sz = (stats_sz + 7) & ~7; + return stats_sz; +} + + +void vp8_output_stats(const VP8_COMP *cpi, + struct vpx_codec_pkt_list *pktlist, FIRSTPASS_STATS *stats) { struct vpx_codec_cx_pkt pkt; pkt.kind = VPX_CODEC_STATS_PKT; pkt.data.twopass_stats.buf = stats; - pkt.data.twopass_stats.sz = sizeof(*stats); + pkt.data.twopass_stats.sz = vp8_firstpass_stats_sz(cpi->common.MBs); vpx_codec_pkt_list_add(pktlist, &pkt); // TEMP debug code -#ifdef OUTPUT_FPF +#if OUTPUT_FPF { FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); @@ -279,16 +288,24 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist, stats->mv_in_out_count, stats->count); fclose(fpfile); + + + fpfile = fopen("fpmotionmap.stt", "a"); + if(fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile)); + fclose(fpfile); } #endif } int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps) { + size_t stats_sz = vp8_firstpass_stats_sz(cpi->common.MBs); + if (cpi->stats_in >= cpi->stats_in_end) return EOF; - *fps = *cpi->stats_in++; + *fps = *cpi->stats_in; + cpi->stats_in = (void*)((char *)cpi->stats_in + stats_sz); return 1; } @@ -351,76 +368,47 @@ void vp8_avg_stats(FIRSTPASS_STATS *section) section->duration /= section->count; } -int vp8_fpmm_get_pos(VP8_COMP *cpi) +unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi) { - return ftell(cpi->fp_motion_mapfile); + return cpi->fp_motion_map_stats; } -void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos) +void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos) { int Offset; - if (cpi->fp_motion_mapfile) - { - Offset = ftell(cpi->fp_motion_mapfile) - target_pos; - fseek(cpi->fp_motion_mapfile, (int) - Offset, SEEK_CUR); - } + cpi->fp_motion_map_stats = target_pos; } void vp8_advance_fpmm(VP8_COMP *cpi, int count) { -#ifdef FIRSTPASS_MM - fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR); -#endif + cpi->fp_motion_map_stats = (void*)((char*)cpi->fp_motion_map_stats + + count * vp8_firstpass_stats_sz(cpi->common.MBs)); } -void vp8_input_fpmm(VP8_COMP *cpi, int count) +void vp8_input_fpmm(VP8_COMP *cpi) { -#ifdef FIRSTPASS_MM + unsigned char *fpmm = cpi->fp_motion_map; + int MBs = cpi->common.MBs; + int max_frames = cpi->active_arnr_frames; + int i; - unsigned char *tmp_motion_map; - int i, j; - - if (!cpi->fp_motion_mapfile) - return; // Error - - // Create the first pass motion map structure and set to 0 - CHECK_MEM_ERROR(tmp_motion_map, vpx_calloc(cpi->common.MBs, 1)); - - // Reset the state of the global map - vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs); - - // Read the specified number of frame maps and set the global map to the highest value seen for each mb. - for (i = 0; i < count; i++) + for (i=0; icommon.MBs, cpi->fp_motion_mapfile) == cpi->common.MBs) - { - for (j = 0; j < cpi->common.MBs; j++) - { - if (tmp_motion_map[j] > 1) - cpi->fp_motion_map[j] += 5; // Intra is flagged - else - cpi->fp_motion_map[j] += tmp_motion_map[j]; - } - } - else - break; // Read error + char *motion_map = (char*)cpi->fp_motion_map_stats + + sizeof(FIRSTPASS_STATS); + memcpy(fpmm, motion_map, MBs); + fpmm += MBs; + vp8_advance_fpmm(cpi, 1); } - if (tmp_motion_map != 0) - vpx_free(tmp_motion_map); - -#endif - + // Flag the use of weights in the temporal filter + cpi->use_weighted_temporal_filter = 1; } void vp8_init_first_pass(VP8_COMP *cpi) { - vp8_zero_stats(&cpi->total_stats); - -#ifdef FIRSTPASS_MM - cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "wb"); -#endif + vp8_zero_stats(cpi->total_stats); // TEMP debug code #ifdef OUTPUT_FPF @@ -428,6 +416,8 @@ void vp8_init_first_pass(VP8_COMP *cpi) FILE *fpfile; fpfile = fopen("firstpass.stt", "w"); fclose(fpfile); + fpfile = fopen("fpmotionmap.stt", "wb"); + fclose(fpfile); } #endif @@ -435,16 +425,10 @@ void vp8_init_first_pass(VP8_COMP *cpi) void vp8_end_first_pass(VP8_COMP *cpi) { - vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats); - -#ifdef FIRSTPASS_MM - - if (cpi->fp_motion_mapfile) - fclose(cpi->fp_motion_mapfile); - -#endif - + vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats); } + + void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset ) { MACROBLOCKD * const xd = & x->e_mbd; @@ -478,12 +462,11 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV * int step_param = 3; //3; // Dont search over full range for first pass int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3; int n; - vp8_variance_fn_ptr_t v_fn_ptr; + vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; int new_mv_mode_penalty = 256; + // override the default variance function to use MSE v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16); - v_fn_ptr.sdf = cpi->fn_ptr.sdf; - v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df; // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; @@ -535,8 +518,11 @@ void vp8_first_pass(VP8_COMP *cpi) int col_blocks = 4 * cm->mb_cols; int recon_yoffset, recon_uvoffset; - int recon_y_stride = cm->last_frame.y_stride; - int recon_uv_stride = cm->last_frame.uv_stride; + YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; + YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; + YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; + int recon_y_stride = lst_yv12->y_stride; + int recon_uv_stride = lst_yv12->uv_stride; int intra_error = 0; int coded_error = 0; @@ -558,8 +544,12 @@ void vp8_first_pass(VP8_COMP *cpi) vp8_clear_system_state(); //__asm emms; x->src = * cpi->Source; - xd->pre = cm->last_frame; - xd->dst = cm->new_frame; + xd->pre = *lst_yv12; + xd->dst = *new_yv12; + + x->partition_info = x->pi; + + xd->mode_info_context = cm->mi; vp8_build_block_offsets(x); @@ -568,7 +558,7 @@ void vp8_first_pass(VP8_COMP *cpi) vp8_setup_block_ptrs(x); // set up frame new frame for intra coded blocks - vp8_setup_intra_recon(&cm->new_frame); + vp8_setup_intra_recon(new_yv12); vp8cx_frame_init_quantizer(cpi); // Initialise the MV cost table to the defaults @@ -595,12 +585,14 @@ void vp8_first_pass(VP8_COMP *cpi) for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { int this_error; + int zero_error; + int zz_to_best_ratio; int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset; + xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; + xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset; + xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); // do intra 16x16 prediction @@ -616,7 +608,7 @@ void vp8_first_pass(VP8_COMP *cpi) intra_error += this_error; // Indicate default assumption of intra in the motion map - *fp_motion_map_ptr = 2; + *fp_motion_map_ptr = 0; // Set up limit values for motion vectors to prevent them extending outside the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); @@ -634,18 +626,25 @@ void vp8_first_pass(VP8_COMP *cpi) int motion_error = INT_MAX; // Simple 0,0 motion with no mv overhead - vp8_zz_motion_search( cpi, x, &cm->last_frame, &motion_error, recon_yoffset ); + vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset ); d->bmi.mv.as_mv.row = 0; d->bmi.mv.as_mv.col = 0; - // Test last reference frame using the previous best mv as the starting point (best reference) for the search - vp8_first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, &cm->last_frame, &motion_error, recon_yoffset); + // Save (0,0) error for later use + zero_error = motion_error; + + // Test last reference frame using the previous best mv as the + // starting point (best reference) for the search + vp8_first_pass_motion_search(cpi, x, &best_ref_mv, + &d->bmi.mv.as_mv, lst_yv12, + &motion_error, recon_yoffset); // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0)) { tmp_err = INT_MAX; - vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->last_frame, &motion_error, recon_yoffset); + vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, + lst_yv12, &tmp_err, recon_yoffset); if ( tmp_err < motion_error ) { @@ -659,7 +658,7 @@ void vp8_first_pass(VP8_COMP *cpi) // Experimental search in a second reference frame ((0,0) based only) if (cm->current_video_frame > 1) { - vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->golden_frame, &gf_motion_error, recon_yoffset); + vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset); if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) { @@ -677,9 +676,9 @@ void vp8_first_pass(VP8_COMP *cpi) // Reset to last frame as reference buffer - xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset; - xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset; - xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset; + xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; + xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; + xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } if (motion_error <= this_error) @@ -707,8 +706,6 @@ void vp8_first_pass(VP8_COMP *cpi) { mvcount++; - *fp_motion_map_ptr = 1; - // Does the Row vector point inwards or outwards if (mb_row < cm->mb_rows / 2) { @@ -740,12 +737,30 @@ void vp8_first_pass(VP8_COMP *cpi) else if (d->bmi.mv.as_mv.col < 0) sum_in_vectors--; } + + // Compute how close (0,0) predictor is to best + // predictor in terms of their prediction error + zz_to_best_ratio = (10*zero_error + this_error/2) + / (this_error+!this_error); + + if ((zero_error < 50000) && + (zz_to_best_ratio <= 11) ) + *fp_motion_map_ptr = 1; + else + *fp_motion_map_ptr = 0; } else - *fp_motion_map_ptr = 0; // 0,0 mv was best + { + // 0,0 mv was best + if( zero_error<50000 ) + *fp_motion_map_ptr = 2; + else + *fp_motion_map_ptr = 1; + } } else { + // Intra was best best_ref_mv.row = 0; best_ref_mv.col = 0; } @@ -771,7 +786,7 @@ void vp8_first_pass(VP8_COMP *cpi) x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; //extend the recon for intra prediction - vp8_extend_mb_row(&cm->new_frame, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); + vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); vp8_clear_system_state(); //__asm emms; } @@ -823,31 +838,32 @@ void vp8_first_pass(VP8_COMP *cpi) fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp; // don't want to do outputstats with a stack variable! - cpi->this_frame_stats = fps; - vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats); - vp8_accumulate_stats(&cpi->total_stats, &fps); - -#ifdef FIRSTPASS_MM - fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile); -#endif + memcpy(cpi->this_frame_stats, + &fps, + sizeof(FIRSTPASS_STATS)); + memcpy((char*)cpi->this_frame_stats + sizeof(FIRSTPASS_STATS), + cpi->fp_motion_map, + sizeof(cpi->fp_motion_map[0]) * cpi->common.MBs); + vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats); + vp8_accumulate_stats(cpi->total_stats, &fps); } // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met if ((cm->current_video_frame > 0) && - (cpi->this_frame_stats.pcnt_inter > 0.20) && - ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0)) + (cpi->this_frame_stats->pcnt_inter > 0.20) && + ((cpi->this_frame_stats->intra_error / cpi->this_frame_stats->coded_error) > 2.0)) { - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame); + vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); } // swap frame pointers so last frame refers to the frame we just compressed - vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame); - vp8_yv12_extend_frame_borders(&cm->last_frame); + vp8_swap_yv12_buffer(lst_yv12, new_yv12); + vp8_yv12_extend_frame_borders(lst_yv12); // Special case for the first frame. Copy into the GF buffer as a second reference. if (cm->current_video_frame == 0) { - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame); + vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); } @@ -863,7 +879,7 @@ void vp8_first_pass(VP8_COMP *cpi) else recon_file = fopen(filename, "ab"); - fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file); + if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file)); fclose(recon_file); } @@ -1104,33 +1120,33 @@ void vp8_init_second_pass(VP8_COMP *cpi) double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - vp8_zero_stats(&cpi->total_stats); + vp8_zero_stats(cpi->total_stats); if (!cpi->stats_in_end) return; - cpi->total_stats = *cpi->stats_in_end; + *cpi->total_stats = *cpi->stats_in_end; - cpi->total_error_left = cpi->total_stats.ssim_weighted_pred_err; - cpi->total_intra_error_left = cpi->total_stats.intra_error; - cpi->total_coded_error_left = cpi->total_stats.coded_error; + cpi->total_error_left = cpi->total_stats->ssim_weighted_pred_err; + cpi->total_intra_error_left = cpi->total_stats->intra_error; + cpi->total_coded_error_left = cpi->total_stats->coded_error; cpi->start_tot_err_left = cpi->total_error_left; - //cpi->bits_left = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); - //cpi->bits_left -= (long long)(cpi->total_stats.count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); + //cpi->bits_left = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); + //cpi->bits_left -= (long long)(cpi->total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); // each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However the sum duration is not. // Its calculated based on the actual durations of all frames from the first // pass. - vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats.count / cpi->total_stats.duration); + vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats->count / cpi->total_stats->duration); cpi->output_frame_rate = cpi->oxcf.frame_rate; - cpi->bits_left = (long long)(cpi->total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ; - cpi->bits_left -= (long long)(cpi->total_stats.duration * two_pass_min_rate / 10000000.0); + cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ; + cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0); - vp8_avg_stats(&cpi->total_stats); + vp8_avg_stats(cpi->total_stats); // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence { @@ -1146,7 +1162,7 @@ void vp8_init_second_pass(VP8_COMP *cpi) sum_iiratio += IIRatio; } - cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats.count); + cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats->count); // Reset file position reset_fpf_position(cpi, start_pos); @@ -1168,21 +1184,11 @@ void vp8_init_second_pass(VP8_COMP *cpi) } -#ifdef FIRSTPASS_MM - cpi->fp_motion_mapfile = 0; - cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb"); -#endif - + cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in; } void vp8_end_second_pass(VP8_COMP *cpi) { -#ifdef FIRSTPASS_MM - - if (cpi->fp_motion_mapfile) - fclose(cpi->fp_motion_mapfile); - -#endif } // Analyse and define a gf/arf group . @@ -1191,7 +1197,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) FIRSTPASS_STATS next_frame; FIRSTPASS_STATS *start_pos; int i; - int image_size = cpi->common.last_frame.y_width * cpi->common.last_frame.y_height; + int y_width = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_width; + int y_height = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_height; + int image_size = y_width * y_height; double boost_score = 0.0; double old_boost_score = 0.0; double gf_group_err = 0.0; @@ -1200,10 +1208,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double mv_accumulator_rabs = 0.0; double mv_accumulator_cabs = 0.0; - double this_mv_rabs; - double this_mv_cabs; double mv_ratio_accumulator = 0.0; - double distance_factor = 0.0; double decay_accumulator = 1.0; double boost_factor = IIFACTOR; @@ -1216,21 +1221,19 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int max_bits = frame_max_bits(cpi); // Max for a single frame -#ifdef FIRSTPASS_MM - int fpmm_pos; -#endif + unsigned char *fpmm_pos; cpi->gf_group_bits = 0; cpi->gf_decay_rate = 0; vp8_clear_system_state(); //__asm emms; -#ifdef FIRSTPASS_MM fpmm_pos = vp8_fpmm_get_pos(cpi); -#endif start_pos = cpi->stats_in; + vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean + // Preload the stats for the next frame. mod_frame_err = calculate_modified_err(cpi, this_frame); @@ -1250,9 +1253,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key)) { double r; - double motion_factor; double this_frame_mvr_ratio; double this_frame_mvc_ratio; + double motion_decay; + double motion_pct = next_frame.pcnt_motion; i++; // Increment the loop counter @@ -1267,12 +1271,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) break; // Accumulate motion stats. - motion_factor = next_frame.pcnt_motion; - this_mv_rabs = fabs(next_frame.mvr_abs * motion_factor); - this_mv_cabs = fabs(next_frame.mvc_abs * motion_factor); - - mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_factor); - mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_factor); + mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct); + mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct); //Accumulate Motion In/Out of frame stats this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion; @@ -1280,13 +1280,23 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion); // If there is a significant amount of motion - if (motion_factor > 0.05) + if (motion_pct > 0.05) { - this_frame_mvr_ratio = fabs(next_frame.mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr)); - this_frame_mvc_ratio = fabs(next_frame.mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc)); + this_frame_mvr_ratio = fabs(next_frame.mvr_abs) / + DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr)); - mv_ratio_accumulator += (this_frame_mvr_ratio < next_frame.mvr_abs) ? (this_frame_mvr_ratio * motion_factor) : next_frame.mvr_abs * motion_factor; - mv_ratio_accumulator += (this_frame_mvc_ratio < next_frame.mvc_abs) ? (this_frame_mvc_ratio * motion_factor) : next_frame.mvc_abs * motion_factor; + this_frame_mvc_ratio = fabs(next_frame.mvc_abs) / + DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc)); + + mv_ratio_accumulator += + (this_frame_mvr_ratio < next_frame.mvr_abs) + ? (this_frame_mvr_ratio * motion_pct) + : next_frame.mvr_abs * motion_pct; + + mv_ratio_accumulator += + (this_frame_mvc_ratio < next_frame.mvc_abs) + ? (this_frame_mvc_ratio * motion_pct) + : next_frame.mvc_abs * motion_pct; } else { @@ -1314,14 +1324,26 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) loop_decay_rate = next_frame.pcnt_inter; // High % motion -> somewhat higher decay rate - if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate) - loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0)); + motion_decay = (1.0 - (motion_pct / 20.0)); + if (motion_decay < loop_decay_rate) + loop_decay_rate = motion_decay; - distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + (this_mv_cabs * this_mv_cabs)) / 300.0; - distance_factor = ((distance_factor > 1.0) ? 0.0 : (1.0 - distance_factor)); + // Adjustment to decay rate based on speed of motion + { + double this_mv_rabs; + double this_mv_cabs; + double distance_factor; - if (distance_factor < loop_decay_rate) - loop_decay_rate = distance_factor; + this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct); + this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct); + + distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + + (this_mv_cabs * this_mv_cabs)) / 250.0; + distance_factor = ((distance_factor > 1.0) + ? 0.0 : (1.0 - distance_factor)); + if (distance_factor < loop_decay_rate) + loop_decay_rate = distance_factor; + } // Cumulative effect of decay decay_accumulator = decay_accumulator * loop_decay_rate; @@ -1387,6 +1409,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Should we use the alternate refernce frame if (cpi->oxcf.play_alternate && + cpi->oxcf.lag_in_frames && (i >= MIN_GF_INTERVAL) && (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) && // dont use ARF very near next kf (((next_frame.pcnt_inter > 0.75) && @@ -1416,7 +1439,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Boost for arf frame Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); - Boost += (cpi->baseline_gf_interval * 50); + Boost += (i * 50); allocation_chunks = (i * 100) + Boost; // Normalize Altboost and allocations chunck down to prevent overflow @@ -1435,6 +1458,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames. if (tmp_q < cpi->worst_quality) { + int half_gf_int; + int frames_after_arf; + int frames_bwd = cpi->oxcf.arnr_max_frames - 1; + int frames_fwd = cpi->oxcf.arnr_max_frames - 1; + cpi->source_alt_ref_pending = TRUE; // For alt ref frames the error score for the end frame of the group (the alt ref frame) should not contribute to the group total and hence @@ -1445,22 +1473,63 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // The future frame itself is part of the next group cpi->baseline_gf_interval = i - 1; -#ifdef FIRSTPASS_MM - // Read through the motion map to load up the entry for the ARF + // Define the arnr filter width for this group of frames: + // We only filter frames that lie within a distance of half + // the GF interval from the ARF frame. We also have to trap + // cases where the filter extends beyond the end of clip. + // Note: this_frame->frame has been updated in the loop + // so it now points at the ARF frame. + half_gf_int = cpi->baseline_gf_interval >> 1; + frames_after_arf = cpi->total_stats->count - this_frame->frame - 1; + + switch (cpi->oxcf.arnr_type) { - int j; + case 1: // Backward filter + frames_fwd = 0; + if (frames_bwd > half_gf_int) + frames_bwd = half_gf_int; + break; - // Advance to the region of interest - // Current default 2 frames before to 2 frames after the ARF frame itsef - vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos); + case 2: // Forward filter + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + frames_bwd = 0; + break; - for (j = 0; j < cpi->baseline_gf_interval - 2; j++) - vp8_advance_fpmm(cpi, 1); + case 3: // Centered filter + default: + frames_fwd >>= 1; + if (frames_fwd > frames_after_arf) + frames_fwd = frames_after_arf; + if (frames_fwd > half_gf_int) + frames_fwd = half_gf_int; + + frames_bwd = frames_fwd; + + // For even length filter there is one more frame backward + // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. + if (frames_bwd < half_gf_int) + frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1; + break; + } + + cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; + + { + // Advance to & read in the motion map for those frames + // to be considered for filtering based on the position + // of the ARF + vp8_fpmm_reset_pos(cpi, cpi->fp_motion_map_stats_save); + + // Position at the 'earliest' frame to be filtered + vp8_advance_fpmm(cpi, + cpi->baseline_gf_interval - frames_bwd); // Read / create a motion map for the region of interest - vp8_input_fpmm(cpi, 5); + vp8_input_fpmm(cpi); } -#endif } else { @@ -1496,7 +1565,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Now decide how many bits should be allocated to the GF group as a proportion of those remaining in the kf group. // The final key frame group in the clip is treated as a special case where cpi->kf_group_bits is tied to cpi->bits_left. // This is also important for short clips where there may only be one key frame. - if (cpi->frames_to_key >= (int)(cpi->total_stats.count - cpi->common.current_video_frame)) + if (cpi->frames_to_key >= (int)(cpi->total_stats->count - cpi->common.current_video_frame)) { cpi->kf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0; } @@ -1565,26 +1634,36 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Calculate the number of bits to be spent on the gf or arf based on the boost number cpi->gf_bits = (int)((double)Boost * (cpi->gf_group_bits / (double)allocation_chunks)); - // If the frame that is to be boosted is simpler than the average for the gf/arf group then use an alternative calculation + // If the frame that is to be boosted is simpler than the average for + // the gf/arf group then use an alternative calculation // based on the error score of the frame itself if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { double alt_gf_grp_bits; int alt_gf_bits; - alt_gf_grp_bits = ((double)cpi->kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / (double)cpi->kf_group_error_left) ; - alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks)); + alt_gf_grp_bits = + (double)cpi->kf_group_bits * + (mod_frame_err * (double)cpi->baseline_gf_interval) / + DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left); + + alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / + (double)allocation_chunks)); if (cpi->gf_bits > alt_gf_bits) { cpi->gf_bits = alt_gf_bits; } } - // Else if it is harder than other frames in the group make sure it at least receives an allocation in keeping with - // its relative error score, otherwise it may be worse off than an "un-boosted" frame + // Else if it is harder than other frames in the group make sure it at + // least receives an allocation in keeping with its relative error + // score, otherwise it may be worse off than an "un-boosted" frame else { - int alt_gf_bits = (int)((double)cpi->kf_group_bits * (mod_frame_err / (double)cpi->kf_group_error_left)); + int alt_gf_bits = + (int)((double)cpi->kf_group_bits * + mod_frame_err / + DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left)); if (alt_gf_bits > cpi->gf_bits) { @@ -1659,16 +1738,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) vp8_avg_stats(§ionstats); - if (sectionstats.pcnt_motion < .17) - cpi->section_is_low_motion = 1; - else - cpi->section_is_low_motion = 0; - - if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45) - cpi->section_is_fast_motion = 1; - else - cpi->section_is_fast_motion = 0; - cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); @@ -1686,10 +1755,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) reset_fpf_position(cpi, start_pos); } -#ifdef FIRSTPASS_MM // Reset the First pass motion map file position vp8_fpmm_reset_pos(cpi, fpmm_pos); -#endif } // Allocate bits to a normal frame that is neither a gf an arf or a key frame. @@ -1703,7 +1770,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int max_bits = frame_max_bits(cpi); // Max for a single frame // The final few frames have special treatment - if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats.count - cpi->common.current_video_frame)) + if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats->count - cpi->common.current_video_frame)) { cpi->gf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;; } @@ -1748,7 +1815,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) void vp8_second_pass(VP8_COMP *cpi) { int tmp_q; - int frames_left = (int)(cpi->total_stats.count - cpi->common.current_video_frame); + int frames_left = (int)(cpi->total_stats->count - cpi->common.current_video_frame); FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; @@ -1771,11 +1838,12 @@ void vp8_second_pass(VP8_COMP *cpi) if (EOF == vp8_input_stats(cpi, &this_frame)) return; -#ifdef FIRSTPASS_MM - vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs); - cpi->fpmm_pos = vp8_fpmm_get_pos(cpi); - vp8_advance_fpmm(cpi, 1); // Read this frame's first pass motion map -#endif + vpx_memset(cpi->fp_motion_map, 0, + cpi->oxcf.arnr_max_frames*cpi->common.MBs); + cpi->fp_motion_map_stats_save = vp8_fpmm_get_pos(cpi); + + // Step over this frame's first pass motion map + vp8_advance_fpmm(cpi, 1); this_frame_error = this_frame.ssim_weighted_pred_err; this_frame_intra_error = this_frame.intra_error; @@ -1868,6 +1936,18 @@ void vp8_second_pass(VP8_COMP *cpi) } } + // Keep a globally available copy of this and the next frame's iiratio. + cpi->this_iiratio = this_frame_intra_error / + DOUBLE_DIVIDE_CHECK(this_frame_coded_error); + { + FIRSTPASS_STATS next_frame; + if ( lookup_next_frame_stats(cpi, &next_frame) != EOF ) + { + cpi->next_iiratio = next_frame.intra_error / + DOUBLE_DIVIDE_CHECK(next_frame.coded_error); + } + } + // Set nominal per second bandwidth for this frame cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate; if (cpi->target_bandwidth < 0) @@ -1890,7 +1970,14 @@ void vp8_second_pass(VP8_COMP *cpi) cpi->ni_av_qi = cpi->worst_quality; } } - else + // The last few frames of a clip almost always have to few or too many + // bits and for the sake of over exact rate control we dont want to make + // radical adjustments to the allowed quantizer range just to use up a + // few surplus bits or get beneath the target rate. + else if ( (cpi->common.current_video_frame < + (((unsigned int)cpi->total_stats->count * 255)>>8)) && + ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < + (unsigned int)cpi->total_stats->count) ) { if (frames_left < 1) frames_left = 1; @@ -2025,6 +2112,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) double kf_group_coded_err = 0.0; double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); + vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean + vp8_clear_system_state(); //__asm emms; start_position = cpi->stats_in; @@ -2041,7 +2130,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Take a copy of the initial frame details vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame)); - cpi->kf_group_bits = 0; // Estimate of total bits avaialable to kf group + cpi->kf_group_bits = 0; // Total bits avaialable to kf group cpi->kf_group_error_left = 0; // Group modified error score. kf_mod_err = calculate_modified_err(cpi, this_frame); @@ -2057,33 +2146,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) kf_group_intra_err += this_frame->intra_error; kf_group_coded_err += this_frame->coded_error; + // load a the next frame's stats vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame)); + vp8_input_stats(cpi, this_frame); // Provided that we are not at the end of the file... - if (EOF != vp8_input_stats(cpi, this_frame)) + if (cpi->oxcf.auto_key + && lookup_next_frame_stats(cpi, &next_frame) != EOF) { - if (lookup_next_frame_stats(cpi, &next_frame) != EOF) - { - if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) - break; - } - } + if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) + break; - // Step on to the next frame - cpi->frames_to_key ++; - - // If we don't have a real key frame within the next two - // forcekeyframeevery intervals then break out of the loop. - if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency) - break; + // Step on to the next frame + cpi->frames_to_key ++; + // If we don't have a real key frame within the next two + // forcekeyframeevery intervals then break out of the loop. + if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency) + break; + } else + cpi->frames_to_key ++; } // If there is a max kf interval set by the user we must obey it. // We already breakout of the loop above at 2x max. // This code centers the extra kf if the actual natural // interval is between 1x and 2x - if ( cpi->frames_to_key > (int)cpi->key_frame_frequency ) + if (cpi->oxcf.auto_key + && cpi->frames_to_key > (int)cpi->key_frame_frequency ) { cpi->frames_to_key /= 2; @@ -2108,39 +2198,64 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) // Calculate the number of bits that should be assigned to the kf group. if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0)) { - int max_bits = frame_max_bits(cpi); // Max for a single normal frame (not key frame) + // Max for a single normal frame (not key frame) + int max_bits = frame_max_bits(cpi); - // Default allocation based on bits left and relative complexity of the section - cpi->kf_group_bits = (int)(cpi->bits_left * (kf_group_err / cpi->modified_total_error_left)); + // Maximum bits for the kf group + long long max_grp_bits; + + // Default allocation based on bits left and relative + // complexity of the section + cpi->kf_group_bits = (long long)( cpi->bits_left * + ( kf_group_err / + cpi->modified_total_error_left )); // Clip based on maximum per frame rate defined by the user. - if (cpi->kf_group_bits > max_bits * cpi->frames_to_key) - cpi->kf_group_bits = max_bits * cpi->frames_to_key; + max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key; + if (cpi->kf_group_bits > max_grp_bits) + cpi->kf_group_bits = max_grp_bits; // Additional special case for CBR if buffer is getting full. if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { - // If the buffer is near or above the optimal and this kf group is not being allocated much - // then increase the allocation a bit. - if (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) + int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; + int buffer_lvl = cpi->buffer_level; + + // If the buffer is near or above the optimal and this kf group is + // not being allocated much then increase the allocation a bit. + if (buffer_lvl >= opt_buffer_lvl) { - int high_water_mark = (cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1; - int min_group_bits; + int high_water_mark = (opt_buffer_lvl + + cpi->oxcf.maximum_buffer_size) >> 1; + + long long av_group_bits; + + // Av bits per frame * number of frames + av_group_bits = (long long)cpi->av_per_frame_bandwidth * + (long long)cpi->frames_to_key; // We are at or above the maximum. if (cpi->buffer_level >= high_water_mark) { - min_group_bits = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) + (cpi->buffer_level - high_water_mark); + long long min_group_bits; + + min_group_bits = av_group_bits + + (long long)(buffer_lvl - + high_water_mark); if (cpi->kf_group_bits < min_group_bits) cpi->kf_group_bits = min_group_bits; } // We are above optimal but below the maximum - else if (cpi->kf_group_bits < (cpi->av_per_frame_bandwidth * cpi->frames_to_key)) + else if (cpi->kf_group_bits < av_group_bits) { - int bits_below_av = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) - cpi->kf_group_bits; - cpi->kf_group_bits += (int)((double)bits_below_av * (double)(cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / - (double)(high_water_mark - cpi->oxcf.optimal_buffer_level)); + long long bits_below_av = av_group_bits - + cpi->kf_group_bits; + + cpi->kf_group_bits += + (long long)((double)bits_below_av * + (double)(buffer_lvl - opt_buffer_lvl) / + (double)(high_water_mark - opt_buffer_lvl)); } } } @@ -2159,6 +2274,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) for (i = 0 ; i < cpi->frames_to_key ; i++) { double r; + double motion_decay; + double motion_pct = next_frame.pcnt_motion; if (EOF == vp8_input_stats(cpi, &next_frame)) break; @@ -2172,10 +2289,30 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) //if ( next_frame.pcnt_inter < loop_decay_rate ) loop_decay_rate = next_frame.pcnt_inter; - if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate) - loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0)); + // High % motion -> somewhat higher decay rate + motion_decay = (1.0 - (motion_pct / 20.0)); + if (motion_decay < loop_decay_rate) + loop_decay_rate = motion_decay; + + // Adjustment to decay rate based on speed of motion + { + double this_mv_rabs; + double this_mv_cabs; + double distance_factor; + + this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct); + this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct); + + distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + + (this_mv_cabs * this_mv_cabs)) / 250.0; + distance_factor = ((distance_factor > 1.0) + ? 0.0 : (1.0 - distance_factor)); + if (distance_factor < loop_decay_rate) + loop_decay_rate = distance_factor; + } decay_accumulator = decay_accumulator * loop_decay_rate; + decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; boost_score += (decay_accumulator * r); @@ -2204,17 +2341,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) vp8_avg_stats(§ionstats); - if (sectionstats.pcnt_motion < .17) - cpi->section_is_low_motion = 1; - else - cpi->section_is_low_motion = 0; - - if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45) - cpi->section_is_fast_motion = 1; - else - cpi->section_is_fast_motion = 0; - - cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); + cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) ) @@ -2266,7 +2393,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) int allocation_chunks; int Counter = cpi->frames_to_key; int alt_kf_bits; - + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; // Min boost based on kf interval #if 0 @@ -2286,10 +2413,10 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } // bigger frame sizes need larger kf boosts, smaller frames smaller boosts... - if ((cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) > (320 * 240)) - kf_boost += 2 * (cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) / (320 * 240); - else if ((cpi->common.last_frame.y_width * cpi->common.last_frame.y_height) < (320 * 240)) - kf_boost -= 4 * (320 * 240) / (cpi->common.last_frame.y_width * cpi->common.last_frame.y_height); + if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240)) + kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240); + else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240)) + kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height); kf_boost = (int)((double)kf_boost * 100.0) >> 4; // Scale 16 to 100 @@ -2325,23 +2452,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->kf_bits = (3 * cpi->buffer_level) >> 2; } - // If the key frame is actually easier than the average for the kf group (which does sometimes happen... eg a blank intro frame) - // Then use an alternate calculation based on the kf error score which should give a smaller key frame. + // If the key frame is actually easier than the average for the + // kf group (which does sometimes happen... eg a blank intro frame) + // Then use an alternate calculation based on the kf error score + // which should give a smaller key frame. if (kf_mod_err < kf_group_err / cpi->frames_to_key) { - double alt_kf_grp_bits = ((double)cpi->bits_left * (kf_mod_err * (double)cpi->frames_to_key) / cpi->modified_total_error_left) ; + double alt_kf_grp_bits = + ((double)cpi->bits_left * + (kf_mod_err * (double)cpi->frames_to_key) / + DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)); - alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); + alt_kf_bits = (int)((double)kf_boost * + (alt_kf_grp_bits / (double)allocation_chunks)); if (cpi->kf_bits > alt_kf_bits) { cpi->kf_bits = alt_kf_bits; } } - // Else if it is much harder than other frames in the group make sure it at least receives an allocation in keeping with its relative error score + // Else if it is much harder than other frames in the group make sure + // it at least receives an allocation in keeping with its relative + // error score else { - alt_kf_bits = (int)((double)cpi->bits_left * (kf_mod_err / cpi->modified_total_error_left)); + alt_kf_bits = + (int)((double)cpi->bits_left * + (kf_mod_err / + DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left))); if (alt_kf_bits > cpi->kf_bits) { @@ -2391,7 +2529,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) cpi->common.vert_scale = NORMAL; // Calculate Average bits per frame. - //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats.count - cpi->common.current_video_frame); + //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats->count - cpi->common.current_video_frame); av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate); //if ( av_bits_per_frame < 0.0 ) // av_bits_per_frame = 0.0 @@ -2435,7 +2573,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (0) { FILE *f = fopen("Subsamle.stt", "a"); - fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n", cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width); + fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n", cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width); fclose(f); } @@ -2454,7 +2592,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) } else { - long long clip_bits = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); + long long clip_bits = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate)); long long over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; long long over_spend2 = cpi->oxcf.starting_buffer_level - projected_buffer_level; @@ -2493,7 +2631,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) if (0) { FILE *f = fopen("Subsamle.stt", "a"); - fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n", kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width); + fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n", kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width); fclose(f); } } diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h index d7b52f3f3..95e1e5463 100644 --- a/vp8/encoder/firstpass.h +++ b/vp8/encoder/firstpass.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -19,4 +20,5 @@ extern void vp8_init_second_pass(VP8_COMP *cpi); extern void vp8_second_pass(VP8_COMP *cpi); extern void vp8_end_second_pass(VP8_COMP *cpi); +extern size_t vp8_firstpass_stats_sz(unsigned int mb_count); #endif diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 52aab6642..824af5e46 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,6 +15,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi); +void vp8_arch_arm_encoder_init(VP8_COMP *cpi); void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); @@ -38,6 +40,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c; cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c; + cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c; + cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c; + cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c; + cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c; + cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c; + cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c; cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c; cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c; @@ -55,6 +63,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; @@ -67,8 +78,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_c; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_c; + cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; + cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; cpi->rtcd.encodemb.berr = vp8_block_error_c; @@ -93,4 +104,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) vp8_arch_x86_encoder_init(cpi); #endif +#if ARCH_ARM + vp8_arch_arm_encoder_init(cpi); +#endif + } diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index 2a2de3d0a..bb85afa6f 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -185,7 +186,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc -#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. +#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best @@ -194,7 +195,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; unsigned char *z = (*(b->base_src) + b->src); @@ -219,7 +220,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, bestmv->col <<= 3; // calculate central point error - besterr = vf(y, d->pre_stride, z, b->src_stride, &sse); + besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected) @@ -308,7 +309,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef CHECK_BETTER #undef MIN #undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { int bestmse = INT_MAX; MV startmv; @@ -335,13 +336,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, startmv = *bestmv; // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse); + left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) @@ -351,7 +352,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.col += 8; - right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse); + right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -363,7 +364,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse); + up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) @@ -373,7 +374,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.row += 8; - down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse); + down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -385,10 +386,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, // now check 1 more diagonal whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); - // whichdir must be 0-4. Therefore, one of the cases below - // must run through. However, because there is no default - // and diag is not set elsewhere, we get a compile warning - diag = 0; //for(whichdir =0;whichdir<4;whichdir++) //{ this_mv = startmv; @@ -398,22 +395,22 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, case 0: this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 1: this_mv.col += 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); break; case 2: this_mv.col = (this_mv.col - 8) | 4; this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); break; case 3: this_mv.col += 4; this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); break; } @@ -445,12 +442,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col = startmv.col - 2; - left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); + left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse); } left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); @@ -462,7 +459,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.col += 4; - right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -477,12 +474,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row = startmv.row - 2; - up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); @@ -494,7 +491,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, } this_mv.row += 4; - down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -522,12 +519,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } } else @@ -537,12 +534,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse); } } @@ -553,12 +550,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.row & 7) { this_mv.row -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.row = (startmv.row - 8) | 6; - diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse); } break; @@ -568,19 +565,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, if (startmv.col & 7) { this_mv.col -= 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.col = (startmv.col - 8) | 6; - diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; + diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);; } break; case 3: this_mv.col += 2; this_mv.row += 2; - diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse); break; } @@ -597,7 +594,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, return bestmse; } -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { int bestmse = INT_MAX; MV startmv; @@ -622,13 +619,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm startmv = *bestmv; // calculate central point error - bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); // go left then right and check error this_mv.row = startmv.row; this_mv.col = ((startmv.col - 8) | 4); - left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse); + left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse); left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) @@ -638,7 +635,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse); + right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse); right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) @@ -650,7 +647,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm // go up then down and check error this_mv.col = startmv.col; this_mv.row = ((startmv.row - 8) | 4); - up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse); + up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) @@ -660,7 +657,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.row += 8; - down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse); + down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse); down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) @@ -680,22 +677,22 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm case 0: this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 1: this_mv.col += 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 2: this_mv.col = (this_mv.col - 8) | 4; this_mv.row += 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; case 3: this_mv.col += 4; this_mv.row += 4; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); break; } @@ -710,7 +707,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm #else this_mv.col = (this_mv.col - 8) | 4; this_mv.row = (this_mv.row - 8) | 4; - diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -720,7 +717,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -731,7 +728,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm this_mv.col = (this_mv.col - 8) | 4; this_mv.row = startmv.row + 4; - diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -741,7 +738,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm } this_mv.col += 8; - diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse); + diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse); diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) @@ -757,10 +754,18 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c) #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector -#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. +#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score. #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best - +static const MV next_chkpts[6][3] = +{ + {{ -2, 0}, { -1, -2}, {1, -2}}, + {{ -1, -2}, {1, -2}, {2, 0}}, + {{1, -2}, {2, 0}, {1, 2}}, + {{2, 0}, {1, 2}, { -1, 2}}, + {{1, 2}, { -1, 2}, { -2, 0}}, + {{ -1, 2}, { -2, 0}, { -1, -2}} +}; int vp8_hex_search ( MACROBLOCK *x, @@ -771,44 +776,72 @@ int vp8_hex_search int search_param, int error_per_bit, int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, + const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], int *mvcost[2] ) { - MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ; + MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ; MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ; int i, j; unsigned char *src = (*(b->base_src) + b->src); int src_stride = b->src_stride; - int rr = ref_mv->row, rc = ref_mv->col, br = rr, bc = rc, tr, tc; + int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc; unsigned int besterr, thiserr = 0x7fffffff; + int k = -1, tk; - if (rc < x->mv_col_min) bc = x->mv_col_min; + if (bc < x->mv_col_min) bc = x->mv_col_min; - if (rc > x->mv_col_max) bc = x->mv_col_max; + if (bc > x->mv_col_max) bc = x->mv_col_max; - if (rr < x->mv_row_min) br = x->mv_row_min; + if (br < x->mv_row_min) br = x->mv_row_min; - if (rr > x->mv_row_max) br = x->mv_row_max; + if (br > x->mv_row_max) br = x->mv_row_max; rr >>= 1; rc >>= 1; - br >>= 3; - bc >>= 3; besterr = ERR(br, bc, thiserr); - // hex search jbb changed to 127 to avoid max 256 problem steping by 2. - for (j = 0; j < 127; j++) + // hex search + //j=0 + tr = br; + tc = bc; + + for (i = 0; i < 6; i++) + { + int nr = tr + hex[i].row, nc = tc + hex[i].col; + + if (nc < x->mv_col_min) continue; + + if (nc > x->mv_col_max) continue; + + if (nr < x->mv_row_min) continue; + + if (nr > x->mv_row_max) continue; + + //CHECK_BETTER(thiserr,nr,nc); + if ((thiserr = ERR(nr, nc, besterr)) < besterr) + { + besterr = thiserr; + br = nr; + bc = nc; + k = i; + } + } + + if (tr == br && tc == bc) + goto cal_neighbors; + + for (j = 1; j < 127; j++) { tr = br; tc = bc; + tk = k; - for (i = 0; i < 6; i++) + for (i = 0; i < 3; i++) { - int nr = tr + hex[i].row, nc = tc + hex[i].col; + int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col; if (nc < x->mv_col_min) continue; @@ -818,7 +851,17 @@ int vp8_hex_search if (nr > x->mv_row_max) continue; - CHECK_BETTER(thiserr, nr, nc); + //CHECK_BETTER(thiserr,nr,nc); + if ((thiserr = ERR(nr, nc, besterr)) < besterr) + { + besterr = thiserr; + br = nr; + bc = nc; //k=(tk+5+i)%6;} + k = tk + 5 + i; + + if (k >= 12) k -= 12; + else if (k >= 6) k -= 6; + } } if (tr == br && tc == bc) @@ -826,6 +869,7 @@ int vp8_hex_search } // check 8 1 away neighbors +cal_neighbors: tr = br; tc = bc; @@ -847,7 +891,7 @@ int vp8_hex_search best_mv->row = br; best_mv->col = bc; - return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; + return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ; } #undef MVC #undef PRE @@ -855,6 +899,8 @@ int vp8_hex_search #undef DIST #undef ERR #undef CHECK_BETTER + + int vp8_diamond_search_sad ( MACROBLOCK *x, @@ -996,7 +1042,7 @@ int vp8_diamond_search_sadx4 int tot_steps; MV this_mv; - unsigned int bestsad = UINT_MAX; + int bestsad = INT_MAX; int best_site = 0; int last_site = 0; @@ -1034,44 +1080,63 @@ int vp8_diamond_search_sadx4 for (step = 0; step < tot_steps ; step++) { - int check_row_min, check_col_min, check_row_max, check_col_max; + int all_in = 1, t; - check_row_min = x->mv_row_min - best_mv->row; - check_row_max = x->mv_row_max - best_mv->row; - check_col_min = x->mv_col_min - best_mv->col; - check_col_max = x->mv_col_max - best_mv->col; + // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of + // checking 4 bounds for each points. + all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min); + all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max); + all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min); + all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max); - for (j = 0 ; j < x->searches_per_step ; j += 4) + if (all_in) { - unsigned char *block_offset[4]; - unsigned int valid_block[4]; - int all_in = 1, t; + unsigned int sad_array[4]; - for (t = 0; t < 4; t++) + for (j = 0 ; j < x->searches_per_step ; j += 4) { - valid_block [t] = (ss[t+i].mv.col > check_col_min); - valid_block [t] &= (ss[t+i].mv.col < check_col_max); - valid_block [t] &= (ss[t+i].mv.row > check_row_min); - valid_block [t] &= (ss[t+i].mv.row < check_row_max); + unsigned char *block_offset[4]; - all_in &= valid_block[t]; - block_offset[t] = ss[i+t].offset + best_address; - } - - if (all_in) - { - unsigned int sad_array[4]; + for (t = 0; t < 4; t++) + block_offset[t] = ss[i+t].offset + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { - thissad = sad_array[t]; - - if (thissad < bestsad) + if (sad_array[t] < bestsad) { this_mv.row = (best_mv->row + ss[i].mv.row) << 3; this_mv.col = (best_mv->col + ss[i].mv.col) << 3; + sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + + if (sad_array[t] < bestsad) + { + bestsad = sad_array[t]; + best_site = i; + } + } + } + } + } + else + { + for (j = 0 ; j < x->searches_per_step ; j++) + { + // Trap illegal vectors + this_row_offset = best_mv->row + ss[i].mv.row; + this_col_offset = best_mv->col + ss[i].mv.col; + + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) + { + check_here = ss[i].offset + best_address; + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.row = this_row_offset << 3; + this_mv.col = this_col_offset << 3; thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); if (thissad < bestsad) @@ -1081,37 +1146,7 @@ int vp8_diamond_search_sadx4 } } } - } - else - { - int t; - - for (t = 0; t < 4; i++, t++) - { - // Trap illegal vectors - if (valid_block[t]) - - { - check_here = block_offset[t]; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); - - if (thissad < bestsad) - { - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } - } - } - } + i++; } } @@ -1137,6 +1172,7 @@ int vp8_diamond_search_sadx4 } +#if !(CONFIG_REALTIME_ONLY) int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) { unsigned char *what = (*(b->base_src) + b->src); @@ -1237,7 +1273,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er unsigned char *bestaddress; MV *best_mv = &d->bmi.mv.as_mv; MV this_mv; - unsigned int bestsad = UINT_MAX; + int bestsad = INT_MAX; int r, c; unsigned char *check_here; @@ -1287,7 +1323,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er check_here = r * mv_stride + in_what + col_min; c = col_min; - while ((c + 3) < col_max) + while ((c + 2) < col_max) { int i; @@ -1349,6 +1385,160 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er else return INT_MAX; } +#endif + + +int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2]) +{ + unsigned char *what = (*(b->base_src) + b->src); + int what_stride = b->src_stride; + unsigned char *in_what; + int in_what_stride = d->pre_stride; + int mv_stride = d->pre_stride; + unsigned char *bestaddress; + MV *best_mv = &d->bmi.mv.as_mv; + MV this_mv; + int bestsad = INT_MAX; + int r, c; + + unsigned char *check_here; + unsigned int thissad; + + int ref_row = ref_mv->row >> 3; + int ref_col = ref_mv->col >> 3; + + int row_min = ref_row - distance; + int row_max = ref_row + distance; + int col_min = ref_col - distance; + int col_max = ref_col + distance; + + unsigned short sad_array8[8]; + unsigned int sad_array[3]; + + // Work out the mid point for the search + in_what = *(d->base_pre) + d->pre; + bestaddress = in_what + (ref_row * d->pre_stride) + ref_col; + + best_mv->row = ref_row; + best_mv->col = ref_col; + + // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits + if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) && + (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max)) + { + // Baseline value at the centre + bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit); + } + + // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border + if (col_min < x->mv_col_min) + col_min = x->mv_col_min; + + if (col_max > x->mv_col_max) + col_max = x->mv_col_max; + + if (row_min < x->mv_row_min) + row_min = x->mv_row_min; + + if (row_max > x->mv_row_max) + row_max = x->mv_row_max; + + for (r = row_min; r < row_max ; r++) + { + this_mv.row = r << 3; + check_here = r * mv_stride + in_what + col_min; + c = col_min; + + while ((c + 7) < col_max) + { + int i; + + fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8); + + for (i = 0; i < 8; i++) + { + thissad = (unsigned int)sad_array8[i]; + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here++; + c++; + } + } + + while ((c + 2) < col_max) + { + int i; + + fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array); + + for (i = 0; i < 3; i++) + { + thissad = sad_array[i]; + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here++; + c++; + } + } + + while (c < col_max) + { + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + + if (thissad < bestsad) + { + this_mv.col = c << 3; + thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + + if (thissad < bestsad) + { + bestsad = thissad; + best_mv->row = r; + best_mv->col = c; + bestaddress = check_here; + } + } + + check_here ++; + c ++; + } + } + + this_mv.row = best_mv->row << 3; + this_mv.col = best_mv->col << 3; + + if (bestsad < INT_MAX) + return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad)) + + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); + else + return INT_MAX; +} #ifdef ENTROPY_STATS void print_mode_context(void) diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 921206fec..7d6036248 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -41,14 +42,15 @@ extern int vp8_hex_search int search_param, int error_per_bit, int *num00, - vp8_variance_fn_t vf, - vp8_sad_fn_t sf, + const vp8_variance_fn_ptr_t *vf, int *mvsadcost[2], int *mvcost[2] ); -typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]); +typedef int (fractional_mv_step_fp) + (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, + int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]); extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp8_find_best_sub_pixel_step; extern fractional_mv_step_fp vp8_find_best_half_pixel_step; @@ -91,6 +93,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step; typedef prototype_full_search_sad(*vp8_full_search_fn_t); extern prototype_full_search_sad(vp8_full_search_sad); extern prototype_full_search_sad(vp8_full_search_sadx3); +extern prototype_full_search_sad(vp8_full_search_sadx8); typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t); extern prototype_diamond_search_sad(vp8_diamond_search_sad); diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c index 73170cf52..d23c97e6e 100644 --- a/vp8/encoder/modecosts.c +++ b/vp8/encoder/modecosts.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h index 5ade26566..99ef119d5 100644 --- a/vp8/encoder/modecosts.h +++ b/vp8/encoder/modecosts.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 9bc7f523a..9dbeeb26f 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -20,7 +21,7 @@ #include "extend.h" #include "ratectrl.h" #include "quant_common.h" -#include "segmentation_common.h" +#include "segmentation.h" #include "g_common.h" #include "vpx_scale/yv12extend.h" #include "postproc.h" @@ -28,6 +29,12 @@ #include "swapyv12buffer.h" #include "threading.h" #include "vpx_ports/vpx_timer.h" +#include "vpxerrors.h" +#include "temporal_filter.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif + #include #include #include @@ -67,7 +74,7 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); -static void mode_ref_lf_test_function(VP8_COMP *cpi); +static void set_default_lf_deltas(VP8_COMP *cpi); extern const int vp8_gf_interval_table[101]; @@ -136,8 +143,6 @@ extern unsigned int inter_b_modes[15]; extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch); -extern void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); -extern void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; @@ -146,6 +151,95 @@ extern const int qzbin_factors[129]; extern void vp8cx_init_quantizer(VP8_COMP *cpi); extern const int vp8cx_base_skip_false_prob[128]; +// Tables relating active max Q to active min Q +static const int kf_low_motion_minq[QINDEX_RANGE] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, + 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10, + 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, + 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, + 27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38, +}; +static const int kf_high_motion_minq[QINDEX_RANGE] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10, + 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, + 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, + 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34, + 35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48, +}; +/*static const int kf_minq[QINDEX_RANGE] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14, + 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22, + 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30, + 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38 +};*/ +static const int gf_low_motion_minq[QINDEX_RANGE] = +{ + 0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2, + 3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6, + 7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10, + 11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18, + 19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26, + 27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34, + 35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42, + 43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58 +}; +static const int gf_mid_motion_minq[QINDEX_RANGE] = +{ + 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4, + 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, + 9,10,10,10,10,11,11,11,12,12,12,12,13,13,13,14, + 14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21, + 22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29, + 30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37, + 38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48, + 49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64, +}; +static const int gf_high_motion_minq[QINDEX_RANGE] = +{ + 0,0,0,0,1,1,1,1,1,2,2,2,3,3,3,4, + 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, + 9,10,10,10,11,11,12,12,13,13,14,14,15,15,16,16, + 17,17,18,18,19,19,20,20,21,21,22,22,23,23,24,24, + 25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32, + 33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40, + 41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54, + 55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80, +}; +/*static const int gf_arf_minq[QINDEX_RANGE] = +{ + 0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4, + 4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9, + 9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14, + 15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22, + 23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30, + 31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39, + 39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50, + 51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66 +};*/ +static const int inter_minq[QINDEX_RANGE] = +{ + 0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7, + 8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16, + 16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23, + 23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34, + 35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47, + 47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58, + 59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69, + 69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81, +}; void vp8_initialize() { @@ -179,9 +273,10 @@ static void setup_features(VP8_COMP *cpi) cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); + vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); + vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); - // jbb trial ! - mode_ref_lf_test_function(cpi); + set_default_lf_deltas(cpi); } @@ -225,6 +320,19 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi) vpx_free(cpi->tok); cpi->tok = 0; + // Structure used to minitor GF useage + if (cpi->gf_active_flags != 0) + vpx_free(cpi->gf_active_flags); + + cpi->gf_active_flags = 0; + + if(cpi->mb.pip) + vpx_free(cpi->mb.pip); + + cpi->mb.pip = 0; + + vpx_free(cpi->total_stats); + vpx_free(cpi->this_frame_stats); } static void enable_segmentation(VP8_PTR ptr) @@ -425,7 +533,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) } -static void mode_ref_lf_test_function(VP8_COMP *cpi) +static void set_default_lf_deltas(VP8_COMP *cpi) { cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1; cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; @@ -452,6 +560,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) int Speed = cpi->Speed; int i; VP8_COMMON *cm = &cpi->common; + int last_improved_quant = sf->improved_quant; // Initialise default mode frequency sampling variables for (i = 0; i < MAX_MODES; i ++) @@ -541,7 +650,8 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEWG ] = INT_MAX; sf->thresh_mult[THR_SPLITG ] = INT_MAX; } - else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) + + if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) { sf->thresh_mult[THR_NEARESTA ] = INT_MAX; sf->thresh_mult[THR_ZEROA ] = INT_MAX; @@ -553,7 +663,6 @@ void vp8_set_speed_features(VP8_COMP *cpi) break; case 1: case 3: - sf->optimize_coefficients = 0; sf->thresh_mult[THR_NEARESTMV] = 0; sf->thresh_mult[THR_ZEROMV ] = 0; sf->thresh_mult[THR_DC ] = 0; @@ -593,7 +702,8 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEARMV ] = INT_MAX; sf->thresh_mult[THR_SPLITMV ] = INT_MAX; } - else if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) + + if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) { sf->thresh_mult[THR_NEARESTG ] = INT_MAX; sf->thresh_mult[THR_ZEROG ] = INT_MAX; @@ -601,7 +711,8 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEWG ] = INT_MAX; sf->thresh_mult[THR_SPLITG ] = INT_MAX; } - else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) + + if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) { sf->thresh_mult[THR_NEARESTA ] = INT_MAX; sf->thresh_mult[THR_ZEROA ] = INT_MAX; @@ -612,6 +723,9 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (Speed > 0) { + // Disable coefficient optimization above speed 0 + sf->optimize_coefficients = 0; + cpi->mode_check_freq[THR_SPLITG] = 4; cpi->mode_check_freq[THR_SPLITA] = 4; cpi->mode_check_freq[THR_SPLITMV] = 2; @@ -759,7 +873,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) cpi->mode_check_freq[THR_NEWA] = 4; } - if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_GOLD_FLAG) + if (cpi->ref_frame_flags & VP8_GOLD_FLAG) { sf->thresh_mult[THR_NEARESTG ] = 2000; sf->thresh_mult[THR_ZEROG ] = 2000; @@ -767,7 +881,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_NEWG ] = 4000; } - if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_ALT_FLAG) + if (cpi->ref_frame_flags & VP8_ALT_FLAG) { sf->thresh_mult[THR_NEARESTA ] = 2000; sf->thresh_mult[THR_ZEROA ] = 2000; @@ -807,7 +921,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->full_freq[1] = 31; sf->search_method = NSTEP; - if (!cpi->ref_frame_flags & VP8_LAST_FLAG) + if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) { sf->thresh_mult[THR_NEWMV ] = INT_MAX; sf->thresh_mult[THR_NEARESTMV] = INT_MAX; @@ -816,7 +930,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITMV ] = INT_MAX; } - if (!cpi->ref_frame_flags & VP8_GOLD_FLAG) + if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) { sf->thresh_mult[THR_NEARESTG ] = INT_MAX; sf->thresh_mult[THR_ZEROG ] = INT_MAX; @@ -825,7 +939,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) sf->thresh_mult[THR_SPLITG ] = INT_MAX; } - if (!cpi->ref_frame_flags & VP8_ALT_FLAG) + if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) { sf->thresh_mult[THR_NEARESTA ] = INT_MAX; sf->thresh_mult[THR_ZEROA ] = INT_MAX; @@ -1118,41 +1232,36 @@ void vp8_set_speed_features(VP8_COMP *cpi) if (cpi->sf.search_method == NSTEP) { - vp8_init3smotion_compensation(&cpi->mb, cm->last_frame.y_stride); + vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride); } else if (cpi->sf.search_method == DIAMOND) { - vp8_init_dsmotion_compensation(&cpi->mb, cm->last_frame.y_stride); + vp8_init_dsmotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride); } if (cpi->sf.improved_dct) { cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); - cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4); - cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); } else { cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); - cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4); - cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4); } - cpi->mb.vp8_short_fdct4x4_ptr = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4); cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4); if (cpi->sf.improved_quant) { cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb); - cpi->mb.quantize_brd = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb); } else { cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb); - cpi->mb.quantize_brd = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb); } + if (cpi->sf.improved_quant != last_improved_quant) + vp8cx_init_quantizer(cpi); #if CONFIG_RUNTIME_CPU_DETECT cpi->mb.e_mbd.rtcd = &cpi->common.rtcd; @@ -1176,7 +1285,7 @@ void vp8_set_speed_features(VP8_COMP *cpi) } if (cpi->sf.optimize_coefficients == 1) - cpi->mb.optimize = 1; + cpi->mb.optimize = 1 + cpi->is_next_src_alt_ref; else cpi->mb.optimize = 0; @@ -1217,6 +1326,20 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi) cpi->source_buffer_count = 0; } + +static int vp8_alloc_partition_data(VP8_COMP *cpi) +{ + cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) * + (cpi->common.mb_rows + 1), + sizeof(PARTITION_INFO)); + if(!cpi->mb.pip) + return ALLOC_FAILURE; + + cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1; + + return 0; +} + void vp8_alloc_compressor_data(VP8_COMP *cpi) { VP8_COMMON *cm = & cpi->common; @@ -1228,6 +1351,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); + if (vp8_alloc_partition_data(cpi)) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate partition data"); + + if ((width & 0xf) != 0) width += 16 - (width & 0xf); @@ -1258,6 +1386,21 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) cpi->inter_zz_count = 0; cpi->gf_bad_count = 0; cpi->gf_update_recommended = 0; + + + // Structures used to minitor GF usage + if (cpi->gf_active_flags != 0) + vpx_free(cpi->gf_active_flags); + + CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols)); + + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; + + cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs)); + cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs)); + if(!cpi->total_stats || !cpi->this_frame_stats) + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate firstpass stats"); } @@ -1286,16 +1429,14 @@ int vp8_reverse_trans(int x) }; void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) { + if(framerate < .1) + framerate = 30; + cpi->oxcf.frame_rate = framerate; cpi->output_frame_rate = cpi->oxcf.frame_rate; cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->av_per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate); cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); - cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; - cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; - - cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; - cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2; //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1; @@ -1305,14 +1446,26 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate) cpi->max_gf_interval = 12; - // Special conditions when altr ref frame enabled - if (cpi->oxcf.play_alternate) + // Special conditions when altr ref frame enabled in lagged compress mode + if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1; } } + +static int +rescale(int val, int num, int denom) +{ + int64_t llnum = num; + int64_t llden = denom; + int64_t llval = val; + + return llval * llnum / llden; +} + + void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) { VP8_COMP *cpi = (VP8_COMP *)(ptr); @@ -1340,9 +1493,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->oxcf.worst_allowed_q = MAXQ; cpi->oxcf.end_usage = USAGE_STREAM_FROM_SERVER; - cpi->oxcf.starting_buffer_level = 4; - cpi->oxcf.optimal_buffer_level = 5; - cpi->oxcf.maximum_buffer_size = 6; + cpi->oxcf.starting_buffer_level = 4000; + cpi->oxcf.optimal_buffer_level = 5000; + cpi->oxcf.maximum_buffer_size = 6000; cpi->oxcf.under_shoot_pct = 90; cpi->oxcf.allow_df = 0; cpi->oxcf.drop_frames_water_mark = 20; @@ -1491,26 +1644,32 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) // local file playback mode == really big buffer if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { - cpi->oxcf.starting_buffer_level = 60; - cpi->oxcf.optimal_buffer_level = 60; - cpi->oxcf.maximum_buffer_size = 240; + cpi->oxcf.starting_buffer_level = 60000; + cpi->oxcf.optimal_buffer_level = 60000; + cpi->oxcf.maximum_buffer_size = 240000; } // Convert target bandwidth from Kbit/s to Bit/s cpi->oxcf.target_bandwidth *= 1000; - cpi->oxcf.starting_buffer_level *= cpi->oxcf.target_bandwidth; + cpi->oxcf.starting_buffer_level = + rescale(cpi->oxcf.starting_buffer_level, + cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.optimal_buffer_level == 0) cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth; + cpi->oxcf.optimal_buffer_level = + rescale(cpi->oxcf.optimal_buffer_level, + cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.maximum_buffer_size == 0) cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.maximum_buffer_size *= cpi->oxcf.target_bandwidth; + cpi->oxcf.maximum_buffer_size = + rescale(cpi->oxcf.maximum_buffer_size, + cpi->oxcf.target_bandwidth, 1000); cpi->buffer_level = cpi->oxcf.starting_buffer_level; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; @@ -1523,6 +1682,10 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; + cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; + cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; + cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; + cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; @@ -1566,9 +1729,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; } - if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width || - ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height || - cm->last_frame.y_width == 0) + if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width || + ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height || + cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { alloc_raw_frame_buffers(cpi); vp8_alloc_compressor_data(cpi); @@ -1595,16 +1758,10 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled. - if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS)) - { - cpi->oxcf.play_alternate = 0; - cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG; - } - // YX Temp cpi->last_alt_ref_sei = -1; cpi->is_src_frame_alt_ref = 0; + cpi->is_next_src_alt_ref = 0; #if 0 // Experimental RD Code @@ -1613,13 +1770,16 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf) #endif #if VP8_TEMPORAL_ALT_REF + + cpi->use_weighted_temporal_filter = 0; + { int i; cpi->fixed_divide[0] = 0; - for (i = 1; i < 255; i++) - cpi->fixed_divide[i] = 0x10000 / i; + for (i = 1; i < 512; i++) + cpi->fixed_divide[i] = 0x80000 / i; } #endif } @@ -1770,26 +1930,32 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) // local file playback mode == really big buffer if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { - cpi->oxcf.starting_buffer_level = 60; - cpi->oxcf.optimal_buffer_level = 60; - cpi->oxcf.maximum_buffer_size = 240; + cpi->oxcf.starting_buffer_level = 60000; + cpi->oxcf.optimal_buffer_level = 60000; + cpi->oxcf.maximum_buffer_size = 240000; } // Convert target bandwidth from Kbit/s to Bit/s cpi->oxcf.target_bandwidth *= 1000; - cpi->oxcf.starting_buffer_level *= cpi->oxcf.target_bandwidth; + cpi->oxcf.starting_buffer_level = + rescale(cpi->oxcf.starting_buffer_level, + cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.optimal_buffer_level == 0) cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth; + cpi->oxcf.optimal_buffer_level = + rescale(cpi->oxcf.optimal_buffer_level, + cpi->oxcf.target_bandwidth, 1000); if (cpi->oxcf.maximum_buffer_size == 0) cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; else - cpi->oxcf.maximum_buffer_size *= cpi->oxcf.target_bandwidth; + cpi->oxcf.maximum_buffer_size = + rescale(cpi->oxcf.maximum_buffer_size, + cpi->oxcf.target_bandwidth, 1000); cpi->buffer_level = cpi->oxcf.starting_buffer_level; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; @@ -1802,6 +1968,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE; + cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; + cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; + cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; + cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; @@ -1845,9 +2015,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; } - if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width || - ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height || - cm->last_frame.y_width == 0) + if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width || + ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height || + cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { alloc_raw_frame_buffers(cpi); vp8_alloc_compressor_data(cpi); @@ -1874,16 +2044,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf) else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; - // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled. - if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS)) - { - cpi->oxcf.play_alternate = 0; - cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG; - } - // YX Temp cpi->last_alt_ref_sei = -1; cpi->is_src_frame_alt_ref = 0; + cpi->is_next_src_alt_ref = 0; #if 0 // Experimental RD Code @@ -1948,8 +2112,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA))); CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1)); - vp8_cmachine_specific_config(cpi); vp8_create_common(&cpi->common); + vp8_cmachine_specific_config(cpi); vp8_init_config((VP8_PTR)cpi, oxcf); @@ -1990,7 +2154,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) cpi->active_map_enabled = 0; // Create the first pass motion map structure and set to 0 - CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(cpi->common.MBs, 1)); + // Allocate space for maximum of 15 buffers + CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1)); #if 0 // Experimental code for lagged and one pass @@ -2031,19 +2196,11 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) //segmentation_test_function((VP8_PTR) cpi); - // Loop filter mode / ref deltas test function - //mode_ref_lf_test_function(cpi); - #ifdef ENTROPY_STATS init_context_counters(); #endif -#ifdef INTRARDOPT - cpi->intra_rd_opt = 1; - -#endif - cpi->frames_since_key = 8; // Give a sensible default for the first frame. cpi->key_frame_frequency = cpi->oxcf.key_freq; @@ -2144,10 +2301,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) } else if (cpi->pass == 2) { + size_t packet_sz = vp8_firstpass_stats_sz(cpi->common.MBs); + int packets = oxcf->two_pass_stats_in.sz / packet_sz; + cpi->stats_in = oxcf->two_pass_stats_in.buf; - cpi->stats_in_end = cpi->stats_in - + oxcf->two_pass_stats_in.sz / sizeof(FIRSTPASS_STATS) - - 1; + cpi->stats_in_end = (void*)((char *)cpi->stats_in + + (packets - 1) * packet_sz); vp8_init_second_pass(cpi); } @@ -2174,11 +2333,55 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) vp8cx_create_encoder_threads(cpi); - cpi->fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); - cpi->fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); - cpi->fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); - cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); - cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); + cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); + cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); + cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); + cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); + cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); + cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8); + cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); + + cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); + cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); + cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); + cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8); + cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); + + cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); + cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); + cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); + cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8); + cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); + + cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); + cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); + cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); + cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8); + cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); + + cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); + cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); + cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; + cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; + cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); + cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); + cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); #if !(CONFIG_REALTIME_ONLY) cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); @@ -2242,7 +2445,8 @@ void vp8_remove_compressor(VP8_PTR *ptr) if (cpi->b_calculate_psnr) { - double samples = 3.0 / 2 * cpi->count * cpi->common.last_frame.y_width * cpi->common.last_frame.y_height; + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; + double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height; double total_psnr = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error); double total_psnr2 = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error2); double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); @@ -2371,6 +2575,7 @@ void vp8_remove_compressor(VP8_PTR *ptr) } fprintf(fmode, "};\n"); + fclose(fmode); } #endif @@ -2581,19 +2786,19 @@ int vp8_get_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF { VP8_COMP *cpi = (VP8_COMP *)(ptr); VP8_COMMON *cm = &cpi->common; + int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(&cm->last_frame, sd); - + ref_fb_idx = cm->lst_fb_idx; else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd); - + ref_fb_idx = cm->gld_fb_idx; else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd); - + ref_fb_idx = cm->alt_fb_idx; else return -1; + vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd); + return 0; } int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) @@ -2601,18 +2806,19 @@ int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF VP8_COMP *cpi = (VP8_COMP *)(ptr); VP8_COMMON *cm = &cpi->common; + int ref_fb_idx; + if (ref_frame_flag == VP8_LAST_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->last_frame); - + ref_fb_idx = cm->lst_fb_idx; else if (ref_frame_flag == VP8_GOLD_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame); - + ref_fb_idx = cm->gld_fb_idx; else if (ref_frame_flag == VP8_ALT_FLAG) - vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame); - + ref_fb_idx = cm->alt_fb_idx; else return -1; + vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]); + return 0; } int vp8_update_entropy(VP8_PTR comp, int update) @@ -2624,6 +2830,8 @@ int vp8_update_entropy(VP8_PTR comp, int update) return 0; } + +#if OUTPUT_YUV_SRC void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) { FILE *yuv_file = fopen(name, "ab"); @@ -2659,6 +2867,8 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s) fclose(yuv_file); } +#endif + static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { @@ -2687,14 +2897,25 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) #endif } // we may need to copy to a buffer so we can extend the image... - else if (cm->Width != cm->last_frame.y_width || - cm->Height != cm->last_frame.y_height) + else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width || + cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height) { //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); #if HAVE_ARMV7 - vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source); -#else - vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source); + } #endif cpi->Source = &cpi->scaled_source; @@ -2778,24 +2999,18 @@ static int pick_frame_size(VP8_COMP *cpi) cm->frame_type = KEY_FRAME; } - // Auto key frames (Only two pass will enter here) + // Special case for forced key frames + // The frame sizing here is still far from ideal for 2 pass. + else if (cm->frame_flags & FRAMEFLAGS_KEY) + { + cm->frame_type = KEY_FRAME; + resize_key_frame(cpi); + vp8_calc_iframe_target_size(cpi); + } else if (cm->frame_type == KEY_FRAME) { vp8_calc_auto_iframe_target_size(cpi); } - // Forced key frames (by interval or an external signal) - else if ((cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) - { - // Key frame from VFW/auto-keyframe/first frame - cm->frame_type = KEY_FRAME; - - resize_key_frame(cpi); - - // Compute target frame size - if (cpi->pass != 2) - vp8_calc_iframe_target_size(cpi); - } else { // INTER frame: compute target frame size @@ -2841,7 +3056,7 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi) VP8_COMMON *cm = &cpi->common; // Update the golden frame buffer - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); // Select an interval before next GF or altref if (!cpi->auto_gold) @@ -2861,8 +3076,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi) } // Update data structure that monitors level of reference to last GF - vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cm->gf_active_count = cm->mb_rows * cm->mb_cols; + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; // this frame refreshes means next frames don't unless specified by user cpi->common.frames_since_golden = 0; @@ -2883,7 +3098,7 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi) if (cm->refresh_golden_frame) { // Update the golden frame buffer - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); // Select an interval before next GF if (!cpi->auto_gold) @@ -2909,8 +3124,8 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi) } // Update data structure that monitors level of reference to last GF - vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cm->gf_active_count = cm->mb_rows * cm->mb_cols; + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; // this frame refreshes means next frames don't unless specified by user cm->refresh_golden_frame = 0; @@ -3216,290 +3431,13 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) #endif // return of 0 means drop frame -#if VP8_TEMPORAL_ALT_REF -static void vp8cx_temp_blur1_c +static void encode_frame_to_data_rate ( - unsigned char **frames, - int frame_count, - unsigned char *src, - unsigned char *dst, - int width, - int stride, - int height, - int strength, - int *fixed_divide, - unsigned char *motion_map_ptr, - unsigned char block_size + VP8_COMP *cpi, + unsigned long *size, + unsigned char *dest, + unsigned int *frame_flags ) -{ - int byte = 0; // Buffer offset for the current pixel value being filtered - int frame = 0; - int modifier = 0; - int i, j, k; - int block_ofset; - int Cols, Rows; - unsigned char Shift = (block_size == 16) ? 4 : 3; - - Cols = width / block_size; - Rows = height / block_size; - - for (i = 0; i < height; i++) - { - block_ofset = (i >> Shift) * Cols; - - for (j = 0; j < Cols; j ++) - { - if (motion_map_ptr[block_ofset] > 2) - { - vpx_memcpy(&dst[byte], &src[byte], block_size); - byte += block_size; - } - else - { - for (k = 0; k < block_size; k++) - { - int accumulator = 0; - int count = 0; - int src_byte = src[byte]; - - for (frame = 0; frame < frame_count; frame++) - { - // get current frame pixel value - int pixel_value = frames[frame][byte]; // int pixel_value = *frameptr; - - modifier = src_byte; // modifier = s[byte]; - modifier -= pixel_value; - modifier *= modifier; - modifier >>= strength; - modifier *= 3; - - if (modifier > 16) - modifier = 16; - - modifier = 16 - modifier; - - accumulator += modifier * pixel_value; - - count += modifier; - } - - accumulator += (count >> 1); - accumulator *= fixed_divide[count]; // accumulator *= ppi->fixed_divide[count]; - accumulator >>= 16; - - dst[byte] = accumulator; // d[byte] = accumulator; - - // move to next pixel - byte++; - } - } - - block_ofset++; - } - - // Step byte on over the UMV border to the start of the next line - byte += stride - width; - } -} - -static void vp8cx_temp_filter_c -( - VP8_COMP *cpi -) -{ - YV12_BUFFER_CONFIG *temp_source_buffer; - int *fixed_divide = cpi->fixed_divide; - - int frame = 0; - int max_frames = 11; - - int num_frames_backward = 0; - int num_frames_forward = 0; - int frames_to_blur_backward = 0; - int frames_to_blur_forward = 0; - int frames_to_blur = 0; - int start_frame = 0; - - int strength = cpi->oxcf.arnr_strength; - - int blur_type = cpi->oxcf.arnr_type; - - int new_max_frames = cpi->oxcf.arnr_max_frames; - - if (new_max_frames > 0) - max_frames = new_max_frames; - - num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index; - - if (num_frames_backward < 0) - num_frames_backward += cpi->oxcf.lag_in_frames; - - num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1); - - switch (blur_type) - { - case 1: - ///////////////////////////////////////// - // Backward Blur - - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_backward >= max_frames) - frames_to_blur_backward = max_frames - 1; - - frames_to_blur = frames_to_blur_backward + 1; - break; - - case 2: - ///////////////////////////////////////// - // Forward Blur - - frames_to_blur_forward = num_frames_forward; - - if (frames_to_blur_forward >= max_frames) - frames_to_blur_forward = max_frames - 1; - - frames_to_blur = frames_to_blur_forward + 1; - break; - - case 3: - ///////////////////////////////////////// - // Center Blur - frames_to_blur_forward = num_frames_forward; - frames_to_blur_backward = num_frames_backward; - - if (frames_to_blur_forward > frames_to_blur_backward) - frames_to_blur_forward = frames_to_blur_backward; - - if (frames_to_blur_backward > frames_to_blur_forward) - frames_to_blur_backward = frames_to_blur_forward; - - if (frames_to_blur_forward > (max_frames / 2)) - frames_to_blur_forward = (max_frames / 2); - - if (frames_to_blur_backward > (max_frames / 2)) - frames_to_blur_backward = (max_frames / 2); - - frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; - break; - - default: - ///////////////////////////////////////// - // At most 4 frames forward Blur - frames_to_blur_forward = 4; - frames_to_blur_backward = num_frames_backward; - - if (max_frames > 5) - { - if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames) - { - frames_to_blur_backward = max_frames - frames_to_blur_forward - 1; - } - } - else - { - frames_to_blur_forward = max_frames - 1; - frames_to_blur_backward = 0; - } - - frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; - break; - } - - start_frame = (cpi->last_alt_ref_sei + frames_to_blur_forward) % cpi->oxcf.lag_in_frames; - -#ifdef DEBUGFWG - // DEBUG FWG - printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" - , max_frames - , num_frames_backward - , num_frames_forward - , frames_to_blur - , frames_to_blur_backward - , frames_to_blur_forward - , cpi->source_encode_index - , cpi->last_alt_ref_sei - , start_frame); -#endif - - for (frame = 0; frame < frames_to_blur; frame++) - { - int which_buffer = start_frame - frame; - - if (which_buffer < 0) - which_buffer += cpi->oxcf.lag_in_frames; - - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.y_buffer; - } - - temp_source_buffer = &cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer; - - // Blur Y - vp8cx_temp_blur1_c( - cpi->frames, - frames_to_blur, - temp_source_buffer->y_buffer, // cpi->Source->y_buffer, - cpi->alt_ref_buffer.source_buffer.y_buffer, // cpi->Source->y_buffer, - temp_source_buffer->y_width, - temp_source_buffer->y_stride, - temp_source_buffer->y_height, - //temp_source_buffer->y_height * temp_source_buffer->y_stride, - strength, - fixed_divide, - cpi->fp_motion_map, 16); - - for (frame = 0; frame < frames_to_blur; frame++) - { - int which_buffer = cpi->last_alt_ref_sei - frame; - - if (which_buffer < 0) - which_buffer += cpi->oxcf.lag_in_frames; - - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.u_buffer; - } - - // Blur U - vp8cx_temp_blur1_c( - cpi->frames, - frames_to_blur, - temp_source_buffer->u_buffer, - cpi->alt_ref_buffer.source_buffer.u_buffer, // cpi->Source->u_buffer, - temp_source_buffer->uv_width, - temp_source_buffer->uv_stride, - temp_source_buffer->uv_height, - //temp_source_buffer->uv_height * temp_source_buffer->uv_stride, - strength, - fixed_divide, - cpi->fp_motion_map, 8); - - for (frame = 0; frame < frames_to_blur; frame++) - { - int which_buffer = cpi->last_alt_ref_sei - frame; - - if (which_buffer < 0) - which_buffer += cpi->oxcf.lag_in_frames; - - cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.v_buffer; - } - - // Blur V - vp8cx_temp_blur1_c( - cpi->frames, - frames_to_blur, - temp_source_buffer->v_buffer, - cpi->alt_ref_buffer.source_buffer.v_buffer, // cpi->Source->v_buffer, - temp_source_buffer->uv_width, - temp_source_buffer->uv_stride, - //temp_source_buffer->uv_height * temp_source_buffer->uv_stride, - temp_source_buffer->uv_height, - strength, - fixed_divide, - cpi->fp_motion_map, 8); -} -#endif - - -static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags) { int Q; int frame_over_shoot_limit; @@ -3559,8 +3497,18 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign cpi->zbin_over_quant = 0; cpi->zbin_mode_boost = 0; - // Enable mode based tweaking of the zbin + // Enable or disable mode based tweaking of the zbin + // For 2 Pass Only used where GF/ARF prediction quality + // is above a threshold + cpi->zbin_mode_boost = 0; cpi->zbin_mode_boost_enabled = TRUE; + if (cpi->pass == 2) + { + if ( cpi->gfu_boost <= 400 ) + { + cpi->zbin_mode_boost_enabled = FALSE; + } + } // Current default encoder behaviour for the altref sign bias if (cpi->source_alt_ref_active) @@ -3588,6 +3536,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign { int i; + // Reset the loop filter deltas and segmentation map + setup_features(cpi); + // If segmentation is enabled force a map update for key frames if (cpi->mb.e_mbd.segmentation_enabled) { @@ -3595,12 +3546,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign cpi->mb.e_mbd.update_mb_segmentation_data = 1; } - // If mode or reference frame based loop filter deltas are enabled then force an update for key frames. - if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled) - { - cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; - } - // The alternate reference frame cannot be active for a key frame cpi->source_alt_ref_active = FALSE; @@ -3753,87 +3698,49 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign // Set an active best quality and if necessary active worst quality if (cpi->pass == 2 || (cm->current_video_frame > 150)) { - //if ( (cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame ) int Q; int i; int bpm_target; + //int tmp; + + vp8_clear_system_state(); Q = cpi->active_worst_quality; if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) { - vp8_clear_system_state(); - if (cm->frame_type != KEY_FRAME) { - // Where a gf overlays an existing arf then allow active max Q to drift to highest allowed value. - //if ( cpi->common.refresh_golden_frame && cpi->source_alt_ref_active ) - //cpi->active_worst_quality = cpi->worst_quality; - if (cpi->avg_frame_qindex < cpi->active_worst_quality) Q = cpi->avg_frame_qindex; - if (cpi->section_is_low_motion) - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 3 / 2) + 128)) / 64; - else if (cpi->section_is_fast_motion) - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 128)) / 64; + if ( cpi->gfu_boost > 1000 ) + cpi->active_best_quality = gf_low_motion_minq[Q]; + else if ( cpi->gfu_boost < 400 ) + cpi->active_best_quality = gf_high_motion_minq[Q]; else - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 5 / 4) + 128)) / 64; - } - // KEY FRAMES - else - { - if (cpi->section_is_low_motion) - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 240)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127 - else - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64; - } + cpi->active_best_quality = gf_mid_motion_minq[Q]; - for (i = Q; i > 0; i--) - { - if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i]) - break; - } - - cpi->active_best_quality = i; - - // this entire section could be replaced by a look up table -#if 0 - { - int Q, best_q[128]; - - for (Q = 0; Q < 128; Q++) - { - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127 - - for (i = Q; i > 0; i--) - { - if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i]) - break; - } - - best_q[Q] = i; - } - - Q += 0; - } -#endif + /*cpi->active_best_quality = gf_arf_minq[Q]; + tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400; + //tmp = (cpi->gfu_boost > 1000) ? 600 : + //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400; + tmp = 128 - (tmp >> 4); + cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/ + } + // KEY FRAMES + else + { + if (cpi->gfu_boost > 600) + cpi->active_best_quality = kf_low_motion_minq[Q]; + else + cpi->active_best_quality = kf_high_motion_minq[Q]; + } } else { - vp8_clear_system_state(); - - //bpm_target = (vp8_bits_per_mb[cm->frame_type][Q]*(Q+128))/64; // Approx 2 to 4 where Q has the range 0-127 - bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 192)) / 128; // Approx * 1.5 to 2.5 where Q has range 0-127 - - for (i = Q; i > 0; i--) - { - if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i]) - break; - } - - cpi->active_best_quality = i; + cpi->active_best_quality = inter_minq[Q]; } // If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames @@ -4059,6 +3966,9 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign // Clear the Alt reference frame active flag when we have a key frame cpi->source_alt_ref_active = FALSE; + // Reset the loop filter deltas and segmentation map + setup_features(cpi); + // If segmentation is enabled force a map update for key frames if (cpi->mb.e_mbd.segmentation_enabled) { @@ -4066,12 +3976,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign cpi->mb.e_mbd.update_mb_segmentation_data = 1; } - // If mode or reference frame based loop filter deltas are enabled then force an update for key frames. - if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled) - { - cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; - } - vp8_restore_coding_context(cpi); Q = vp8_regulate_q(cpi, cpi->this_frame_target); @@ -4275,17 +4179,18 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign // Update the GF useage maps. // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter - vp8_update_gf_useage_maps(cm, &cpi->mb.e_mbd); + vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); if (cm->frame_type == KEY_FRAME) cm->refresh_last_frame = 1; - if (0) +#if 0 { FILE *f = fopen("gfactive.stt", "a"); - fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame); + fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame); fclose(f); } +#endif // For inter frames the current default behaviour is that when cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer // This is purely an encoder descision at present. @@ -4296,11 +4201,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign if (cm->refresh_last_frame) { - vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame); - cm->frame_to_show = &cm->last_frame; + vp8_swap_yv12_buffer(&cm->yv12_fb[cm->lst_fb_idx], &cm->yv12_fb[cm->new_fb_idx]); + cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; } else - cm->frame_to_show = &cm->new_frame; + cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; @@ -4350,43 +4255,48 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign } } - - // At this point the new frame has been encoded coded. - // If any buffer copy / swaping is signalled it should be done here. - if (cm->frame_type == KEY_FRAME) { - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame); - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame); - } - else // For non key frames - { - // Code to copy between reference buffers - if (cm->copy_buffer_to_arf) + YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; + YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; + YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; + YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx]; + // At this point the new frame has been encoded coded. + // If any buffer copy / swaping is signalled it should be done here. + if (cm->frame_type == KEY_FRAME) { - if (cm->copy_buffer_to_arf == 1) - { - if (cm->refresh_last_frame) - // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame); - } - else if (cm->copy_buffer_to_arf == 2) - vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12); } - - if (cm->copy_buffer_to_gf) + else // For non key frames { - if (cm->copy_buffer_to_gf == 1) + // Code to copy between reference buffers + if (cm->copy_buffer_to_arf) { - if (cm->refresh_last_frame) - // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. - vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame); - else - vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame); + if (cm->copy_buffer_to_arf == 1) + { + if (cm->refresh_last_frame) + // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. + vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12); + else + vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12); + } + else if (cm->copy_buffer_to_arf == 2) + vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12); + } + + if (cm->copy_buffer_to_gf) + { + if (cm->copy_buffer_to_gf == 1) + { + if (cm->refresh_last_frame) + // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set. + vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12); + else + vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12); + } + else if (cm->copy_buffer_to_gf == 2) + vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12); } - else if (cm->copy_buffer_to_gf == 2) - vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame); } } @@ -4524,18 +4434,46 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign } } -#if CONFIG_PSNR - - if (0) +#if 0 && CONFIG_PSNR { FILE *f = fopen("tmp.stt", "a"); vp8_clear_system_state(); //__asm emms; if (cpi->total_coded_error_left != 0.0) - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left, (double)cpi->bits_left / cpi->total_coded_error_left, cpi->tot_recode_hits); + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" + "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%10.3f %8ld\n", + cpi->common.current_video_frame, cpi->this_frame_target, + cpi->projected_frame_size, + (cpi->projected_frame_size - cpi->this_frame_target), + (int)cpi->total_target_vs_actual, + (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), + (int)cpi->total_actual_bits, cm->base_qindex, + cpi->active_best_quality, cpi->active_worst_quality, + cpi->avg_frame_qindex, cpi->zbin_over_quant, + cm->refresh_golden_frame, cm->refresh_alt_ref_frame, + cm->frame_type, cpi->gfu_boost, + cpi->est_max_qcorrection_factor, (int)cpi->bits_left, + cpi->total_coded_error_left, + (double)cpi->bits_left / cpi->total_coded_error_left, + cpi->tot_recode_hits); else - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left, cpi->tot_recode_hits); + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld" + "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f" + "%8ld\n", + cpi->common.current_video_frame, + cpi->this_frame_target, cpi->projected_frame_size, + (cpi->projected_frame_size - cpi->this_frame_target), + (int)cpi->total_target_vs_actual, + (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), + (int)cpi->total_actual_bits, cm->base_qindex, + cpi->active_best_quality, cpi->active_worst_quality, + cpi->avg_frame_qindex, cpi->zbin_over_quant, + cm->refresh_golden_frame, cm->refresh_alt_ref_frame, + cm->frame_type, cpi->gfu_boost, + cpi->est_max_qcorrection_factor, (int)cpi->bits_left, + cpi->total_coded_error_left, cpi->tot_recode_hits); fclose(f); @@ -4543,7 +4481,10 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign FILE *fmodes = fopen("Modes.stt", "a"); int i; - fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, cm->frame_type, cm->refresh_golden_frame, cm->refresh_alt_ref_frame); + fprintf(fmodes, "%6d:%1d:%1d:%1d ", + cpi->common.current_video_frame, + cm->frame_type, cm->refresh_golden_frame, + cm->refresh_alt_ref_frame); for (i = 0; i < MAX_MODES; i++) fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]); @@ -4589,23 +4530,23 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; if (cpi->gold_is_last) - cpi->ref_frame_flags &= !VP8_GOLD_FLAG; + cpi->ref_frame_flags &= ~VP8_GOLD_FLAG; if (cpi->alt_is_last) - cpi->ref_frame_flags &= !VP8_ALT_FLAG; + cpi->ref_frame_flags &= ~VP8_ALT_FLAG; if (cpi->gold_is_alt) - cpi->ref_frame_flags &= !VP8_ALT_FLAG; + cpi->ref_frame_flags &= ~VP8_ALT_FLAG; if (cpi->oxcf.error_resilient_mode) { // Is this an alternate reference update if (cpi->common.refresh_alt_ref_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]); if (cpi->common.refresh_golden_frame) - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame); + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]); } else { @@ -4651,15 +4592,17 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign - if (0) +#if 0 { char filename[512]; FILE *recon_file; sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); recon_file = fopen(filename, "wb"); - fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file); + fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc, + cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file); fclose(recon_file); } +#endif // DEBUG //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); @@ -4681,7 +4624,7 @@ int vp8_is_gf_update_needed(VP8_PTR ptr) void vp8_check_gf_quality(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; - int gf_active_pct = (100 * cm->gf_active_count) / (cm->mb_rows * cm->mb_cols); + int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols); int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols); int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols); @@ -4719,8 +4662,6 @@ void vp8_check_gf_quality(VP8_COMP *cpi) } #if 0 - - if (0) { FILE *f = fopen("gfneeded.stt", "a"); fprintf(f, "%10d %10d %10d %10d %10ld \n", @@ -4757,10 +4698,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, #if HAVE_ARMV7 extern void vp8_push_neon(INT64 *store); extern void vp8_pop_neon(INT64 *store); -static INT64 store_reg[8]; #endif int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time) { + INT64 store_reg[8]; VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer timer; @@ -4769,7 +4710,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON return -1; #if HAVE_ARMV7 - vp8_push_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(store_reg); + } #endif vpx_usec_timer_start(&timer); @@ -4778,7 +4724,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames) { #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return -1; } @@ -4819,9 +4770,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON s->source_time_stamp = time_stamp; s->source_frame_flags = frame_flags; #if HAVE_ARMV7 - vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer); -#else - vp8_yv12_copy_frame_ptr(sd, &s->source_buffer); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(sd, &s->source_buffer); + } #endif cpi->source_buffer_count = 1; } @@ -4830,14 +4792,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; } int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush) { - + INT64 store_reg[8]; VP8_COMP *cpi = (VP8_COMP *) ptr; VP8_COMMON *cm = &cpi->common; struct vpx_usec_timer tsctimer; @@ -4848,7 +4815,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon return -1; #if HAVE_ARMV7 - vp8_push_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_push_neon(store_reg); + } #endif vpx_usec_timer_start(&cmptimer); @@ -4949,6 +4921,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cm->show_frame = 0; cpi->source_alt_ref_pending = FALSE; // Clear Pending altf Ref flag. cpi->is_src_frame_alt_ref = 0; + cpi->is_next_src_alt_ref = 0; } else #endif @@ -4960,26 +4933,18 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon { if (cpi->source_encode_index == cpi->last_alt_ref_sei) { -#if VP8_TEMPORAL_ALT_REF - - if (cpi->oxcf.arnr_max_frames == 0) - { - cpi->is_src_frame_alt_ref = 1; // copy alt ref - } - else - { - cpi->is_src_frame_alt_ref = 0; - } - -#else cpi->is_src_frame_alt_ref = 1; -#endif cpi->last_alt_ref_sei = -1; } else cpi->is_src_frame_alt_ref = 0; cpi->source_encode_index = (cpi->source_encode_index + 1) % cpi->oxcf.lag_in_frames; + + if(cpi->source_encode_index == cpi->last_alt_ref_sei) + cpi->is_next_src_alt_ref = 1; + else + cpi->is_next_src_alt_ref = 0; } #endif @@ -5007,24 +4972,28 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon #endif #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return -1; } *frame_flags = cpi->source_frame_flags; -#if CONFIG_PSNR - if (cpi->source_time_stamp < cpi->first_time_stamp_ever) + { cpi->first_time_stamp_ever = cpi->source_time_stamp; - -#endif + cpi->last_end_time_stamp_seen = cpi->source_time_stamp; + } // adjust frame rates based on timestamps given if (!cm->refresh_alt_ref_frame) { - if (cpi->last_time_stamp_seen == 0) + if (cpi->source_time_stamp == cpi->first_time_stamp_ever) { double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp); @@ -5032,7 +5001,8 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } else { - long long nanosecs = cpi->source_time_stamp - cpi->last_time_stamp_seen; + long long nanosecs = cpi->source_end_time_stamp + - cpi->last_end_time_stamp_seen; double this_fps = 10000000.000 / nanosecs; vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8); @@ -5040,6 +5010,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon } cpi->last_time_stamp_seen = cpi->source_time_stamp; + cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp; } if (cpi->compressor_speed == 2) @@ -5050,7 +5021,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (!cpi) { #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; } @@ -5141,8 +5117,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon { // return to normal state - cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG; - cm->refresh_entropy_probs = 1; cm->refresh_alt_ref_frame = 0; cm->refresh_golden_frame = 0; @@ -5241,13 +5215,18 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon #endif #if HAVE_ARMV7 - vp8_pop_neon(store_reg); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_pop_neon(store_reg); + } #endif return 0; } -int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags) +int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags) { VP8_COMP *cpi = (VP8_COMP *) comp; @@ -5257,7 +5236,7 @@ int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int debloc { int ret; #if CONFIG_POSTPROC - ret = vp8_post_proc_frame(&cpi->common, dest, deblock_level, noise_level, flags); + ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else if (cpi->common.frame_to_show) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index ec1774c81..ea794f5ce 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,6 @@ #include "vpx/internal/vpx_codec_internal.h" #include "mcomp.h" -#define INTRARDOPT //#define SPEEDSTATS 1 #define MIN_GF_INTERVAL 4 #define DEFAULT_GF_INTERVAL 7 @@ -46,6 +46,8 @@ #define MAX_THRESHMULT 512 #define GF_ZEROMV_ZBIN_BOOST 24 +#define LF_ZEROMV_ZBIN_BOOST 12 +#define MV_ZBIN_BOOST 4 #define ZBIN_OQ_MAX 192 #define VP8_TEMPORAL_ALT_REF 1 @@ -233,20 +235,33 @@ typedef struct VP8_ENCODER_RTCD vp8_search_rtcd_vtable_t search; } VP8_ENCODER_RTCD; +enum +{ + BLOCK_16X8, + BLOCK_8X16, + BLOCK_8X8, + BLOCK_4X4, + BLOCK_16X16, + BLOCK_MAX_SEGMENTS +}; + typedef struct { - DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]); - DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]); - DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]); + DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); + DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]); @@ -274,6 +289,7 @@ typedef struct int last_alt_ref_sei; int is_src_frame_alt_ref; + int is_next_src_alt_ref; int gold_is_last; // golden frame same as last frame ( short circuit gold searches) int alt_is_last; // Alt reference frame same as last ( short circuit altref search) @@ -310,15 +326,12 @@ typedef struct int subseqblockweight; int errthresh; -#ifdef INTRARDOPT int RDMULT; int RDDIV ; TOKENEXTRA *rdtok; - int intra_rd_opt; vp8_writer rdbc; int intra_mode_costs[10]; -#endif CODING_CONTEXT coding_context; @@ -359,9 +372,14 @@ typedef struct int gf_bits; // Bits for the golden frame or ARF - 2 pass only int mid_gf_extra_bits; // A few extra bits for the frame half way between two gfs. - int kf_group_bits; // Projected total bits available for a key frame group of frames - int kf_group_error_left; // Error score of frames still to be coded in kf group - int kf_bits; // Bits for the key frame in a key frame group - 2 pass only + // Projected total bits available for a key frame group of frames + long long kf_group_bits; + + // Error score of frames still to be coded in kf group + long long kf_group_error_left; + + // Bits for the key frame in a key frame group - 2 pass only + int kf_bits; int non_gf_bitrate_adjustment; // Used in the few frames following a GF to recover the extra bits spent in that GF int initial_gf_use; // percentage use of gf 2 frames after gf @@ -373,6 +391,7 @@ typedef struct int max_gf_interval; int baseline_gf_interval; int gf_decay_rate; + int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames INT64 key_frame_count; INT64 tot_key_frame_bits; @@ -386,6 +405,7 @@ typedef struct int inter_frame_target; double output_frame_rate; long long last_time_stamp_seen; + long long last_end_time_stamp_seen; long long first_time_stamp_ever; int ni_av_qi; @@ -458,14 +478,14 @@ typedef struct int target_bandwidth; long long bits_left; - FIRSTPASS_STATS total_stats; - FIRSTPASS_STATS this_frame_stats; + FIRSTPASS_STATS *total_stats; + FIRSTPASS_STATS *this_frame_stats; FIRSTPASS_STATS *stats_in, *stats_in_end; struct vpx_codec_pkt_list *output_pkt_list; int first_pass_done; unsigned char *fp_motion_map; - FILE *fp_motion_mapfile; - int fpmm_pos; + + unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save; #if 0 // Experimental code for lagged and one pass @@ -526,8 +546,8 @@ typedef struct int motion_lvl; int motion_speed; int motion_var; - int next_iiratio; - int this_iiratio; + unsigned int next_iiratio; + unsigned int this_iiratio; int this_frame_modified_error; double norm_intra_err_per_mb; @@ -588,7 +608,7 @@ typedef struct fractional_mv_step_fp *find_fractional_mv_step; vp8_full_search_fn_t full_search_sad; vp8_diamond_search_fn_t diamond_search_sad; - vp8_variance_fn_ptr_t fn_ptr; + vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; unsigned int time_receive_data; unsigned int time_compress_data; unsigned int time_pick_lpf; @@ -598,9 +618,6 @@ typedef struct unsigned int tempdata2; int base_skip_false_prob[128]; - unsigned int section_is_low_motion; - unsigned int section_benefits_from_aggresive_q; - unsigned int section_is_fast_motion; unsigned int section_intra_rating; double section_max_qfactor; @@ -611,9 +628,11 @@ typedef struct #endif #if VP8_TEMPORAL_ALT_REF SOURCE_SAMPLE alt_ref_buffer; - unsigned char *frames[MAX_LAG_BUFFERS]; - int fixed_divide[255]; + YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; + int fixed_divide[512]; #endif + // Flag to indicate temporal filter method + int use_weighted_temporal_filter; #if CONFIG_PSNR int count; @@ -641,6 +660,12 @@ typedef struct int b_calculate_ssimg; #endif int b_calculate_psnr; + + + unsigned char *gf_active_flags; // Record of which MBs still refer to last golden frame either directly or through 0,0 + int gf_active_count; + + } VP8_COMP; void control_data_rate(VP8_COMP *cpi); diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp index 66fdafb1a..6cc450121 100644 --- a/vp8/encoder/parms.cpp +++ b/vp8/encoder/parms.cpp @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index d61e2ceda..2f7dd9c7c 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -49,14 +50,13 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv); -int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]) +int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]) { (void) b; (void) d; (void) ref_mv; (void) error_per_bit; - (void) svf; - (void) vf; + (void) vfp; (void) mvcost; bestmv->row <<= 3; bestmv->col <<= 3; @@ -64,7 +64,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, } -static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse) +static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse) { BLOCK *b = &mb->block[0]; @@ -80,20 +80,20 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v if (xoffset | yoffset) { - return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse); + return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse); } else { - return vf(what, what_stride, in_what, in_what_stride, sse); + return vfp->vf(what, what_stride, in_what, in_what_stride, sse); } } unsigned int vp8_get16x16pred_error_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad ) @@ -124,9 +124,9 @@ unsigned int vp8_get16x16pred_error_c unsigned int vp8_get4x4sse_cs_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, int max_sad ) @@ -219,13 +219,20 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int { MACROBLOCKD *const xd = &mb->e_mbd; int i; - TEMP_CONTEXT t; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode int distortion = 0; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; vp8_intra_prediction_down_copy(xd); - vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4); for (i = 0; i < 16; i++) { @@ -238,8 +245,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int error += pick_intra4x4block(rtcd, mb, mb->block + i, xd->block + i, &best_mode, A, L, - t.a + vp8_block2above[i], - t.l + vp8_block2left[i], &r, &d); + ta + vp8_block2above[i], + tl + vp8_block2left[i], &r, &d); cost += r; distortion += d; @@ -409,7 +416,7 @@ int vp8_pick_intra_mbuv_mode(MACROBLOCK *mb) } - mb->e_mbd.mbmi.uv_mode = best_mode; + mb->e_mbd.mode_info_context->mbmi.uv_mode = best_mode; return best_error; } @@ -422,6 +429,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec MACROBLOCKD *xd = &x->e_mbd; B_MODE_INFO best_bmodes[16]; MB_MODE_INFO best_mbmode; + PARTITION_INFO best_partition; MV best_ref_mv1; MV mode_mv[MB_MODE_COUNT]; MB_PREDICTION_MODE this_mode; @@ -453,41 +461,48 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec vpx_memset(mode_mv, 0, sizeof(mode_mv)); vpx_memset(nearest_mv, 0, sizeof(nearest_mv)); vpx_memset(near_mv, 0, sizeof(near_mv)); + vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // set up all the refframe dependent pointers. if (cpi->ref_frame_flags & VP8_LAST_FLAG) { + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME], &best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias); - y_buffer[LAST_FRAME] = cpi->common.last_frame.y_buffer + recon_yoffset; - u_buffer[LAST_FRAME] = cpi->common.last_frame.u_buffer + recon_uvoffset; - v_buffer[LAST_FRAME] = cpi->common.last_frame.v_buffer + recon_uvoffset; + y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset; + u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset; + v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset; } else skip_mode[LAST_FRAME] = 1; if (cpi->ref_frame_flags & VP8_GOLD_FLAG) { + YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME], &best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias); - y_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.y_buffer + recon_yoffset; - u_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.u_buffer + recon_uvoffset; - v_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.v_buffer + recon_uvoffset; + y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset; + u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset; + v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset; } else skip_mode[GOLDEN_FRAME] = 1; if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active) { + YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; + vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME], &best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias); - y_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.y_buffer + recon_yoffset; - u_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset; - v_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset; + y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset; + u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset; + v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset; } else skip_mode[ALTREF_FRAME] = 1; @@ -527,7 +542,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec best_rd = INT_MAX; - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; // if we encode a new mv this is important // find the best new motion vector @@ -539,9 +554,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec if (best_rd <= cpi->rd_threshes[mode_index]) continue; - x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index]; + x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; - if (skip_mode[x->e_mbd.mbmi.ref_frame]) + if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame]) continue; // Check to see if the testing frequency for this mode is at its max @@ -570,33 +585,33 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec distortion2 = 0; this_mode = vp8_mode_order[mode_index]; - + // Experimental debug code. //all_rds[mode_index] = -1; - x->e_mbd.mbmi.mode = this_mode; - x->e_mbd.mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; // Work out the cost assosciated with selecting the reference frame - frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame]; + frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; rate2 += frame_cost; // everything but intra - if (x->e_mbd.mbmi.ref_frame) + if (x->e_mbd.mode_info_context->mbmi.ref_frame) { - x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mbmi.ref_frame]; - x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mbmi.ref_frame]; - x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mbmi.ref_frame]; - mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mbmi.ref_frame]; - mode_mv[NEARMV] = near_mv[x->e_mbd.mbmi.ref_frame]; - best_ref_mv1 = best_ref_mv[x->e_mbd.mbmi.ref_frame]; - memcpy(mdcounts, MDCounts[x->e_mbd.mbmi.ref_frame], sizeof(mdcounts)); + x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame]; + memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts)); } //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. if (cpi->is_src_frame_alt_ref) { - if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME) + if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; } @@ -636,7 +651,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec case TM_PRED: vp8_build_intra_predictors_mby_ptr(&x->e_mbd); distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff); - rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode]; + rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) @@ -703,13 +718,13 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -728,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 if (thissme < bestsme) { @@ -749,7 +764,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec } if (bestsme < INT_MAX) - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -774,12 +789,12 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec continue; rate2 += vp8_cost_mv_ref(this_mode, mdcounts); - x->e_mbd.mbmi.mode = this_mode; - x->e_mbd.mbmi.mv.as_mv = mode_mv[this_mode]; + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + x->e_mbd.mode_info_context->mbmi.mv.as_mv = mode_mv[this_mode]; x->e_mbd.block[0].bmi.mode = this_mode; - x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mbmi.mv.as_int; + x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int; - distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse)); + distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2); @@ -816,7 +831,8 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec *returnrate = rate2; *returndistortion = distortion2; best_rd = this_rd; - vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); if (this_mode == B_PRED || this_mode == SPLITMV) for (i = 0; i < 16; i++) @@ -862,9 +878,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec if (best_mbmode.mode <= B_PRED) { - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_pick_intra_mbuv_mode(x); - best_mbmode.uv_mode = x->e_mbd.mbmi.uv_mode; + best_mbmode.uv_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; } @@ -890,23 +906,25 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec best_mbmode.partitioning = 0; best_mbmode.dc_diff = 0; - vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); for (i = 0; i < 16; i++) { vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO)); } - x->e_mbd.mbmi.mv.as_int = 0; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; return best_rd; } // macroblock modes - vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); - if (x->e_mbd.mbmi.mode == B_PRED || x->e_mbd.mbmi.mode == SPLITMV) + if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) for (i = 0; i < 16; i++) { vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO)); @@ -914,10 +932,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec } else { - vp8_set_mbmode_and_mvs(x, x->e_mbd.mbmi.mode, &best_bmodes[0].mv.as_mv); + vp8_set_mbmode_and_mvs(x, x->e_mbd.mode_info_context->mbmi.mode, &best_bmodes[0].mv.as_mv); } - x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv; + x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv; return best_rd; } diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h index fb28837ed..b80e4c86f 100644 --- a/vp8/encoder/pickinter.h +++ b/vp8/encoder/pickinter.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index bbd7840b8..09e8b5412 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -15,6 +16,9 @@ #include "vpx_scale/yv12extend.h" #include "vpx_scale/vpxscale.h" #include "alloccommon.h" +#if ARCH_ARM +#include "vpx_ports/arm.h" +#endif extern void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val); extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int filt_val, int sharpness_lvl); @@ -305,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Make a copy of the unfiltered / processed recon buffer #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); -#else - vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf); + } #endif if (cm->frame_type == KEY_FRAME) @@ -342,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif while (filter_step > 0) @@ -371,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif // If value is close to the best so far then bias towards a lower loop filter value. @@ -400,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) // Re-instate the unfiltered frame #if HAVE_ARMV7 - vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); -#else - vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); +#if CONFIG_RUNTIME_CPU_DETECT + if (cm->rtcd.flags & HAS_NEON) +#endif + { + vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show); + } +#if CONFIG_RUNTIME_CPU_DETECT + else +#endif +#endif +#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT + { + vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show); + } #endif // Was it better than the previous best? diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c index f99277f99..588656b97 100644 --- a/vp8/encoder/ppc/csystemdependent.c +++ b/vp8/encoder/ppc/csystemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm index e0e976d71..6e0099ddc 100644 --- a/vp8/encoder/ppc/encodemb_altivec.asm +++ b/vp8/encoder/ppc/encodemb_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm index eaab14c79..935d0cb09 100644 --- a/vp8/encoder/ppc/fdct_altivec.asm +++ b/vp8/encoder/ppc/fdct_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm index 917bfe036..ba4823009 100644 --- a/vp8/encoder/ppc/rdopt_altivec.asm +++ b/vp8/encoder/ppc/rdopt_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm index 1102ccf17..e5f26380f 100644 --- a/vp8/encoder/ppc/sad_altivec.asm +++ b/vp8/encoder/ppc/sad_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm index 952bf7286..a1ebf663a 100644 --- a/vp8/encoder/ppc/variance_altivec.asm +++ b/vp8/encoder/ppc/variance_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm index 148a8d25b..301360b1d 100644 --- a/vp8/encoder/ppc/variance_subpixel_altivec.asm +++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c index d2a13dced..bd918fa3c 100644 --- a/vp8/encoder/preproc.c +++ b/vp8/encoder/preproc.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c index 0e34cecb1..dc2a03b69 100644 --- a/vp8/encoder/psnr.c +++ b/vp8/encoder/psnr.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h index 9f6ca0bbf..8ae444823 100644 --- a/vp8/encoder/psnr.h +++ b/vp8/encoder/psnr.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c index 6028ebf56..a1be6614b 100644 --- a/vp8/encoder/quantize.c +++ b/vp8/encoder/quantize.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -15,18 +16,22 @@ #include "entropy.h" #include "predictdc.h" +#define EXACT_QUANT + +#ifdef EXACT_FASTQUANT void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; - short *coeff_ptr = &b->coeff[0]; - short *zbin_ptr = &b->zbin[0][0]; - short *round_ptr = &b->round[0][0]; - short *quant_ptr = &b->quant[0][0]; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = &d->dequant[0][0]; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -44,7 +49,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) if (x >= zbin) { - y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) x = (y ^ sz) - sz; // get the sign back qcoeff_ptr[rc] = x; // write to destination dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value @@ -55,25 +62,177 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) } } } + d->eob = eob + 1; +} + +#else + +void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *coeff_ptr = b->coeff; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + + eob = -1; + for (i = 0; i < 16; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + } + } + d->eob = eob + 1; +} + +#endif + +#ifdef EXACT_QUANT +void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) +{ + int i, rc, eob; + int zbin; + int x, y, z, sz; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *quant_shift_ptr = b->quant_shift; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + + eob = -1; + + for (i = 0; i < 16; i++) + { + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + + //if ( i == 0 ) + // zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2; + //else + zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; + + zbin_boost_ptr ++; + sz = (z >> 31); // sign of z + x = (z ^ sz) - sz; // x = abs(z) + + if (x >= zbin) + { + x += round_ptr[rc]; + y = (((x * quant_ptr[rc]) >> 16) + x) + >> quant_shift_ptr[rc]; // quantize (x) + x = (y ^ sz) - sz; // get the sign back + qcoeff_ptr[rc] = x; // write to destination + dqcoeff_ptr[rc] = x * dequant_ptr[rc]; // dequantized value + + if (y) + { + eob = i; // last nonzero coeffs + zbin_boost_ptr = &b->zrun_zbin_boost[0]; // reset zero runlength + } + } + } d->eob = eob + 1; - } +/* Perform regular quantization, with unbiased rounding and no zero bin. */ +void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d) +{ + int i; + int rc; + int eob; + int x; + int y; + int z; + int sz; + short *coeff_ptr; + short *quant_ptr; + short *quant_shift_ptr; + short *qcoeff_ptr; + short *dqcoeff_ptr; + short *dequant_ptr; + + coeff_ptr = b->coeff; + quant_ptr = b->quant; + quant_shift_ptr = b->quant_shift; + qcoeff_ptr = d->qcoeff; + dqcoeff_ptr = d->dqcoeff; + dequant_ptr = d->dequant; + eob = - 1; + vpx_memset(qcoeff_ptr, 0, 32); + vpx_memset(dqcoeff_ptr, 0, 32); + for (i = 0; i < 16; i++) + { + int dq; + int round; + + /*TODO: These arrays should be stored in zig-zag order.*/ + rc = vp8_default_zig_zag1d[i]; + z = coeff_ptr[rc]; + dq = dequant_ptr[rc]; + round = dq >> 1; + /* Sign of z. */ + sz = -(z < 0); + x = (z + sz) ^ sz; + x += round; + if (x >= dq) + { + /* Quantize x. */ + y = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc]; + /* Put the sign back. */ + x = (y + sz) ^ sz; + /* Save the coefficient and its dequantized value. */ + qcoeff_ptr[rc] = x; + dqcoeff_ptr[rc] = x * dq; + /* Remember the last non-zero coefficient. */ + if (y) + eob = i; + } + } + + d->eob = eob + 1; +} + +#else + void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; - short *zbin_boost_ptr = &b->zrun_zbin_boost[0]; - short *coeff_ptr = &b->coeff[0]; - short *zbin_ptr = &b->zbin[0][0]; - short *round_ptr = &b->round[0][0]; - short *quant_ptr = &b->quant[0][0]; - short *qcoeff_ptr = d->qcoeff; - short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = &d->dequant[0][0]; - short zbin_oq_value = b->zbin_extra; + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; vpx_memset(qcoeff_ptr, 0, 32); vpx_memset(dqcoeff_ptr, 0, 32); @@ -111,61 +270,30 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d) d->eob = eob + 1; } + +#endif + void vp8_quantize_mby(MACROBLOCK *x) { int i; + int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) - { - for (i = 0; i < 16; i++) - { - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); - } + for (i = 0; i < 16; i++) + x->quantize_b(&x->block[i], &x->e_mbd.block[i]); + if(has_2nd_order) x->quantize_b(&x->block[24], &x->e_mbd.block[24]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob); - - } - else - { - for (i = 0; i < 16; i++) - { - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } } void vp8_quantize_mb(MACROBLOCK *x) { int i; + int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED + && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); - x->e_mbd.mbmi.mb_skip_coeff = 1; - - if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) - { - for (i = 0; i < 16; i++) - { - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); - } - - for (i = 16; i < 25; i++) - { - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } - else - { - for (i = 0; i < 24; i++) - { - x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } - + for (i = 0; i < 24+has_2nd_order; i++) + x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } @@ -174,76 +302,5 @@ void vp8_quantize_mbuv(MACROBLOCK *x) int i; for (i = 16; i < 24; i++) - { x->quantize_b(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } -} - -// This function is not currently called -void vp8_quantize_mbrd(MACROBLOCK *x) -{ - int i; - - x->e_mbd.mbmi.mb_skip_coeff = 1; - - if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) - { - for (i = 0; i < 16; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); - } - - for (i = 16; i < 25; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } - else - { - for (i = 0; i < 24; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } -} - -void vp8_quantize_mbuvrd(MACROBLOCK *x) -{ - int i; - - for (i = 16; i < 24; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } -} - -void vp8_quantize_mbyrd(MACROBLOCK *x) -{ - int i; - - if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV) - { - for (i = 0; i < 16; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2); - } - - x->quantize_brd(&x->block[24], &x->e_mbd.block[24]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob); - - } - else - { - for (i = 0; i < 16; i++) - { - x->quantize_brd(&x->block[i], &x->e_mbd.block[i]); - x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob); - } - } } diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h index 868e8e3a8..b74718bfa 100644 --- a/vp8/encoder/quantize.h +++ b/vp8/encoder/quantize.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -16,6 +17,10 @@ #define prototype_quantize_block(sym) \ void (sym)(BLOCK *b,BLOCKD *d) +#if ARCH_X86 || ARCH_X86_64 +#include "x86/quantize_x86.h" +#endif + #if ARCH_ARM #include "arm/quantize_arm.h" #endif @@ -42,11 +47,10 @@ typedef struct #define QUANTIZE_INVOKE(ctx,fn) vp8_quantize_##fn #endif +extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d); + extern void vp8_quantize_mb(MACROBLOCK *x); extern void vp8_quantize_mbuv(MACROBLOCK *x); extern void vp8_quantize_mby(MACROBLOCK *x); -extern void vp8_quantize_mbyrd(MACROBLOCK *x); -extern void vp8_quantize_mbuvrd(MACROBLOCK *x); -extern void vp8_quantize_mbrd(MACROBLOCK *x); #endif diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 23a2d1abd..dd324f435 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -407,7 +408,7 @@ static void calc_gf_params(VP8_COMP *cpi) cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]; - int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); + int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); // Reset the last boost indicator //cpi->last_boost = 100; @@ -1021,7 +1022,7 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]; - int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); + int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); // Reset the last boost indicator //cpi->last_boost = 100; @@ -1119,10 +1120,12 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi) } // If there is an active ARF at this location use the minimum - // bits on this frame unless it was a contructed arf. - else if (cpi->oxcf.arnr_max_frames == 0) + // bits on this frame even if it is a contructed arf. + // The active maximum quantizer insures that an appropriate + // number of bits will be spent if needed for contstructed ARFs. + else { - cpi->this_frame_target = 0; // Minimial spend on gf that is replacing an arf + cpi->this_frame_target = 0; } cpi->current_gf_interval = cpi->frames_till_gf_update_due; @@ -1363,8 +1366,7 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) if (cpi->zbin_over_quant > zbin_oqmax) cpi->zbin_over_quant = zbin_oqmax; - // Each over-run step is assumed to equate to approximately - // 3% reduction in bitrate + // Adjust bits_per_mb_at_this_q estimate bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); Factor += factor_adjustment; @@ -1442,6 +1444,9 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) } else { + int last_kf_interval = + (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1; + // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes for (i = 0; i < KEY_FRAME_CONTEXT; i++) { @@ -1452,8 +1457,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) } else { - cpi->prior_key_frame_size[KEY_FRAME_CONTEXT - 1] = cpi->projected_frame_size; - cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = cpi->frames_since_key; + cpi->prior_key_frame_size[i] = cpi->projected_frame_size; + cpi->prior_key_frame_distance[i] = last_kf_interval; } av_key_frame_bits += prior_key_frame_weight[i] * cpi->prior_key_frame_size[i]; @@ -1476,6 +1481,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi) // allocated than those following other gfs. cpi->kf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 7 / 8; cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 1 / 8; + if(!av_key_frame_frequency) + av_key_frame_frequency = 60; // Work out how much to try and recover per frame. // For one pass we estimate the number of frames to spread it over based upon past history. diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h index 588c7a823..766dfdfce 100644 --- a/vp8/encoder/ratectrl.h +++ b/vp8/encoder/ratectrl.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 601c52978..d291b9e6f 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -63,11 +64,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x); #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) -extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2]; -extern const TOKENEXTRA *vp8_dct_value_tokens_ptr; -extern int vp8_dct_value_cost[DCT_MAX_VALUE*2]; -extern int *vp8_dct_value_cost_ptr; - const int vp8_auto_speed_thresh[17] = { @@ -170,15 +166,13 @@ static void fill_token_costs( } -static int rd_iifactor [ 32 ] = { 16, 16, 16, 12, 8, 4, 2, 0, +static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; - - // The values in this table should be reviewed static int sad_per_bit16lut[128] = { @@ -232,43 +226,41 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue) int i; int *thresh; int threshmult; - - int capped_q = (Qvalue < 160) ? Qvalue : 160; + double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; + double rdconst = 3.00; vp8_clear_system_state(); //__asm emms; - cpi->RDMULT = (int)((0.00007 * (capped_q * capped_q * capped_q * capped_q)) - (0.0125 * (capped_q * capped_q * capped_q)) + - (2.25 * (capped_q * capped_q)) - (12.5 * capped_q) + 25.0); - - if (cpi->RDMULT < 25) - cpi->RDMULT = 25; - - if (cpi->pass == 2) - { - if (cpi->common.frame_type == KEY_FRAME) - cpi->RDMULT += (cpi->RDMULT * rd_iifactor[0]) / 16; - else if (cpi->next_iiratio > 31) - cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) / 16; - else - cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) / 16; - } - + // Further tests required to see if optimum is different + // for key frames, golden frames and arf frames. + // if (cpi->common.refresh_golden_frame || + // cpi->common.refresh_alt_ref_frame) + cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); // Extend rate multiplier along side quantizer zbin increases if (cpi->zbin_over_quant > 0) { - // Extend rate multiplier along side quantizer zbin increases - if (cpi->zbin_over_quant > 0) - { - double oq_factor = pow(1.006, cpi->zbin_over_quant); + double oq_factor; + double modq; - if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0))) - oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0); - - cpi->RDMULT = (int)(oq_factor * cpi->RDMULT); - } + // Experimental code using the same basic equation as used for Q above + // The units of cpi->zbin_over_quant are 1/128 of Q bin size + oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant); + modq = (int)((double)capped_q * oq_factor); + cpi->RDMULT = (int)(rdconst * (modq * modq)); } + if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) + { + if (cpi->next_iiratio > 31) + cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; + else + cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4; + } + + if (cpi->RDMULT < 125) + cpi->RDMULT = 125; + cpi->mb.errorperbit = (cpi->RDMULT / 100); if (cpi->mb.errorperbit < 1) @@ -494,7 +486,7 @@ static int macro_block_max_error(MACROBLOCK *mb) int i, j; int berror; - dc = !(mb->e_mbd.mbmi.mode == B_PRED || mb->e_mbd.mbmi.mode == SPLITMV); + dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV); for (i = 0; i < 16; i++) { @@ -622,24 +614,28 @@ int vp8_rdcost_mby(MACROBLOCK *mb) { int cost = 0; int b; - TEMP_CONTEXT t, t2; int type = 0; - MACROBLOCKD *x = &mb->e_mbd; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; - vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4); - vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1); + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - if (x->mbmi.mode == SPLITMV) + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + + if (x->mode_info_context->mbmi.mode == SPLITMV) type = 3; for (b = 0; b < 16; b++) cost += cost_coeffs(mb, x->block + b, type, - t.a + vp8_block2above[b], t.l + vp8_block2left[b]); + ta + vp8_block2above[b], tl + vp8_block2left[b]); - if (x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) cost += cost_coeffs(mb, x->block + 24, 1, - t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]); + ta + vp8_block2above[24], tl + vp8_block2left[24]); return cost; } @@ -718,13 +714,20 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int { MACROBLOCKD *const xd = &mb->e_mbd; int i; - TEMP_CONTEXT t; int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int distortion = 0; int tot_rate_y = 0; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; vp8_intra_prediction_down_copy(xd); - vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4); for (i = 0; i < 16; i++) { @@ -737,8 +740,8 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int rd_pick_intra4x4block( cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L, - t.a + vp8_block2above[i], - t.l + vp8_block2left[i], &r, &ry, &d); + ta + vp8_block2above[i], + tl + vp8_block2left[i], &r, &ry, &d); cost += r; distortion += d; @@ -769,9 +772,9 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int int dummy; rate = 0; - x->e_mbd.mbmi.mode = mode; + x->e_mbd.mode_info_context->mbmi.mode = mode; - rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode]; + rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x); @@ -793,28 +796,33 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int } } - x->e_mbd.mbmi.mode = mode_selected; + x->e_mbd.mode_info_context->mbmi.mode = mode_selected; return best_rd; } static int rd_cost_mbuv(MACROBLOCK *mb) { - TEMP_CONTEXT t, t2; int b; int cost = 0; MACROBLOCKD *x = &mb->e_mbd; + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; - vp8_setup_temp_context(&t, x->above_context[UCONTEXT], x->left_context[UCONTEXT], 2); - vp8_setup_temp_context(&t2, x->above_context[VCONTEXT], x->left_context[VCONTEXT], 2); + vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 20; b++) cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], - t.a + vp8_block2above[b], t.l + vp8_block2left[b]); + ta + vp8_block2above[b], tl + vp8_block2left[b]); for (b = 20; b < 24; b++) cost += cost_coeffs(mb, x->block + b, vp8_block2type[b], - t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]); + ta + vp8_block2above[b], tl + vp8_block2left[b]); return cost; } @@ -855,11 +863,11 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra int distortion; int this_rd; - x->e_mbd.mbmi.uv_mode = mode; + x->e_mbd.mode_info_context->mbmi.uv_mode = mode; vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x); rate_to = rd_cost_mbuv(x); - rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.uv_mode]; + rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode]; distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x); @@ -878,7 +886,7 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra *rate = r; *distortion = d; - x->e_mbd.mbmi.uv_mode = mode_selected; + x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; return best_rd; } #endif @@ -888,16 +896,17 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) vp8_prob p [VP8_MVREFS-1]; assert(NEARESTMV <= m && m <= SPLITMV); vp8_mv_ref_probs(p, near_mv_ref_ct); - return vp8_cost_token(vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m); + return vp8_cost_token(vp8_mv_ref_tree, p, + vp8_mv_ref_encoding_array - NEARESTMV + m); } void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv) { int i; - x->e_mbd.mbmi.mode = mb; - x->e_mbd.mbmi.mv.as_mv.row = mv->row; - x->e_mbd.mbmi.mv.as_mv.col = mv->col; + x->e_mbd.mode_info_context->mbmi.mode = mb; + x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row; + x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col; for (i = 0; i < 16; i++) { @@ -909,21 +918,6 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv) } #if !(CONFIG_REALTIME_ONLY) -int vp8_count_labels(int const *labelings) -{ - int i; - int count = 0; - - for (i = 0; i < 16; i++) - { - if (labelings[i] > count) - count = labelings[i]; - } - - return count + 1; -} - - static int labels2mode( MACROBLOCK *x, int const *labelings, int which_label, @@ -1002,18 +996,19 @@ static int labels2mode( return cost; } -static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, TEMP_CONTEXT *t) +static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, + int which_label, ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl) { int cost = 0; int b; MACROBLOCKD *x = &mb->e_mbd; - for (b = 0; b < 16; b++) if (labels[ b] == which_label) cost += cost_coeffs(mb, x->block + b, 3, - t->a + vp8_block2above[b], - t->l + vp8_block2left[b]); + ta + vp8_block2above[b], + tl + vp8_block2left[b]); return cost; @@ -1033,11 +1028,11 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict); ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16); - x->short_fdct4x4rd(be->src_diff, be->coeff, 32); + x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); // set to 0 no way to account for 2nd order DC so discount //be->coeff[0] = 0; - x->quantize_brd(be, bd); + x->quantize_b(be, bd); distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff); } @@ -1061,13 +1056,13 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp // Fdct and building the 2nd order block for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { - mb->short_fdct8x4rd(beptr->src_diff, beptr->coeff, 32); + mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32); *Y2DCPtr++ = beptr->coeff[0]; *Y2DCPtr++ = beptr->coeff[16]; } // 2nd order fdct - if (x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); } @@ -1075,20 +1070,20 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp // Quantization for (b = 0; b < 16; b++) { - mb->quantize_brd(&mb->block[b], &mb->e_mbd.block[b]); + mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); } // DC predication and Quantization of 2nd Order block - if (x->mbmi.mode != SPLITMV) + if (x->mode_info_context->mbmi.mode != SPLITMV) { { - mb->quantize_brd(mb_y2, x_y2); + mb->quantize_b(mb_y2, x_y2); } } // Distortion - if (x->mbmi.mode == SPLITMV) + if (x->mode_info_context->mbmi.mode == SPLITMV) d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2; else { @@ -1102,15 +1097,19 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp *Rate = vp8_rdcost_mby(mb); } +unsigned char vp8_mbsplit_offset2[4][16] = { + { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} +}; static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int compressor_speed, int *mvcost[2], int mvthresh, int fullpixel) { int i, segmentation; B_PREDICTION_MODE this_mode; MACROBLOCKD *xc = &x->e_mbd; - BLOCK *b = &x->block[0]; - BLOCKD *d = &x->e_mbd.block[0]; - BLOCK *c = &x->block[0]; - BLOCKD *e = &x->e_mbd.block[0]; + BLOCK *c; + BLOCKD *e; int const *labels; int best_segment_rd = INT_MAX; int best_seg = 0; @@ -1120,6 +1119,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int bsd = 0; int bestsegmentyrate = 0; + static const int segmentation_to_sseshift[4] = {3, 3, 2, 0}; + // FIX TO Rd error outrange bug PGW 9 june 2004 B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4, @@ -1130,6 +1131,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes MV bmvs[16]; int beobs[16]; + vpx_memset(beobs, 0, sizeof(beobs)); + + for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++) { int label_count; @@ -1138,56 +1142,33 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int rate = 0; int sbr = 0; int sbd = 0; - int UNINITIALIZED_IS_SAFE(sseshift); + int sseshift; int segmentyrate = 0; - vp8_variance_fn_ptr_t v_fn_ptr; + vp8_variance_fn_ptr_t *v_fn_ptr; - TEMP_CONTEXT t; - TEMP_CONTEXT tb; - vp8_setup_temp_context(&t, xc->above_context[Y1CONTEXT], xc->left_context[Y1CONTEXT], 4); + ENTROPY_CONTEXT_PLANES t_above, t_left; + ENTROPY_CONTEXT *ta; + ENTROPY_CONTEXT *tl; + ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; + ENTROPY_CONTEXT *ta_b; + ENTROPY_CONTEXT *tl_b; + + vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta = (ENTROPY_CONTEXT *)&t_above; + tl = (ENTROPY_CONTEXT *)&t_left; + ta_b = (ENTROPY_CONTEXT *)&t_above_b; + tl_b = (ENTROPY_CONTEXT *)&t_left_b; br = 0; bd = 0; - switch (segmentation) - { - case 0: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); - sseshift = 3; - break; - case 1: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); - sseshift = 3; - break; - case 2: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); - sseshift = 2; - break; - case 3: - v_fn_ptr.vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); - v_fn_ptr.svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); - v_fn_ptr.sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); - v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); - v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); - sseshift = 0; - break; - } - + v_fn_ptr = &cpi->fn_ptr[segmentation]; + sseshift = segmentation_to_sseshift[segmentation]; labels = vp8_mbsplits[segmentation]; - label_count = vp8_count_labels(labels); + label_count = vp8_mbsplit_count[segmentation]; // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on @@ -1211,14 +1192,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int j; int bestlabelyrate = 0; - b = &x->block[0]; - d = &x->e_mbd.block[0]; - // find first label - for (j = 0; j < 16; j++) - if (labels[j] == i) - break; + j = vp8_mbsplit_offset2[segmentation][i]; c = &x->block[j]; e = &x->e_mbd.block[j]; @@ -1230,9 +1206,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int this_rd; int num00; int labelyrate; + ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; + ENTROPY_CONTEXT *ta_s; + ENTROPY_CONTEXT *tl_s; - TEMP_CONTEXT ts; - vp8_setup_temp_context(&ts, &t.a[0], &t.l[0], 4); + vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); + + ta_s = (ENTROPY_CONTEXT *)&t_above_s; + tl_s = (ENTROPY_CONTEXT *)&t_left_s; if (this_mode == NEW4X4) { @@ -1251,10 +1233,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes int sadpb = x->sadperbit4; if (cpi->sf.search_method == HEX) - bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost); + bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); else { - bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost); + bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); n = num00; num00 = 0; @@ -1267,7 +1249,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes num00--; else { - thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost); + thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost); if (thissme < bestsme) { @@ -1282,7 +1264,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes // Should we do a full search (best quality only) if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000) { - thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost); + thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost); if (thissme < bestsme) { @@ -1300,9 +1282,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes if (bestsme < INT_MAX) { if (!fullpixel) - cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost); + cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost); else - vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost); + vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost); } } @@ -1317,7 +1299,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4; - labelyrate = rdcost_mbsegment_y(x, labels, i, &ts); + labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); rate += labelyrate; this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb); @@ -1329,12 +1311,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; - vp8_setup_temp_context(&tb, &ts.a[0], &ts.l[0], 4); + + vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES)); } } - vp8_setup_temp_context(&t, &tb.a[0], &tb.l[0], 4); + vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES)); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost); @@ -1377,49 +1362,23 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes bd->eob = beobs[i]; } - // Trap cases where the best split mode has all vectors coded 0,0 (or all the same) - if (FALSE) - { - int allsame = 1; - - for (i = 1; i < 16; i++) - { - if ((bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row)) - { - allsame = 0; - break; - } - } - - if (allsame) - { - best_segment_rd = INT_MAX; - } - } - *returntotrate = bsr; *returndistortion = bsd; *returnyrate = bestsegmentyrate; - - // save partitions labels = vp8_mbsplits[best_seg]; - x->e_mbd.mbmi.partitioning = best_seg; - x->e_mbd.mbmi.partition_count = vp8_count_labels(labels); + x->e_mbd.mode_info_context->mbmi.partitioning = best_seg; + x->partition_info->count = vp8_mbsplit_count[best_seg]; - for (i = 0; i < x->e_mbd.mbmi.partition_count; i++) + for (i = 0; i < x->partition_info->count; i++) { int j; - for (j = 0; j < 16; j++) - { - if (labels[j] == i) - break; - } + j = vp8_mbsplit_offset2[best_seg][i]; - x->e_mbd.mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode; - x->e_mbd.mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv; + x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode; + x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv; } return best_segment_rd; @@ -1433,6 +1392,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int MACROBLOCKD *xd = &x->e_mbd; B_MODE_INFO best_bmodes[16]; MB_MODE_INFO best_mbmode; + PARTITION_INFO best_partition; MV best_ref_mv; MV mode_mv[MB_MODE_COUNT]; MB_PREDICTION_MODE this_mode; @@ -1464,6 +1424,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int *returnintra = INT_MAX; + vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean + cpi->mbs_tested_so_far++; // Count of the number of MBs tested so far this frame x->skip = 0; @@ -1517,9 +1479,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int vpx_memset(mode_mv, 0, sizeof(mode_mv)); - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); - uv_intra_mode = x->e_mbd.mbmi.uv_mode; + uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; { uvintra_eob = 0; @@ -1541,7 +1503,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int //all_rds[mode_index] = -1; //all_rates[mode_index] = -1; //all_dist[mode_index] = -1; - //intermodecost[mode_index] = -1; + //intermodecost[mode_index] = -1; // Test best rd so far against threshold for trying this mode. if (best_rd <= cpi->rd_threshes[mode_index]) @@ -1563,31 +1525,34 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int this_mode = vp8_mode_order[mode_index]; - x->e_mbd.mbmi.mode = this_mode; - x->e_mbd.mbmi.uv_mode = DC_PRED; - x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index]; + x->e_mbd.mode_info_context->mbmi.mode = this_mode; + x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; + x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index]; //Only consider ZEROMV/ALTREF_FRAME for alt ref frame. if (cpi->is_src_frame_alt_ref) { - if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME) + if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) continue; } - if (x->e_mbd.mbmi.ref_frame == LAST_FRAME) + if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) { + YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; + if (!(cpi->ref_frame_flags & VP8_LAST_FLAG)) continue; lf_or_gf = 0; // Local last frame vs Golden frame flag // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset; + x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; + x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; + x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } - else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME) + else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) { + YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx]; // not supposed to reference gold frame if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG)) @@ -1596,12 +1561,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int lf_or_gf = 1; // Local last frame vs Golden frame flag // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset; + x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset; + x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset; + x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset; } - else if (x->e_mbd.mbmi.ref_frame == ALTREF_FRAME) + else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME) { + YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx]; + // not supposed to reference alt ref frame if (!(cpi->ref_frame_flags & VP8_ALT_FLAG)) continue; @@ -1612,19 +1579,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int lf_or_gf = 1; // Local last frame vs Golden frame flag // Set up pointers for this macro block into the previous frame recon buffer - x->e_mbd.pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset; - x->e_mbd.pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset; - x->e_mbd.pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset; + x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset; + x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset; + x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset; } vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv, - mdcounts, x->e_mbd.mbmi.ref_frame, cpi->common.ref_frame_sign_bias); + mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias); // Estimate the reference frame signaling cost and add it to the rolling cost variable. - frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame]; + frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; rate2 += frame_cost; if (this_mode <= B_PRED) @@ -1659,10 +1626,22 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise if (cpi->zbin_mode_boost_enabled) { - if ((vp8_mode_order[mode_index] == ZEROMV) && (vp8_ref_frame_order[mode_index] != LAST_FRAME)) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else + if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME ) cpi->zbin_mode_boost = 0; + else + { + if (vp8_mode_order[mode_index] == ZEROMV) + { + if (vp8_ref_frame_order[mode_index] != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } + else if (vp8_ref_frame_order[mode_index] == SPLITMV) + cpi->zbin_mode_boost = 0; + else + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } vp8cx_mb_init_quantizer(cpi, x); } @@ -1691,9 +1670,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int int breakout_rd = best_rd - frame_cost_rd; int tmp_rd; - if (x->e_mbd.mbmi.ref_frame == LAST_FRAME) + if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME) tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ; - else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME) + else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ; else tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ; @@ -1747,19 +1726,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } // trap cases where the 8x8s can be promoted to 8x16s or 16x8s - if (0)//x->e_mbd.mbmi.partition_count == 4) + if (0)//x->partition_info->count == 4) { - if (x->e_mbd.mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mbmi.partition_bmi[1].mv.as_int - && x->e_mbd.mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mbmi.partition_bmi[3].mv.as_int) + if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int + && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int) { const int *labels = vp8_mbsplits[2]; - x->e_mbd.mbmi.partitioning = 0; + x->e_mbd.mode_info_context->mbmi.partitioning = 0; rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2); rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings); - //rate -= x->inter_bmode_costs[ x->e_mbd.mbmi.partition_bmi[1]]; - //rate -= x->inter_bmode_costs[ x->e_mbd.mbmi.partition_bmi[3]]; - x->e_mbd.mbmi.partition_bmi[1] = x->e_mbd.mbmi.partition_bmi[2]; + //rate -= x->inter_bmode_costs[ x->partition_info->bmi[1]]; + //rate -= x->inter_bmode_costs[ x->partition_info->bmi[3]]; + x->partition_info->bmi[1] = x->partition_info->bmi[2]; } } @@ -1769,14 +1748,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int case V_PRED: case H_PRED: case TM_PRED: - x->e_mbd.mbmi.ref_frame = INTRA_FRAME; + x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_build_intra_predictors_mby_ptr(&x->e_mbd); { macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ; rate2 += rate; rate_y = rate; distortion2 += distortion; - rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode]; + rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode]; rate2 += uv_intra_rate; rate_uv = uv_intra_rate_tokenonly; distortion2 += uv_intra_distortion; @@ -1811,13 +1790,13 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (cpi->sf.search_method == HEX) { - bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost); + bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; } else { - bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9 + bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -1836,7 +1815,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int num00--; else { - thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9 + thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9 if (thissme < bestsme) { @@ -1873,7 +1852,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range; { int sadpb = x->sadperbit16 >> 2; - thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost); + thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost); } // Barrier threshold to initiating full search @@ -1898,7 +1877,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (bestsme < INT_MAX) // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost); // normal mvc=11 - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -2082,7 +2061,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int //all_rates[mode_index] = rate2; //all_dist[mode_index] = distortion2; - if ((x->e_mbd.mbmi.ref_frame == INTRA_FRAME) && (this_rd < *returnintra)) + if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && (this_rd < *returnintra)) { *returnintra = this_rd ; } @@ -2092,17 +2071,18 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int { // Note index of best mode so far best_mode_index = mode_index; - x->e_mbd.mbmi.force_no_skip = force_no_skip; + x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip; if (this_mode <= B_PRED) { - x->e_mbd.mbmi.uv_mode = uv_intra_mode; + x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode; } *returnrate = rate2; *returndistortion = distortion2; best_rd = this_rd; - vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); + vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO)); for (i = 0; i < 16; i++) { @@ -2183,28 +2163,30 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int best_mbmode.partitioning = 0; best_mbmode.dc_diff = 0; - vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); for (i = 0; i < 16; i++) { vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO)); } - x->e_mbd.mbmi.mv.as_int = 0; + x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; return best_rd; } // macroblock modes - vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); + vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO)); for (i = 0; i < 16; i++) { vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO)); } - x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv; + x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv; return best_rd; } diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h index c6eae4b92..fb74dd431 100644 --- a/vp8/encoder/rdopt.h +++ b/vp8/encoder/rdopt.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c index 74c6bd76a..5eaca5935 100644 --- a/vp8/encoder/sad_c.c +++ b/vp8/encoder/sad_c.c @@ -1,19 +1,20 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #include unsigned int vp8_sad16x16_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad) { @@ -38,9 +39,9 @@ unsigned int vp8_sad16x16_c( static __inline unsigned int sad_mx_n_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int m, int n) @@ -65,9 +66,9 @@ unsigned int sad_mx_n_c( unsigned int vp8_sad8x8_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad) { @@ -77,9 +78,9 @@ unsigned int vp8_sad8x8_c( unsigned int vp8_sad16x8_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad) { @@ -90,9 +91,9 @@ unsigned int vp8_sad16x8_c( unsigned int vp8_sad8x16_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad) { @@ -102,9 +103,9 @@ unsigned int vp8_sad8x16_c( unsigned int vp8_sad4x4_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, int max_sad) { @@ -113,9 +114,9 @@ unsigned int vp8_sad4x4_c( } void vp8_sad16x16x3_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array ) @@ -125,10 +126,28 @@ void vp8_sad16x16x3_c( sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp8_sad16x8x3_c( - unsigned char *src_ptr, +void vp8_sad16x16x8_c( + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + +void vp8_sad16x8x3_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array ) @@ -138,10 +157,28 @@ void vp8_sad16x8x3_c( sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp8_sad8x8x3_c( - unsigned char *src_ptr, +void vp8_sad16x8x8_c( + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + +void vp8_sad8x8x3_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array ) @@ -151,10 +188,28 @@ void vp8_sad8x8x3_c( sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp8_sad8x16x3_c( - unsigned char *src_ptr, +void vp8_sad8x8x8_c( + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + +void vp8_sad8x16x3_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array ) @@ -164,10 +219,28 @@ void vp8_sad8x16x3_c( sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } -void vp8_sad4x4x3_c( - unsigned char *src_ptr, +void vp8_sad8x16x8_c( + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + +void vp8_sad4x4x3_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array ) @@ -177,8 +250,26 @@ void vp8_sad4x4x3_c( sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp8_sad4x4x8_c( + const unsigned char *src_ptr, + int src_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array +) +{ + sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr , ref_stride, 0x7fffffff); + sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff); + sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff); + sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff); + sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff); + sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff); + sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff); +} + void vp8_sad16x16x4d_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, @@ -192,7 +283,7 @@ void vp8_sad16x16x4d_c( } void vp8_sad16x8x4d_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, @@ -206,7 +297,7 @@ void vp8_sad16x8x4d_c( } void vp8_sad8x8x4d_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, @@ -220,7 +311,7 @@ void vp8_sad8x8x4d_c( } void vp8_sad8x16x4d_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, @@ -234,7 +325,7 @@ void vp8_sad8x16x4d_c( } void vp8_sad4x4x4d_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, diff --git a/vp8/common/segmentation_common.c b/vp8/encoder/segmentation.c similarity index 54% rename from vp8/common/segmentation_common.c rename to vp8/encoder/segmentation.c index 72b8c874b..fc0967db3 100644 --- a/vp8/common/segmentation_common.c +++ b/vp8/encoder/segmentation.c @@ -1,29 +1,30 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ -#include "segmentation_common.h" +#include "segmentation.h" #include "vpx_mem/vpx_mem.h" -void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd) +void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) { int mb_row, mb_col; MODE_INFO *this_mb_mode_info = cm->mi; - xd->gf_active_ptr = (signed char *)cm->gf_active_flags; + x->gf_active_ptr = (signed char *)cpi->gf_active_flags; if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) { // Reset Gf useage monitors - vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); - cm->gf_active_count = cm->mb_rows * cm->mb_cols; + vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); + cpi->gf_active_count = cm->mb_rows * cm->mb_cols; } else { @@ -39,19 +40,19 @@ void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd) // else if using non 0,0 motion or intra modes then clear flag if it is currently set if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) { - if (*(xd->gf_active_ptr) == 0) + if (*(x->gf_active_ptr) == 0) { - *(xd->gf_active_ptr) = 1; - cm->gf_active_count ++; + *(x->gf_active_ptr) = 1; + cpi->gf_active_count ++; } } - else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr)) + else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(x->gf_active_ptr)) { - *(xd->gf_active_ptr) = 0; - cm->gf_active_count--; + *(x->gf_active_ptr) = 0; + cpi->gf_active_count--; } - xd->gf_active_ptr++; // Step onto next entry + x->gf_active_ptr++; // Step onto next entry this_mb_mode_info++; // skip to next mb } diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h new file mode 100644 index 000000000..216e194c2 --- /dev/null +++ b/vp8/encoder/segmentation.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "string.h" +#include "blockd.h" +#include "onyx_int.h" + +extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x); diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c index df214a89f..4ebcba1a1 100644 --- a/vp8/encoder/ssim.c +++ b/vp8/encoder/ssim.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c new file mode 100644 index 000000000..abc50270c --- /dev/null +++ b/vp8/encoder/temporal_filter.c @@ -0,0 +1,651 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include "onyxc_int.h" +#include "onyx_int.h" +#include "systemdependent.h" +#include "quantize.h" +#include "alloccommon.h" +#include "mcomp.h" +#include "firstpass.h" +#include "psnr.h" +#include "vpx_scale/vpxscale.h" +#include "extend.h" +#include "ratectrl.h" +#include "quant_common.h" +#include "segmentation.h" +#include "g_common.h" +#include "vpx_scale/yv12extend.h" +#include "postproc.h" +#include "vpx_mem/vpx_mem.h" +#include "swapyv12buffer.h" +#include "threading.h" +#include "vpx_ports/vpx_timer.h" +#include "vpxerrors.h" + +#include +#include + +#define ALT_REF_MC_ENABLED 1 // dis/enable MC in AltRef filtering +#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering + +#define USE_FILTER_LUT 1 +#if VP8_TEMPORAL_ALT_REF + +#if USE_FILTER_LUT +static int modifier_lut[7][19] = +{ + // Strength=0 + {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + // Strength=1 + {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + // Strength=2 + {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + // Strength=3 + {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + // Strength=4 + {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + // Strength=5 + {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0}, + // Strength=6 + {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1} +}; +#endif +static void build_predictors_mb +( + MACROBLOCKD *x, + unsigned char *y_mb_ptr, + unsigned char *u_mb_ptr, + unsigned char *v_mb_ptr, + int stride, + int mv_row, + int mv_col, + unsigned char *pred +) +{ + int offset; + unsigned char *yptr, *uptr, *vptr; + + // Y + yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); + + if ((mv_row | mv_col) & 7) + { +// vp8_sixtap_predict16x16_c(yptr, stride, +// mv_col & 7, mv_row & 7, &pred[0], 16); + x->subpixel_predict16x16(yptr, stride, + mv_col & 7, mv_row & 7, &pred[0], 16); + } + else + { + //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16); + RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16); + } + + // U & V + mv_row >>= 1; + mv_col >>= 1; + stride >>= 1; + offset = (mv_row >> 3) * stride + (mv_col >> 3); + uptr = u_mb_ptr + offset; + vptr = v_mb_ptr + offset; + + if ((mv_row | mv_col) & 7) + { + x->subpixel_predict8x8(uptr, stride, + mv_col & 7, mv_row & 7, &pred[256], 8); + x->subpixel_predict8x8(vptr, stride, + mv_col & 7, mv_row & 7, &pred[320], 8); + } + else + { + RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8); + RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8); + } +} +static void apply_temporal_filter +( + unsigned char *frame1, + unsigned int stride, + unsigned char *frame2, + unsigned int block_size, + int strength, + int filter_weight, + unsigned int *accumulator, + unsigned int *count +) +{ + int i, j, k; + int modifier; + int byte = 0; + +#if USE_FILTER_LUT + int *lut = modifier_lut[strength]; +#endif + + for (i = 0,k = 0; i < block_size; i++) + { + for (j = 0; j < block_size; j++, k++) + { + + int src_byte = frame1[byte]; + int pixel_value = *frame2++; + +#if USE_FILTER_LUT + // LUT implementation -- + // improves precision of filter + modifier = abs(src_byte-pixel_value); + modifier = modifier>18 ? 0 : lut[modifier]; +#else + modifier = src_byte; + modifier -= pixel_value; + modifier *= modifier; + modifier >>= strength; + modifier *= 3; + + if (modifier > 16) + modifier = 16; + + modifier = 16 - modifier; +#endif + modifier *= filter_weight; + + count[k] += modifier; + accumulator[k] += modifier * pixel_value; + + byte++; + } + + byte += stride - block_size; + } +} + +#if ALT_REF_MC_ENABLED +static int dummy_cost[2*mv_max+1]; + +static int find_matching_mb +( + VP8_COMP *cpi, + YV12_BUFFER_CONFIG *arf_frame, + YV12_BUFFER_CONFIG *frame_ptr, + int mb_offset, + int error_thresh +) +{ + MACROBLOCK *x = &cpi->mb; + int thissme; + int step_param; + int further_steps; + int n = 0; + int sadpb = x->sadperbit16; + int bestsme = INT_MAX; + int num00 = 0; + + BLOCK *b = &x->block[0]; + BLOCKD *d = &x->e_mbd.block[0]; + MV best_ref_mv1 = {0,0}; + + int *mvcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; + int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] }; + + // Save input state + unsigned char **base_src = b->base_src; + int src = b->src; + int src_stride = b->src_stride; + unsigned char **base_pre = d->base_pre; + int pre = d->pre; + int pre_stride = d->pre_stride; + + // Setup frame pointers + b->base_src = &arf_frame->y_buffer; + b->src_stride = arf_frame->y_stride; + b->src = mb_offset; + + d->base_pre = &frame_ptr->y_buffer; + d->pre_stride = frame_ptr->y_stride; + d->pre = mb_offset; + + // Further step/diamond searches as necessary + if (cpi->Speed < 8) + { + step_param = cpi->sf.first_step + + ((cpi->Speed > 5) ? 1 : 0); + further_steps = + (cpi->sf.max_step_search_steps - 1)-step_param; + } + else + { + step_param = cpi->sf.first_step + 2; + further_steps = 0; + } + + if (1/*cpi->sf.search_method == HEX*/) + { + // TODO Check that the 16x16 vf & sdf are selected here + bestsme = vp8_hex_search(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param, + sadpb/*x->errorperbit*/, + &num00, &cpi->fn_ptr[BLOCK_16X16], + mvsadcost, mvcost); + } + else + { + int mv_x, mv_y; + + bestsme = cpi->diamond_search_sad(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param, + sadpb / 2/*x->errorperbit*/, + &num00, &cpi->fn_ptr[BLOCK_16X16], + mvsadcost, mvcost); //sadpb < 9 + + // Further step/diamond searches as necessary + n = 0; + //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; + + n = num00; + num00 = 0; + + while (n < further_steps) + { + n++; + + if (num00) + num00--; + else + { + thissme = cpi->diamond_search_sad(x, b, d, + &best_ref_mv1, &d->bmi.mv.as_mv, + step_param + n, + sadpb / 4/*x->errorperbit*/, + &num00, &cpi->fn_ptr[BLOCK_16X16], + mvsadcost, mvcost); //sadpb = 9 + + if (thissme < bestsme) + { + bestsme = thissme; + mv_y = d->bmi.mv.as_mv.row; + mv_x = d->bmi.mv.as_mv.col; + } + else + { + d->bmi.mv.as_mv.row = mv_y; + d->bmi.mv.as_mv.col = mv_x; + } + } + } + } + +#if ALT_REF_SUBPEL_ENABLED + // Try sub-pixel MC? + //if (bestsme > error_thresh && bestsme < INT_MAX) + { + bestsme = cpi->find_fractional_mv_step(x, b, d, + &d->bmi.mv.as_mv, &best_ref_mv1, + x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], + mvcost); + } +#endif + + // Save input state + b->base_src = base_src; + b->src = src; + b->src_stride = src_stride; + d->base_pre = base_pre; + d->pre = pre; + d->pre_stride = pre_stride; + + return bestsme; +} +#endif + +static void vp8cx_temp_blur1_c +( + VP8_COMP *cpi, + int frame_count, + int alt_ref_index, + int strength +) +{ + int byte; + int frame; + int mb_col, mb_row; + unsigned int filter_weight[MAX_LAG_BUFFERS]; + unsigned char *mm_ptr = cpi->fp_motion_map; + int cols = cpi->common.mb_cols; + int rows = cpi->common.mb_rows; + int MBs = cpi->common.MBs; + int mb_y_offset = 0; + int mb_uv_offset = 0; + unsigned int accumulator[384]; + unsigned int count[384]; + MACROBLOCKD *mbd = &cpi->mb.e_mbd; + YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; + unsigned char *dst1, *dst2; + DECLARE_ALIGNED(16, unsigned char, predictor[384]); + + // Save input state + unsigned char *y_buffer = mbd->pre.y_buffer; + unsigned char *u_buffer = mbd->pre.u_buffer; + unsigned char *v_buffer = mbd->pre.v_buffer; + + if (!cpi->use_weighted_temporal_filter) + { + // Temporal filtering is unweighted + for (frame = 0; frame < frame_count; frame++) + filter_weight[frame] = 1; + } + + for (mb_row = 0; mb_row < rows; mb_row++) + { +#if ALT_REF_MC_ENABLED + // Reduced search extent by 3 for 6-tap filter & smaller UMV border + cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19)); + cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + + (VP8BORDERINPIXELS - 19); +#endif + + for (mb_col = 0; mb_col < cols; mb_col++) + { + int i, j, k, w; + int weight_cap; + int stride; + + vpx_memset(accumulator, 0, 384*sizeof(unsigned int)); + vpx_memset(count, 0, 384*sizeof(unsigned int)); + +#if ALT_REF_MC_ENABLED + // Reduced search extent by 3 for 6-tap filter & smaller UMV border + cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19)); + cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) + + (VP8BORDERINPIXELS - 19); +#endif + + // Read & process macroblock weights from motion map + if (cpi->use_weighted_temporal_filter) + { + weight_cap = 2; + + for (frame = alt_ref_index-1; frame >= 0; frame--) + { + w = *(mm_ptr + (frame+1)*MBs); + filter_weight[frame] = w < weight_cap ? w : weight_cap; + weight_cap = w; + } + + filter_weight[alt_ref_index] = 2; + + weight_cap = 2; + + for (frame = alt_ref_index+1; frame < frame_count; frame++) + { + w = *(mm_ptr + frame*MBs); + filter_weight[frame] = w < weight_cap ? w : weight_cap; + weight_cap = w; + } + + } + + for (frame = 0; frame < frame_count; frame++) + { + int err; + + if (cpi->frames[frame] == NULL) + continue; + + mbd->block[0].bmi.mv.as_mv.row = 0; + mbd->block[0].bmi.mv.as_mv.col = 0; + +#if ALT_REF_MC_ENABLED + //if (filter_weight[frame] == 0) + { +#define THRESH_LOW 10000 +#define THRESH_HIGH 20000 + + // Correlation has been lost try MC + err = find_matching_mb ( cpi, + cpi->frames[alt_ref_index], + cpi->frames[frame], + mb_y_offset, + THRESH_LOW ); + + if (filter_weight[frame] < 2) + { + // Set weight depending on error + filter_weight[frame] = errframes[frame]->y_buffer + mb_y_offset, + cpi->frames[frame]->u_buffer + mb_uv_offset, + cpi->frames[frame]->v_buffer + mb_uv_offset, + cpi->frames[frame]->y_stride, + mbd->block[0].bmi.mv.as_mv.row, + mbd->block[0].bmi.mv.as_mv.col, + predictor ); + + // Apply the filter (YUV) + apply_temporal_filter ( f->y_buffer + mb_y_offset, + f->y_stride, + predictor, + 16, + strength, + filter_weight[frame], + accumulator, + count ); + + apply_temporal_filter ( f->u_buffer + mb_uv_offset, + f->uv_stride, + predictor + 256, + 8, + strength, + filter_weight[frame], + accumulator + 256, + count + 256 ); + + apply_temporal_filter ( f->v_buffer + mb_uv_offset, + f->uv_stride, + predictor + 320, + 8, + strength, + filter_weight[frame], + accumulator + 320, + count + 320 ); + } + } + + // Normalize filter output to produce AltRef frame + dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer; + stride = cpi->alt_ref_buffer.source_buffer.y_stride; + byte = mb_y_offset; + for (i = 0,k = 0; i < 16; i++) + { + for (j = 0; j < 16; j++, k++) + { + unsigned int pval = accumulator[k] + (count[k] >> 1); + pval *= cpi->fixed_divide[count[k]]; + pval >>= 19; + + dst1[byte] = (unsigned char)pval; + + // move to next pixel + byte++; + } + + byte += stride - 16; + } + + dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer; + dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer; + stride = cpi->alt_ref_buffer.source_buffer.uv_stride; + byte = mb_uv_offset; + for (i = 0,k = 256; i < 8; i++) + { + for (j = 0; j < 8; j++, k++) + { + int m=k+64; + + // U + unsigned int pval = accumulator[k] + (count[k] >> 1); + pval *= cpi->fixed_divide[count[k]]; + pval >>= 19; + dst1[byte] = (unsigned char)pval; + + // V + pval = accumulator[m] + (count[m] >> 1); + pval *= cpi->fixed_divide[count[m]]; + pval >>= 19; + dst2[byte] = (unsigned char)pval; + + // move to next pixel + byte++; + } + + byte += stride - 8; + } + + mm_ptr++; + mb_y_offset += 16; + mb_uv_offset += 8; + } + + mb_y_offset += 16*f->y_stride-f->y_width; + mb_uv_offset += 8*f->uv_stride-f->uv_width; + } + + // Restore input state + mbd->pre.y_buffer = y_buffer; + mbd->pre.u_buffer = u_buffer; + mbd->pre.v_buffer = v_buffer; +} + +void vp8cx_temp_filter_c +( + VP8_COMP *cpi +) +{ + int frame = 0; + + int num_frames_backward = 0; + int num_frames_forward = 0; + int frames_to_blur_backward = 0; + int frames_to_blur_forward = 0; + int frames_to_blur = 0; + int start_frame = 0; + unsigned int filtered = 0; + + int strength = cpi->oxcf.arnr_strength; + + int blur_type = cpi->oxcf.arnr_type; + + int max_frames = cpi->active_arnr_frames; + + num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index; + + if (num_frames_backward < 0) + num_frames_backward += cpi->oxcf.lag_in_frames; + + num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1); + + switch (blur_type) + { + case 1: + ///////////////////////////////////////// + // Backward Blur + + frames_to_blur_backward = num_frames_backward; + + if (frames_to_blur_backward >= max_frames) + frames_to_blur_backward = max_frames - 1; + + frames_to_blur = frames_to_blur_backward + 1; + break; + + case 2: + ///////////////////////////////////////// + // Forward Blur + + frames_to_blur_forward = num_frames_forward; + + if (frames_to_blur_forward >= max_frames) + frames_to_blur_forward = max_frames - 1; + + frames_to_blur = frames_to_blur_forward + 1; + break; + + case 3: + default: + ///////////////////////////////////////// + // Center Blur + frames_to_blur_forward = num_frames_forward; + frames_to_blur_backward = num_frames_backward; + + if (frames_to_blur_forward > frames_to_blur_backward) + frames_to_blur_forward = frames_to_blur_backward; + + if (frames_to_blur_backward > frames_to_blur_forward) + frames_to_blur_backward = frames_to_blur_forward; + + // When max_frames is even we have 1 more frame backward than forward + if (frames_to_blur_forward > (max_frames - 1) / 2) + frames_to_blur_forward = ((max_frames - 1) / 2); + + if (frames_to_blur_backward > (max_frames / 2)) + frames_to_blur_backward = (max_frames / 2); + + frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; + break; + } + + start_frame = (cpi->last_alt_ref_sei + + frames_to_blur_forward) % cpi->oxcf.lag_in_frames; + +#ifdef DEBUGFWG + // DEBUG FWG + printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d" + , max_frames + , num_frames_backward + , num_frames_forward + , frames_to_blur + , frames_to_blur_backward + , frames_to_blur_forward + , cpi->source_encode_index + , cpi->last_alt_ref_sei + , start_frame); +#endif + + // Setup frame pointers, NULL indicates frame not included in filter + vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *)); + for (frame = 0; frame < frames_to_blur; frame++) + { + int which_buffer = start_frame - frame; + + if (which_buffer < 0) + which_buffer += cpi->oxcf.lag_in_frames; + + cpi->frames[frames_to_blur-1-frame] + = &cpi->src_buffer[which_buffer].source_buffer; + } + + vp8cx_temp_blur1_c ( + cpi, + frames_to_blur, + frames_to_blur_backward, + strength ); +} +#endif diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h new file mode 100644 index 000000000..f70e8c01e --- /dev/null +++ b/vp8/encoder/temporal_filter.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef __INC_VP8_TEMPORAL_FILTER_H +#define __INC_VP8_TEMPORAL_FILTER_H + +#include "onyx_int.h" + +void vp8cx_temp_filter_c(VP8_COMP *cpi); + +#endif // __INC_VP8_TEMPORAL_FILTER_H diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 33ddd64e7..e4da83379 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,12 +24,12 @@ _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; #endif void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ; -void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x); +void vp8_fix_contexts(MACROBLOCKD *x); -TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2]; -TOKENEXTRA *vp8_dct_value_tokens_ptr; +TOKENVALUE vp8_dct_value_tokens[DCT_MAX_VALUE*2]; +const TOKENVALUE *vp8_dct_value_tokens_ptr; int vp8_dct_value_cost[DCT_MAX_VALUE*2]; -int *vp8_dct_value_cost_ptr; +const int *vp8_dct_value_cost_ptr; #if 0 int skip_true_count = 0; int skip_false_count = 0; @@ -36,7 +37,7 @@ int skip_false_count = 0; static void fill_value_tokens() { - TOKENEXTRA *const t = vp8_dct_value_tokens + DCT_MAX_VALUE; + TOKENVALUE *const t = vp8_dct_value_tokens + DCT_MAX_VALUE; vp8_extra_bit_struct *const e = vp8_extra_bits; int i = -DCT_MAX_VALUE; @@ -196,86 +197,40 @@ static void tokenize1st_order_b *a = *l = pt; } -#if 0 -void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) + + +static int mb_is_skippable(MACROBLOCKD *x) { - //int i; - ENTROPY_CONTEXT **const A = x->above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; - int plane_type; - int b; + int has_y2_block; + int skip = 1; + int i = 0; - TOKENEXTRA *start = *t; - TOKENEXTRA *tp = *t; - - x->mbmi.dc_diff = 1; - - vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts)); - - if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV) + has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED + && x->mode_info_context->mbmi.mode != SPLITMV); + if (has_y2_block) { - plane_type = 3; - } - else - { - tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type, - A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi); - plane_type = 0; - + for (i = 0; i < 16; i++) + skip &= (x->block[i].eob < 2); } - for (b = 0; b < 16; b++) - tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); + for (; i < 24 + has_y2_block; i++) + skip &= (!x->block[i].eob); - for (b = 16; b < 24; b++) - tokenize1st_order_b(x->block + b, t, 2, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); - - if (cpi->common.mb_no_coeff_skip) - { - x->mbmi.mb_skip_coeff = 1; - - while ((tp != *t) && x->mbmi.mb_skip_coeff) - { - x->mbmi.mb_skip_coeff = (x->mbmi.mb_skip_coeff && (tp->Token == DCT_EOB_TOKEN)); - tp ++; - } - - if (x->mbmi.mb_skip_coeff == 1) - { - x->mbmi.dc_diff = 0; - //redo the coutnts - vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts)); - - *t = start; - cpi->skip_true_count++; - - //skip_true_count++; - } - else - { - - cpi->skip_false_count++; - //skip_false_count++; - } - } + return skip; } -#else + + void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { - //int i; - ENTROPY_CONTEXT **const A = x->above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; int plane_type; int b; TOKENEXTRA *start = *t; TOKENEXTRA *tp = *t; - x->mbmi.dc_diff = 1; + x->mode_info_context->mbmi.dc_diff = 1; #if 0 @@ -290,7 +245,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) #if 1 - if (x->mbmi.mb_skip_coeff) + x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x); + if (x->mode_info_context->mbmi.mb_skip_coeff) { cpi->skip_true_count++; @@ -299,13 +255,13 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) vp8_stuff_mb(cpi, x, t) ; else { - vp8_fix_contexts(cpi, x); + vp8_fix_contexts(x); } - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) - x->mbmi.dc_diff = 0; + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + x->mode_info_context->mbmi.dc_diff = 0; else - x->mbmi.dc_diff = 1; + x->mode_info_context->mbmi.dc_diff = 1; return; @@ -314,59 +270,30 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) cpi->skip_false_count++; #endif #if 0 - - if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV) - { - int i, skip = 1; - - for (i = 0; i < 24; i++) - skip &= (!x->block[i].eob); - - if (skip != x->mbmi.mb_skip_coeff) - skip += 0; - - x->mbmi.mb_skip_coeff = skip; - } - else - { - int i, skip = 1; - - for (i = 0; i < 16; i++) - skip &= (x->block[i].eob < 2); - - for (i = 16; i < 25; i++) - skip &= (!x->block[i].eob); - - if (skip != x->mbmi.mb_skip_coeff) - skip += 0; - - x->mbmi.mb_skip_coeff = skip; - } - vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts)); #endif - if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV) + if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV) { plane_type = 3; } else { tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type, - A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi); + A + vp8_block2above[24], L + vp8_block2left[24], cpi); plane_type = 0; } for (b = 0; b < 16; b++) tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); for (b = 16; b < 24; b++) tokenize1st_order_b(x->block + b, t, 2, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); #if 0 @@ -405,7 +332,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) #endif } -#endif + #ifdef ENTROPY_STATS @@ -580,57 +507,45 @@ void stuff1st_order_buv void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { - //int i; - ENTROPY_CONTEXT **const A = x->above_context; - ENTROPY_CONTEXT(* const L)[4] = x->left_context; + ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; int plane_type; int b; stuff2nd_order_b(x->block + 24, t, 1, x->frame_type, - A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi); + A + vp8_block2above[24], L + vp8_block2left[24], cpi); plane_type = 0; - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) - x->mbmi.dc_diff = 0; + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) + x->mode_info_context->mbmi.dc_diff = 0; else - x->mbmi.dc_diff = 1; + x->mode_info_context->mbmi.dc_diff = 1; for (b = 0; b < 16; b++) stuff1st_order_b(x->block + b, t, plane_type, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); for (b = 16; b < 24; b++) stuff1st_order_buv(x->block + b, t, 2, x->frame_type, - A[vp8_block2context[b]] + vp8_block2above[b], - L[vp8_block2context[b]] + vp8_block2left[b], cpi); + A + vp8_block2above[b], + L + vp8_block2left[b], cpi); } -void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x) +void vp8_fix_contexts(MACROBLOCKD *x) { - x->left_context[Y1CONTEXT][0] = 0; - x->left_context[Y1CONTEXT][1] = 0; - x->left_context[Y1CONTEXT][2] = 0; - x->left_context[Y1CONTEXT][3] = 0; - x->left_context[UCONTEXT][0] = 0; - x->left_context[VCONTEXT][0] = 0; - x->left_context[UCONTEXT][1] = 0; - x->left_context[VCONTEXT][1] = 0; - - x->above_context[Y1CONTEXT][0] = 0; - x->above_context[Y1CONTEXT][1] = 0; - x->above_context[Y1CONTEXT][2] = 0; - x->above_context[Y1CONTEXT][3] = 0; - x->above_context[UCONTEXT][0] = 0; - x->above_context[VCONTEXT][0] = 0; - x->above_context[UCONTEXT][1] = 0; - x->above_context[VCONTEXT][1] = 0; - - if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV) + /* Clear entropy contexts for Y2 blocks */ + if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { - x->left_context[Y2CONTEXT][0] = 0; - x->above_context[Y2CONTEXT][0] = 0; + vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } + else + { + vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1); + } + } diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h index 02aacc222..01e8ec6d7 100644 --- a/vp8/encoder/tokenize.h +++ b/vp8/encoder/tokenize.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -16,6 +17,12 @@ void vp8_tokenize_initialize(); +typedef struct +{ + short Token; + short Extra; +} TOKENVALUE; + typedef struct { int Token; @@ -34,5 +41,11 @@ void print_context_counters(); extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens]; #endif +extern const int *vp8_dct_value_cost_ptr; +/* TODO: The Token field should be broken out into a separate char array to + * improve cache locality, since it's needed for costing when the rest of the + * fields are not. + */ +extern const TOKENVALUE *vp8_dct_value_tokens_ptr; #endif /* tokenize_h */ diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c index e398044db..03967c835 100644 --- a/vp8/encoder/treewriter.c +++ b/vp8/encoder/treewriter.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h index 05ac74cb7..88096d875 100644 --- a/vp8/encoder/treewriter.h +++ b/vp8/encoder/treewriter.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index b3b55c319..5befd3b86 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,9 +15,9 @@ #define prototype_sad(sym)\ unsigned int (sym)\ (\ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ - unsigned char *ref_ptr, \ + const unsigned char *ref_ptr, \ int ref_stride, \ int max_sad\ ) @@ -24,17 +25,27 @@ #define prototype_sad_multi_same_address(sym)\ void (sym)\ (\ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ - unsigned char *ref_ptr, \ + const unsigned char *ref_ptr, \ int ref_stride, \ unsigned int *sad_array\ ) +#define prototype_sad_multi_same_address_1(sym)\ + void (sym)\ + (\ + const unsigned char *src_ptr, \ + int source_stride, \ + const unsigned char *ref_ptr, \ + int ref_stride, \ + unsigned short *sad_array\ + ) + #define prototype_sad_multi_dif_address(sym)\ void (sym)\ (\ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ unsigned char *ref_ptr[4], \ int ref_stride, \ @@ -44,9 +55,9 @@ #define prototype_variance(sym) \ unsigned int (sym) \ (\ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ - unsigned char *ref_ptr, \ + const unsigned char *ref_ptr, \ int ref_stride, \ unsigned int *sse\ ) @@ -54,9 +65,9 @@ #define prototype_variance2(sym) \ unsigned int (sym) \ (\ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ - unsigned char *ref_ptr, \ + const unsigned char *ref_ptr, \ int ref_stride, \ unsigned int *sse,\ int *sum\ @@ -65,17 +76,17 @@ #define prototype_subpixvariance(sym) \ unsigned int (sym) \ ( \ - unsigned char *src_ptr, \ + const unsigned char *src_ptr, \ int source_stride, \ int xoffset, \ int yoffset, \ - unsigned char *ref_ptr, \ + const unsigned char *ref_ptr, \ int Refstride, \ unsigned int *sse \ ); -#define prototype_getmbss(sym) unsigned int (sym)(short *) +#define prototype_getmbss(sym) unsigned int (sym)(const short *) #if ARCH_X86 || ARCH_X86_64 #include "x86/variance_x86.h" @@ -137,6 +148,31 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3); #endif extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3); +#ifndef vp8_variance_sad16x16x8 +#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8); + +#ifndef vp8_variance_sad16x8x8 +#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8); + +#ifndef vp8_variance_sad8x8x8 +#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8); + +#ifndef vp8_variance_sad8x16x8 +#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8); + +#ifndef vp8_variance_sad4x4x8 +#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c +#endif +extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8); + //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #ifndef vp8_variance_sad16x16x4d @@ -218,6 +254,21 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8); #endif extern prototype_subpixvariance(vp8_variance_subpixvar16x16); +#ifndef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_h); + +#ifndef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_v); + +#ifndef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c +#endif +extern prototype_variance(vp8_variance_halfpixvar16x16_hv); + #ifndef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c #endif @@ -258,6 +309,7 @@ extern prototype_sad(vp8_variance_get4x4sse_cs); typedef prototype_sad(*vp8_sad_fn_t); typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); +typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t); typedef prototype_variance(*vp8_variance_fn_t); typedef prototype_variance2(*vp8_variance2_fn_t); @@ -282,6 +334,9 @@ typedef struct vp8_subpixvariance_fn_t subpixvar8x16; vp8_subpixvariance_fn_t subpixvar16x8; vp8_subpixvariance_fn_t subpixvar16x16; + vp8_variance_fn_t halfpixvar16x16_h; + vp8_variance_fn_t halfpixvar16x16_v; + vp8_variance_fn_t halfpixvar16x16_hv; vp8_subpixvariance_fn_t subpixmse16x16; vp8_getmbss_fn_t getmbss; @@ -298,6 +353,12 @@ typedef struct vp8_sad_multi_fn_t sad8x8x3; vp8_sad_multi_fn_t sad4x4x3; + vp8_sad_multi1_fn_t sad16x16x8; + vp8_sad_multi1_fn_t sad16x8x8; + vp8_sad_multi1_fn_t sad8x16x8; + vp8_sad_multi1_fn_t sad8x8x8; + vp8_sad_multi1_fn_t sad4x4x8; + vp8_sad_multi_d_fn_t sad16x16x4d; vp8_sad_multi_d_fn_t sad16x8x4d; vp8_sad_multi_d_fn_t sad8x16x4d; @@ -308,11 +369,15 @@ typedef struct typedef struct { - vp8_sad_fn_t sdf; - vp8_sad_multi_fn_t sdx3f; - vp8_sad_multi_d_fn_t sdx4df; - vp8_variance_fn_t vf; + vp8_sad_fn_t sdf; + vp8_variance_fn_t vf; vp8_subpixvariance_fn_t svf; + vp8_variance_fn_t svf_halfpix_h; + vp8_variance_fn_t svf_halfpix_v; + vp8_variance_fn_t svf_halfpix_hv; + vp8_sad_multi_fn_t sdx3f; + vp8_sad_multi1_fn_t sdx8f; + vp8_sad_multi_d_fn_t sdx4df; } vp8_variance_fn_ptr_t; #if CONFIG_RUNTIME_CPU_DETECT @@ -321,7 +386,4 @@ typedef struct #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn #endif -/* TODO: Determine if this USEBILINEAR flag is necessary. */ -#define USEBILINEAR - #endif diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index 85269b9d3..95ec96cec 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,7 +24,6 @@ const int vp8_six_tap[8][6] = }; -#ifdef USEBILINEAR const int VP8_FILTER_WEIGHT = 128; const int VP8_FILTER_SHIFT = 7; const int vp8_bilinear_taps[8][2] = @@ -40,7 +40,7 @@ const int vp8_bilinear_taps[8][2] = unsigned int vp8_get_mb_ss_c ( - short *src_ptr + const short *src_ptr ) { unsigned int i = 0, sum = 0; @@ -57,9 +57,9 @@ unsigned int vp8_get_mb_ss_c void vp8_variance( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, int w, int h, @@ -89,9 +89,9 @@ void vp8_variance( unsigned int vp8_get8x8var_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum @@ -105,9 +105,9 @@ vp8_get8x8var_c unsigned int vp8_get16x16var_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum @@ -122,9 +122,9 @@ vp8_get16x16var_c unsigned int vp8_variance16x16_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -138,9 +138,9 @@ unsigned int vp8_variance16x16_c( } unsigned int vp8_variance8x16_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -154,9 +154,9 @@ unsigned int vp8_variance8x16_c( } unsigned int vp8_variance16x8_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -171,9 +171,9 @@ unsigned int vp8_variance16x8_c( unsigned int vp8_variance8x8_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -187,9 +187,9 @@ unsigned int vp8_variance8x8_c( } unsigned int vp8_variance4x4_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -204,9 +204,9 @@ unsigned int vp8_variance4x4_c( unsigned int vp8_mse16x16_c( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -249,7 +249,7 @@ unsigned int vp8_mse16x16_c( ****************************************************************************/ void vp8e_filter_block2d_bil_first_pass ( - unsigned char *src_ptr, + const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, int pixel_step, @@ -307,7 +307,7 @@ void vp8e_filter_block2d_bil_first_pass ****************************************************************************/ void vp8e_filter_block2d_bil_second_pass ( - unsigned short *src_ptr, + const unsigned short *src_ptr, unsigned char *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, @@ -365,7 +365,7 @@ void vp8e_filter_block2d_bil_second_pass ****************************************************************************/ void vp8e_filter_block2d_bil ( - unsigned char *src_ptr, + const unsigned char *src_ptr, unsigned char *output_ptr, unsigned int src_pixels_per_line, int *HFilter, @@ -386,11 +386,11 @@ void vp8e_filter_block2d_bil unsigned int vp8_sub_pixel_variance4x4_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -414,11 +414,11 @@ unsigned int vp8_sub_pixel_variance4x4_c unsigned int vp8_sub_pixel_variance8x8_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -438,11 +438,11 @@ unsigned int vp8_sub_pixel_variance8x8_c unsigned int vp8_sub_pixel_variance16x16_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -460,13 +460,50 @@ unsigned int vp8_sub_pixel_variance16x16_c return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse); } + +unsigned int vp8_variance_halfpixvar16x16_h_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} + + unsigned int vp8_sub_pixel_mse16x16_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -477,11 +514,11 @@ unsigned int vp8_sub_pixel_mse16x16_c unsigned int vp8_sub_pixel_variance16x8_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -501,11 +538,11 @@ unsigned int vp8_sub_pixel_variance16x8_c unsigned int vp8_sub_pixel_variance8x16_c ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -524,4 +561,3 @@ unsigned int vp8_sub_pixel_variance8x16_c return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse); } -#endif diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c deleted file mode 100644 index 186ee6856..000000000 --- a/vp8/encoder/x86/csystemdependent.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. - */ - - -#include "variance.h" -#include "onyx_int.h" - -SADFunction *vp8_sad16x16; -SADFunction *vp8_sad16x8; -SADFunction *vp8_sad8x16; -SADFunction *vp8_sad8x8; -SADFunction *vp8_sad4x4; - -variance_function *vp8_variance4x4; -variance_function *vp8_variance8x8; -variance_function *vp8_variance8x16; -variance_function *vp8_variance16x8; -variance_function *vp8_variance16x16; - - -variance_function *vp8_mse16x16; - -sub_pixel_variance_function *vp8_sub_pixel_variance4x4; -sub_pixel_variance_function *vp8_sub_pixel_variance8x8; -sub_pixel_variance_function *vp8_sub_pixel_variance8x16; -sub_pixel_variance_function *vp8_sub_pixel_variance16x8; -sub_pixel_variance_function *vp8_sub_pixel_variance16x16; - -int (*vp8_block_error)(short *, short *); -int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc); -void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride); - -extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride); -extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride); - -extern int vp8_block_error_c(short *, short *); -extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc); - -extern int vp8_block_error_mmx(short *, short *); -extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc); - -extern int vp8_block_error_xmm(short *, short *); -extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc); - - - -int (*vp8_mbuverror)(MACROBLOCK *mb); -unsigned int (*vp8_get_mb_ss)(short *); -void (*vp8_short_fdct4x4)(short *input, short *output, int pitch); -void (*vp8_short_fdct8x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch); -void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch); - -void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch); -void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d); -unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); - -// c imports -extern int vp8_mbuverror_c(MACROBLOCK *mb); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch); -extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch); -extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch); -extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch); - - -extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch); -extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d); - -extern SADFunction vp8_sad16x16_c; -extern SADFunction vp8_sad16x8_c; -extern SADFunction vp8_sad8x16_c; -extern SADFunction vp8_sad8x8_c; -extern SADFunction vp8_sad4x4_c; - -extern SADFunction vp8_sad16x16_wmt; -extern SADFunction vp8_sad16x8_wmt; -extern SADFunction vp8_sad8x16_wmt; -extern SADFunction vp8_sad8x8_wmt; -extern SADFunction vp8_sad4x4_wmt; - -extern SADFunction vp8_sad16x16_mmx; -extern SADFunction vp8_sad16x8_mmx; -extern SADFunction vp8_sad8x16_mmx; -extern SADFunction vp8_sad8x8_mmx; -extern SADFunction vp8_sad4x4_mmx; - -extern variance_function vp8_variance16x16_c; -extern variance_function vp8_variance8x16_c; -extern variance_function vp8_variance16x8_c; -extern variance_function vp8_variance8x8_c; -extern variance_function vp8_variance4x4_c; -extern variance_function vp8_mse16x16_c; - -extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c; - -extern unsigned int vp8_get_mb_ss_c(short *); -extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); - -// mmx imports -extern int vp8_mbuverror_mmx(MACROBLOCK *mb); -extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d); -extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch); -extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride); -extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch); -extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch); -extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch); -extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch); -extern variance_function vp8_variance4x4_mmx; -extern variance_function vp8_variance8x8_mmx; -extern variance_function vp8_variance8x16_mmx; -extern variance_function vp8_variance16x8_mmx; -extern variance_function vp8_variance16x16_mmx; - -extern variance_function vp8_mse16x16_mmx; -extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx; - -extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -extern unsigned int vp8_get_mb_ss_mmx(short *); -extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride); - - -// wmt imports -extern int vp8_mbuverror_xmm(MACROBLOCK *mb); -extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d); -extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch); -extern variance_function vp8_variance4x4_wmt; -extern variance_function vp8_variance8x8_wmt; -extern variance_function vp8_variance8x16_wmt; -extern variance_function vp8_variance16x8_wmt; -extern variance_function vp8_variance16x16_wmt; - -extern variance_function vp8_mse16x16_wmt; -extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt; -extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt; -extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt; -extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride); -extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr); -extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); -extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int source_stride, unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum); - -extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled); - -void vp8_cmachine_specific_config(void) -{ - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; - - vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - - if (wmt_enabled) // Willamette - { - // Willamette instruction set available: - vp8_mbuverror = vp8_mbuverror_xmm; - vp8_fast_quantize_b = vp8_fast_quantize_b_sse; - vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; - vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; - vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx; - vp8_fast_fdct8x4 = vp8_fast_fdct8x4_wmt; - vp8_subtract_b = vp8_subtract_b_mmx; - vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; - vp8_variance4x4 = vp8_variance4x4_mmx; - vp8_variance8x8 = vp8_variance8x8_mmx; - vp8_variance8x16 = vp8_variance8x16_wmt; - vp8_variance16x8 = vp8_variance16x8_wmt; - vp8_variance16x16 = vp8_variance16x16_wmt; - vp8_mse16x16 = vp8_mse16x16_wmt; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_wmt; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_wmt; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_wmt; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_wmt; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_wmt; - vp8_get_mb_ss = vp8_get_mb_ss_sse2; - vp8_get16x16pred_error = vp8_get16x16pred_error_sse2; - vp8_get8x8var = vp8_get8x8var_sse2; - vp8_get16x16var = vp8_get16x16var_sse2; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_sad16x16 = vp8_sad16x16_wmt; - vp8_sad16x8 = vp8_sad16x8_wmt; - vp8_sad8x16 = vp8_sad8x16_wmt; - vp8_sad8x8 = vp8_sad8x8_wmt; - vp8_sad4x4 = vp8_sad4x4_wmt; - vp8_block_error = vp8_block_error_xmm; - vp8_mbblock_error = vp8_mbblock_error_xmm; - vp8_subtract_mby = vp8_subtract_mby_mmx; - - } - else if (mmx_enabled) - { - // MMX instruction set available: - vp8_mbuverror = vp8_mbuverror_mmx; - vp8_fast_quantize_b = vp8_fast_quantize_b_mmx; - vp8_short_fdct4x4 = vp8_short_fdct4x4_mmx; - vp8_short_fdct8x4 = vp8_short_fdct8x4_mmx; - vp8_fast_fdct4x4 = vp8_fast_fdct4x4_mmx; - vp8_fast_fdct8x4 = vp8_fast_fdct8x4_mmx; - vp8_subtract_b = vp8_subtract_b_mmx; - vp8_subtract_mbuv = vp8_subtract_mbuv_mmx; - vp8_variance4x4 = vp8_variance4x4_mmx; - vp8_variance8x8 = vp8_variance8x8_mmx; - vp8_variance8x16 = vp8_variance8x16_mmx; - vp8_variance16x8 = vp8_variance16x8_mmx; - vp8_variance16x16 = vp8_variance16x16_mmx; - vp8_mse16x16 = vp8_mse16x16_mmx; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_mmx; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_mmx; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_mmx; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_mmx; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_mmx; - vp8_get_mb_ss = vp8_get_mb_ss_mmx; - vp8_get16x16pred_error = vp8_get16x16pred_error_mmx; - vp8_get8x8var = vp8_get8x8var_mmx; - vp8_get16x16var = vp8_get16x16var_mmx; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_mmx; - vp8_sad16x16 = vp8_sad16x16_mmx; - vp8_sad16x8 = vp8_sad16x8_mmx; - vp8_sad8x16 = vp8_sad8x16_mmx; - vp8_sad8x8 = vp8_sad8x8_mmx; - vp8_sad4x4 = vp8_sad4x4_mmx; - vp8_block_error = vp8_block_error_mmx; - vp8_mbblock_error = vp8_mbblock_error_mmx; - vp8_subtract_mby = vp8_subtract_mby_mmx; - - } - else - { - // Pure C: - vp8_mbuverror = vp8_mbuverror_c; - vp8_fast_quantize_b = vp8_fast_quantize_b_c; - vp8_short_fdct4x4 = vp8_short_fdct4x4_c; - vp8_short_fdct8x4 = vp8_short_fdct8x4_c; - vp8_fast_fdct4x4 = vp8_fast_fdct4x4_c; - vp8_fast_fdct8x4 = vp8_fast_fdct8x4_c; - vp8_subtract_b = vp8_subtract_b_c; - vp8_subtract_mbuv = vp8_subtract_mbuv_c; - vp8_variance4x4 = vp8_variance4x4_c; - vp8_variance8x8 = vp8_variance8x8_c; - vp8_variance8x16 = vp8_variance8x16_c; - vp8_variance16x8 = vp8_variance16x8_c; - vp8_variance16x16 = vp8_variance16x16_c; - vp8_mse16x16 = vp8_mse16x16_c; - vp8_sub_pixel_variance4x4 = vp8_sub_pixel_variance4x4_c; - vp8_sub_pixel_variance8x8 = vp8_sub_pixel_variance8x8_c; - vp8_sub_pixel_variance8x16 = vp8_sub_pixel_variance8x16_c; - vp8_sub_pixel_variance16x8 = vp8_sub_pixel_variance16x8_c; - vp8_sub_pixel_variance16x16 = vp8_sub_pixel_variance16x16_c; - vp8_get_mb_ss = vp8_get_mb_ss_c; - vp8_get16x16pred_error = vp8_get16x16pred_error_c; - vp8_get8x8var = vp8_get8x8var_c; - vp8_get16x16var = vp8_get16x16var_c; - vp8_get4x4sse_cs = vp8_get4x4sse_cs_c; - vp8_sad16x16 = vp8_sad16x16_c; - vp8_sad16x8 = vp8_sad16x8_c; - vp8_sad8x16 = vp8_sad8x16_c; - vp8_sad8x8 = vp8_sad8x8_c; - vp8_sad4x4 = vp8_sad4x4_c; - vp8_block_error = vp8_block_error_c; - vp8_mbblock_error = vp8_mbblock_error_c; - vp8_subtract_mby = vp8_subtract_mby_c; - } - -} diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm index e13423796..f07b030bd 100644 --- a/vp8/encoder/x86/dct_mmx.asm +++ b/vp8/encoder/x86/dct_mmx.asm @@ -1,846 +1,241 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" -section .text - global sym(vp8_short_fdct4x4_mmx) - global sym(vp8_fast_fdct4x4_mmx) - global sym(vp8_fast_fdct8x4_wmt) - - -%define DCTCONSTANTSBITS (16) -%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1)) -%define x_c1 (60547) ; cos(pi /8) * (1<<15) -%define x_c2 (46341) ; cos(pi*2/8) * (1<<15) -%define x_c3 (25080) ; cos(pi*3/8) * (1<<15) - - -%define _1STSTAGESHIFT 14 -%define _2NDSTAGESHIFT 16 - -; using matrix multiply with source and destbuffer has a pitch ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch) +global sym(vp8_short_fdct4x4_mmx) sym(vp8_short_fdct4x4_mmx): push rbp - mov rbp, rsp + mov rbp, rsp SHADOW_ARGS_TO_STACK 3 GET_GOT rbx - push rsi - push rdi + push rsi + push rdi ; end prolog - mov rsi, arg(0) ;input - mov rdi, arg(1) ;output + mov rsi, arg(0) ; input + mov rdi, arg(1) ; output - movsxd rax, dword ptr arg(2) ;pitch - lea rdx, [dct_matrix GLOBAL] + movsxd rax, dword ptr arg(2) ;pitch - movq mm0, [rsi ] - movq mm1, [rsi + rax] - - movq mm2, [rsi + rax*2] - lea rsi, [rsi + rax*2] - - movq mm3, [rsi + rax] - - ; first column - movq mm4, mm0 - movq mm7, [rdx] - - pmaddwd mm4, mm7 - movq mm5, mm1 - - pmaddwd mm5, mm7 - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - - pmaddwd mm5, mm7 - movq mm6, mm3 - - pmaddwd mm6, mm7 - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct1st_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _1STSTAGESHIFT - psrad mm5, _1STSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi], mm4 - - ;second column - movq mm4, mm0 - - pmaddwd mm4, [rdx+8] - movq mm5, mm1 - - pmaddwd mm5, [rdx+8] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx+8] - movq mm6, mm3 - - pmaddwd mm6, [rdx+8] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct1st_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _1STSTAGESHIFT - psrad mm5, _1STSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi+8], mm4 - - - ;third column - movq mm4, mm0 - - pmaddwd mm4, [rdx+16] - movq mm5, mm1 - - pmaddwd mm5, [rdx+16] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx+16] - movq mm6, mm3 - - pmaddwd mm6, [rdx+16] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct1st_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _1STSTAGESHIFT - psrad mm5, _1STSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi+16], mm4 - - ;fourth column (this is the last column, so we do not have save the source any more) - - pmaddwd mm0, [rdx+24] - - pmaddwd mm1, [rdx+24] - movq mm6, mm0 - - punpckldq mm0, mm1 - punpckhdq mm6, mm1 - - paddd mm0, mm6 - - pmaddwd mm2, [rdx+24] - - pmaddwd mm3, [rdx+24] - movq mm7, mm2 - - punpckldq mm2, mm3 - punpckhdq mm7, mm3 - - paddd mm2, mm7 - movq mm6, [dct1st_stage_rounding_mmx GLOBAL] - - paddd mm0, mm6 - paddd mm2, mm6 - - psrad mm0, _1STSTAGESHIFT - psrad mm2, _1STSTAGESHIFT - - packssdw mm0, mm2 - - movq mm3, mm0 - - ; done with one pass - ; now start second pass - movq mm0, [rdi ] - movq mm1, [rdi+ 8] - movq mm2, [rdi+ 16] - - movq mm4, mm0 - - pmaddwd mm4, [rdx] - movq mm5, mm1 - - pmaddwd mm5, [rdx] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx] - movq mm6, mm3 - - pmaddwd mm6, [rdx] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct2nd_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _2NDSTAGESHIFT - psrad mm5, _2NDSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi], mm4 - - ;second column - movq mm4, mm0 - - pmaddwd mm4, [rdx+8] - movq mm5, mm1 - - pmaddwd mm5, [rdx+8] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx+8] - movq mm6, mm3 - - pmaddwd mm6, [rdx+8] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct2nd_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _2NDSTAGESHIFT - psrad mm5, _2NDSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi+8], mm4 - - - ;third column - movq mm4, mm0 - - pmaddwd mm4, [rdx+16] - movq mm5, mm1 - - pmaddwd mm5, [rdx+16] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx+16] - movq mm6, mm3 - - pmaddwd mm6, [rdx+16] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct2nd_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _2NDSTAGESHIFT - psrad mm5, _2NDSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi+16], mm4 - - ;fourth column - movq mm4, mm0 - - pmaddwd mm4, [rdx+24] - movq mm5, mm1 - - pmaddwd mm5, [rdx+24] - movq mm6, mm4 - - punpckldq mm4, mm5 - punpckhdq mm6, mm5 - - paddd mm4, mm6 - movq mm5, mm2 - - pmaddwd mm5, [rdx+24] - movq mm6, mm3 - - pmaddwd mm6, [rdx+24] - movq mm7, mm5 - - punpckldq mm5, mm6 - punpckhdq mm7, mm6 - - paddd mm5, mm7 - movq mm6, [dct2nd_stage_rounding_mmx GLOBAL] - - paddd mm4, mm6 - paddd mm5, mm6 - - psrad mm4, _2NDSTAGESHIFT - psrad mm5, _2NDSTAGESHIFT - - packssdw mm4, mm5 - movq [rdi+24], mm4 - - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - -;void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch) -sym(vp8_fast_fdct4x4_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;input - mov rdi, arg(1) ;output - - lea rdx, [dct_const_mmx GLOBAL] - movsxd rax, dword ptr arg(2) ;pitch - - lea rcx, [rsi + rax*2] + lea rcx, [rsi + rax*2] ; read the input data - movq mm0, [rsi] - movq mm1, [rsi + rax ] + movq mm0, [rsi] + movq mm1, [rsi + rax] - movq mm2, [rcx] - movq mm3, [rcx + rax] - ; get the constants - ;shift to left by 1 for prescision - paddw mm0, mm0 - paddw mm1, mm1 + movq mm2, [rcx] + movq mm4, [rcx + rax] - psllw mm2, 1 - psllw mm3, 1 + ; transpose for the first stage + movq mm3, mm0 ; 00 01 02 03 + movq mm5, mm2 ; 20 21 22 23 - ; transpose for the second stage - movq mm4, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 10 11 12 03 + punpcklwd mm0, mm1 ; 00 10 01 11 + punpckhwd mm3, mm1 ; 02 12 03 13 - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm4, mm1 ; 02 12 03 13 + punpcklwd mm2, mm4 ; 20 30 21 31 + punpckhwd mm5, mm4 ; 22 32 23 33 - punpcklwd mm2, mm3 ; 20 30 21 31 - punpckhwd mm5, mm3 ; 22 32 23 33 + movq mm1, mm0 ; 00 10 01 11 + punpckldq mm0, mm2 ; 00 10 20 30 + punpckhdq mm1, mm2 ; 01 11 21 31 - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 + movq mm2, mm3 ; 02 12 03 13 + punpckldq mm2, mm5 ; 02 12 22 32 - punpckhdq mm1, mm2 ; 01 11 21 31 - - movq mm2, mm4 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 - - punpckhdq mm4, mm5 ; 03 13 23 33 - movq mm3, mm4 + punpckhdq mm3, mm5 ; 03 13 23 33 + ; mm0 0 + ; mm1 1 + ; mm2 2 + ; mm3 3 ; first stage - movq mm5, mm0 - movq mm4, mm1 + movq mm5, mm0 + movq mm4, mm1 - paddw mm0, mm3 ; a = 0 + 3 - paddw mm1, mm2 ; b = 1 + 2 + paddw mm0, mm3 ; a1 = 0 + 3 + paddw mm1, mm2 ; b1 = 1 + 2 - psubw mm4, mm2 ; c = 1 - 2 - psubw mm5, mm3 ; d = 0 - 3 + psubw mm4, mm2 ; c1 = 1 - 2 + psubw mm5, mm3 ; d1 = 0 - 3 + psllw mm5, 3 + psllw mm4, 3 + + psllw mm0, 3 + psllw mm1, 3 ; output 0 and 2 - movq mm6, [rdx + 16] ; c2 - movq mm2, mm0 ; a + movq mm2, mm0 ; a1 - paddw mm0, mm1 ; a + b - psubw mm2, mm1 ; a - b - - movq mm1, mm0 ; a + b - pmulhw mm0, mm6 ; 00 01 02 03 - - paddw mm0, mm1 ; output 00 01 02 03 - pmulhw mm6, mm2 ; 20 21 22 23 - - paddw mm2, mm6 ; output 20 21 22 23 + paddw mm0, mm1 ; op[0] = a1 + b1 + psubw mm2, mm1 ; op[2] = a1 - b1 ; output 1 and 3 - movq mm6, [rdx + 8] ; c1 - movq mm7, [rdx + 24] ; c3 + ; interleave c1, d1 + movq mm1, mm5 ; d1 + punpcklwd mm1, mm4 ; c1 d1 + punpckhwd mm5, mm4 ; c1 d1 - movq mm1, mm4 ; c - movq mm3, mm5 ; d + movq mm3, mm1 + movq mm4, mm5 - pmulhw mm1, mm7 ; c * c3 - pmulhw mm3, mm6 ; d * c1 + pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - paddw mm3, mm5 ; d * c1 rounded - paddw mm1, mm3 ; output 10 11 12 13 + pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - movq mm3, mm4 ; c - pmulhw mm5, mm7 ; d * c3 + paddd mm1, MMWORD PTR[GLOBAL(_14500)] + paddd mm4, MMWORD PTR[GLOBAL(_14500)] + paddd mm3, MMWORD PTR[GLOBAL(_7500)] + paddd mm5, MMWORD PTR[GLOBAL(_7500)] - pmulhw mm4, mm6 ; c * c1 - paddw mm3, mm4 ; round c* c1 - - psubw mm5, mm3 ; output 30 31 32 33 - movq mm3, mm5 + psrad mm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad mm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad mm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + psrad mm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + packssdw mm1, mm4 ; op[1] + packssdw mm3, mm5 ; op[3] ; done with vertical ; transpose for the second stage - movq mm4, mm0 ; 00 01 02 03 - movq mm5, mm2 ; 10 11 12 03 + movq mm4, mm0 ; 00 10 20 30 + movq mm5, mm2 ; 02 12 22 32 - punpcklwd mm0, mm1 ; 00 10 01 11 - punpckhwd mm4, mm1 ; 02 12 03 13 + punpcklwd mm0, mm1 ; 00 01 10 11 + punpckhwd mm4, mm1 ; 20 21 30 31 - punpcklwd mm2, mm3 ; 20 30 21 31 - punpckhwd mm5, mm3 ; 22 32 23 33 + punpcklwd mm2, mm3 ; 02 03 12 13 + punpckhwd mm5, mm3 ; 22 23 32 33 + movq mm1, mm0 ; 00 01 10 11 + punpckldq mm0, mm2 ; 00 01 02 03 - movq mm1, mm0 ; 00 10 01 11 - punpckldq mm0, mm2 ; 00 10 20 30 + punpckhdq mm1, mm2 ; 01 22 12 13 - punpckhdq mm1, mm2 ; 01 11 21 31 + movq mm2, mm4 ; 20 31 30 31 + punpckldq mm2, mm5 ; 20 21 22 23 - movq mm2, mm4 ; 02 12 03 13 - punpckldq mm2, mm5 ; 02 12 22 32 + punpckhdq mm4, mm5 ; 30 31 32 33 - punpckhdq mm4, mm5 ; 03 13 23 33 - movq mm3, mm4 + ; mm0 0 + ; mm1 1 + ; mm2 2 + ; mm3 4 + movq mm5, mm0 + movq mm3, mm1 - ; first stage - movq mm5, mm0 - movq mm4, mm1 + paddw mm0, mm4 ; a1 = 0 + 3 + paddw mm1, mm2 ; b1 = 1 + 2 - paddw mm0, mm3 ; a = 0 + 3 - paddw mm1, mm2 ; b = 1 + 2 + psubw mm3, mm2 ; c1 = 1 - 2 + psubw mm5, mm4 ; d1 = 0 - 3 - psubw mm4, mm2 ; c = 1 - 2 - psubw mm5, mm3 ; d = 0 - 3 + pxor mm6, mm6 ; zero out for compare + pcmpeqw mm6, mm5 ; d1 != 0 + + pandn mm6, MMWORD PTR[GLOBAL(_cmp_mask)] ; clear upper, + ; and keep bit 0 of lower ; output 0 and 2 - movq mm6, [rdx + 16] ; c2 - movq mm2, mm0 ; a - paddw mm0, mm1 ; a + b + movq mm2, mm0 ; a1 - psubw mm2, mm1 ; a - b + paddw mm0, mm1 ; a1 + b1 + psubw mm2, mm1 ; a1 - b1 - movq mm1, mm0 ; a + b - pmulhw mm0, mm6 ; 00 01 02 03 + paddw mm0, MMWORD PTR[GLOBAL(_7w)] + paddw mm2, MMWORD PTR[GLOBAL(_7w)] - paddw mm0, mm1 ; output 00 01 02 03 - pmulhw mm6, mm2 ; 20 21 22 23 - - paddw mm2, mm6 ; output 20 21 22 23 + psraw mm0, 4 ; op[0] = (a1 + b1 + 7)>>4 + psraw mm2, 4 ; op[8] = (a1 - b1 + 7)>>4 + movq MMWORD PTR[rdi + 0 ], mm0 + movq MMWORD PTR[rdi + 16], mm2 ; output 1 and 3 - movq mm6, [rdx + 8] ; c1 - movq mm7, [rdx + 24] ; c3 + ; interleave c1, d1 + movq mm1, mm5 ; d1 + punpcklwd mm1, mm3 ; c1 d1 + punpckhwd mm5, mm3 ; c1 d1 - movq mm1, mm4 ; c - movq mm3, mm5 ; d + movq mm3, mm1 + movq mm4, mm5 - pmulhw mm1, mm7 ; c * c3 - pmulhw mm3, mm6 ; d * c1 + pmaddwd mm1, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd mm4, MMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 - paddw mm3, mm5 ; d * c1 rounded - paddw mm1, mm3 ; output 10 11 12 13 + pmaddwd mm3, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd mm5, MMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 - movq mm3, mm4 ; c - pmulhw mm5, mm7 ; d * c3 + paddd mm1, MMWORD PTR[GLOBAL(_12000)] + paddd mm4, MMWORD PTR[GLOBAL(_12000)] + paddd mm3, MMWORD PTR[GLOBAL(_51000)] + paddd mm5, MMWORD PTR[GLOBAL(_51000)] - pmulhw mm4, mm6 ; c * c1 - paddw mm3, mm4 ; round c* c1 + psrad mm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad mm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad mm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + psrad mm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 - psubw mm5, mm3 ; output 30 31 32 33 - movq mm3, mm5 - ; done with vertical + packssdw mm1, mm4 ; op[4] + packssdw mm3, mm5 ; op[12] - pcmpeqw mm4, mm4 - pcmpeqw mm5, mm5 - psrlw mm4, 15 - psrlw mm5, 15 + paddw mm1, mm6 ; op[4] += (d1!=0) - paddw mm0, mm4 - paddw mm1, mm5 - paddw mm2, mm4 - paddw mm3, mm5 + movq MMWORD PTR[rdi + 8 ], mm1 + movq MMWORD PTR[rdi + 24], mm3 - psraw mm0, 1 - psraw mm1, 1 - psraw mm2, 1 - psraw mm3, 1 - - movq [rdi ], mm0 - movq [rdi+ 8], mm1 - movq [rdi+16], mm2 - movq [rdi+24], mm3 - - ; begin epilog - pop rdi - pop rsi + ; begin epilog + pop rdi + pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret - -;void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch) -sym(vp8_fast_fdct8x4_wmt): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 3 - GET_GOT rbx - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;input - mov rdi, arg(1) ;output - - lea rdx, [dct_const_xmm GLOBAL] - movsxd rax, dword ptr arg(2) ;pitch - - lea rcx, [rsi + rax*2] - ; read the input data - movdqa xmm0, [rsi] - movdqa xmm2, [rsi + rax] - - movdqa xmm4, [rcx] - movdqa xmm3, [rcx + rax] - ; get the constants - ;shift to left by 1 for prescision - psllw xmm0, 1 - psllw xmm2, 1 - - psllw xmm4, 1 - psllw xmm3, 1 - - ; transpose for the second stage - movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 - movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 - - punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 - punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 - - punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 - punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 - - movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 - punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 - - punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 - - - movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 - punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 - - punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 - movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 - - punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 - punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 - - movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 - punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 - - punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 - - ; xmm0 0 - ; xmm1 1 - ; xmm2 2 - ; xmm3 3 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a = 0 + 3 - paddw xmm1, xmm2 ; b = 1 + 2 - - psubw xmm4, xmm2 ; c = 1 - 2 - psubw xmm5, xmm3 ; d = 0 - 3 - - - ; output 0 and 2 - movdqa xmm6, [rdx + 32] ; c2 - movdqa xmm2, xmm0 ; a - - paddw xmm0, xmm1 ; a + b - psubw xmm2, xmm1 ; a - b - - movdqa xmm1, xmm0 ; a + b - pmulhw xmm0, xmm6 ; 00 01 02 03 - - paddw xmm0, xmm1 ; output 00 01 02 03 - pmulhw xmm6, xmm2 ; 20 21 22 23 - - paddw xmm2, xmm6 ; output 20 21 22 23 - - ; output 1 and 3 - movdqa xmm6, [rdx + 16] ; c1 - movdqa xmm7, [rdx + 48] ; c3 - - movdqa xmm1, xmm4 ; c - movdqa xmm3, xmm5 ; d - - pmulhw xmm1, xmm7 ; c * c3 - pmulhw xmm3, xmm6 ; d * c1 - - paddw xmm3, xmm5 ; d * c1 rounded - paddw xmm1, xmm3 ; output 10 11 12 13 - - movdqa xmm3, xmm4 ; c - pmulhw xmm5, xmm7 ; d * c3 - - pmulhw xmm4, xmm6 ; c * c1 - paddw xmm3, xmm4 ; round c* c1 - - psubw xmm5, xmm3 ; output 30 31 32 33 - movdqa xmm3, xmm5 - - - ; done with vertical - ; transpose for the second stage - movdqa xmm4, xmm2 ; 02 12 22 32 06 16 26 36 - movdqa xmm2, xmm1 ; 01 11 21 31 05 15 25 35 - - movdqa xmm1, xmm0 ; 00 10 20 30 04 14 24 34 - movdqa xmm5, xmm4 ; 02 12 22 32 06 16 26 36 - - punpcklwd xmm0, xmm2 ; 00 01 10 11 20 21 30 31 - punpckhwd xmm1, xmm2 ; 04 05 14 15 24 25 34 35 - - punpcklwd xmm4, xmm3 ; 02 03 12 13 22 23 32 33 - punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 - - movdqa xmm2, xmm0 ; 00 01 10 11 20 21 30 31 - punpckldq xmm0, xmm4 ; 00 01 02 03 10 11 12 13 - - punpckhdq xmm2, xmm4 ; 20 21 22 23 30 31 32 33 - - - movdqa xmm4, xmm1 ; 04 05 14 15 24 25 34 35 - punpckldq xmm4, xmm5 ; 04 05 06 07 14 15 16 17 - - punpckhdq xmm1, xmm5 ; 24 25 26 27 34 35 36 37 - movdqa xmm3, xmm2 ; 20 21 22 23 30 31 32 33 - - punpckhqdq xmm3, xmm1 ; 30 31 32 33 34 35 36 37 - punpcklqdq xmm2, xmm1 ; 20 21 22 23 24 25 26 27 - - movdqa xmm1, xmm0 ; 00 01 02 03 10 11 12 13 - punpcklqdq xmm0, xmm4 ; 00 01 02 03 04 05 06 07 - - punpckhqdq xmm1, xmm4 ; 10 11 12 13 14 15 16 17 - - ; first stage - movdqa xmm5, xmm0 - movdqa xmm4, xmm1 - - paddw xmm0, xmm3 ; a = 0 + 3 - paddw xmm1, xmm2 ; b = 1 + 2 - - psubw xmm4, xmm2 ; c = 1 - 2 - psubw xmm5, xmm3 ; d = 0 - 3 - - - ; output 0 and 2 - movdqa xmm6, [rdx + 32] ; c2 - movdqa xmm2, xmm0 ; a - - paddw xmm0, xmm1 ; a + b - psubw xmm2, xmm1 ; a - b - - movdqa xmm1, xmm0 ; a + b - pmulhw xmm0, xmm6 ; 00 01 02 03 - - paddw xmm0, xmm1 ; output 00 01 02 03 - pmulhw xmm6, xmm2 ; 20 21 22 23 - - paddw xmm2, xmm6 ; output 20 21 22 23 - - ; output 1 and 3 - movdqa xmm6, [rdx + 16] ; c1 - movdqa xmm7, [rdx + 48] ; c3 - - movdqa xmm1, xmm4 ; c - movdqa xmm3, xmm5 ; d - - pmulhw xmm1, xmm7 ; c * c3 - pmulhw xmm3, xmm6 ; d * c1 - - paddw xmm3, xmm5 ; d * c1 rounded - paddw xmm1, xmm3 ; output 10 11 12 13 - - movdqa xmm3, xmm4 ; c - pmulhw xmm5, xmm7 ; d * c3 - - pmulhw xmm4, xmm6 ; c * c1 - paddw xmm3, xmm4 ; round c* c1 - - psubw xmm5, xmm3 ; output 30 31 32 33 - movdqa xmm3, xmm5 - ; done with vertical - - - pcmpeqw xmm4, xmm4 - pcmpeqw xmm5, xmm5; - psrlw xmm4, 15 - psrlw xmm5, 15 - - paddw xmm0, xmm4 - paddw xmm1, xmm5 - paddw xmm2, xmm4 - paddw xmm3, xmm5 - - psraw xmm0, 1 - psraw xmm1, 1 - psraw xmm2, 1 - psraw xmm3, 1 - - movq QWORD PTR[rdi ], xmm0 - movq QWORD PTR[rdi+ 8], xmm1 - movq QWORD PTR[rdi+16], xmm2 - movq QWORD PTR[rdi+24], xmm3 - - psrldq xmm0, 8 - psrldq xmm1, 8 - psrldq xmm2, 8 - psrldq xmm3, 8 - - movq QWORD PTR[rdi+32], xmm0 - movq QWORD PTR[rdi+40], xmm1 - movq QWORD PTR[rdi+48], xmm2 - movq QWORD PTR[rdi+56], xmm3 - ; begin epilog - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - - SECTION_RODATA -;static const unsigned int dct1st_stage_rounding_mmx[2] = -align 16 -dct1st_stage_rounding_mmx: - times 2 dd 8192 - - -;static const unsigned int dct2nd_stage_rounding_mmx[2] = -align 16 -dct2nd_stage_rounding_mmx: - times 2 dd 32768 - - -;static const short dct_matrix[4][4]= -align 16 -dct_matrix: - times 4 dw 23170 - - dw 30274 - dw 12540 - dw -12540 - dw -30274 - - dw 23170 - times 2 dw -23170 - dw 23170 - - dw 12540 - dw -30274 - dw 30274 - dw -12540 - - -;static const unsigned short dct_const_mmx[4 * 4]= -align 16 -dct_const_mmx: - times 4 dw 0 - times 4 dw 60547 - times 4 dw 46341 - times 4 dw 25080 - - -;static const unsigned short dct_const_xmm[8 * 4]= -align 16 -dct_const_xmm: - times 8 dw 0 - times 8 dw 60547 - times 8 dw 46341 - times 8 dw 25080 +align 8 +_5352_2217: + dw 5352 + dw 2217 + dw 5352 + dw 2217 +align 8 +_2217_neg5352: + dw 2217 + dw -5352 + dw 2217 + dw -5352 +align 8 +_cmp_mask: + times 4 dw 1 +align 8 +_7w: + times 4 dw 7 +align 8 +_14500: + times 2 dd 14500 +align 8 +_7500: + times 2 dd 7500 +align 8 +_12000: + times 2 dd 12000 +align 8 +_51000: + times 2 dd 51000 diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm index 3e5e9a70c..652dd9804 100644 --- a/vp8/encoder/x86/dct_sse2.asm +++ b/vp8/encoder/x86/dct_sse2.asm @@ -1,260 +1,430 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" -global sym(vp8_short_fdct4x4_wmt) - -%define DCTCONSTANTSBITS (16) -%define DCTROUNDINGVALUE (1<< (DCTCONSTANTSBITS-1)) -%define x_c1 (60547) ; cos(pi /8) * (1<<15) -%define x_c2 (46341) ; cos(pi*2/8) * (1<<15) -%define x_c3 (25080) ; cos(pi*3/8) * (1<<15) - -%define _1STSTAGESHIFT 14 -%define _2NDSTAGESHIFT 16 - - -;; using matrix multiply -;void vp8_short_fdct4x4_wmt(short *input, short *output) -sym(vp8_short_fdct4x4_wmt): +%macro STACK_FRAME_CREATE 0 +%if ABI_IS_32BIT + %define input rsi + %define output rdi + %define pitch rax push rbp mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 GET_GOT rbx + push rsi + push rdi ; end prolog - mov rax, arg(0) ;input - mov rcx, arg(1) ;output + mov rsi, arg(0) + mov rdi, arg(1) - lea rdx, [dct_matrix_sse2 GLOBAL] + movsxd rax, dword ptr arg(2) + lea rcx, [rsi + rax*2] +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define input rcx + %define output rdx + %define pitch r8 + %else + %define input rdi + %define output rsi + %define pitch rdx + %endif +%endif +%endmacro - movdqu xmm0, [rax ] - movdqu xmm1, [rax+16] +%macro STACK_FRAME_DESTROY 0 + %define input + %define output + %define pitch - ; first column - movdqa xmm2, xmm0 - movdqa xmm7, [rdx] - - pmaddwd xmm2, xmm7 - movdqa xmm3, xmm1 - - pmaddwd xmm3, xmm7 - movdqa xmm4, xmm2 - - punpckldq xmm2, xmm3 - punpckhdq xmm4, xmm3 - - movdqa xmm3, xmm2 - punpckldq xmm2, xmm4 - - punpckhdq xmm3, xmm4 - paddd xmm2, xmm3 - - - paddd xmm2, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL] - psrad xmm2, _1STSTAGESHIFT - ;second column - movdqa xmm3, xmm0 - pmaddwd xmm3, [rdx+16] - - movdqa xmm4, xmm1 - pmaddwd xmm4, [rdx+16] - - movdqa xmm5, xmm3 - punpckldq xmm3, xmm4 - - punpckhdq xmm5, xmm4 - movdqa xmm4, xmm3 - - punpckldq xmm3, xmm5 - punpckhdq xmm4, xmm5 - - paddd xmm3, xmm4 - paddd xmm3, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL] - - - psrad xmm3, _1STSTAGESHIFT - packssdw xmm2, xmm3 - - ;third column - movdqa xmm3, xmm0 - pmaddwd xmm3, [rdx+32] - - movdqa xmm4, xmm1 - pmaddwd xmm4, [rdx+32] - - movdqa xmm5, xmm3 - punpckldq xmm3, xmm4 - - punpckhdq xmm5, xmm4 - movdqa xmm4, xmm3 - - punpckldq xmm3, xmm5 - punpckhdq xmm4, xmm5 - - paddd xmm3, xmm4 - paddd xmm3, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL] - - psrad xmm3, _1STSTAGESHIFT - - ;fourth column (this is the last column, so we do not have save the source any more) - pmaddwd xmm0, [rdx+48] - pmaddwd xmm1, [rdx+48] - - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - - punpckhdq xmm4, xmm1 - movdqa xmm1, xmm0 - - punpckldq xmm0, xmm4 - punpckhdq xmm1, xmm4 - - paddd xmm0, xmm1 - paddd xmm0, XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL] - - - psrad xmm0, _1STSTAGESHIFT - packssdw xmm3, xmm0 - ; done with one pass - ; now start second pass - movdqa xmm0, xmm2 - movdqa xmm1, xmm3 - - pmaddwd xmm2, xmm7 - pmaddwd xmm3, xmm7 - - movdqa xmm4, xmm2 - punpckldq xmm2, xmm3 - - punpckhdq xmm4, xmm3 - movdqa xmm3, xmm2 - - punpckldq xmm2, xmm4 - punpckhdq xmm3, xmm4 - - paddd xmm2, xmm3 - paddd xmm2, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL] - - psrad xmm2, _2NDSTAGESHIFT - - ;second column - movdqa xmm3, xmm0 - pmaddwd xmm3, [rdx+16] - - movdqa xmm4, xmm1 - pmaddwd xmm4, [rdx+16] - - movdqa xmm5, xmm3 - punpckldq xmm3, xmm4 - - punpckhdq xmm5, xmm4 - movdqa xmm4, xmm3 - - punpckldq xmm3, xmm5 - punpckhdq xmm4, xmm5 - - paddd xmm3, xmm4 - paddd xmm3, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL] - - psrad xmm3, _2NDSTAGESHIFT - packssdw xmm2, xmm3 - - movdqu [rcx], xmm2 - ;third column - movdqa xmm3, xmm0 - pmaddwd xmm3, [rdx+32] - - movdqa xmm4, xmm1 - pmaddwd xmm4, [rdx+32] - - movdqa xmm5, xmm3 - punpckldq xmm3, xmm4 - - punpckhdq xmm5, xmm4 - movdqa xmm4, xmm3 - - punpckldq xmm3, xmm5 - punpckhdq xmm4, xmm5 - - paddd xmm3, xmm4 - paddd xmm3, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL] - - psrad xmm3, _2NDSTAGESHIFT - ;fourth column - pmaddwd xmm0, [rdx+48] - pmaddwd xmm1, [rdx+48] - - movdqa xmm4, xmm0 - punpckldq xmm0, xmm1 - - punpckhdq xmm4, xmm1 - movdqa xmm1, xmm0 - - punpckldq xmm0, xmm4 - punpckhdq xmm1, xmm4 - - paddd xmm0, xmm1 - paddd xmm0, XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL] - - psrad xmm0, _2NDSTAGESHIFT - packssdw xmm3, xmm0 - - movdqu [rcx+16], xmm3 - - mov rsp, rbp - ; begin epilog +%if ABI_IS_32BIT + pop rdi + pop rsi RESTORE_GOT - UNSHADOW_ARGS pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + %endif +%endif ret +%endmacro +;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) +global sym(vp8_short_fdct4x4_sse2) +sym(vp8_short_fdct4x4_sse2): + + STACK_FRAME_CREATE + + movq xmm0, MMWORD PTR[input ] ;03 02 01 00 + movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 + lea input, [input+2*pitch] + movq xmm1, MMWORD PTR[input ] ;23 22 21 20 + movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 + + punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 + punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 + + movdqa xmm2, xmm0 + punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 + punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 + movdqa xmm1, xmm0 + punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 + pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx + pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx + + punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 + movdqa xmm3, xmm0 + paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 + psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 + psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 + psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 + + movdqa xmm1, xmm0 + pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 + pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 + movdqa xmm4, xmm3 + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 + + paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] + psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 + + packssdw xmm0, xmm1 ;op[2] op[0] + packssdw xmm3, xmm4 ;op[3] op[1] + ; 23 22 21 20 03 02 01 00 + ; + ; 33 32 31 30 13 12 11 10 + ; + movdqa xmm2, xmm0 + punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 + punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 + + movdqa xmm3, xmm0 + punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 + punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 + movdqa xmm2, xmm0 + punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 + punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 + + movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] + pshufd xmm2, xmm2, 04eh + movdqa xmm3, xmm0 + paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 + psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 + + pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 + movdqa xmm2, xmm3 ;save d1 for compare + pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 + pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 + pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 + pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 + pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 + movdqa xmm1, xmm0 + pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 + pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 + + pxor xmm4, xmm4 ;zero out for compare + paddd xmm0, xmm5 + paddd xmm1, xmm5 + pcmpeqw xmm2, xmm4 + psrad xmm0, 4 ;(a1 + b1 + 7)>>4 + psrad xmm1, 4 ;(a1 - b1 + 7)>>4 + pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, + ;and keep bit 0 of lower + + movdqa xmm4, xmm3 + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 + paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] + packssdw xmm0, xmm1 ;op[8] op[0] + psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 + psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 + + packssdw xmm3, xmm4 ;op[12] op[4] + movdqa xmm1, xmm0 + paddw xmm3, xmm2 ;op[4] += (d1!=0) + punpcklqdq xmm0, xmm3 ;op[4] op[0] + punpckhqdq xmm1, xmm3 ;op[12] op[8] + + movdqa XMMWORD PTR[output + 0], xmm0 + movdqa XMMWORD PTR[output + 16], xmm1 + + STACK_FRAME_DESTROY + +;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) +global sym(vp8_short_fdct8x4_sse2) +sym(vp8_short_fdct8x4_sse2): + + STACK_FRAME_CREATE + + ; read the input data + movdqa xmm0, [input ] + movdqa xmm2, [input+ pitch] + lea input, [input+2*pitch] + movdqa xmm4, [input ] + movdqa xmm3, [input+ pitch] + + ; transpose for the first stage + movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 + movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 + + punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 + punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 + + punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 + punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 + + movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 + punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 + + punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 + + movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 + punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 + + punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 + movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 + + punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 + punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 + + movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 + punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 + + punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 + + ; xmm0 0 + ; xmm1 1 + ; xmm2 2 + ; xmm3 3 + + ; first stage + movdqa xmm5, xmm0 + movdqa xmm4, xmm1 + + paddw xmm0, xmm3 ; a1 = 0 + 3 + paddw xmm1, xmm2 ; b1 = 1 + 2 + + psubw xmm4, xmm2 ; c1 = 1 - 2 + psubw xmm5, xmm3 ; d1 = 0 - 3 + + psllw xmm5, 3 + psllw xmm4, 3 + + psllw xmm0, 3 + psllw xmm1, 3 + + ; output 0 and 2 + movdqa xmm2, xmm0 ; a1 + + paddw xmm0, xmm1 ; op[0] = a1 + b1 + psubw xmm2, xmm1 ; op[2] = a1 - b1 + + ; output 1 and 3 + ; interleave c1, d1 + movdqa xmm1, xmm5 ; d1 + punpcklwd xmm1, xmm4 ; c1 d1 + punpckhwd xmm5, xmm4 ; c1 d1 + + movdqa xmm3, xmm1 + movdqa xmm4, xmm5 + + pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + + paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] + paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] + paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] + + psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 + psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 + + packssdw xmm1, xmm4 ; op[1] + packssdw xmm3, xmm5 ; op[3] + + ; done with vertical + ; transpose for the second stage + movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 + movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 + + punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 + punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 + + punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 + punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 + + movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 + punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 + + punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 + + movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 + punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 + + punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 + movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 + + punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 + punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 + + movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 + punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 + + punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 + + ; xmm0 0 + ; xmm1 4 + ; xmm2 1 + ; xmm3 3 + + movdqa xmm5, xmm0 + movdqa xmm2, xmm1 + + paddw xmm0, xmm3 ; a1 = 0 + 3 + paddw xmm1, xmm4 ; b1 = 1 + 2 + + psubw xmm4, xmm2 ; c1 = 1 - 2 + psubw xmm5, xmm3 ; d1 = 0 - 3 + + pxor xmm6, xmm6 ; zero out for compare + + pcmpeqw xmm6, xmm5 ; d1 != 0 + + pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, + ; and keep bit 0 of lower + + ; output 0 and 2 + movdqa xmm2, xmm0 ; a1 + + paddw xmm0, xmm1 ; a1 + b1 + psubw xmm2, xmm1 ; a1 - b1 + + paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] + paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] + + psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 + psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 + + ; output 1 and 3 + ; interleave c1, d1 + movdqa xmm1, xmm5 ; d1 + punpcklwd xmm1, xmm4 ; c1 d1 + punpckhwd xmm5, xmm4 ; c1 d1 + + movdqa xmm3, xmm1 + movdqa xmm4, xmm5 + + pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 + + pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 + + paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] + paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] + paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] + + psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 + psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 + + packssdw xmm1, xmm4 ; op[4] + packssdw xmm3, xmm5 ; op[12] + + paddw xmm1, xmm6 ; op[4] += (d1!=0) + + movdqa xmm4, xmm0 + movdqa xmm5, xmm2 + + punpcklqdq xmm0, xmm1 + punpckhqdq xmm4, xmm1 + + punpcklqdq xmm2, xmm3 + punpckhqdq xmm5, xmm3 + + movdqa XMMWORD PTR[output + 0 ], xmm0 + movdqa XMMWORD PTR[output + 16], xmm2 + movdqa XMMWORD PTR[output + 32], xmm4 + movdqa XMMWORD PTR[output + 48], xmm5 + + STACK_FRAME_DESTROY SECTION_RODATA -;static unsigned int dct1st_stage_rounding_sse2[4] = align 16 -dct1st_stage_rounding_sse2: - times 4 dd 8192 - - -;static unsigned int dct2nd_stage_rounding_sse2[4] = +_5352_2217: + dw 5352 + dw 2217 + dw 5352 + dw 2217 + dw 5352 + dw 2217 + dw 5352 + dw 2217 align 16 -dct2nd_stage_rounding_sse2: - times 4 dd 32768 - -;static short dct_matrix_sse2[4][8]= +_2217_neg5352: + dw 2217 + dw -5352 + dw 2217 + dw -5352 + dw 2217 + dw -5352 + dw 2217 + dw -5352 align 16 -dct_matrix_sse2: - times 8 dw 23170 - - dw 30274 - dw 12540 - dw -12540 - dw -30274 - dw 30274 - dw 12540 - dw -12540 - dw -30274 - - dw 23170 - times 2 dw -23170 - times 2 dw 23170 - times 2 dw -23170 - dw 23170 - - dw 12540 - dw -30274 - dw 30274 - dw -12540 - dw 12540 - dw -30274 - dw 30274 - dw -12540 +_mult_add: + times 8 dw 1 +align 16 +_cmp_mask: + times 4 dw 1 + times 4 dw 0 +align 16 +_cmp_mask8x4: + times 8 dw 1 +align 16 +_mult_sub: + dw 1 + dw -1 + dw 1 + dw -1 + dw 1 + dw -1 + dw 1 + dw -1 +align 16 +_7: + times 4 dd 7 +align 16 +_7w: + times 8 dw 7 +align 16 +_14500: + times 4 dd 14500 +align 16 +_7500: + times 4 dd 7500 +align 16 +_12000: + times 4 dd 12000 +align 16 +_51000: + times 4 dd 51000 diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h index bc80e64ef..59a5cb1d7 100644 --- a/vp8/encoder/x86/dct_x86.h +++ b/vp8/encoder/x86/dct_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,48 +22,41 @@ #if HAVE_MMX extern prototype_fdct(vp8_short_fdct4x4_mmx); extern prototype_fdct(vp8_short_fdct8x4_mmx); -extern prototype_fdct(vp8_fast_fdct4x4_mmx); -extern prototype_fdct(vp8_fast_fdct8x4_mmx); #if !CONFIG_RUNTIME_CPU_DETECT + #undef vp8_fdct_short4x4 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx #undef vp8_fdct_short8x4 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx -#undef vp8_fdct_fast4x4 -#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_mmx - -#undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_mmx - #endif + #endif #if HAVE_SSE2 -extern prototype_fdct(vp8_short_fdct4x4_wmt); -extern prototype_fdct(vp8_short_fdct8x4_wmt); -extern prototype_fdct(vp8_fast_fdct8x4_wmt); - +extern prototype_fdct(vp8_short_fdct8x4_sse2); extern prototype_fdct(vp8_short_walsh4x4_sse2); +extern prototype_fdct(vp8_short_fdct4x4_sse2); + #if !CONFIG_RUNTIME_CPU_DETECT -#if 0 -/* short SSE2 DCT currently disabled, does not match the MMX version */ #undef vp8_fdct_short4x4 -#define vp8_fdct_short4x4 vp8_short_fdct4x4_wmt +#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2 #undef vp8_fdct_short8x4 -#define vp8_fdct_short8x4 vp8_short_fdct8x4_wmt -#endif +#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2 + +#undef vp8_fdct_fast4x4 +#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2 #undef vp8_fdct_fast8x4 -#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_wmt +#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2 -#undef vp8_fdct_walsh_short4x4 +#undef vp8_fdct_walsh_short4x4 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_sse2 #endif diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h index 9397a6cca..69b3edd66 100644 --- a/vp8/encoder/x86/encodemb_x86.h +++ b/vp8/encoder/x86/encodemb_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -54,7 +55,9 @@ extern prototype_submbuv(vp8_subtract_mbuv_mmx); extern prototype_berr(vp8_block_error_xmm); extern prototype_mberr(vp8_mbblock_error_xmm); extern prototype_mbuverr(vp8_mbuverror_xmm); - +extern prototype_subb(vp8_subtract_b_sse2); +extern prototype_submby(vp8_subtract_mby_sse2); +extern prototype_submbuv(vp8_subtract_mbuv_sse2); #if !CONFIG_RUNTIME_CPU_DETECT #undef vp8_encodemb_berr @@ -66,6 +69,15 @@ extern prototype_mbuverr(vp8_mbuverror_xmm); #undef vp8_encodemb_mbuverr #define vp8_encodemb_mbuverr vp8_mbuverror_xmm +#undef vp8_encodemb_subb +#define vp8_encodemb_subb vp8_subtract_b_sse2 + +#undef vp8_encodemb_submby +#define vp8_encodemb_submby vp8_subtract_mby_sse2 + +#undef vp8_encodemb_submbuv +#define vp8_encodemb_submbuv vp8_subtract_mbuv_sse2 + #endif #endif diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm index 194047155..c0f06bbbb 100644 --- a/vp8/encoder/x86/encodeopt.asm +++ b/vp8/encoder/x86/encodeopt.asm @@ -1,16 +1,16 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" - ;int vp8_block_error_xmm(short *coeff_ptr, short *dcoef_ptr) global sym(vp8_block_error_xmm) sym(vp8_block_error_xmm): @@ -19,11 +19,9 @@ sym(vp8_block_error_xmm): SHADOW_ARGS_TO_STACK 2 push rsi push rdi - ; end prolog - + ; end prologue mov rsi, arg(0) ;coeff_ptr - pxor xmm7, xmm7 mov rdi, arg(1) ;dcoef_ptr movdqa xmm3, [rsi] @@ -32,33 +30,27 @@ sym(vp8_block_error_xmm): movdqa xmm5, [rsi+16] movdqa xmm6, [rdi+16] - pxor xmm1, xmm1 ; from movd xmm1, dc; dc=0 - - movdqa xmm2, xmm7 - psubw xmm5, xmm6 - - por xmm1, xmm2 - pmaddwd xmm5, xmm5 - - pcmpeqw xmm1, xmm7 psubw xmm3, xmm4 - pand xmm1, xmm3 - pmaddwd xmm1, xmm1 + psubw xmm5, xmm6 + pmaddwd xmm3, xmm3 + pmaddwd xmm5, xmm5 - paddd xmm1, xmm5 - movdqa xmm0, xmm1 + paddd xmm3, xmm5 + + pxor xmm7, xmm7 + movdqa xmm0, xmm3 punpckldq xmm0, xmm7 - punpckhdq xmm1, xmm7 + punpckhdq xmm3, xmm7 - paddd xmm0, xmm1 - movdqa xmm1, xmm0 + paddd xmm0, xmm3 + movdqa xmm3, xmm0 psrldq xmm0, 8 - paddd xmm0, xmm1 + paddd xmm0, xmm3 - movd rax, xmm0 + movq rax, xmm0 pop rdi pop rsi @@ -67,7 +59,6 @@ sym(vp8_block_error_xmm): pop rbp ret - ;int vp8_block_error_mmx(short *coeff_ptr, short *dcoef_ptr) global sym(vp8_block_error_mmx) sym(vp8_block_error_mmx): @@ -124,7 +115,7 @@ sym(vp8_block_error_mmx): psrlq mm1, 32 paddd mm0, mm1 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -201,7 +192,7 @@ mberror_loop_mmx: psrlq mm2, 32 paddd mm0, mm2 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -269,7 +260,7 @@ mberror_loop: psrldq xmm0, 8 paddd xmm0, xmm1 - movd rax, xmm0 + movq rax, xmm0 pop rdi pop rsi @@ -326,7 +317,7 @@ mbuverror_loop_mmx: psrlq mm7, 32 paddd mm0, mm7 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -383,7 +374,7 @@ mbuverror_loop: psrldq xmm1, 8 paddd xmm1, xmm2 - movd rax, xmm1 + movq rax, xmm1 pop rdi pop rsi diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm index 7d8620178..39439f0d8 100644 --- a/vp8/encoder/x86/fwalsh_sse2.asm +++ b/vp8/encoder/x86/fwalsh_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -16,102 +17,148 @@ sym(vp8_short_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 + SAVE_XMM + GET_GOT rbx push rsi push rdi ; end prolog - mov rsi, arg(0) - mov rdi, arg(1) + mov rsi, arg(0) ; input + mov rdi, arg(1) ; output + movsxd rdx, dword ptr arg(2) ; pitch - movdqu xmm4, [rsi + 0] ;ip[4] ip[0] - movdqu xmm0, [rsi + 16] ;ip[12] ip[8] + ; first for loop + movq xmm0, MMWORD PTR [rsi] ; load input + movq xmm1, MMWORD PTR [rsi + rdx] + lea rsi, [rsi + rdx*2] + movq xmm2, MMWORD PTR [rsi] + movq xmm3, MMWORD PTR [rsi + rdx] - pxor xmm7, xmm7 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm4 ;ip[4] ip[0] + punpcklwd xmm0, xmm1 + punpcklwd xmm2, xmm3 - paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 + movdqa xmm1, xmm0 + punpckldq xmm0, xmm2 ; ip[1] ip[0] + punpckhdq xmm1, xmm2 ; ip[3] ip[2] + movdqa xmm2, xmm0 + paddw xmm0, xmm1 + psubw xmm2, xmm1 + + psllw xmm0, 2 ; d1 a1 + psllw xmm2, 2 ; c1 b1 + + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 ; b1 a1 + punpckhqdq xmm1, xmm2 ; c1 d1 + + pxor xmm6, xmm6 + movq xmm6, xmm0 + pxor xmm7, xmm7 + pcmpeqw xmm7, xmm6 + paddw xmm7, [GLOBAL(c1)] + + movdqa xmm2, xmm0 + paddw xmm0, xmm1 ; b1+c1 a1+d1 + psubw xmm2, xmm1 ; b1-c1 a1-d1 + paddw xmm0, xmm7 ; b1+c1 a1+d1+(a1!=0) + + ; second for loop + ; input: 13 9 5 1 12 8 4 0 (xmm0) + ; 14 10 6 2 15 11 7 3 (xmm2) + ; after shuffle: + ; 13 5 9 1 12 4 8 0 (xmm0) + ; 14 6 10 2 15 7 11 3 (xmm1) + pshuflw xmm3, xmm0, 0xd8 + pshufhw xmm0, xmm3, 0xd8 + pshuflw xmm3, xmm2, 0xd8 + pshufhw xmm1, xmm3, 0xd8 + + movdqa xmm2, xmm0 + pmaddwd xmm0, [GLOBAL(c1)] ; d11 a11 d10 a10 + pmaddwd xmm2, [GLOBAL(cn1)] ; c11 b11 c10 b10 + movdqa xmm3, xmm1 + pmaddwd xmm1, [GLOBAL(c1)] ; d12 a12 d13 a13 + pmaddwd xmm3, [GLOBAL(cn1)] ; c12 b12 c13 b13 + + pshufd xmm4, xmm0, 0xd8 ; d11 d10 a11 a10 + pshufd xmm5, xmm2, 0xd8 ; c11 c10 b11 b10 + pshufd xmm6, xmm1, 0x72 ; d13 d12 a13 a12 + pshufd xmm7, xmm3, 0x72 ; c13 c12 b13 b12 + + movdqa xmm0, xmm4 + punpcklqdq xmm0, xmm5 ; b11 b10 a11 a10 + punpckhqdq xmm4, xmm5 ; c11 c10 d11 d10 + movdqa xmm1, xmm6 + punpcklqdq xmm1, xmm7 ; b13 b12 a13 a12 + punpckhqdq xmm6, xmm7 ; c13 c12 d13 d12 + + movdqa xmm2, xmm0 + paddd xmm0, xmm4 ; b21 b20 a21 a20 + psubd xmm2, xmm4 ; c21 c20 d21 d20 + movdqa xmm3, xmm1 + paddd xmm1, xmm6 ; b23 b22 a23 a22 + psubd xmm3, xmm6 ; c23 c22 d23 d22 + + pxor xmm4, xmm4 movdqa xmm5, xmm4 - punpcklqdq xmm4, xmm3 ;d1 a1 - punpckhqdq xmm5, xmm3 ;c1 b1 + pcmpgtd xmm4, xmm0 + pcmpgtd xmm5, xmm2 + pand xmm4, [GLOBAL(cd1)] + pand xmm5, [GLOBAL(cd1)] - movdqa xmm1, xmm5 ;c1 b1 - paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - ; 13 12 11 10 03 02 01 00 - ; - ; 33 32 31 30 23 22 21 20 - ; - movdqa xmm0, xmm5 ; 13 12 11 10 03 02 01 00 - punpcklwd xmm5, xmm4 ; 23 03 22 02 21 01 20 00 - punpckhwd xmm0, xmm4 ; 33 13 32 12 31 11 30 10 - movdqa xmm1, xmm5 ; 23 03 22 02 21 01 20 00 - punpcklwd xmm5, xmm0 ; 31 21 11 01 30 20 10 00 - punpckhwd xmm1, xmm0 ; 33 23 13 03 32 22 12 02 - ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] - movdqa xmm3, xmm5 ;ip[4] ip[0] + pxor xmm6, xmm6 + movdqa xmm7, xmm6 + pcmpgtd xmm6, xmm1 + pcmpgtd xmm7, xmm3 + pand xmm6, [GLOBAL(cd1)] + pand xmm7, [GLOBAL(cd1)] - paddw xmm5, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 - psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 + paddd xmm0, xmm4 + paddd xmm2, xmm5 + paddd xmm0, [GLOBAL(cd3)] + paddd xmm2, [GLOBAL(cd3)] + paddd xmm1, xmm6 + paddd xmm3, xmm7 + paddd xmm1, [GLOBAL(cd3)] + paddd xmm3, [GLOBAL(cd3)] - movdqa xmm6, xmm5 - punpcklqdq xmm5, xmm3 ;d1 a1 - punpckhqdq xmm6, xmm3 ;c1 b1 + psrad xmm0, 3 + psrad xmm1, 3 + psrad xmm2, 3 + psrad xmm3, 3 + movdqa xmm4, xmm0 + punpcklqdq xmm0, xmm1 ; a23 a22 a21 a20 + punpckhqdq xmm4, xmm1 ; b23 b22 b21 b20 + movdqa xmm5, xmm2 + punpckhqdq xmm2, xmm3 ; c23 c22 c21 c20 + punpcklqdq xmm5, xmm3 ; d23 d22 d21 d20 - movdqa xmm1, xmm6 ;c1 b1 - paddw xmm6, xmm5 ;dl+cl a1+b1 aka op[4] op[0] - psubw xmm5, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] + packssdw xmm0, xmm4 ; b23 b22 b21 b20 a23 a22 a21 a20 + packssdw xmm2, xmm5 ; d23 d22 d21 d20 c23 c22 c21 c20 - movdqa xmm0, xmm6 ;aka b2 a2 - movdqa xmm1, xmm5 ;aka d2 c2 - - pcmpgtw xmm0, xmm7 - pcmpgtw xmm1, xmm7 - - psrlw xmm0, 15 - psrlw xmm1, 15 - - paddw xmm6, xmm0 - paddw xmm5, xmm1 - - psraw xmm6, 1 - psraw xmm5, 1 - - ; a2 = a1 + b1; - ; b2 = c1 + d1; - ; c2 = a1 - b1; - ; d2 = d1 - c1; - ; a2 += (a2>0); - ; b2 += (b2>0); - ; c2 += (c2>0); - ; d2 += (d2>0); - ; op[0] = (a2)>>1; - ; op[4] = (b2)>>1; - ; op[8] = (c2)>>1; - ; op[12]= (d2)>>1; - - movdqu [rdi + 0], xmm6 - movdqu [rdi + 16], xmm5 + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi + 16], xmm2 ; begin epilog pop rdi pop rsi + RESTORE_GOT + RESTORE_XMM UNSHADOW_ARGS pop rbp ret + +SECTION_RODATA +align 16 +c1: + dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 +align 16 +cn1: + dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff +align 16 +cd1: + dd 0x00000001, 0x00000001, 0x00000001, 0x00000001 +align 16 +cd3: + dd 0x00000003, 0x00000003, 0x00000003, 0x00000003 diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h index 5661491ad..3b7b29c21 100644 --- a/vp8/encoder/x86/mcomp_x86.h +++ b/vp8/encoder/x86/mcomp_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -23,5 +24,14 @@ #endif #endif +#if HAVE_SSE4_1 +#if !CONFIG_RUNTIME_CPU_DETECT + +#undef vp8_search_full_search +#define vp8_search_full_search vp8_full_search_sadx8 + +#endif +#endif + #endif diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c index 69617ca47..a182c8856 100644 --- a/vp8/encoder/x86/preproc_mmx.c +++ b/vp8/encoder/x86/preproc_mmx.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm index 847fc6e37..f29a54ecd 100644 --- a/vp8/encoder/x86/quantize_mmx.asm +++ b/vp8/encoder/x86/quantize_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -248,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx): paddd mm0, mm5 ; eob adjustment begins here - movd rcx, mm0 + movq rcx, mm0 and rcx, 0xffff xor rdx, rdx @@ -261,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx): and rax, rdx ; Substitute the sse assembly for the old mmx mixed assembly/C. The ; following is kept as reference - ; movd rcx, mm0 + ; movq rcx, mm0 ; bsr rax, rcx ; ; mov eob, rax @@ -283,156 +284,3 @@ sym(vp8_fast_quantize_b_impl_mmx): UNSHADOW_ARGS pop rbp ret - - -;int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr, -; short *qcoeff_ptr,short *dequant_ptr, -; short *scan_mask, short *round_ptr, -; short *quant_ptr, short *dqcoeff_ptr); -global sym(vp8_fast_quantize_b_impl_sse) -sym(vp8_fast_quantize_b_impl_sse): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 8 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;coeff_ptr - movdqa xmm0, [rsi] - - mov rax, arg(1) ;zbin_ptr - movdqa xmm1, [rax] - - movdqa xmm3, xmm0 - psraw xmm0, 15 - - pxor xmm3, xmm0 - psubw xmm3, xmm0 ; abs - - movdqa xmm2, xmm3 - pcmpgtw xmm1, xmm2 - - pandn xmm1, xmm2 - movdqa xmm3, xmm1 - - mov rdx, arg(6) ; quant_ptr - movdqa xmm1, [rdx] - - mov rcx, arg(5) ; round_ptr - movdqa xmm2, [rcx] - - paddw xmm3, xmm2 - pmulhuw xmm3, xmm1 - - pxor xmm3, xmm0 - psubw xmm3, xmm0 ;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - movdqa xmm0, xmm3 - - movdqa [rdi], xmm3 - - mov rax, arg(3) ;dequant_ptr - movdqa xmm2, [rax] - - pmullw xmm3, xmm2 - mov rax, arg(7) ;dqcoeff_ptr - - movdqa [rax], xmm3 - - ; next 8 - movdqa xmm4, [rsi+16] - - mov rax, arg(1) ;zbin_ptr - movdqa xmm5, [rax+16] - - movdqa xmm7, xmm4 - psraw xmm4, 15 - - pxor xmm7, xmm4 - psubw xmm7, xmm4 ; abs - - movdqa xmm6, xmm7 - pcmpgtw xmm5, xmm6 - - pandn xmm5, xmm6 - movdqa xmm7, xmm5 - - movdqa xmm5, [rdx+16] - movdqa xmm6, [rcx+16] - - - paddw xmm7, xmm6 - pmulhuw xmm7, xmm5 - - pxor xmm7, xmm4 - psubw xmm7, xmm4;gain the sign back - - mov rdi, arg(2) ;qcoeff_ptr - - movdqa xmm1, xmm7 - movdqa [rdi+16], xmm7 - - mov rax, arg(3) ;dequant_ptr - movdqa xmm6, [rax+16] - - pmullw xmm7, xmm6 - mov rax, arg(7) ;dqcoeff_ptr - - movdqa [rax+16], xmm7 - mov rdi, arg(4) ;scan_mask - - pxor xmm7, xmm7 - movdqa xmm2, [rdi] - - movdqa xmm3, [rdi+16]; - pcmpeqw xmm0, xmm7 - - pcmpeqw xmm1, xmm7 - pcmpeqw xmm6, xmm6 - - pxor xmm0, xmm6 - pxor xmm1, xmm6 - - psrlw xmm0, 15 - psrlw xmm1, 15 - - pmaddwd xmm0, xmm2 - pmaddwd xmm1, xmm3 - - movq xmm2, xmm0 - movq xmm3, xmm1 - - psrldq xmm0, 8 - psrldq xmm1, 8 - - paddd xmm0, xmm1 - paddd xmm2, xmm3 - - paddd xmm0, xmm2 - movq xmm1, xmm0 - - psrldq xmm0, 4 - paddd xmm1, xmm0 - - movd rcx, xmm1 - and rcx, 0xffff - - xor rdx, rdx - sub rdx, rcx - - bsr rax, rcx - inc rax - - sar rdx, 31 - and rax, rdx - - - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm new file mode 100644 index 000000000..1e0bd5c48 --- /dev/null +++ b/vp8/encoder/x86/quantize_sse2.asm @@ -0,0 +1,388 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, +; short *qcoeff_ptr,short *dequant_ptr, +; const int *default_zig_zag, short *round_ptr, +; short *quant_ptr, short *dqcoeff_ptr, +; unsigned short zbin_oq_value, +; short *zbin_boost_ptr); +; +global sym(vp8_regular_quantize_b_impl_sse2) +sym(vp8_regular_quantize_b_impl_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 10 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + + %define abs_minus_zbin_lo 0 + %define abs_minus_zbin_hi 16 + %define temp_qcoeff_lo 32 + %define temp_qcoeff_hi 48 + %define save_xmm6 64 + %define save_xmm7 80 + %define eob 96 + + %define vp8_regularquantizeb_stack_size eob + 16 + + sub rsp, vp8_regularquantizeb_stack_size + + movdqa OWORD PTR[rsp + save_xmm6], xmm6 + movdqa OWORD PTR[rsp + save_xmm7], xmm7 + + mov rdx, arg(0) ;coeff_ptr + mov eax, arg(8) ;zbin_oq_value + + mov rcx, arg(1) ;zbin_ptr + movd xmm7, eax + + movdqa xmm0, OWORD PTR[rdx] + movdqa xmm4, OWORD PTR[rdx + 16] + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + psraw xmm0, 15 ;sign of z (aka sz) + psraw xmm4, 15 ;sign of z (aka sz) + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + + movdqa xmm2, OWORD PTR[rcx] ;load zbin_ptr + movdqa xmm3, OWORD PTR[rcx + 16] ;load zbin_ptr + + pshuflw xmm7, xmm7, 0 + psubw xmm1, xmm0 ;x = abs(z) + + punpcklwd xmm7, xmm7 ;duplicated zbin_oq_value + psubw xmm5, xmm4 ;x = abs(z) + + paddw xmm2, xmm7 + paddw xmm3, xmm7 + + psubw xmm1, xmm2 ;sub (zbin_ptr + zbin_oq_value) + psubw xmm5, xmm3 ;sub (zbin_ptr + zbin_oq_value) + + mov rdi, arg(5) ;round_ptr + mov rsi, arg(6) ;quant_ptr + + movdqa OWORD PTR[rsp + abs_minus_zbin_lo], xmm1 + movdqa OWORD PTR[rsp + abs_minus_zbin_hi], xmm5 + + paddw xmm1, xmm2 ;add (zbin_ptr + zbin_oq_value) back + paddw xmm5, xmm3 ;add (zbin_ptr + zbin_oq_value) back + + movdqa xmm2, OWORD PTR[rdi] + movdqa xmm3, OWORD PTR[rsi] + + movdqa xmm6, OWORD PTR[rdi + 16] + movdqa xmm7, OWORD PTR[rsi + 16] + + paddw xmm1, xmm2 + paddw xmm5, xmm6 + + pmulhw xmm1, xmm3 + pmulhw xmm5, xmm7 + + mov rsi, arg(2) ;qcoeff_ptr + pxor xmm6, xmm6 + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa OWORD PTR[rsp + temp_qcoeff_lo], xmm1 + movdqa OWORD PTR[rsp + temp_qcoeff_hi], xmm5 + + movdqa OWORD PTR[rsi], xmm6 ;zero qcoeff + movdqa OWORD PTR[rsi + 16], xmm6 ;zero qcoeff + + xor rax, rax + mov rcx, -1 + + mov [rsp + eob], rcx + mov rsi, arg(9) ;zbin_boost_ptr + + mov rbx, arg(4) ;default_zig_zag + +rq_zigzag_loop: + movsxd rcx, DWORD PTR[rbx + rax*4] ;now we have rc + movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin + lea rsi, [rsi + 2] ;zbin_boost_ptr++ + + movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] + + sub edx, edi ;x - zbin + jl rq_zigzag_1 + + mov rdi, arg(2) ;qcoeff_ptr + + movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] + + cmp edx, 0 + je rq_zigzag_1 + + mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ;zbin_boost_ptr + mov [rsp + eob], rax ;eob = i + +rq_zigzag_1: + movsxd rcx, DWORD PTR[rbx + rax*4 + 4] + movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin + lea rsi, [rsi + 2] ;zbin_boost_ptr++ + + movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] + lea rax, [rax + 1] + + sub edx, edi ;x - zbin + jl rq_zigzag_1a + + mov rdi, arg(2) ;qcoeff_ptr + + movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] + + cmp edx, 0 + je rq_zigzag_1a + + mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ;zbin_boost_ptr + mov [rsp + eob], rax ;eob = i + +rq_zigzag_1a: + movsxd rcx, DWORD PTR[rbx + rax*4 + 4] + movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin + lea rsi, [rsi + 2] ;zbin_boost_ptr++ + + movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] + lea rax, [rax + 1] + + sub edx, edi ;x - zbin + jl rq_zigzag_1b + + mov rdi, arg(2) ;qcoeff_ptr + + movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] + + cmp edx, 0 + je rq_zigzag_1b + + mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ;zbin_boost_ptr + mov [rsp + eob], rax ;eob = i + +rq_zigzag_1b: + movsxd rcx, DWORD PTR[rbx + rax*4 + 4] + movsx edi, WORD PTR [rsi] ;*zbin_boost_ptr aka zbin + lea rsi, [rsi + 2] ;zbin_boost_ptr++ + + movsx edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2] + lea rax, [rax + 1] + + sub edx, edi ;x - zbin + jl rq_zigzag_1c + + mov rdi, arg(2) ;qcoeff_ptr + + movsx edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2] + + cmp edx, 0 + je rq_zigzag_1c + + mov WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc] + + mov rsi, arg(9) ;zbin_boost_ptr + mov [rsp + eob], rax ;eob = i + +rq_zigzag_1c: + lea rax, [rax + 1] + + cmp rax, 16 + jl rq_zigzag_loop + + mov rdi, arg(2) ;qcoeff_ptr + mov rcx, arg(3) ;dequant_ptr + mov rsi, arg(7) ;dqcoeff_ptr + + movdqa xmm2, OWORD PTR[rdi] + movdqa xmm3, OWORD PTR[rdi + 16] + + movdqa xmm0, OWORD PTR[rcx] + movdqa xmm1, OWORD PTR[rcx + 16] + + pmullw xmm0, xmm2 + pmullw xmm1, xmm3 + + movdqa OWORD PTR[rsi], xmm0 ;store dqcoeff + movdqa OWORD PTR[rsi + 16], xmm1 ;store dqcoeff + + mov rax, [rsp + eob] + + movdqa xmm6, OWORD PTR[rsp + save_xmm6] + movdqa xmm7, OWORD PTR[rsp + save_xmm7] + + add rax, 1 + + add rsp, vp8_regularquantizeb_stack_size + pop rsp + + ; begin epilog + pop rbx + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, +; short *qcoeff_ptr,short *dequant_ptr, +; short *scan_mask, short *round_ptr, +; short *quant_ptr, short *dqcoeff_ptr); +global sym(vp8_fast_quantize_b_impl_sse2) +sym(vp8_fast_quantize_b_impl_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 7 + push rsi + push rdi + push rbx + ; end prolog + + ALIGN_STACK 16, rax + + %define save_xmm6 0 + %define save_xmm7 16 + + %define vp8_fastquantizeb_stack_size save_xmm7 + 16 + + sub rsp, vp8_fastquantizeb_stack_size + + movdqa XMMWORD PTR[rsp + save_xmm6], xmm6 + movdqa XMMWORD PTR[rsp + save_xmm7], xmm7 + + mov rdx, arg(0) ;coeff_ptr + mov rcx, arg(2) ;dequant_ptr + mov rax, arg(3) ;scan_mask + mov rdi, arg(4) ;round_ptr + mov rsi, arg(5) ;quant_ptr + + movdqa xmm0, XMMWORD PTR[rdx] + movdqa xmm4, XMMWORD PTR[rdx + 16] + + movdqa xmm6, XMMWORD PTR[rdi] ;round lo + movdqa xmm7, XMMWORD PTR[rdi + 16] ;round hi + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + psraw xmm0, 15 ;sign of z (aka sz) + psraw xmm4, 15 ;sign of z (aka sz) + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 ;x = abs(z) + psubw xmm5, xmm4 ;x = abs(z) + + paddw xmm1, xmm6 + paddw xmm5, xmm7 + + pmulhw xmm1, XMMWORD PTR[rsi] + pmulhw xmm5, XMMWORD PTR[rsi + 16] + + mov rdi, arg(1) ;qcoeff_ptr + mov rsi, arg(6) ;dqcoeff_ptr + + movdqa xmm6, XMMWORD PTR[rcx] + movdqa xmm7, XMMWORD PTR[rcx + 16] + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa XMMWORD PTR[rdi], xmm1 + movdqa XMMWORD PTR[rdi + 16], xmm5 + + pmullw xmm6, xmm1 + pmullw xmm7, xmm5 + + movdqa xmm2, XMMWORD PTR[rax] + movdqa xmm3, XMMWORD PTR[rax+16]; + + pxor xmm4, xmm4 ;clear all bits + pcmpeqw xmm1, xmm4 + pcmpeqw xmm5, xmm4 + + pcmpeqw xmm4, xmm4 ;set all bits + pxor xmm1, xmm4 + pxor xmm5, xmm4 + + psrlw xmm1, 15 + psrlw xmm5, 15 + + pmaddwd xmm1, xmm2 + pmaddwd xmm5, xmm3 + + movq xmm2, xmm1 + movq xmm3, xmm5 + + psrldq xmm1, 8 + psrldq xmm5, 8 + + paddd xmm1, xmm5 + paddd xmm2, xmm3 + + paddd xmm1, xmm2 + movq xmm5, xmm1 + + psrldq xmm1, 4 + paddd xmm5, xmm1 + + movq rcx, xmm5 + and rcx, 0xffff + + xor rdx, rdx + sub rdx, rcx + + bsr rax, rcx + inc rax + + sar rdx, 31 + and rax, rdx + + movdqa XMMWORD PTR[rsi], xmm6 ;store dqcoeff + movdqa XMMWORD PTR[rsi + 16], xmm7 ;store dqcoeff + + movdqa xmm6, XMMWORD PTR[rsp + save_xmm6] + movdqa xmm7, XMMWORD PTR[rsp + save_xmm7] + + add rsp, vp8_fastquantizeb_stack_size + pop rsp + + ; begin epilog + pop rbx + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm new file mode 100755 index 000000000..2f33199e5 --- /dev/null +++ b/vp8/encoder/x86/quantize_ssse3.asm @@ -0,0 +1,114 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license and patent +; grant that can be found in the LICENSE file in the root of the source +; tree. All contributing project authors may be found in the AUTHORS +; file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + + +;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr +; short *qcoeff_ptr,short *dequant_ptr, +; short *round_ptr, +; short *quant_ptr, short *dqcoeff_ptr); +; +global sym(vp8_fast_quantize_b_impl_ssse3) +sym(vp8_fast_quantize_b_impl_ssse3): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 6 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdx, arg(0) ;coeff_ptr + mov rdi, arg(3) ;round_ptr + mov rsi, arg(4) ;quant_ptr + + movdqa xmm0, [rdx] + movdqa xmm4, [rdx + 16] + + movdqa xmm2, [rdi] ;round lo + movdqa xmm3, [rdi + 16] ;round hi + + movdqa xmm1, xmm0 + movdqa xmm5, xmm4 + + psraw xmm0, 15 ;sign of z (aka sz) + psraw xmm4, 15 ;sign of z (aka sz) + + pabsw xmm1, xmm1 + pabsw xmm5, xmm5 + + paddw xmm1, xmm2 + paddw xmm5, xmm3 + + pmulhw xmm1, [rsi] + pmulhw xmm5, [rsi + 16] + + mov rdi, arg(1) ;qcoeff_ptr + mov rcx, arg(2) ;dequant_ptr + mov rsi, arg(5) ;dqcoeff_ptr + + pxor xmm1, xmm0 + pxor xmm5, xmm4 + psubw xmm1, xmm0 + psubw xmm5, xmm4 + + movdqa [rdi], xmm1 + movdqa [rdi + 16], xmm5 + + movdqa xmm2, [rcx] + movdqa xmm3, [rcx + 16] + + pxor xmm4, xmm4 + pmullw xmm2, xmm1 + pmullw xmm3, xmm5 + + pcmpeqw xmm1, xmm4 ;non zero mask + pcmpeqw xmm5, xmm4 ;non zero mask + packsswb xmm1, xmm5 + pshufb xmm1, [ GLOBAL(zz_shuf)] + + pmovmskb edx, xmm1 + +; xor ecx, ecx +; mov eax, -1 +;find_eob_loop: +; shr edx, 1 +; jc fq_skip +; mov eax, ecx +;fq_skip: +; inc ecx +; cmp ecx, 16 +; jne find_eob_loop + xor rdi, rdi + mov eax, -1 + xor dx, ax ;flip the bits for bsr + bsr eax, edx + + movdqa [rsi], xmm2 ;store dqcoeff + movdqa [rsi + 16], xmm3 ;store dqcoeff + + sub edi, edx ;check for all zeros in bit mask + sar edi, 31 ;0 or -1 + add eax, 1 + and eax, edi ;if the bit mask was all zero, + ;then eob = 0 + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +zz_shuf: + db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h new file mode 100644 index 000000000..b5b22c022 --- /dev/null +++ b/vp8/encoder/x86/quantize_x86.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license and patent + * grant that can be found in the LICENSE file in the root of the source + * tree. All contributing project authors may be found in the AUTHORS + * file in the root of the source tree. + */ + +#ifndef QUANTIZE_X86_H +#define QUANTIZE_X86_H + + +/* Note: + * + * This platform is commonly built for runtime CPU detection. If you modify + * any of the function mappings present in this file, be sure to also update + * them in the function pointer initialization code + */ +#if HAVE_MMX + +#endif + + +#if HAVE_SSE2 +extern prototype_quantize_block(vp8_regular_quantize_b_sse2); + +#if !CONFIG_RUNTIME_CPU_DETECT + +/* The sse2 quantizer has not been updated to match the new exact + * quantizer introduced in commit e04e2935 + *#undef vp8_quantize_quantb + *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2 + */ + +#endif + +#endif + + +#endif diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm index a825698e7..85cb023a4 100644 --- a/vp8/encoder/x86/sad_mmx.asm +++ b/vp8/encoder/x86/sad_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -16,8 +17,6 @@ global sym(vp8_sad8x8_mmx) global sym(vp8_sad4x4_mmx) global sym(vp8_sad16x8_mmx) -%idefine QWORD - ;unsigned int vp8_sad16x16_mmx( ; unsigned char *src_ptr, ; int src_stride, @@ -99,7 +98,7 @@ x16x16sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -171,7 +170,7 @@ x8x16sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -241,7 +240,7 @@ x8x8sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -271,11 +270,11 @@ sym(vp8_sad4x4_mmx): movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride - movd mm0, QWORD PTR [rsi] - movd mm1, QWORD PTR [rdi] + movd mm0, DWORD PTR [rsi] + movd mm1, DWORD PTR [rdi] - movd mm2, QWORD PTR [rsi+rax] - movd mm3, QWORD PTR [rdi+rdx] + movd mm2, DWORD PTR [rsi+rax] + movd mm3, DWORD PTR [rdi+rdx] punpcklbw mm0, mm2 punpcklbw mm1, mm3 @@ -297,11 +296,11 @@ sym(vp8_sad4x4_mmx): lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] - movd mm4, QWORD PTR [rsi] - movd mm5, QWORD PTR [rdi] + movd mm4, DWORD PTR [rsi] + movd mm5, DWORD PTR [rdi] - movd mm6, QWORD PTR [rsi+rax] - movd mm7, QWORD PTR [rdi+rdx] + movd mm6, DWORD PTR [rsi+rax] + movd mm7, DWORD PTR [rdi+rdx] punpcklbw mm4, mm6 punpcklbw mm5, mm7 @@ -330,7 +329,7 @@ sym(vp8_sad4x4_mmx): psrlq mm0, 32 paddw mm0, mm1 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -417,7 +416,7 @@ x16x8sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm index 53240bbf1..39ed79604 100644 --- a/vp8/encoder/x86/sad_sse2.asm +++ b/vp8/encoder/x86/sad_sse2.asm @@ -1,17 +1,16 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" -%idefine QWORD - ;unsigned int vp8_sad16x16_wmt( ; unsigned char *src_ptr, ; int src_stride, @@ -74,7 +73,7 @@ x16x16sad_wmt_loop: psrldq xmm7, 8 paddw xmm0, xmm7 - movd rax, xmm0 + movq rax, xmm0 ; begin epilog pop rdi @@ -112,7 +111,7 @@ sym(vp8_sad8x16_wmt): x8x16sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x8x16sad_wmt_early_exit @@ -134,7 +133,7 @@ x8x16sad_wmt_loop: cmp rsi, rcx jne x8x16sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x8x16sad_wmt_early_exit: @@ -173,7 +172,7 @@ sym(vp8_sad8x8_wmt): x8x8sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x8x8sad_wmt_early_exit @@ -189,7 +188,7 @@ x8x8sad_wmt_loop: cmp rsi, rcx jne x8x8sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x8x8sad_wmt_early_exit: ; begin epilog @@ -220,11 +219,11 @@ sym(vp8_sad4x4_wmt): movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride - movd mm0, QWORD PTR [rsi] - movd mm1, QWORD PTR [rdi] + movd mm0, DWORD PTR [rsi] + movd mm1, DWORD PTR [rdi] - movd mm2, QWORD PTR [rsi+rax] - movd mm3, QWORD PTR [rdi+rdx] + movd mm2, DWORD PTR [rsi+rax] + movd mm3, DWORD PTR [rdi+rdx] punpcklbw mm0, mm2 punpcklbw mm1, mm3 @@ -233,19 +232,19 @@ sym(vp8_sad4x4_wmt): lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] - movd mm4, QWORD PTR [rsi] + movd mm4, DWORD PTR [rsi] - movd mm5, QWORD PTR [rdi] - movd mm6, QWORD PTR [rsi+rax] + movd mm5, DWORD PTR [rdi] + movd mm6, DWORD PTR [rsi+rax] - movd mm7, QWORD PTR [rdi+rdx] + movd mm7, DWORD PTR [rdi+rdx] punpcklbw mm4, mm6 punpcklbw mm5, mm7 psadbw mm4, mm5 paddw mm0, mm4 - movd rax, mm0 + movq rax, mm0 ; begin epilog pop rdi @@ -282,7 +281,7 @@ sym(vp8_sad16x8_wmt): x16x8sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x16x8sad_wmt_early_exit @@ -316,7 +315,7 @@ x16x8sad_wmt_loop: cmp rsi, rcx jne x16x8sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x16x8sad_wmt_early_exit: diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm index 38cc02957..575417516 100644 --- a/vp8/encoder/x86/sad_sse3.asm +++ b/vp8/encoder/x86/sad_sse3.asm @@ -1,32 +1,176 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; - %include "vpx_ports/x86_abi_support.asm" -%idefine QWORD +%macro STACK_FRAME_CREATE_X3 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define ref_ptr rdi + %define ref_stride rdx + %define end_ptr rcx + %define ret_var rbx + %define result_ptr arg(4) + %define max_err arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, [rsi] - lddqu xmm5, [rdi] - lddqu xmm6, [rdi+1] - lddqu xmm7, [rdi+2] + mov rsi, arg(0) ; src_ptr + mov rdi, arg(2) ; ref_ptr + + movsxd rax, dword ptr arg(1) ; src_stride + movsxd rdx, dword ptr arg(3) ; ref_stride +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define src_ptr rcx + %define src_stride rdx + %define ref_ptr r8 + %define ref_stride r9 + %define end_ptr r10 + %define ret_var r11 + %define result_ptr [rsp+8+4*8] + %define max_err [rsp+8+4*8] + %else + %define src_ptr rdi + %define src_stride rsi + %define ref_ptr rdx + %define ref_stride rcx + %define end_ptr r9 + %define ret_var r10 + %define result_ptr r8 + %define max_err r8 + %endif +%endif + +%endmacro + +%macro STACK_FRAME_DESTROY_X3 0 + %define src_ptr + %define src_stride + %define ref_ptr + %define ref_stride + %define end_ptr + %define ret_var + %define result_ptr + %define max_err + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + %endif +%endif + ret +%endmacro + +%macro STACK_FRAME_CREATE_X4 0 +%if ABI_IS_32BIT + %define src_ptr rsi + %define src_stride rax + %define r0_ptr rcx + %define r1_ptr rdx + %define r2_ptr rbx + %define r3_ptr rdi + %define ref_stride rbp + %define result_ptr arg(4) + push rbp + mov rbp, rsp + push rsi + push rdi + push rbx + + push rbp + mov rdi, arg(2) ; ref_ptr_base + + LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi + + mov rsi, arg(0) ; src_ptr + + movsxd rbx, dword ptr arg(1) ; src_stride + movsxd rbp, dword ptr arg(3) ; ref_stride + + xchg rbx, rax +%else + %ifidn __OUTPUT_FORMAT__,x64 + %define src_ptr rcx + %define src_stride rdx + %define r0_ptr rsi + %define r1_ptr r10 + %define r2_ptr r11 + %define r3_ptr r8 + %define ref_stride r9 + %define result_ptr [rsp+16+4*8] + push rsi + + LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr + %else + %define src_ptr rdi + %define src_stride rsi + %define r0_ptr r9 + %define r1_ptr r10 + %define r2_ptr r11 + %define r3_ptr rdx + %define ref_stride rcx + %define result_ptr r8 + + LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr + + %endif +%endif +%endmacro + +%macro STACK_FRAME_DESTROY_X4 0 + %define src_ptr + %define src_stride + %define r0_ptr + %define r1_ptr + %define r2_ptr + %define r3_ptr + %define ref_stride + %define result_ptr + +%if ABI_IS_32BIT + pop rbx + pop rdi + pop rsi + pop rbp +%else + %ifidn __OUTPUT_FORMAT__,x64 + pop rsi + %endif +%endif + ret +%endmacro + +%macro PROCESS_16X2X3 5 +%if %1==0 + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm5, XMMWORD PTR [%3] + lddqu xmm6, XMMWORD PTR [%3+1] + lddqu xmm7, XMMWORD PTR [%3+2] psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, [rsi] - lddqu xmm1, [rdi] - lddqu xmm2, [rdi+1] - lddqu xmm3, [rdi+2] + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm1, XMMWORD PTR [%3] + lddqu xmm2, XMMWORD PTR [%3+1] + lddqu xmm3, XMMWORD PTR [%3+2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 @@ -36,13 +180,15 @@ paddw xmm6, xmm2 paddw xmm7, xmm3 %endif - movdqa xmm0, QWORD PTR [rsi+rax] - lddqu xmm1, QWORD PTR [rdi+rdx] - lddqu xmm2, QWORD PTR [rdi+rdx+1] - lddqu xmm3, QWORD PTR [rdi+rdx+2] + movdqa xmm0, XMMWORD PTR [%2+%4] + lddqu xmm1, XMMWORD PTR [%3+%5] + lddqu xmm2, XMMWORD PTR [%3+%5+1] + lddqu xmm3, XMMWORD PTR [%3+%5+2] - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] +%if %1==0 || %1==1 + lea %2, [%2+%4*2] + lea %3, [%3+%5*2] +%endif psadbw xmm1, xmm0 psadbw xmm2, xmm0 @@ -53,21 +199,21 @@ paddw xmm7, xmm3 %endmacro -%macro PROCESS_8X2X3 1 -%if %1 - movq mm0, [rsi] - movq mm5, [rdi] - movq mm6, [rdi+1] - movq mm7, [rdi+2] +%macro PROCESS_8X2X3 5 +%if %1==0 + movq mm0, QWORD PTR [%2] + movq mm5, QWORD PTR [%3] + movq mm6, QWORD PTR [%3+1] + movq mm7, QWORD PTR [%3+2] psadbw mm5, mm0 psadbw mm6, mm0 psadbw mm7, mm0 %else - movq mm0, [rsi] - movq mm1, [rdi] - movq mm2, [rdi+1] - movq mm3, [rdi+2] + movq mm0, QWORD PTR [%2] + movq mm1, QWORD PTR [%3] + movq mm2, QWORD PTR [%3+1] + movq mm3, QWORD PTR [%3+2] psadbw mm1, mm0 psadbw mm2, mm0 @@ -77,13 +223,15 @@ paddw mm6, mm2 paddw mm7, mm3 %endif - movq mm0, QWORD PTR [rsi+rax] - movq mm1, QWORD PTR [rdi+rdx] - movq mm2, QWORD PTR [rdi+rdx+1] - movq mm3, QWORD PTR [rdi+rdx+2] + movq mm0, QWORD PTR [%2+%4] + movq mm1, QWORD PTR [%3+%5] + movq mm2, QWORD PTR [%3+%5+1] + movq mm3, QWORD PTR [%3+%5+2] - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] +%if %1==0 || %1==1 + lea %2, [%2+%4*2] + lea %3, [%3+%5*2] +%endif psadbw mm1, mm0 psadbw mm2, mm0 @@ -102,115 +250,117 @@ mov %5, [%1+REG_SZ_BYTES*3] %endmacro -%macro PROCESS_16X2X4 1 -%if %1 - movdqa xmm0, [rsi] - lddqu xmm4, [rcx] - lddqu xmm5, [rdx] - lddqu xmm6, [rbx] - lddqu xmm7, [rdi] +%macro PROCESS_16X2X4 8 +%if %1==0 + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm4, XMMWORD PTR [%3] + lddqu xmm5, XMMWORD PTR [%4] + lddqu xmm6, XMMWORD PTR [%5] + lddqu xmm7, XMMWORD PTR [%6] psadbw xmm4, xmm0 psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, [rsi] - lddqu xmm1, [rcx] - lddqu xmm2, [rdx] - lddqu xmm3, [rbx] + movdqa xmm0, XMMWORD PTR [%2] + lddqu xmm1, XMMWORD PTR [%3] + lddqu xmm2, XMMWORD PTR [%4] + lddqu xmm3, XMMWORD PTR [%5] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm4, xmm1 - lddqu xmm1, [rdi] + lddqu xmm1, XMMWORD PTR [%6] paddw xmm5, xmm2 paddw xmm6, xmm3 psadbw xmm1, xmm0 paddw xmm7, xmm1 %endif - movdqa xmm0, QWORD PTR [rsi+rax] - lddqu xmm1, QWORD PTR [rcx+rbp] - lddqu xmm2, QWORD PTR [rdx+rbp] - lddqu xmm3, QWORD PTR [rbx+rbp] + movdqa xmm0, XMMWORD PTR [%2+%7] + lddqu xmm1, XMMWORD PTR [%3+%8] + lddqu xmm2, XMMWORD PTR [%4+%8] + lddqu xmm3, XMMWORD PTR [%5+%8] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm4, xmm1 - lddqu xmm1, QWORD PTR [rdi+rbp] + lddqu xmm1, XMMWORD PTR [%6+%8] paddw xmm5, xmm2 paddw xmm6, xmm3 - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] +%if %1==0 || %1==1 + lea %2, [%2+%7*2] + lea %3, [%3+%8*2] - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] - - lea rdi, [rdi+rbp*2] + lea %4, [%4+%8*2] + lea %5, [%5+%8*2] + lea %6, [%6+%8*2] +%endif psadbw xmm1, xmm0 paddw xmm7, xmm1 %endmacro -%macro PROCESS_8X2X4 1 -%if %1 - movq mm0, [rsi] - movq mm4, [rcx] - movq mm5, [rdx] - movq mm6, [rbx] - movq mm7, [rdi] +%macro PROCESS_8X2X4 8 +%if %1==0 + movq mm0, QWORD PTR [%2] + movq mm4, QWORD PTR [%3] + movq mm5, QWORD PTR [%4] + movq mm6, QWORD PTR [%5] + movq mm7, QWORD PTR [%6] psadbw mm4, mm0 psadbw mm5, mm0 psadbw mm6, mm0 psadbw mm7, mm0 %else - movq mm0, [rsi] - movq mm1, [rcx] - movq mm2, [rdx] - movq mm3, [rbx] + movq mm0, QWORD PTR [%2] + movq mm1, QWORD PTR [%3] + movq mm2, QWORD PTR [%4] + movq mm3, QWORD PTR [%5] psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm4, mm1 - movq mm1, [rdi] + movq mm1, QWORD PTR [%6] paddw mm5, mm2 paddw mm6, mm3 psadbw mm1, mm0 paddw mm7, mm1 %endif - movq mm0, QWORD PTR [rsi+rax] - movq mm1, QWORD PTR [rcx+rbp] - movq mm2, QWORD PTR [rdx+rbp] - movq mm3, QWORD PTR [rbx+rbp] + movq mm0, QWORD PTR [%2+%7] + movq mm1, QWORD PTR [%3+%8] + movq mm2, QWORD PTR [%4+%8] + movq mm3, QWORD PTR [%5+%8] psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm4, mm1 - movq mm1, QWORD PTR [rdi+rbp] + movq mm1, QWORD PTR [%6+%8] paddw mm5, mm2 paddw mm6, mm3 - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] +%if %1==0 || %1==1 + lea %2, [%2+%7*2] + lea %3, [%3+%8*2] - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] - - lea rdi, [rdi+rbp*2] + lea %4, [%4+%8*2] + lea %5, [%5+%8*2] + lea %6, [%6+%8*2] +%endif psadbw mm1, mm0 paddw mm7, mm1 @@ -224,54 +374,39 @@ ; int *results) global sym(vp8_sad16x16x3_sse3) sym(vp8_sad16x16x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - - mov rdi, arg(4) ;Results + mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad16x8x3_sse3( ; unsigned char *src_ptr, @@ -281,50 +416,35 @@ sym(vp8_sad16x16x3_sse3): ; int *results) global sym(vp8_sad16x8x3_sse3) sym(vp8_sad16x8x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - - mov rdi, arg(4) ;Results + mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad8x16x3_sse3( ; unsigned char *src_ptr, @@ -334,40 +454,26 @@ sym(vp8_sad16x8x3_sse3): ; int *results) global sym(vp8_sad8x16x3_sse3) sym(vp8_sad8x16x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 + mov rcx, result_ptr - mov rdi, arg(4) ;Results + punpckldq mm5, mm6 - movd [rdi], mm5 - movd [rdi+4], mm6 - movd [rdi+8], mm7 + movq [rcx], mm5 + movd [rcx+8], mm7 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad8x8x3_sse3( ; unsigned char *src_ptr, @@ -377,36 +483,22 @@ sym(vp8_sad8x16x3_sse3): ; int *results) global sym(vp8_sad8x8x3_sse3) sym(vp8_sad8x8x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride + PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride - PROCESS_8X2X3 1 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 - PROCESS_8X2X3 0 + mov rcx, result_ptr - mov rdi, arg(4) ;Results + punpckldq mm5, mm6 - movd [rdi], mm5 - movd [rdi+4], mm6 - movd [rdi+8], mm7 + movq [rcx], mm5 + movd [rcx+8], mm7 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;void int vp8_sad4x4x3_sse3( ; unsigned char *src_ptr, @@ -416,33 +508,23 @@ sym(vp8_sad8x8x3_sse3): ; int *results) global sym(vp8_sad4x4x3_sse3) sym(vp8_sad4x4x3_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + movd mm0, DWORD PTR [src_ptr] + movd mm1, DWORD PTR [ref_ptr] - movd mm0, QWORD PTR [rsi] - movd mm1, QWORD PTR [rdi] - - movd mm2, QWORD PTR [rsi+rax] - movd mm3, QWORD PTR [rdi+rdx] + movd mm2, DWORD PTR [src_ptr+src_stride] + movd mm3, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm2 punpcklbw mm1, mm3 - movd mm4, QWORD PTR [rdi+1] - movd mm5, QWORD PTR [rdi+2] + movd mm4, DWORD PTR [ref_ptr+1] + movd mm5, DWORD PTR [ref_ptr+2] - movd mm2, QWORD PTR [rdi+rdx+1] - movd mm3, QWORD PTR [rdi+rdx+2] + movd mm2, DWORD PTR [ref_ptr+ref_stride+1] + movd mm3, DWORD PTR [ref_ptr+ref_stride+2] psadbw mm1, mm0 @@ -452,29 +534,27 @@ sym(vp8_sad4x4x3_sse3): psadbw mm4, mm0 psadbw mm5, mm0 + lea src_ptr, [src_ptr+src_stride*2] + lea ref_ptr, [ref_ptr+ref_stride*2] + movd mm0, DWORD PTR [src_ptr] + movd mm2, DWORD PTR [ref_ptr] - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - movd mm0, QWORD PTR [rsi] - movd mm2, QWORD PTR [rdi] - - movd mm3, QWORD PTR [rsi+rax] - movd mm6, QWORD PTR [rdi+rdx] + movd mm3, DWORD PTR [src_ptr+src_stride] + movd mm6, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm3 punpcklbw mm2, mm6 - movd mm3, QWORD PTR [rdi+1] - movd mm7, QWORD PTR [rdi+2] + movd mm3, DWORD PTR [ref_ptr+1] + movd mm7, DWORD PTR [ref_ptr+2] psadbw mm2, mm0 paddw mm1, mm2 - movd mm2, QWORD PTR [rdi+rdx+1] - movd mm6, QWORD PTR [rdi+rdx+2] + movd mm2, DWORD PTR [ref_ptr+ref_stride+1] + movd mm6, DWORD PTR [ref_ptr+ref_stride+2] punpcklbw mm3, mm2 punpcklbw mm7, mm6 @@ -485,19 +565,14 @@ sym(vp8_sad4x4x3_sse3): paddw mm3, mm4 paddw mm7, mm5 - mov rdi, arg(4) ;Results - movd [rdi], mm1 + mov rcx, result_ptr - movd [rdi+4], mm3 - movd [rdi+8], mm7 + punpckldq mm1, mm3 + movq [rcx], mm1 + movd [rcx+8], mm7 - ; begin epilog - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X3 ;unsigned int vp8_sad16x16_sse3( ; unsigned char *src_ptr, @@ -508,51 +583,40 @@ sym(vp8_sad4x4x3_sse3): ;%define lddqu movdqu global sym(vp8_sad16x16_sse3) sym(vp8_sad16x16_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rbx - push rsi - push rdi - ; end prolog - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr + STACK_FRAME_CREATE_X3 - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride + lea end_ptr, [src_ptr+src_stride*8] - lea rcx, [rsi+rbx*8] - - lea rcx, [rcx+rbx*8] + lea end_ptr, [end_ptr+src_stride*8] pxor mm7, mm7 -vp8_sad16x16_sse3_loop: +.vp8_sad16x16_sse3_loop: - movd rax, mm7 - cmp rax, arg(4) - jg vp8_sad16x16_early_exit + movq ret_var, mm7 + cmp ret_var, max_err + jg .vp8_sad16x16_early_exit - movq mm0, QWORD PTR [rsi] - movq mm2, QWORD PTR [rsi+8] + movq mm0, QWORD PTR [src_ptr] + movq mm2, QWORD PTR [src_ptr+8] - movq mm1, QWORD PTR [rdi] - movq mm3, QWORD PTR [rdi+8] + movq mm1, QWORD PTR [ref_ptr] + movq mm3, QWORD PTR [ref_ptr+8] - movq mm4, QWORD PTR [rsi+rbx] - movq mm5, QWORD PTR [rdi+rdx] + movq mm4, QWORD PTR [src_ptr+src_stride] + movq mm5, QWORD PTR [ref_ptr+ref_stride] psadbw mm0, mm1 psadbw mm2, mm3 - movq mm1, QWORD PTR [rsi+rbx+8] - movq mm3, QWORD PTR [rdi+rdx+8] + movq mm1, QWORD PTR [src_ptr+src_stride+8] + movq mm3, QWORD PTR [ref_ptr+ref_stride+8] psadbw mm4, mm5 psadbw mm1, mm3 - lea rsi, [rsi+rbx*2] - lea rdi, [rdi+rdx*2] + lea src_ptr, [src_ptr+src_stride*2] + lea ref_ptr, [ref_ptr+ref_stride*2] paddw mm0, mm2 paddw mm4, mm1 @@ -560,20 +624,16 @@ vp8_sad16x16_sse3_loop: paddw mm7, mm0 paddw mm7, mm4 - cmp rsi, rcx - jne vp8_sad16x16_sse3_loop + cmp src_ptr, end_ptr + jne .vp8_sad16x16_sse3_loop - movd rax, mm7 + movq ret_var, mm7 -vp8_sad16x16_early_exit: +.vp8_sad16x16_early_exit: - ; begin epilog - pop rdi - pop rsi - pop rbx - UNSHADOW_ARGS - pop rbp - ret + mov rax, ret_var + + STACK_FRAME_DESTROY_X3 ;void vp8_sad16x16x4d_sse3( ; unsigned char *src_ptr, @@ -583,69 +643,48 @@ vp8_sad16x16_early_exit: ; int *results) global sym(vp8_sad16x16x4d_sse3) sym(vp8_sad16x16x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - push rbp - mov rdi, arg(2) ; ref_ptr_base + STACK_FRAME_CREATE_X4 - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_16X2X4 1 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 + PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr movq xmm0, xmm4 psrldq xmm4, 8 paddw xmm0, xmm4 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+12], xmm0 + movd [rcx+12], xmm0 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 ;void vp8_sad16x8x4d_sse3( ; unsigned char *src_ptr, @@ -655,65 +694,44 @@ sym(vp8_sad16x16x4d_sse3): ; int *results) global sym(vp8_sad16x8x4d_sse3) sym(vp8_sad16x8x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - push rbp - mov rdi, arg(2) ; ref_ptr_base + STACK_FRAME_CREATE_X4 - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_16X2X4 1 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 - PROCESS_16X2X4 0 + PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr movq xmm0, xmm4 psrldq xmm4, 8 paddw xmm0, xmm4 - movd [rdi], xmm0 + movd [rcx], xmm0 ;- movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 - movd [rdi+4], xmm0 + movd [rcx+4], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 - movd [rdi+8], xmm0 + movd [rcx+8], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 - movd [rdi+12], xmm0 + movd [rcx+12], xmm0 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 ;void int vp8_sad8x16x4d_sse3( ; unsigned char *src_ptr, @@ -723,50 +741,30 @@ sym(vp8_sad16x8x4d_sse3): ; int *results) global sym(vp8_sad8x16x4d_sse3) sym(vp8_sad8x16x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - push rbp - mov rdi, arg(2) ; ref_ptr_base + STACK_FRAME_CREATE_X4 - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_8X2X4 1 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 + PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr - movd [rdi], mm4 - movd [rdi+4], mm5 - movd [rdi+8], mm6 - movd [rdi+12], mm7 + punpckldq mm4, mm5 + punpckldq mm6, mm7 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + movq [rcx], mm4 + movq [rcx+8], mm6 + + STACK_FRAME_DESTROY_X4 ;void int vp8_sad8x8x4d_sse3( ; unsigned char *src_ptr, @@ -776,46 +774,26 @@ sym(vp8_sad8x16x4d_sse3): ; int *results) global sym(vp8_sad8x8x4d_sse3) sym(vp8_sad8x8x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - push rbp - mov rdi, arg(2) ; ref_ptr_base + STACK_FRAME_CREATE_X4 - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi - - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - PROCESS_8X2X4 1 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 - PROCESS_8X2X4 0 + PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride + PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride +%if ABI_IS_32BIT pop rbp - mov rdi, arg(4) ;Results +%endif + mov rcx, result_ptr - movd [rdi], mm4 - movd [rdi+4], mm5 - movd [rdi+8], mm6 - movd [rdi+12], mm7 + punpckldq mm4, mm5 + punpckldq mm6, mm7 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + movq [rcx], mm4 + movq [rcx+8], mm6 + + STACK_FRAME_DESTROY_X4 ;void int vp8_sad4x4x4d_sse3( ; unsigned char *src_ptr, @@ -825,43 +803,26 @@ sym(vp8_sad8x8x4d_sse3): ; int *results) global sym(vp8_sad4x4x4d_sse3) sym(vp8_sad4x4x4d_sse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - push rsi - push rdi - push rbx - ; end prolog - push rbp - mov rdi, arg(2) ; ref_ptr_base + STACK_FRAME_CREATE_X4 - LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi + movd mm0, DWORD PTR [src_ptr] + movd mm1, DWORD PTR [r0_ptr] - mov rsi, arg(0) ;src_ptr - - movsxd rbx, dword ptr arg(1) ;src_stride - movsxd rbp, dword ptr arg(3) ;ref_stride - - xchg rbx, rax - - movd mm0, QWORD PTR [rsi] - movd mm1, QWORD PTR [rcx] - - movd mm2, QWORD PTR [rsi+rax] - movd mm3, QWORD PTR [rcx+rbp] + movd mm2, DWORD PTR [src_ptr+src_stride] + movd mm3, DWORD PTR [r0_ptr+ref_stride] punpcklbw mm0, mm2 punpcklbw mm1, mm3 - movd mm4, QWORD PTR [rdx] - movd mm5, QWORD PTR [rbx] + movd mm4, DWORD PTR [r1_ptr] + movd mm5, DWORD PTR [r2_ptr] - movd mm6, QWORD PTR [rdi] - movd mm2, QWORD PTR [rdx+rbp] + movd mm6, DWORD PTR [r3_ptr] + movd mm2, DWORD PTR [r1_ptr+ref_stride] - movd mm3, QWORD PTR [rbx+rbp] - movd mm7, QWORD PTR [rdi+rbp] + movd mm3, DWORD PTR [r2_ptr+ref_stride] + movd mm7, DWORD PTR [r3_ptr+ref_stride] psadbw mm1, mm0 @@ -876,37 +837,40 @@ sym(vp8_sad4x4x4d_sse3): - lea rsi, [rsi+rax*2] - lea rcx, [rcx+rbp*2] + lea src_ptr, [src_ptr+src_stride*2] + lea r0_ptr, [r0_ptr+ref_stride*2] - lea rdx, [rdx+rbp*2] - lea rbx, [rbx+rbp*2] + lea r1_ptr, [r1_ptr+ref_stride*2] + lea r2_ptr, [r2_ptr+ref_stride*2] - lea rdi, [rdi+rbp*2] + lea r3_ptr, [r3_ptr+ref_stride*2] - movd mm0, QWORD PTR [rsi] - movd mm2, QWORD PTR [rcx] + movd mm0, DWORD PTR [src_ptr] + movd mm2, DWORD PTR [r0_ptr] - movd mm3, QWORD PTR [rsi+rax] - movd mm7, QWORD PTR [rcx+rbp] + movd mm3, DWORD PTR [src_ptr+src_stride] + movd mm7, DWORD PTR [r0_ptr+ref_stride] punpcklbw mm0, mm3 punpcklbw mm2, mm7 - movd mm3, QWORD PTR [rdx] - movd mm7, QWORD PTR [rbx] + movd mm3, DWORD PTR [r1_ptr] + movd mm7, DWORD PTR [r2_ptr] psadbw mm2, mm0 +%if ABI_IS_32BIT mov rax, rbp pop rbp - mov rsi, arg(4) ;Results +%define ref_stride rax +%endif + mov rsi, result_ptr paddw mm1, mm2 movd [rsi], mm1 - movd mm2, QWORD PTR [rdx+rax] - movd mm1, QWORD PTR [rbx+rax] + movd mm2, DWORD PTR [r1_ptr+ref_stride] + movd mm1, DWORD PTR [r2_ptr+ref_stride] punpcklbw mm3, mm2 punpcklbw mm7, mm1 @@ -914,8 +878,8 @@ sym(vp8_sad4x4x4d_sse3): psadbw mm3, mm0 psadbw mm7, mm0 - movd mm2, QWORD PTR [rdi] - movd mm1, QWORD PTR [rdi+rax] + movd mm2, DWORD PTR [r3_ptr] + movd mm1, DWORD PTR [r3_ptr+ref_stride] paddw mm3, mm4 paddw mm7, mm5 @@ -930,10 +894,4 @@ sym(vp8_sad4x4x4d_sse3): movd [rsi+12], mm2 - ; begin epilog - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret + STACK_FRAME_DESTROY_X4 diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm new file mode 100644 index 000000000..21e2e5007 --- /dev/null +++ b/vp8/encoder/x86/sad_sse4.asm @@ -0,0 +1,353 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +%macro PROCESS_16X2X8 1 +%if %1 + movdqa xmm0, XMMWORD PTR [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + movq xmm2, MMWORD PTR [rdi+16] + punpcklqdq xmm1, xmm3 + punpcklqdq xmm3, xmm2 + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm1, xmm2 + paddw xmm1, xmm3 + paddw xmm1, xmm4 +%else + movdqa xmm0, XMMWORD PTR [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + movq xmm2, MMWORD PTR [rdi+16] + punpcklqdq xmm5, xmm3 + punpcklqdq xmm3, xmm2 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm5, xmm2 + paddw xmm5, xmm3 + paddw xmm5, xmm4 + + paddw xmm1, xmm5 +%endif + movdqa xmm0, XMMWORD PTR [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + movq xmm2, MMWORD PTR [rdi+ rdx+16] + punpcklqdq xmm5, xmm3 + punpcklqdq xmm3, xmm2 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + + psrldq xmm0, 8 + movdqa xmm4, xmm3 + mpsadbw xmm3, xmm0, 0x0 + mpsadbw xmm4, xmm0, 0x5 + + paddw xmm5, xmm2 + paddw xmm5, xmm3 + paddw xmm5, xmm4 + + paddw xmm1, xmm5 +%endmacro + +%macro PROCESS_8X2X8 1 +%if %1 + movq xmm0, MMWORD PTR [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm1, xmm3 + + movdqa xmm2, xmm1 + mpsadbw xmm1, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm1, xmm2 +%else + movq xmm0, MMWORD PTR [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm5, xmm3 + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm5, xmm2 + + paddw xmm1, xmm5 +%endif + movq xmm0, MMWORD PTR [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + punpcklqdq xmm5, xmm3 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + movdqa xmm2, xmm5 + mpsadbw xmm5, xmm0, 0x0 + mpsadbw xmm2, xmm0, 0x5 + paddw xmm5, xmm2 + + paddw xmm1, xmm5 +%endmacro + +%macro PROCESS_4X2X8 1 +%if %1 + movd xmm0, [rsi] + movq xmm1, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm1, xmm3 + + mpsadbw xmm1, xmm0, 0x0 +%else + movd xmm0, [rsi] + movq xmm5, MMWORD PTR [rdi] + movq xmm3, MMWORD PTR [rdi+8] + punpcklqdq xmm5, xmm3 + + mpsadbw xmm5, xmm0, 0x0 + + paddw xmm1, xmm5 +%endif + movd xmm0, [rsi + rax] + movq xmm5, MMWORD PTR [rdi+ rdx] + movq xmm3, MMWORD PTR [rdi+ rdx+8] + punpcklqdq xmm5, xmm3 + + lea rsi, [rsi+rax*2] + lea rdi, [rdi+rdx*2] + + mpsadbw xmm5, xmm0, 0x0 + + paddw xmm1, xmm5 +%endmacro + + +;void vp8_sad16x16x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array); +global sym(vp8_sad16x16x8_sse4) +sym(vp8_sad16x16x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_16X2X8 1 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad16x8x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad16x8x8_sse4) +sym(vp8_sad16x8x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_16X2X8 1 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + PROCESS_16X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad8x8x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad8x8x8_sse4) +sym(vp8_sad8x8x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_8X2X8 1 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad8x16x8_sse4( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad8x16x8_sse4) +sym(vp8_sad8x16x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_8X2X8 1 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + PROCESS_8X2X8 0 + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_sad4x4x8_c( +; const unsigned char *src_ptr, +; int src_stride, +; const unsigned char *ref_ptr, +; int ref_stride, +; unsigned short *sad_array +;); +global sym(vp8_sad4x4x8_sse4) +sym(vp8_sad4x4x8_sse4): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + push rsi + push rdi + ; end prolog + + mov rsi, arg(0) ;src_ptr + mov rdi, arg(2) ;ref_ptr + + movsxd rax, dword ptr arg(1) ;src_stride + movsxd rdx, dword ptr arg(3) ;ref_stride + + PROCESS_4X2X8 1 + PROCESS_4X2X8 0 + + mov rdi, arg(4) ;Results + movdqu XMMWORD PTR [rdi], xmm1 + + ; begin epilog + pop rdi + pop rsi + UNSHADOW_ARGS + pop rbp + ret + + + + diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm index 1bb956121..69c5eaedc 100644 --- a/vp8/encoder/x86/sad_ssse3.asm +++ b/vp8/encoder/x86/sad_ssse3.asm @@ -1,32 +1,31 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" -%idefine QWORD - %macro PROCESS_16X2X3 1 %if %1 - movdqa xmm0, [rsi] - lddqu xmm5, [rdi] - lddqu xmm6, [rdi+1] - lddqu xmm7, [rdi+2] + movdqa xmm0, XMMWORD PTR [rsi] + lddqu xmm5, XMMWORD PTR [rdi] + lddqu xmm6, XMMWORD PTR [rdi+1] + lddqu xmm7, XMMWORD PTR [rdi+2] psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, [rsi] - lddqu xmm1, [rdi] - lddqu xmm2, [rdi+1] - lddqu xmm3, [rdi+2] + movdqa xmm0, XMMWORD PTR [rsi] + lddqu xmm1, XMMWORD PTR [rdi] + lddqu xmm2, XMMWORD PTR [rdi+1] + lddqu xmm3, XMMWORD PTR [rdi+2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 @@ -36,10 +35,10 @@ paddw xmm6, xmm2 paddw xmm7, xmm3 %endif - movdqa xmm0, QWORD PTR [rsi+rax] - lddqu xmm1, QWORD PTR [rdi+rdx] - lddqu xmm2, QWORD PTR [rdi+rdx+1] - lddqu xmm3, QWORD PTR [rdi+rdx+2] + movdqa xmm0, XMMWORD PTR [rsi+rax] + lddqu xmm1, XMMWORD PTR [rdi+rdx] + lddqu xmm2, XMMWORD PTR [rdi+rdx+1] + lddqu xmm3, XMMWORD PTR [rdi+rdx+2] lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] @@ -55,9 +54,9 @@ %macro PROCESS_16X2X3_OFFSET 2 %if %1 - movdqa xmm0, [rsi] - movdqa xmm4, [rdi] - movdqa xmm7, [rdi+16] + movdqa xmm0, XMMWORD PTR [rsi] + movdqa xmm4, XMMWORD PTR [rdi] + movdqa xmm7, XMMWORD PTR [rdi+16] movdqa xmm5, xmm7 palignr xmm5, xmm4, %2 @@ -71,9 +70,9 @@ psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else - movdqa xmm0, [rsi] - movdqa xmm4, [rdi] - movdqa xmm3, [rdi+16] + movdqa xmm0, XMMWORD PTR [rsi] + movdqa xmm4, XMMWORD PTR [rdi] + movdqa xmm3, XMMWORD PTR [rdi+16] movdqa xmm1, xmm3 palignr xmm1, xmm4, %2 @@ -91,9 +90,9 @@ paddw xmm6, xmm2 paddw xmm7, xmm3 %endif - movdqa xmm0, QWORD PTR [rsi+rax] - movdqa xmm4, QWORD PTR [rdi+rdx] - movdqa xmm3, QWORD PTR [rdi+rdx+16] + movdqa xmm0, XMMWORD PTR [rsi+rax] + movdqa xmm4, XMMWORD PTR [rdi+rdx] + movdqa xmm3, XMMWORD PTR [rdi+rdx+16] movdqa xmm1, xmm3 palignr xmm1, xmm4, %2 diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm index ce3e61066..a47e1f0d6 100644 --- a/vp8/encoder/x86/subtract_mmx.asm +++ b/vp8/encoder/x86/subtract_mmx.asm @@ -1,20 +1,21 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride, -; unsigned short *diff, unsigned char *Predictor, +; short *diff, unsigned char *Predictor, ; int pitch); global sym(vp8_subtract_b_mmx_impl) -sym(vp8_subtract_b_mmx_impl) +sym(vp8_subtract_b_mmx_impl): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 @@ -150,7 +151,7 @@ submby_loop: ;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) global sym(vp8_subtract_mbuv_mmx) -sym(vp8_subtract_mbuv_mmx) +sym(vp8_subtract_mbuv_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm new file mode 100644 index 000000000..3fb23d097 --- /dev/null +++ b/vp8/encoder/x86/subtract_sse2.asm @@ -0,0 +1,356 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, +; short *diff, unsigned char *Predictor, +; int pitch); +global sym(vp8_subtract_b_sse2_impl) +sym(vp8_subtract_b_sse2_impl): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdi, arg(2) ;diff + mov rax, arg(3) ;Predictor + mov rsi, arg(0) ;z + movsxd rdx, dword ptr arg(1);src_stride; + movsxd rcx, dword ptr arg(4);pitch + pxor mm7, mm7 + + movd mm0, [rsi] + movd mm1, [rax] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + psubw mm0, mm1 + movq MMWORD PTR [rdi], mm0 + + movd mm0, [rsi+rdx] + movd mm1, [rax+rcx] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + psubw mm0, mm1 + movq MMWORD PTR [rdi+rcx*2], mm0 + + movd mm0, [rsi+rdx*2] + movd mm1, [rax+rcx*2] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + psubw mm0, mm1 + movq MMWORD PTR [rdi+rcx*4], mm0 + + lea rsi, [rsi+rdx*2] + lea rcx, [rcx+rcx*2] + + movd mm0, [rsi+rdx] + movd mm1, [rax+rcx] + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 + psubw mm0, mm1 + movq MMWORD PTR [rdi+rcx*2], mm0 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride) +global sym(vp8_subtract_mby_sse2) +sym(vp8_subtract_mby_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 4 + SAVE_XMM + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rsi, arg(1) ;src + mov rdi, arg(0) ;diff + + mov rax, arg(2) ;pred + movsxd rdx, dword ptr arg(3) ;stride + + mov rcx, 8 ; do two lines at one time + +submby_loop: + movdqa xmm0, XMMWORD PTR [rsi] ; src + movdqa xmm1, XMMWORD PTR [rax] ; pred + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi +16], xmm2 + + movdqa xmm4, XMMWORD PTR [rsi + rdx] + movdqa xmm5, XMMWORD PTR [rax + 16] + + movdqa xmm6, xmm4 + psubb xmm4, xmm5 + + pxor xmm5, [GLOBAL(t80)] ;convert to signed values + pxor xmm6, [GLOBAL(t80)] + pcmpgtb xmm5, xmm6 ; obtain sign information + + movdqa xmm6, xmm4 + movdqa xmm7, xmm5 + punpcklbw xmm4, xmm5 ; put sign back to subtraction + punpckhbw xmm6, xmm7 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi +32], xmm4 + movdqa XMMWORD PTR [rdi +48], xmm6 + + add rdi, 64 + add rax, 32 + lea rsi, [rsi+rdx*2] + + sub rcx, 1 + jnz submby_loop + + pop rdi + pop rsi + ; begin epilog + RESTORE_GOT + RESTORE_XMM + UNSHADOW_ARGS + pop rbp + ret + + +;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride) +global sym(vp8_subtract_mbuv_sse2) +sym(vp8_subtract_mbuv_sse2): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 5 + GET_GOT rbx + push rsi + push rdi + ; end prolog + + mov rdi, arg(0) ;diff + mov rax, arg(3) ;pred + mov rsi, arg(1) ;z = usrc + add rdi, 256*2 ;diff = diff + 256 (shorts) + add rax, 256 ;Predictor = pred + 256 + movsxd rdx, dword ptr arg(4) ;stride; + lea rcx, [rdx + rdx*2] + + ;u + ;line 0 1 + movq xmm0, MMWORD PTR [rsi] ; src + movq xmm2, MMWORD PTR [rsi+rdx] + movdqa xmm1, XMMWORD PTR [rax] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi +16], xmm2 + + ;line 2 3 + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src + movq xmm2, MMWORD PTR [rsi+rcx] + movdqa xmm1, XMMWORD PTR [rax+16] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 32], xmm0 + movdqa XMMWORD PTR [rdi + 48], xmm2 + + ;line 4 5 + lea rsi, [rsi + rdx*4] + + movq xmm0, MMWORD PTR [rsi] ; src + movq xmm2, MMWORD PTR [rsi+rdx] + movdqa xmm1, XMMWORD PTR [rax + 32] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 64], xmm0 + movdqa XMMWORD PTR [rdi + 80], xmm2 + + ;line 6 7 + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src + movq xmm2, MMWORD PTR [rsi+rcx] + movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 96], xmm0 + movdqa XMMWORD PTR [rdi + 112], xmm2 + + ;v + mov rsi, arg(2) ;z = vsrc + add rdi, 64*2 ;diff = diff + 320 (shorts) + add rax, 64 ;Predictor = pred + 320 + + ;line 0 1 + movq xmm0, MMWORD PTR [rsi] ; src + movq xmm2, MMWORD PTR [rsi+rdx] + movdqa xmm1, XMMWORD PTR [rax] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi], xmm0 + movdqa XMMWORD PTR [rdi +16], xmm2 + + ;line 2 3 + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src + movq xmm2, MMWORD PTR [rsi+rcx] + movdqa xmm1, XMMWORD PTR [rax+16] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 32], xmm0 + movdqa XMMWORD PTR [rdi + 48], xmm2 + + ;line 4 5 + lea rsi, [rsi + rdx*4] + + movq xmm0, MMWORD PTR [rsi] ; src + movq xmm2, MMWORD PTR [rsi+rdx] + movdqa xmm1, XMMWORD PTR [rax + 32] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 64], xmm0 + movdqa XMMWORD PTR [rdi + 80], xmm2 + + ;line 6 7 + movq xmm0, MMWORD PTR [rsi+rdx*2] ; src + movq xmm2, MMWORD PTR [rsi+rcx] + movdqa xmm1, XMMWORD PTR [rax+ 48] ; pred + punpcklqdq xmm0, xmm2 + + movdqa xmm2, xmm0 + psubb xmm0, xmm1 ; subtraction with sign missed + + pxor xmm1, [GLOBAL(t80)] ;convert to signed values + pxor xmm2, [GLOBAL(t80)] + pcmpgtb xmm1, xmm2 ; obtain sign information + + movdqa xmm2, xmm0 + movdqa xmm3, xmm1 + punpcklbw xmm0, xmm1 ; put sign back to subtraction + punpckhbw xmm2, xmm3 ; put sign back to subtraction + + movdqa XMMWORD PTR [rdi + 96], xmm0 + movdqa XMMWORD PTR [rdi + 112], xmm2 + + ; begin epilog + pop rdi + pop rsi + RESTORE_GOT + UNSHADOW_ARGS + pop rbp + ret + +SECTION_RODATA +align 16 +t80: + times 16 db 0x80 diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm index d0da82ad4..67a9b4d3e 100644 --- a/vp8/encoder/x86/variance_impl_mmx.asm +++ b/vp8/encoder/x86/variance_impl_mmx.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -497,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx): psrlq mm7, 32 paddd mm0, mm7 - movd rax, mm0 + movq rax, mm0 ; begin epilog @@ -555,7 +556,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx): pmullw mm3, [rax+8] ; paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movq mm5, mm1 @@ -579,7 +580,7 @@ filter_block2d_bil4x4_var_mmx_loop: pmullw mm3, [rax+8] ; paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movq mm3, mm5 ; @@ -591,7 +592,7 @@ filter_block2d_bil4x4_var_mmx_loop: paddw mm1, mm3 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; movd mm3, [rdi] ; @@ -709,10 +710,10 @@ sym(vp8_filter_block2d_bil_var_mmx): paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm2, mmx_filter_shift ; movq mm5, mm1 @@ -748,10 +749,10 @@ filter_block2d_bil_var_mmx_loop: paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm2, mmx_filter_shift ; movq mm3, mm5 ; @@ -772,8 +773,8 @@ filter_block2d_bil_var_mmx_loop: paddw mm1, mm3 ; paddw mm2, mm4 ; - paddw mm1, [mmx_bi_rd GLOBAL] ; - paddw mm2, [mmx_bi_rd GLOBAL] ; + paddw mm1, [GLOBAL(mmx_bi_rd)] ; + paddw mm2, [GLOBAL(mmx_bi_rd)] ; psraw mm1, mmx_filter_shift ; psraw mm2, mmx_filter_shift ; diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm index 7e5ee284b..cefa0a956 100644 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ b/vp8/encoder/x86/variance_impl_sse2.asm @@ -1,10 +1,11 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; @@ -57,7 +58,7 @@ NEXTROW: movdqa xmm3,xmm4 psrldq xmm4,4 paddd xmm4,xmm3 - movd rax,xmm4 + movq rax,xmm4 ; begin epilog @@ -470,7 +471,7 @@ sym(vp8_get8x8var_sse2): mov rax, arg(5) ;[Sum] mov rdi, arg(4) ;[SSE] - movd rdx, xmm7 + movq rdx, xmm7 movsx rcx, dx mov dword ptr [rax], ecx @@ -531,7 +532,7 @@ sym(vp8_filter_block2d_bil_var_sse2): pmullw xmm3, [rax+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movdqa xmm5, xmm1 @@ -553,7 +554,7 @@ filter_block2d_bil_var_sse2_loop: pmullw xmm3, [rax+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movdqa xmm3, xmm5 ; @@ -564,7 +565,7 @@ filter_block2d_bil_var_sse2_loop: pmullw xmm1, [rdx+16] ; paddw xmm1, xmm3 ; - paddw xmm1, [xmm_bi_rd GLOBAL] ; + paddw xmm1, [GLOBAL(xmm_bi_rd)] ; psraw xmm1, xmm_filter_shift ; movq xmm3, QWORD PTR [rdi] ; diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c index 4a5b25b0d..2df73a635 100644 --- a/vp8/encoder/x86/variance_mmx.c +++ b/vp8/encoder/x86/variance_mmx.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,7 +15,7 @@ extern void filter_block1d_h6_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, @@ -24,7 +25,7 @@ extern void filter_block1d_h6_mmx ); extern void filter_block1d_v6_mmx ( - short *src_ptr, + const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, @@ -36,34 +37,34 @@ extern void filter_block1d_v6_mmx extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr); extern unsigned int vp8_get8x8var_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum ); extern unsigned int vp8_get4x4var_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum ); extern unsigned int vp8_get4x4sse_cs_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride ); extern void vp8_filter_block2d_bil4x4_var_mmx ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter, const short *VFilter, @@ -72,9 +73,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx ); extern void vp8_filter_block2d_bil_var_mmx ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, const short *HFilter, @@ -125,9 +126,9 @@ void vp8_test_get_mb_ss(void) unsigned int vp8_get16x16var_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned *SSE, unsigned *SUM @@ -156,9 +157,9 @@ unsigned int vp8_get16x16var_mmx( unsigned int vp8_variance4x4_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -172,9 +173,9 @@ unsigned int vp8_variance4x4_mmx( } unsigned int vp8_variance8x8_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -189,9 +190,9 @@ unsigned int vp8_variance8x8_mmx( } unsigned int vp8_mse16x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -211,9 +212,9 @@ unsigned int vp8_mse16x16_mmx( unsigned int vp8_variance16x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, int *sse) { @@ -233,9 +234,9 @@ unsigned int vp8_variance16x16_mmx( } unsigned int vp8_variance16x8_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -254,9 +255,9 @@ unsigned int vp8_variance16x8_mmx( unsigned int vp8_variance8x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -295,11 +296,11 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) = unsigned int vp8_sub_pixel_variance4x4_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse) @@ -319,11 +320,11 @@ unsigned int vp8_sub_pixel_variance4x4_mmx unsigned int vp8_sub_pixel_variance8x8_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -343,11 +344,11 @@ unsigned int vp8_sub_pixel_variance8x8_mmx unsigned int vp8_sub_pixel_variance16x16_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -382,11 +383,11 @@ unsigned int vp8_sub_pixel_variance16x16_mmx } unsigned int vp8_sub_pixel_mse16x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -397,11 +398,11 @@ unsigned int vp8_sub_pixel_mse16x16_mmx( unsigned int vp8_sub_pixel_variance16x8_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -434,11 +435,11 @@ unsigned int vp8_sub_pixel_variance16x8_mmx unsigned int vp8_sub_pixel_variance8x16_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, int *sse ) @@ -456,9 +457,9 @@ unsigned int vp8_sub_pixel_variance8x16_mmx } unsigned int vp8_i_variance16x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -479,9 +480,9 @@ unsigned int vp8_i_variance16x16_mmx( } unsigned int vp8_i_variance8x16_mmx( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -500,11 +501,11 @@ unsigned int vp8_i_variance8x16_mmx( unsigned int vp8_i_sub_pixel_variance16x16_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -559,11 +560,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_mmx unsigned int vp8_i_sub_pixel_variance8x16_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -594,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx *sse = xxsum0; return (xxsum0 - ((xsum0 * xsum0) >> 7)); } + + +unsigned int vp8_variance_halfpixvar16x16_h_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c index ea80753bd..006e0a24a 100644 --- a/vp8/encoder/x86/variance_sse2.c +++ b/vp8/encoder/x86/variance_sse2.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -12,16 +13,16 @@ #include "pragmas.h" #include "vpx_ports/mem.h" -extern void filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d_v6_mmx(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); -extern void filter_block1d8_v6_sse2(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); +extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); +extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); +extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); +extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter); extern void vp8_filter_block2d_bil4x4_var_mmx ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, const short *HFilter, const short *VFilter, @@ -31,9 +32,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx extern unsigned int vp8_get4x4var_mmx ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *SSE, int *Sum @@ -41,38 +42,38 @@ extern unsigned int vp8_get4x4var_mmx unsigned int vp8_get_mb_ss_sse2 ( - short *src_ptr + const short *src_ptr ); unsigned int vp8_get16x16var_sse2 ( - unsigned char *src_ptr, - int source_stride, - unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *SSE, + int *Sum ); unsigned int vp8_get16x16pred_error_sse2 ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_stride ); unsigned int vp8_get8x8var_sse2 ( - unsigned char *src_ptr, - int source_stride, - unsigned char *ref_ptr, - int recon_stride, - unsigned int *SSE, - int *Sum + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *SSE, + int *Sum ); void vp8_filter_block2d_bil_var_sse2 ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, const short *HFilter, @@ -82,9 +83,9 @@ void vp8_filter_block2d_bil_var_sse2 ); void vp8_half_horiz_vert_variance16x_h_sse2 ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, int *sum, @@ -92,9 +93,9 @@ void vp8_half_horiz_vert_variance16x_h_sse2 ); void vp8_half_horiz_variance16x_h_sse2 ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, int *sum, @@ -102,9 +103,9 @@ void vp8_half_horiz_variance16x_h_sse2 ); void vp8_half_vert_variance16x_h_sse2 ( - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int ref_pixels_per_line, - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, unsigned int Height, int *sum, @@ -114,9 +115,9 @@ void vp8_half_vert_variance16x_h_sse2 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]); unsigned int vp8_variance4x4_wmt( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride) { unsigned int var; @@ -131,9 +132,9 @@ unsigned int vp8_variance4x4_wmt( unsigned int vp8_variance8x8_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride) { unsigned int var; @@ -148,9 +149,9 @@ unsigned int vp8_variance8x8_wmt unsigned int vp8_variance16x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -163,9 +164,9 @@ unsigned int vp8_variance16x16_wmt return (sse0 - ((sum0 * sum0) >> 8)); } unsigned int vp8_mse16x16_wmt( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -181,9 +182,9 @@ unsigned int vp8_mse16x16_wmt( unsigned int vp8_variance16x8_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -202,9 +203,9 @@ unsigned int vp8_variance16x8_wmt unsigned int vp8_variance8x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -238,11 +239,11 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) = }; unsigned int vp8_sub_pixel_variance4x4_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -262,11 +263,11 @@ unsigned int vp8_sub_pixel_variance4x4_wmt unsigned int vp8_sub_pixel_variance8x8_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -287,11 +288,11 @@ unsigned int vp8_sub_pixel_variance8x8_wmt unsigned int vp8_sub_pixel_variance16x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -363,11 +364,11 @@ unsigned int vp8_sub_pixel_variance16x16_wmt } unsigned int vp8_sub_pixel_mse16x16_wmt( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -378,11 +379,11 @@ unsigned int vp8_sub_pixel_mse16x16_wmt( unsigned int vp8_sub_pixel_variance16x8_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse @@ -416,11 +417,11 @@ unsigned int vp8_sub_pixel_variance16x8_wmt unsigned int vp8_sub_pixel_variance8x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -439,9 +440,9 @@ unsigned int vp8_sub_pixel_variance8x16_wmt } unsigned int vp8_i_variance16x16_wmt( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -463,9 +464,9 @@ unsigned int vp8_i_variance16x16_wmt( } unsigned int vp8_i_variance8x16_wmt( - unsigned char *src_ptr, + const unsigned char *src_ptr, int source_stride, - unsigned char *ref_ptr, + const unsigned char *ref_ptr, int recon_stride, unsigned int *sse) { @@ -485,11 +486,11 @@ unsigned int vp8_i_variance8x16_wmt( unsigned int vp8_i_sub_pixel_variance16x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -500,11 +501,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_wmt unsigned int vp8_i_sub_pixel_variance8x16_wmt ( - unsigned char *src_ptr, + const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, - unsigned char *dst_ptr, + const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse ) @@ -512,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse); } + + +unsigned int vp8_variance_halfpixvar16x16_h_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index 35fc90c48..6bea15ebc 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -34,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx); extern prototype_getmbss(vp8_get_mb_ss_mmx); extern prototype_variance(vp8_mse16x16_mmx); @@ -88,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx + #undef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx @@ -129,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt); extern prototype_getmbss(vp8_get_mb_ss_sse2); extern prototype_variance(vp8_mse16x16_wmt); @@ -182,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt + #undef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt @@ -240,7 +265,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3); #define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3 #undef vp8_variance_sad16x16x4d -#define vp8_variance_sad16x16x4 vp8_sad16x16x4d_sse3 +#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3 #undef vp8_variance_sad16x8x4d #define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3 @@ -272,4 +297,31 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3); #endif #endif + +#if HAVE_SSE4_1 +extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4); +extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4); + +#if !CONFIG_RUNTIME_CPU_DETECT +#undef vp8_variance_sad16x16x8 +#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4 + +#undef vp8_variance_sad16x8x8 +#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4 + +#undef vp8_variance_sad8x16x8 +#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4 + +#undef vp8_variance_sad8x8x8 +#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4 + +#undef vp8_variance_sad4x4x8 +#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4 + +#endif +#endif + #endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index f1391ba8c..781079849 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -21,26 +22,20 @@ void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch) vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch); } -void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch) -{ - vp8_fast_fdct4x4_mmx(input, output , pitch); - vp8_fast_fdct4x4_mmx(input + 4, output + 16, pitch); -} - int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr, short *qcoeff_ptr, short *dequant_ptr, short *scan_mask, short *round_ptr, short *quant_ptr, short *dqcoeff_ptr); void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d) { - short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; - short *coeff_ptr = &b->coeff[0]; - short *zbin_ptr = &b->zbin[0][0]; - short *round_ptr = &b->round[0][0]; - short *quant_ptr = &b->quant[0][0]; - short *qcoeff_ptr = d->qcoeff; + short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = &d->dequant[0][0]; + short *dequant_ptr = d->dequant; d->eob = vp8_fast_quantize_b_impl_mmx( coeff_ptr, @@ -86,30 +81,22 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) #endif #if HAVE_SSE2 -void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch) -{ - vp8_short_fdct4x4_wmt(input, output, pitch); - vp8_short_fdct4x4_wmt(input + 4, output + 16, pitch); -} - -int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr, +int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr, short *qcoeff_ptr, short *dequant_ptr, short *scan_mask, short *round_ptr, short *quant_ptr, short *dqcoeff_ptr); -void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d) +void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) { - short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; - short *coeff_ptr = &b->coeff[0]; - short *zbin_ptr = &b->zbin[0][0]; - short *round_ptr = &b->round[0][0]; - short *quant_ptr = &b->quant[0][0]; - short *qcoeff_ptr = d->qcoeff; + short *scan_mask = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr; + short *coeff_ptr = b->coeff; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; - short *dequant_ptr = &d->dequant[0][0]; + short *dequant_ptr = d->dequant; - d->eob = vp8_fast_quantize_b_impl_sse( + d->eob = vp8_fast_quantize_b_impl_sse2( coeff_ptr, - zbin_ptr, qcoeff_ptr, dequant_ptr, scan_mask, @@ -120,6 +107,41 @@ void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d) ); } + +int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr, + short *qcoeff_ptr,short *dequant_ptr, + const int *default_zig_zag, short *round_ptr, + short *quant_ptr, short *dqcoeff_ptr, + unsigned short zbin_oq_value, + short *zbin_boost_ptr); + +void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d) +{ + short *zbin_boost_ptr = b->zrun_zbin_boost; + short *coeff_ptr = b->coeff; + short *zbin_ptr = b->zbin; + short *round_ptr = b->round; + short *quant_ptr = b->quant; + short *qcoeff_ptr = d->qcoeff; + short *dqcoeff_ptr = d->dqcoeff; + short *dequant_ptr = d->dequant; + short zbin_oq_value = b->zbin_extra; + + d->eob = vp8_regular_quantize_b_impl_sse2( + coeff_ptr, + zbin_ptr, + qcoeff_ptr, + dequant_ptr, + vp8_default_zig_zag1d, + + round_ptr, + quant_ptr, + dqcoeff_ptr, + zbin_oq_value, + zbin_boost_ptr + ); +} + int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc); int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc) { @@ -136,8 +158,39 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb) return vp8_mbuverror_xmm_impl(s_ptr, d_ptr); } +void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride, + short *diff, unsigned char *predictor, + int pitch); +void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch) +{ + unsigned char *z = *(be->base_src) + be->src; + unsigned int src_stride = be->src_stride; + short *diff = &be->src_diff[0]; + unsigned char *predictor = &bd->predictor[0]; + vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch); +} + #endif +#if HAVE_SSSE3 +int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr, + short *qcoeff_ptr, short *dequant_ptr, + short *round_ptr, + short *quant_ptr, short *dqcoeff_ptr); +void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) +{ + d->eob = vp8_fast_quantize_b_impl_ssse3( + b->coeff, + d->qcoeff, + d->dequant, + b->round, + b->quant, + d->dqcoeff + ); +} +#endif + + void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT @@ -147,6 +200,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) int wmt_enabled = flags & HAS_SSE2; int SSE3Enabled = flags & HAS_SSE3; int SSSE3Enabled = flags & HAS_SSSE3; + int SSE4_1Enabled = flags & HAS_SSE4_1; /* Note: * @@ -157,7 +211,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) /* Override default functions with fastest ones for this CPU. */ #if HAVE_MMX - if (mmx_enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; @@ -177,6 +230,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; @@ -189,8 +245,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_mmx; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_mmx; - cpi->rtcd.fdct.fast4x4 = vp8_fast_fdct4x4_mmx; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_mmx; + cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_mmx; + cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_mmx; + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; cpi->rtcd.encodemb.berr = vp8_block_error_mmx; @@ -200,12 +257,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.encodemb.submby = vp8_subtract_mby_mmx; cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_mmx; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx; + /*cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_mmx;*/ } - #endif -#if HAVE_SSE2 +#if HAVE_SSE2 if (wmt_enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; @@ -225,6 +281,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; @@ -235,26 +294,26 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.get16x16var = vp8_get16x16var_sse2; /* cpi->rtcd.variance.get4x4sse_cs not implemented for wmt */; -#if 0 - /* short SSE2 DCT currently disabled, does not match the MMX version */ - cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_wmt; - cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_wmt; -#endif - /* cpi->rtcd.fdct.fast4x4 not implemented for wmt */; - cpi->rtcd.fdct.fast8x4 = vp8_fast_fdct8x4_wmt; - cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2; + cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_sse2; + cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_sse2; + cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_sse2; + cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_sse2; + + cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_sse2 ; cpi->rtcd.encodemb.berr = vp8_block_error_xmm; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; - /* cpi->rtcd.encodemb.sub* not implemented for wmt */ + cpi->rtcd.encodemb.subb = vp8_subtract_b_sse2; + cpi->rtcd.encodemb.submby = vp8_subtract_mby_sse2; + cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; - cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse; + /*cpi->rtcd.quantize.quantb = vp8_regular_quantize_b_sse2;*/ + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_sse2; } - #endif -#if HAVE_SSE3 +#if HAVE_SSE3 if (SSE3Enabled) { cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; @@ -272,16 +331,30 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; } - #endif -#if HAVE_SSSE3 +#if HAVE_SSSE3 if (SSSE3Enabled) { cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; + + cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; + } +#endif + +#if HAVE_SSE4_1 + if (SSE4_1Enabled) + { + cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; + cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; + cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; + cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; + cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; + cpi->rtcd.search.full_search = vp8_full_search_sadx8; + } +#endif -#endif #endif } diff --git a/vp8/exports_dec b/vp8/exports_dec new file mode 100644 index 000000000..100ac5c27 --- /dev/null +++ b/vp8/exports_dec @@ -0,0 +1,2 @@ +data vpx_codec_vp8_dx_algo +text vpx_codec_vp8_dx diff --git a/vp8/exports_enc b/vp8/exports_enc new file mode 100644 index 000000000..29ff35ef7 --- /dev/null +++ b/vp8/exports_enc @@ -0,0 +1,2 @@ +data vpx_codec_vp8_cx_algo +text vpx_codec_vp8_cx diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk index a9efbd753..bb3f8259c 100644 --- a/vp8/vp8_common.mk +++ b/vp8/vp8_common.mk @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## @@ -26,7 +27,6 @@ VP8_COMMON_SRCS-yes += common/onyxd.h CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common -VP8_COMMON_SRCS-yes += common/segmentation_common.c VP8_COMMON_SRCS-yes += common/alloccommon.c VP8_COMMON_SRCS-yes += common/blockd.c VP8_COMMON_SRCS-yes += common/coefupdateprobs.h @@ -63,7 +63,6 @@ VP8_COMMON_SRCS-yes += common/recon.h VP8_COMMON_SRCS-yes += common/reconinter.h VP8_COMMON_SRCS-yes += common/reconintra.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h -VP8_COMMON_SRCS-yes += common/segmentation_common.h VP8_COMMON_SRCS-yes += common/setupintrarecon.h VP8_COMMON_SRCS-yes += common/subpixel.h VP8_COMMON_SRCS-yes += common/swapyv12buffer.h @@ -97,42 +96,37 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c -VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.h -VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm +VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm +VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm endif +VP8_COMMON_SRCS-$(ARCH_ARM) += common/arm/arm_systemdependent.c + # common (c) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/bilinearfilter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/filter_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/loopfilter_arm.c -VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/recon_arm.c -VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra4x4_arm.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/reconintra_arm.c -VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/systemdependent.c VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/vpx_asm_offsets.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/filter_c.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/recon.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/reconintra4x4.c -VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6) += common/generic/systemdependent.c - # common (armv6) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/bilinearfilter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x4_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem8x8_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/copymem16x16_v6$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/dc_only_idct_add_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/iwalsh_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/filter_v6$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV6) += common/arm/armv6/idct_v6$(ASM) @@ -149,17 +143,12 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/bilinearpredict16x16_neon$(ASM VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x4_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem8x8_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/copymem16x16_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/dc_only_idct_add_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/iwalsh_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilter_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterhorizontaledge_uv_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterhorizontaledge_y_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterverticaledge_uv_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/loopfilterverticaledge_y_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterhorizontaledge_uv_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterhorizontaledge_y_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterverticaledge_uv_neon$(ASM) -VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilterverticaledge_y_neon$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/mbloopfilter_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon2b_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon4b_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/reconb_neon$(ASM) @@ -172,6 +161,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/sixtappredict16x16_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon16x16mb_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/buildintrapredictorsmby_neon$(ASM) VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/save_neon_reg$(ASM) +VP8_COMMON_SRCS-$(HAVE_ARMV7) += common/arm/neon/recon_neon.c # diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index 32c5f3b21..967fdb5a7 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,6 +14,7 @@ #include "vpx_version.h" #include "onyx_int.h" #include "vpx/vp8e.h" +#include "vp8/encoder/firstpass.h" #include "onyx.h" #include #include @@ -52,19 +54,19 @@ static const struct extraconfig_map extracfg_map[] = NULL, #if !(CONFIG_REALTIME_ONLY) VP8_BEST_QUALITY_ENCODING, /* Encoding Mode */ - -4, /* cpu_used */ + 0, /* cpu_used */ #else VP8_REAL_TIME_ENCODING, /* Encoding Mode */ - -8, /* cpu_used */ + 4, /* cpu_used */ #endif 0, /* enable_auto_alt_ref */ 0, /* noise_sensitivity */ 0, /* Sharpness */ - 800, /* static_thresh */ + 0, /* static_thresh */ VP8_ONE_TOKENPARTITION, /* token_partitions */ - 0, /* arnr_max_frames */ - 0, /* arnr_strength */ - 0, /* arnr_type*/ + 0, /* arnr_max_frames */ + 3, /* arnr_strength */ + 3, /* arnr_type*/ 0, /* experimental mode */ } } @@ -109,10 +111,15 @@ update_error_state(vpx_codec_alg_priv_t *ctx, } while(0) #define RANGE_CHECK(p,memb,lo,hi) do {\ - if(!((p)->memb >= (lo) && (p)->memb <= hi)) \ + if(!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \ ERROR(#memb " out of range ["#lo".."#hi"]");\ } while(0) +#define RANGE_CHECK_HI(p,memb,hi) do {\ + if(!((p)->memb <= (hi))) \ + ERROR(#memb " out of range [.."#hi"]");\ + } while(0) + #define RANGE_CHECK_LO(p,memb,lo) do {\ if(!((p)->memb >= (lo))) \ ERROR(#memb " out of range ["#lo"..]");\ @@ -130,24 +137,24 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, g_h, 2, 16384); RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, cfg->g_timebase.den); - RANGE_CHECK(cfg, g_profile, 0, 3); - RANGE_CHECK(cfg, rc_min_quantizer, 0, 63); - RANGE_CHECK(cfg, rc_max_quantizer, 0, 63); - RANGE_CHECK(cfg, g_threads, 0, 64); + RANGE_CHECK_HI(cfg, g_profile, 3); + RANGE_CHECK_HI(cfg, rc_min_quantizer, 63); + RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); + RANGE_CHECK_HI(cfg, g_threads, 64); #if !(CONFIG_REALTIME_ONLY) - RANGE_CHECK(cfg, g_lag_in_frames, 0, 25); + RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); #else - RANGE_CHECK(cfg, g_lag_in_frames, 0, 0); + RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #endif RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_CBR); - RANGE_CHECK(cfg, rc_undershoot_pct, 0, 100); - RANGE_CHECK(cfg, rc_2pass_vbr_bias_pct, 0, 100); + RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); + RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); //RANGE_CHECK_BOOL(cfg, g_delete_firstpassfile); RANGE_CHECK_BOOL(cfg, rc_resize_allowed); - RANGE_CHECK(cfg, rc_dropframe_thresh, 0, 100); - RANGE_CHECK(cfg, rc_resize_up_thresh, 0, 100); - RANGE_CHECK(cfg, rc_resize_down_thresh, 0, 100); + RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); + RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); + RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); #if !(CONFIG_REALTIME_ONLY) RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); #else @@ -166,7 +173,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, #if !(CONFIG_REALTIME_ONLY) RANGE_CHECK(vp8_cfg, encoding_mode, VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING); RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); - RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 6); + RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); #else RANGE_CHECK(vp8_cfg, encoding_mode, VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING); @@ -177,29 +184,32 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, #endif RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); - RANGE_CHECK(vp8_cfg, Sharpness, 0, 7); - RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 25); - RANGE_CHECK(vp8_cfg, arnr_strength, 0, 6); - RANGE_CHECK(vp8_cfg, arnr_type, 0, 0xffffffff); + RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); + RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); + RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); + RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); if (cfg->g_pass == VPX_RC_LAST_PASS) { - int n_doubles = cfg->rc_twopass_stats_in.sz / sizeof(double); - int n_packets = cfg->rc_twopass_stats_in.sz / sizeof(FIRSTPASS_STATS); - double frames; + int mb_r = (cfg->g_h + 15) / 16; + int mb_c = (cfg->g_w + 15) / 16; + size_t packet_sz = vp8_firstpass_stats_sz(mb_r * mb_c); + int n_packets = cfg->rc_twopass_stats_in.sz / packet_sz; + FIRSTPASS_STATS *stats; if (!cfg->rc_twopass_stats_in.buf) ERROR("rc_twopass_stats_in.buf not set."); - if (cfg->rc_twopass_stats_in.sz % sizeof(FIRSTPASS_STATS)) + if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); - if (cfg->rc_twopass_stats_in.sz < 2 * sizeof(FIRSTPASS_STATS)) + if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) ERROR("rc_twopass_stats_in requires at least two packets."); - frames = ((double *)cfg->rc_twopass_stats_in.buf)[n_doubles - 1]; + stats = (void*)((char *)cfg->rc_twopass_stats_in.buf + + (n_packets - 1) * packet_sz); - if ((int)(frames + 0.5) != n_packets - 1) + if ((int)(stats->count + 0.5) != n_packets - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } @@ -297,9 +307,9 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->under_shoot_pct = cfg.rc_undershoot_pct; //oxcf->over_shoot_pct = cfg.rc_overshoot_pct; - oxcf->maximum_buffer_size = cfg.rc_buf_sz / 1000; - oxcf->starting_buffer_level = cfg.rc_buf_initial_sz / 1000; - oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz / 1000; + oxcf->maximum_buffer_size = cfg.rc_buf_sz; + oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; + oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct; oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct; @@ -774,12 +784,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, { pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE; - // TODO: ideally this timestamp should be as close as - // possible to the prior PTS so that if a decoder uses - // pts to schedule when to do this, we start right after - // last frame was decoded. Maybe should be set to - // last time stamp. Invisible frames have no duration.. - pkt.data.frame.pts --; + // This timestamp should be as close as possible to the + // prior PTS so that if a decoder uses pts to schedule when + // to do this, we start right after last frame was decoded. + // Invisible frames have no duration. + pkt.data.frame.pts = ((cpi->last_time_stamp_seen + * ctx->cfg.g_timebase.den + round) + / ctx->cfg.g_timebase.num / 10000000) + 1; pkt.data.frame.duration = 0; } @@ -846,7 +857,9 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { +#if CONFIG_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); + (void)ctr_id; if (data) { @@ -855,6 +868,12 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, } else return VPX_CODEC_INVALID_PARAM; +#else + (void)ctx; + (void)ctr_id; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif } @@ -862,8 +881,16 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; + vp8_ppflags_t flags = {0}; - if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag)) + if (ctx->preview_ppcfg.post_proc_flag) + { + flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; + flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; + flags.noise_level = ctx->preview_ppcfg.noise_level; + } + + if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags)) { /* @@ -1044,7 +1071,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = 0, /* g_lag_in_frames */ - 70, /* rc_dropframe_thresh */ + 0, /* rc_dropframe_thresh */ 0, /* rc_resize_allowed */ 60, /* rc_resize_down_thresold */ 30, /* rc_resize_up_thresold */ @@ -1086,9 +1113,9 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = #ifndef VERSION_STRING #define VERSION_STRING #endif -vpx_codec_iface_t vpx_codec_vp8_cx_algo = +CODEC_INTERFACE(vpx_codec_vp8_cx) = { - "vpx Technologies VP8 Encoder" VERSION_STRING, + "WebM Project VP8 Encoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, /* vpx_codec_caps_t caps; */ @@ -1207,7 +1234,7 @@ static vpx_codec_err_t api1_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_iface_t vpx_enc_vp8_algo = { - "vpx Technologies VP8 Encoder (Deprecated API)" VERSION_STRING, + "WebM Project VP8 Encoder (Deprecated API)" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_ENCODER, /* vpx_codec_caps_t caps; */ diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c index 19c59cd80..9dd492217 100644 --- a/vp8/vp8_dx_iface.c +++ b/vp8/vp8_dx_iface.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -40,7 +41,7 @@ typedef enum VP8_SEG_ALG_PRIV = 256, VP8_SEG_MAX } mem_seg_id_t; -#define NELEMENTS(x) (sizeof(x)/sizeof(x[0])) +#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0]))) static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t); @@ -64,12 +65,19 @@ struct vpx_codec_alg_priv vpx_codec_priv_t base; vpx_codec_mmap_t mmaps[NELEMENTS(vp8_mem_req_segs)-1]; vpx_codec_dec_cfg_t cfg; - vp8_stream_info_t si; + vp8_stream_info_t si; int defer_alloc; int decoder_init; VP8D_PTR pbi; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; +#if CONFIG_POSTPROC_VISUALIZER + unsigned int dbg_postproc_flag; + int dbg_color_ref_frame_flag; + int dbg_color_mb_modes_flag; + int dbg_color_b_modes_flag; + int dbg_display_mv_flag; +#endif vpx_image_t img; int img_setup; int img_avail; @@ -169,7 +177,7 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap) } } -static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, int id) +static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id) { int i; @@ -195,9 +203,6 @@ static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx) ctx->pbi->fb_storage_ptr[0] = mmap_lkup(ctx, VP6_SEG_IMG0_STRG); ctx->pbi->fb_storage_ptr[1] = mmap_lkup(ctx, VP6_SEG_IMG1_STRG); ctx->pbi->fb_storage_ptr[2] = mmap_lkup(ctx, VP6_SEG_IMG2_STRG); - #if CONFIG_NEW_TOKENS - ctx->pbi->token_graph = mmap_lkup(ctx, VP6_SEG_TOKEN_GRAPH); - #endif #if CONFIG_POSTPROC ctx->pbi->postproc.deblock.fragment_variances = mmap_lkup(ctx, VP6_SEG_DEBLOCKER); ctx->pbi->fb_storage_ptr[3] = mmap_lkup(ctx, VP6_SEG_PP_IMG_STRG); @@ -225,11 +230,12 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx) res = vp8_mmap_alloc(&mmap); if (!res) + { vp8_init_ctx(ctx, &mmap); - ctx->priv->alg_priv->defer_alloc = 1; - /*post processing level initialized to do nothing */ - + ctx->priv->alg_priv->defer_alloc = 1; + /*post processing level initialized to do nothing */ + } } return res; @@ -254,15 +260,18 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si) { - vpx_codec_err_t res = VPX_CODEC_OK; - { - /*Parse from VP8 compressed data, the implies knowledge of the - *VP8 bitsteam. - * First 3 byte header including version, frame type and an offset - * Next 3 bytes are image sizewith 12 bit each for width and height - */ + if(data + data_sz <= data) + res = VPX_CODEC_INVALID_PARAM; + else + { + /* Parse uncompresssed part of key frame header. + * 3 bytes:- including version, frame type and an offset + * 3 bytes:- sync code (0x9d, 0x01, 0x2a) + * 4 bytes:- including image width and height in the lowest 14 bits + * of each 2-byte value. + */ si->is_kf = 0; if (data_sz >= 10 && !(data[0] & 0x01)) /* I-Frame */ @@ -270,14 +279,14 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t *data, const uint8_t *c = data + 3; si->is_kf = 1; - // vet via sync code + /* vet via sync code */ if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a) res = VPX_CODEC_UNSUP_BITSTREAM; si->w = swap2(*(const unsigned short *)(c + 3)) & 0x3fff; si->h = swap2(*(const unsigned short *)(c + 5)) & 0x3fff; - //printf("w=%d, h=%d\n", si->w, si->h); + /*printf("w=%d, h=%d\n", si->w, si->h);*/ if (!(si->h | si->w)) res = VPX_CODEC_UNSUP_BITSTREAM; } @@ -332,7 +341,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, ctx->img_avail = 0; - /* Determine the stream parameters */ + /* Determine the stream parameters. Note that we rely on peek_si to + * validate that we have a buffer that does not wrap around the top + * of the heap. + */ if (!ctx->si.h) res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si); @@ -411,15 +423,27 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, { YV12_BUFFER_CONFIG sd; INT64 time_stamp = 0, time_end_stamp = 0; - int ppflag = 0; - int ppdeblocking = 0; - int ppnoise = 0; + vp8_ppflags_t flags = {0}; if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { - ppflag = ctx->postproc_cfg.post_proc_flag; - ppdeblocking = ctx->postproc_cfg.deblocking_level; - ppnoise = ctx->postproc_cfg.noise_level; + flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag +#if CONFIG_POSTPROC_VISUALIZER + + | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0) + | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0) +#endif + ; + flags.deblocking_level = ctx->postproc_cfg.deblocking_level; + flags.noise_level = ctx->postproc_cfg.noise_level; +#if CONFIG_POSTPROC_VISUALIZER + flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag; + flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; + flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; + flags.display_mv_flag = ctx->dbg_display_mv_flag; +#endif } if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) @@ -428,7 +452,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, res = update_error_state(ctx, &pbi->common.error); } - if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag)) + if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) { /* Align width/height */ unsigned int a_w = (sd.y_width + 15) & ~15; @@ -529,7 +553,7 @@ static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t *ctx, done = 1; - if (ctx->priv->alg_priv) + if (!res && ctx->priv->alg_priv) { for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++) { @@ -641,12 +665,38 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, #endif } +static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx, + int ctrl_id, + va_list args) +{ +#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC + int data = va_arg(args, int); + +#define MAP(id, var) case id: var = data; break; + + switch (ctrl_id) + { + MAP (VP8_SET_DBG_COLOR_REF_FRAME, ctx->dbg_color_ref_frame_flag); + MAP (VP8_SET_DBG_COLOR_MB_MODES, ctx->dbg_color_mb_modes_flag); + MAP (VP8_SET_DBG_COLOR_B_MODES, ctx->dbg_color_b_modes_flag); + MAP (VP8_SET_DBG_DISPLAY_MV, ctx->dbg_display_mv_flag); + } + + return VPX_CODEC_OK; +#else + return VPX_CODEC_INCAPABLE; +#endif +} vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = { - {VP8_SET_REFERENCE, vp8_set_reference}, - {VP8_COPY_REFERENCE, vp8_get_reference}, - {VP8_SET_POSTPROC, vp8_set_postproc}, + {VP8_SET_REFERENCE, vp8_set_reference}, + {VP8_COPY_REFERENCE, vp8_get_reference}, + {VP8_SET_POSTPROC, vp8_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, vp8_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, vp8_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, vp8_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, vp8_set_dbg_options}, { -1, NULL}, }; @@ -654,9 +704,9 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = #ifndef VERSION_STRING #define VERSION_STRING #endif -vpx_codec_iface_t vpx_codec_vp8_dx_algo = +CODEC_INTERFACE(vpx_codec_vp8_dx) = { - "vpx Technologies VP8 Decoder" VERSION_STRING, + "WebM Project VP8 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC, /* vpx_codec_caps_t caps; */ @@ -671,7 +721,14 @@ vpx_codec_iface_t vpx_codec_vp8_dx_algo = vp8_decode, /* vpx_codec_decode_fn_t decode; */ vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ }, - {NOT_IMPLEMENTED} /* encoder functions */ + { /* encoder functions */ + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED + } }; /* @@ -679,7 +736,7 @@ vpx_codec_iface_t vpx_codec_vp8_dx_algo = */ vpx_codec_iface_t vpx_codec_vp8_algo = { - "vpx Technologies VP8 Decoder (Deprecated API)" VERSION_STRING, + "WebM Project VP8 Decoder (Deprecated API)" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC, /* vpx_codec_caps_t caps; */ @@ -694,5 +751,12 @@ vpx_codec_iface_t vpx_codec_vp8_algo = vp8_decode, /* vpx_codec_decode_fn_t decode; */ vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ }, - {NOT_IMPLEMENTED} /* encoder functions */ + { /* encoder functions */ + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED, + NOT_IMPLEMENTED + } }; diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 651ee7767..683d785e6 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -1,14 +1,18 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk + +VP8_CX_EXPORTS += exports_enc + VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes) VP8_CX_SRCS-no += $(VP8_COMMON_SRCS-no) VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) @@ -70,10 +74,16 @@ VP8_CX_SRCS-yes += encoder/quantize.c VP8_CX_SRCS-yes += encoder/ratectrl.c VP8_CX_SRCS-yes += encoder/rdopt.c VP8_CX_SRCS-yes += encoder/sad_c.c -VP8_CX_SRCS-yes += encoder/ssim.c +VP8_CX_SRCS-yes += encoder/segmentation.c +VP8_CX_SRCS-yes += encoder/segmentation.h +VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c VP8_CX_SRCS-yes += encoder/tokenize.c VP8_CX_SRCS-yes += encoder/treewriter.c VP8_CX_SRCS-yes += encoder/variance_c.c +VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h +VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c +VP8_CX_SRCS-yes += encoder/temporal_filter.c +VP8_CX_SRCS-yes += encoder/temporal_filter.h ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c @@ -83,19 +93,24 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h +VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm -VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm +VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm +VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm +VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk index f0753d93e..da27e0897 100644 --- a/vp8/vp8cx_arm.mk +++ b/vp8/vp8cx_arm.mk @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## @@ -12,17 +13,21 @@ #File list for arm # encoder -VP8_CX_SRCS-$(HAVE_ARMV6) += encoder/arm/csystemdependent.c +VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/arm_csystemdependent.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/encodemb_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/quantize_arm.c VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/picklpf_arm.c -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/boolhuff_arm.c -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/mcomp_arm.c +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6) += encoder/generic/csystemdependent.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/boolhuff.c -VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7) += encoder/mcomp.c +VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE) += encoder/boolhuff.c + +#File list for armv5te +# encoder +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/boolhuff_armv5te$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM) +VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM) #File list for armv6 # encoder @@ -43,10 +48,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_memcpy_neon$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM) -VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/boolhuff_armv7$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM) VP8_CX_SRCS-$(HAVE_ARMV7) += encoder/arm/vpx_vp8_enc_asm_offsets.c diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk index 76368eb53..1acd67453 100644 --- a/vp8/vp8dx.mk +++ b/vp8/vp8dx.mk @@ -1,14 +1,18 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk + +VP8_DX_EXPORTS += exports_dec + VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes) VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) @@ -26,7 +30,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder # common #define ARM #define DISABLE_THREAD -#define INLINE=__forceinline #INCLUDES += algo/vpx_common/vpx_mem/include #INCLUDES += common @@ -40,7 +43,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder # decoder #define ARM #define DISABLE_THREAD -#define INLINE=__forceinline #INCLUDES += algo/vpx_common/vpx_mem/include #INCLUDES += common @@ -52,23 +54,26 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder VP8_DX_SRCS-yes += decoder/dboolhuff.c VP8_DX_SRCS-yes += decoder/decodemv.c VP8_DX_SRCS-yes += decoder/decodframe.c -VP8_DX_SRCS-yes += decoder/demode.c VP8_DX_SRCS-yes += decoder/dequantize.c VP8_DX_SRCS-yes += decoder/detokenize.c VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c VP8_DX_SRCS-yes += decoder/dboolhuff.h VP8_DX_SRCS-yes += decoder/decodemv.h VP8_DX_SRCS-yes += decoder/decoderthreading.h -VP8_DX_SRCS-yes += decoder/demode.h VP8_DX_SRCS-yes += decoder/dequantize.h VP8_DX_SRCS-yes += decoder/detokenize.h VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c VP8_DX_SRCS-yes += decoder/threading.c +VP8_DX_SRCS-yes += decoder/idct_blk.c +VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h +VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes)) VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm +VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c +VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk index 1b4a7ecf7..0803a9cb0 100644 --- a/vp8/vp8dx_arm.mk +++ b/vp8/vp8dx_arm.mk @@ -1,44 +1,32 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## #VP8_DX_SRCS list is modified according to different platforms. -#File list for arm -# decoder -#VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/decodframe_arm.c -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dsystemdependent.c +VP8_DX_SRCS-$(ARCH_ARM) += decoder/arm/arm_dsystemdependent.c -#VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/decodframe.c -VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/dequantize.c -VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6) += decoder/generic/dsystemdependent.c +VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/dequantize_arm.c +VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK) += decoder/arm/detokenize$(ASM) #File list for armv6 -# decoder -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantdcidct_v6$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantidct_v6$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_dc_idct_v6$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequant_idct_v6$(ASM) VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/dequantize_v6$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV6) += decoder/arm/armv6/idct_blk_v6.c #File list for neon -# decoder -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantdcidct_neon$(ASM) -VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantidct_neon$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_full_2x_neon$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_dc_0_2x_neon$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequant_idct_neon$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM) +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM) VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/dequantizeb_neon$(ASM) - - -#for new token test -ifeq ($(ARCH_ARM),yes) -VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm_sjl.c -VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm_v6$(ASM) -VP8_DX_SRCS-$(CONFIG_NEW_TOKENS) += decoder/onyxd_if_sjl.c - -VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS) += decoder/arm/detokenize_arm.c -VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS) += decoder/onyxd_if.c -endif +VP8_DX_SRCS-$(HAVE_ARMV7) += decoder/arm/neon/idct_blk_neon.c diff --git a/vpx/exports b/vpx/exports deleted file mode 100644 index f5e7473bc..000000000 --- a/vpx/exports +++ /dev/null @@ -1,17 +0,0 @@ -text vpx_dec_control -text vpx_dec_decode -text vpx_dec_destroy -text vpx_dec_err_to_string -text vpx_dec_error -text vpx_dec_error_detail -text vpx_dec_get_caps -text vpx_dec_get_frame -text vpx_dec_get_mem_map -text vpx_dec_get_stream_info -text vpx_dec_iface_name -text vpx_dec_init_ver -text vpx_dec_peek_stream_info -text vpx_dec_register_put_frame_cb -text vpx_dec_register_put_slice_cb -text vpx_dec_set_mem_map -text vpx_dec_xma_init_ver diff --git a/vpx/exports_com b/vpx/exports_com new file mode 100644 index 000000000..2ab05099f --- /dev/null +++ b/vpx/exports_com @@ -0,0 +1,16 @@ +text vpx_codec_build_config +text vpx_codec_control_ +text vpx_codec_destroy +text vpx_codec_err_to_string +text vpx_codec_error +text vpx_codec_error_detail +text vpx_codec_get_caps +text vpx_codec_iface_name +text vpx_codec_version +text vpx_codec_version_extra_str +text vpx_codec_version_str +text vpx_img_alloc +text vpx_img_flip +text vpx_img_free +text vpx_img_set_rect +text vpx_img_wrap diff --git a/vpx/exports_dec b/vpx/exports_dec new file mode 100644 index 000000000..ed121f7ec --- /dev/null +++ b/vpx/exports_dec @@ -0,0 +1,9 @@ +text vpx_codec_dec_init_ver +text vpx_codec_decode +text vpx_codec_get_frame +text vpx_codec_get_mem_map +text vpx_codec_get_stream_info +text vpx_codec_peek_stream_info +text vpx_codec_register_put_frame_cb +text vpx_codec_register_put_slice_cb +text vpx_codec_set_mem_map diff --git a/vpx/exports_enc b/vpx/exports_enc new file mode 100644 index 000000000..3d5674926 --- /dev/null +++ b/vpx/exports_enc @@ -0,0 +1,8 @@ +text vpx_codec_enc_config_default +text vpx_codec_enc_config_set +text vpx_codec_enc_init_ver +text vpx_codec_encode +text vpx_codec_get_cx_data +text vpx_codec_get_global_headers +text vpx_codec_get_preview_frame +text vpx_codec_set_cx_data_buf diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h index e95d603e8..dcb451dca 100644 --- a/vpx/internal/vpx_codec_internal.h +++ b/vpx/internal/vpx_codec_internal.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -55,7 +56,7 @@ * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ -#define VPX_CODEC_INTERNAL_ABI_VERSION (2) /**<\hideinitializer*/ +#define VPX_CODEC_INTERNAL_ABI_VERSION (3) /**<\hideinitializer*/ typedef struct vpx_codec_alg_priv vpx_codec_alg_priv_t; @@ -137,7 +138,7 @@ typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, * provide type safety for the exchanged data or assign meanings to the * control codes. Those details should be specified in the algorithm's * header file. In particular, the ctrl_id parameter is guaranteed to exist - * in the algorithm's control mapping table, and the data paramter may be NULL. + * in the algorithm's control mapping table, and the data parameter may be NULL. * * * \param[in] ctx Pointer to this instance's context @@ -339,7 +340,6 @@ struct vpx_codec_priv vpx_codec_iface_t *iface; struct vpx_codec_alg_priv *alg_priv; const char *err_detail; - unsigned int eval_counter; vpx_codec_flags_t init_flags; struct { @@ -389,6 +389,20 @@ struct vpx_codec_priv #define RECAST(id, x) id##__convert(x) +/* CODEC_INTERFACE convenience macro + * + * By convention, each codec interface is a struct with extern linkage, where + * the symbol is suffixed with _algo. A getter function is also defined to + * return a pointer to the struct, since in some cases it's easier to work + * with text symbols than data symbols (see issue #169). This function has + * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE + * macro is provided to define this getter function automatically. + */ +#define CODEC_INTERFACE(id)\ +vpx_codec_iface_t* id(void) { return &id##_algo; }\ +vpx_codec_iface_t id##_algo + + /* Internal Utility Functions * * The following functions are indended to be used inside algorithms as diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c index 14f4be32e..9c1558c1f 100644 --- a/vpx/src/vpx_codec.c +++ b/vpx/src/vpx_codec.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c index d0de8b4bc..b52470b51 100644 --- a/vpx/src/vpx_decoder.c +++ b/vpx/src/vpx_decoder.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -121,22 +122,10 @@ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; - -#if CONFIG_EVAL_LIMIT - else if (ctx->priv->eval_counter >= 500) - { - ctx->priv->err_detail = "Evaluation limit exceeded."; - res = VPX_CODEC_ERROR; - } - -#endif else { res = ctx->iface->dec.decode(ctx->priv->alg_priv, data, data_sz, user_priv, deadline); -#if CONFIG_EVAL_LIMIT - ctx->priv->eval_counter++; -#endif } return SAVE_STATUS(ctx, res); diff --git a/vpx/src/vpx_decoder_compat.c b/vpx/src/vpx_decoder_compat.c index 96594fe2f..e264734fe 100644 --- a/vpx/src/vpx_decoder_compat.c +++ b/vpx/src/vpx_decoder_compat.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c index a9a40de71..10929590b 100644 --- a/vpx/src/vpx_encoder.c +++ b/vpx/src/vpx_encoder.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -126,15 +127,6 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, res = VPX_CODEC_ERROR; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; - -#if CONFIG_EVAL_LIMIT - else if (ctx->priv->eval_counter >= 500) - { - ctx->priv->err_detail = "Evaluation limit exceeded."; - res = VPX_CODEC_ERROR; - } - -#endif else { /* Execute in a normalized floating point environment, if the platform @@ -144,10 +136,6 @@ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts, duration, flags, deadline); FLOATING_POINT_RESTORE(); - -#if CONFIG_EVAL_LIMIT - ctx->priv->eval_counter++; -#endif } return SAVE_STATUS(ctx, res); @@ -315,7 +303,7 @@ const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list pkt = (const void *) * iter; - if (pkt - list->pkts < list->cnt) + if ((size_t)(pkt - list->pkts) < list->cnt) *iter = pkt + 1; else pkt = NULL; diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c index 55ee391fc..7a4e27062 100644 --- a/vpx/src/vpx_image.c +++ b/vpx/src/vpx_image.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/vp8.h b/vpx/vp8.h index 6778b7e9c..32c01325f 100644 --- a/vpx/vp8.h +++ b/vpx/vp8.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -37,9 +38,13 @@ */ enum vp8_dec_control_id { - VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ - VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ - VP8_SET_POSTPROC = 3, /**< set decoder's the post processing settings */ + VP8_SET_REFERENCE = 1, /**< pass in an external frame into decoder to be used as reference frame */ + VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ + VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ + VP8_SET_DBG_COLOR_REF_FRAME = 4, /**< set the reference frames to color for each macroblock */ + VP8_SET_DBG_COLOR_MB_MODES = 5, /**< set which macro block modes to color */ + VP8_SET_DBG_COLOR_B_MODES = 6, /**< set which blocks modes to color */ + VP8_SET_DBG_DISPLAY_MV = 7, /**< set which motion vector modes to draw */ VP8_COMMON_CTRL_ID_MAX }; @@ -49,10 +54,14 @@ enum vp8_dec_control_id */ enum vp8_postproc_level { - VP8_NOFILTERING = 0, - VP8_DEBLOCK = 1, - VP8_DEMACROBLOCK = 2, - VP8_ADDNOISE = 4, + VP8_NOFILTERING = 0, + VP8_DEBLOCK = 1<<0, + VP8_DEMACROBLOCK = 1<<1, + VP8_ADDNOISE = 1<<2, + VP8_DEBUG_TXT_FRAME_INFO = 1<<3, /**< print frame information */ + VP8_DEBUG_TXT_MBLK_MODES = 1<<4, /**< print macro block modes over each macro block */ + VP8_DEBUG_TXT_DC_DIFF = 1<<5, /**< print dc diff for each macro block */ + VP8_DEBUG_TXT_RATE_INFO = 1<<6, /**< print video rate info (encoder only) */ }; /*!\brief post process flags @@ -64,9 +73,9 @@ enum vp8_postproc_level typedef struct vp8_postproc_cfg { - int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ - int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ - int noise_level; /**< the strength of additive noise, valid range [0, 16] */ + int post_proc_flag; /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */ + int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ + int noise_level; /**< the strength of additive noise, valid range [0, 16] */ } vp8_postproc_cfg_t; /*!\brief reference frame type @@ -94,12 +103,16 @@ typedef struct vpx_ref_frame /*!\brief vp8 decoder control funciton parameter type * - * defines the data type for each of VP8 decoder control funciton requires + * defines the data type for each of VP8 decoder control function requires */ VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES, int) +VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV, int) /*! @} - end defgroup vp8 */ diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 6a8cf653e..5ab6fbfb7 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,8 @@ * This interface provides the capability to encode raw VP8 streams, as would * be found in AVI files. */ -extern vpx_codec_iface_t vpx_codec_vp8_cx_algo; +extern vpx_codec_iface_t vpx_codec_vp8_cx_algo; +extern vpx_codec_iface_t* vpx_codec_vp8_cx(void); #if CONFIG_EXPERIMENTAL diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h index 8203557ba..fccd407f3 100644 --- a/vpx/vp8dx.h +++ b/vpx/vp8dx.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -28,7 +29,8 @@ * This interface provides the capability to decode raw VP8 streams, as would * be found in AVI files and other non-Flash uses. */ -extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; +extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; +extern vpx_codec_iface_t* vpx_codec_vp8_dx(void); /* Include controls common to both the encoder and decoder */ #include "vp8.h" diff --git a/vpx/vp8e.h b/vpx/vp8e.h index 85ca39f3a..abfce333a 100644 --- a/vpx/vp8e.h +++ b/vpx/vp8e.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h index 145ca29fa..899b27cca 100644 --- a/vpx/vpx_codec.h +++ b/vpx/vpx_codec.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -61,7 +62,7 @@ extern "C" { /*!\brief Decorator indicating a function is potentially unused */ #ifdef UNUSED #elif __GNUC__ -#define UNUSED __attribute__ ((unused)); +#define UNUSED __attribute__ ((unused)) #else #define UNUSED #endif @@ -127,7 +128,7 @@ extern "C" { /*!\brief An iterator reached the end of list. * */ - VPX_CODEC_LIST_END, + VPX_CODEC_LIST_END } vpx_codec_err_t; diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk index 223f9ad36..4f1d74bd4 100644 --- a/vpx/vpx_codec.mk +++ b/vpx/vpx_codec.mk @@ -1,10 +1,11 @@ ## -## Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## -## Use of this source code is governed by a BSD-style license and patent -## grant that can be found in the LICENSE file in the root of the source -## tree. All contributing project authors may be found in the AUTHORS -## file in the root of the source tree. +## Use of this source code is governed by a BSD-style license +## that can be found in the LICENSE file in the root of the source +## tree. An additional intellectual property rights grant can be found +## in the file PATENTS. All contributing project authors may +## be found in the AUTHORS file in the root of the source tree. ## diff --git a/vpx/vpx_codec_impl_bottom.h b/vpx/vpx_codec_impl_bottom.h index c52654cec..6eb79a88a 100644 --- a/vpx/vpx_codec_impl_bottom.h +++ b/vpx/vpx_codec_impl_bottom.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/vpx_codec_impl_top.h b/vpx/vpx_codec_impl_top.h index f73809a8e..c9b8cfab2 100644 --- a/vpx/vpx_codec_impl_top.h +++ b/vpx/vpx_codec_impl_top.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h index ab0818f04..6ffc2d440 100644 --- a/vpx/vpx_decoder.h +++ b/vpx/vpx_decoder.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h index 25bb5eb36..ca6f61849 100644 --- a/vpx/vpx_decoder_compat.h +++ b/vpx/vpx_decoder_compat.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -77,7 +78,7 @@ extern "C" { /*!\brief An iterator reached the end of list. * */ - VPX_DEC_LIST_END = VPX_CODEC_LIST_END, + VPX_DEC_LIST_END = VPX_CODEC_LIST_END } vpx_dec_err_t; diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 67393be5a..3acb19945 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -170,7 +171,7 @@ extern "C" { { VPX_RC_ONE_PASS, /**< Single pass mode */ VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ - VPX_RC_LAST_PASS, /**< Final pass of multi-pass mode */ + VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ }; diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h index 7b235a4c5..dcb8f31bc 100644 --- a/vpx/vpx_image.h +++ b/vpx/vpx_image.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -54,7 +55,7 @@ extern "C" { VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */ - VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4, /** < planar 4:2:0 format with vpx color space */ + VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4 /** < planar 4:2:0 format with vpx color space */ } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ @@ -116,11 +117,11 @@ extern "C" { #define VPX_PLANE_V 2 /**< V (Chroma) plane */ #define VPX_PLANE_ALPHA 3 /**< A (Transparancy) plane */ #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT -#define PLANE_PACKED VPX_PLANE_PACKED +#define PLANE_PACKED VPX_PLANE_PACKED #define PLANE_Y VPX_PLANE_Y #define PLANE_U VPX_PLANE_U #define PLANE_V VPX_PLANE_V -#define PLANE_ALPHA VPX_PLANE_ALPHA +#define PLANE_ALPHA VPX_PLANE_ALPHA #endif unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */ int stride[4]; /**< stride between rows for each plane */ diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h index e250422b0..9a06c1ac3 100644 --- a/vpx/vpx_integer.h +++ b/vpx/vpx_integer.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h index c33240398..e54f54d9b 100644 --- a/vpx_mem/include/nds/vpx_mem_nds.h +++ b/vpx_mem/include/nds/vpx_mem_nds.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h index 3b68d8615..6e261ba7f 100644 --- a/vpx_mem/include/vpx_mem_intrnl.h +++ b/vpx_mem/include/vpx_mem_intrnl.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -14,24 +15,24 @@ #ifndef CONFIG_MEM_MANAGER # if defined(VXWORKS) -# define CONFIG_MEM_MANAGER 1 //include heap manager functionality, -//default: enabled on vxworks +# define CONFIG_MEM_MANAGER 1 /*include heap manager functionality,*/ +/*default: enabled on vxworks*/ # else -# define CONFIG_MEM_MANAGER 0 //include heap manager functionality +# define CONFIG_MEM_MANAGER 0 /*include heap manager functionality*/ # endif #endif /*CONFIG_MEM_MANAGER*/ #ifndef CONFIG_MEM_TRACKER -# define CONFIG_MEM_TRACKER 1 //include xvpx_* calls in the lib +# define CONFIG_MEM_TRACKER 1 /*include xvpx_* calls in the lib*/ #endif #ifndef CONFIG_MEM_CHECKS -# define CONFIG_MEM_CHECKS 0 //include some basic safety checks in -//vpx_memcpy, _memset, and _memmove +# define CONFIG_MEM_CHECKS 0 /*include some basic safety checks in +vpx_memcpy, _memset, and _memmove*/ #endif #ifndef USE_GLOBAL_FUNCTION_POINTERS -# define USE_GLOBAL_FUNCTION_POINTERS 0 //use function pointers instead of compiled functions. +# define USE_GLOBAL_FUNCTION_POINTERS 0 /*use function pointers instead of compiled functions.*/ #endif #if CONFIG_MEM_TRACKER @@ -45,9 +46,9 @@ #ifndef DEFAULT_ALIGNMENT # if defined(VXWORKS) -# define DEFAULT_ALIGNMENT 32 //default addr alignment to use in -//calls to vpx_* functions other -//than vpx_memalign +# define DEFAULT_ALIGNMENT 32 /*default addr alignment to use in + calls to vpx_* functions other + than vpx_memalign*/ # else # define DEFAULT_ALIGNMENT 1 # endif @@ -58,24 +59,24 @@ #endif #if CONFIG_MEM_TRACKER -# define TRY_BOUNDS_CHECK 1 //when set to 1 pads each allocation, -//integrity can be checked using -//vpx_memory_tracker_check_integrity -//or on free by defining -//TRY_BOUNDS_CHECK_ON_FREE +# define TRY_BOUNDS_CHECK 1 /*when set to 1 pads each allocation, + integrity can be checked using + vpx_memory_tracker_check_integrity + or on free by defining*/ +/*TRY_BOUNDS_CHECK_ON_FREE*/ #else # define TRY_BOUNDS_CHECK 0 #endif /*CONFIG_MEM_TRACKER*/ #if TRY_BOUNDS_CHECK -# define TRY_BOUNDS_CHECK_ON_FREE 0 //checks mem integrity on every -//free, very expensive -# define BOUNDS_CHECK_VALUE 0xdeadbeef //value stored before/after ea. -//mem addr for bounds checking -# define BOUNDS_CHECK_PAD_SIZE 32 //size of the padding before and -//after ea allocation to be filled -//with BOUNDS_CHECK_VALUE. -//this should be a multiple of 4 +# define TRY_BOUNDS_CHECK_ON_FREE 0 /*checks mem integrity on every + free, very expensive*/ +# define BOUNDS_CHECK_VALUE 0xdeadbeef /*value stored before/after ea. + mem addr for bounds checking*/ +# define BOUNDS_CHECK_PAD_SIZE 32 /*size of the padding before and + after ea allocation to be filled + with BOUNDS_CHECK_VALUE. + this should be a multiple of 4*/ #else # define BOUNDS_CHECK_VALUE 0 # define BOUNDS_CHECK_PAD_SIZE 0 diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h index ab85d19c4..ef2b29b07 100644 --- a/vpx_mem/include/vpx_mem_tracker.h +++ b/vpx_mem/include/vpx_mem_tracker.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/intel_linux/vpx_mem.c b/vpx_mem/intel_linux/vpx_mem.c index 002e407ef..00150acd5 100644 --- a/vpx_mem/intel_linux/vpx_mem.c +++ b/vpx_mem/intel_linux/vpx_mem.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/intel_linux/vpx_mem_tracker.c b/vpx_mem/intel_linux/vpx_mem_tracker.c index fa023e348..5bed4b50d 100644 --- a/vpx_mem/intel_linux/vpx_mem_tracker.c +++ b/vpx_mem/intel_linux/vpx_mem_tracker.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c index 9abd81ee4..22c4a54ee 100644 --- a/vpx_mem/memory_manager/hmm_alloc.c +++ b/vpx_mem/memory_manager/hmm_alloc.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c index 0cacc3f8f..ad1da032e 100644 --- a/vpx_mem/memory_manager/hmm_base.c +++ b/vpx_mem/memory_manager/hmm_base.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c index dc59f5507..d92435cfa 100644 --- a/vpx_mem/memory_manager/hmm_dflt_abort.c +++ b/vpx_mem/memory_manager/hmm_dflt_abort.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c index 79d75a74b..9a4b6e416 100644 --- a/vpx_mem/memory_manager/hmm_grow.c +++ b/vpx_mem/memory_manager/hmm_grow.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c index 5ebe398e0..c3c6f2c42 100644 --- a/vpx_mem/memory_manager/hmm_largest.c +++ b/vpx_mem/memory_manager/hmm_largest.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c index 6e3f2f041..f90da9692 100644 --- a/vpx_mem/memory_manager/hmm_resize.c +++ b/vpx_mem/memory_manager/hmm_resize.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c index 5ef9b233f..78fe268ba 100644 --- a/vpx_mem/memory_manager/hmm_shrink.c +++ b/vpx_mem/memory_manager/hmm_shrink.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c index 41103c89e..3f7be8f70 100644 --- a/vpx_mem/memory_manager/hmm_true.c +++ b/vpx_mem/memory_manager/hmm_true.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h index e2733ef2f..1b2c9b738 100644 --- a/vpx_mem/memory_manager/include/cavl_if.h +++ b/vpx_mem/memory_manager/include/cavl_if.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h index 267bc7312..5e165dd4d 100644 --- a/vpx_mem/memory_manager/include/cavl_impl.h +++ b/vpx_mem/memory_manager/include/cavl_impl.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h index 933e30dd7..33004cadc 100644 --- a/vpx_mem/memory_manager/include/heapmm.h +++ b/vpx_mem/memory_manager/include/heapmm.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h index 86e4e9fa8..30b9f5045 100644 --- a/vpx_mem/memory_manager/include/hmm_cnfg.h +++ b/vpx_mem/memory_manager/include/hmm_cnfg.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h index 6e2be08fc..5d62abc59 100644 --- a/vpx_mem/memory_manager/include/hmm_intrnl.h +++ b/vpx_mem/memory_manager/include/hmm_intrnl.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/nds/vpx_mem_nds.c b/vpx_mem/nds/vpx_mem_nds.c index f2a3043b2..11ac95cba 100644 --- a/vpx_mem/nds/vpx_mem_nds.c +++ b/vpx_mem/nds/vpx_mem_nds.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c index 6501855c0..d55b7d92c 100644 --- a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c +++ b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c index f6b1a3550..eade43222 100644 --- a/vpx_mem/vpx_mem.c +++ b/vpx_mem/vpx_mem.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -30,7 +31,7 @@ static unsigned long g_alloc_count = 0; # include "hmm_intrnl.h" # define SHIFT_HMM_ADDR_ALIGN_UNIT 5 -# define TOTAL_MEMORY_TO_ALLOCATE 20971520 // 20 * 1024 * 1024 +# define TOTAL_MEMORY_TO_ALLOCATE 20971520 /* 20 * 1024 * 1024 */ # define MM_DYNAMIC_MEMORY 1 # if MM_DYNAMIC_MEMORY @@ -47,7 +48,7 @@ static int g_mng_memory_allocated = 0; static int vpx_mm_create_heap_memory(); static void *vpx_mm_realloc(void *memblk, size_t size); -#endif //CONFIG_MEM_MANAGER +#endif /*CONFIG_MEM_MANAGER*/ #if USE_GLOBAL_FUNCTION_POINTERS struct GLOBAL_FUNC_POINTERS @@ -74,7 +75,7 @@ struct GLOBAL_FUNC_POINTERS # define VPX_MEMCPY_L memcpy # define VPX_MEMSET_L memset # define VPX_MEMMOVE_L memmove -#endif // USE_GLOBAL_FUNCTION_POINTERS +#endif /* USE_GLOBAL_FUNCTION_POINTERS */ unsigned int vpx_mem_get_version() { @@ -129,7 +130,7 @@ void *vpx_memalign(size_t align, size_t size) addr = hmm_alloc(&hmm_d, number_aau); #else addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE); -#endif //CONFIG_MEM_MANAGER +#endif /*CONFIG_MEM_MANAGER*/ if (addr) { @@ -268,7 +269,7 @@ void *xvpx_memalign(size_t align, size_t size, char *file, int line) } #else x = vpx_memalign(align, size); -#endif //TRY_BOUNDS_CHECK +#endif /*TRY_BOUNDS_CHECK*/ g_alloc_count++; @@ -331,9 +332,10 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line) vpx_memory_tracker_check_integrity(file, line); #endif - //have to do this regardless of success, because - //the memory that does get realloc'd may change - //the bounds values of this block + /* have to do this regardless of success, because + * the memory that does get realloc'd may change + * the bounds values of this block + */ vpx_memory_tracker_remove((size_t)memblk); #if TRY_BOUNDS_CHECK @@ -363,7 +365,7 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line) } #else x = vpx_realloc(memblk, size); -#endif //TRY_BOUNDS_CHECK +#endif /*TRY_BOUNDS_CHECK*/ if (!memblk) ++g_alloc_count; @@ -379,7 +381,7 @@ void xvpx_free(void *p_address, char *file, int line) { #if TRY_BOUNDS_CHECK unsigned char *p_bounds_address = (unsigned char *)p_address; - //p_bounds_address -= BOUNDS_CHECK_PAD_SIZE; + /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/ #endif #if !TRY_BOUNDS_CHECK_ON_FREE @@ -393,8 +395,9 @@ void xvpx_free(void *p_address, char *file, int line) vpx_memory_tracker_check_integrity(file, line); #endif - //if the addr isn't found in the list, assume it was allocated via - //vpx_ calls not xvpx_, therefore it does not contain any padding + /* if the addr isn't found in the list, assume it was allocated via + * vpx_ calls not xvpx_, therefore it does not contain any padding + */ if (vpx_memory_tracker_remove((size_t)p_address) == -2) { p_bounds_address = p_address; @@ -420,7 +423,7 @@ void xvpx_free(void *p_address, char *file, int line) #if CONFIG_MEM_CHECKS #if defined(VXWORKS) -#include //for task_delay() +#include /*for task_delay()*/ /* This function is only used to get a stack trace of the player object so we can se where we are having a problem. */ static int get_my_tt(int task) @@ -626,7 +629,7 @@ static void *vpx_mm_realloc(void *memblk, size_t size) return p_ret; } -#endif //CONFIG_MEM_MANAGER +#endif /*CONFIG_MEM_MANAGER*/ #if USE_GLOBAL_FUNCTION_POINTERS # if CONFIG_MEM_TRACKER @@ -638,7 +641,7 @@ extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l , g_memset_func g_memset_l , g_memmove_func g_memmove_l); # endif -#endif //USE_GLOBAL_FUNCTION_POINTERS +#endif /*USE_GLOBAL_FUNCTION_POINTERS*/ int vpx_mem_set_functions(g_malloc_func g_malloc_l , g_calloc_func g_calloc_l , g_realloc_func g_realloc_l diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h index 6ccb9be55..749eaa42e 100644 --- a/vpx_mem/vpx_mem.h +++ b/vpx_mem/vpx_mem.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -25,15 +26,15 @@ /* end - vpx_mem version info */ #ifndef VPX_TRACK_MEM_USAGE -# define VPX_TRACK_MEM_USAGE 0 //enable memory tracking/integrity checks +# define VPX_TRACK_MEM_USAGE 0 /* enable memory tracking/integrity checks */ #endif #ifndef VPX_CHECK_MEM_FUNCTIONS -# define VPX_CHECK_MEM_FUNCTIONS 0 //enable basic safety checks in _memcpy, -//_memset, and _memmove +# define VPX_CHECK_MEM_FUNCTIONS 0 /* enable basic safety checks in _memcpy, + _memset, and _memmove */ #endif #ifndef REPLACE_BUILTIN_FUNCTIONS -# define REPLACE_BUILTIN_FUNCTIONS 0 //replace builtin functions with their -//vpx_ equivalents +# define REPLACE_BUILTIN_FUNCTIONS 0 /* replace builtin functions with their + vpx_ equivalents */ #endif #include @@ -73,7 +74,7 @@ extern "C" { void *vpx_memset(void *dest, int val, size_t length); void *vpx_memmove(void *dest, const void *src, size_t count); -// special memory functions + /* special memory functions */ void *vpx_mem_alloc(int id, size_t size, size_t align); void vpx_mem_free(int id, void *mem, size_t size); diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c index 4427e27fc..938ad0716 100644 --- a/vpx_mem/vpx_mem_tracker.c +++ b/vpx_mem/vpx_mem_tracker.c @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h new file mode 100644 index 000000000..81af1f11f --- /dev/null +++ b/vpx_ports/arm.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VPX_PORTS_ARM_H +#define VPX_PORTS_ARM_H +#include +#include "config.h" + +/*ARMv5TE "Enhanced DSP" instructions.*/ +#define HAS_EDSP 0x01 +/*ARMv6 "Parallel" or "Media" instructions.*/ +#define HAS_MEDIA 0x02 +/*ARMv7 optional NEON instructions.*/ +#define HAS_NEON 0x04 + +int arm_cpu_caps(void); + +#endif + diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c new file mode 100644 index 000000000..4109924cf --- /dev/null +++ b/vpx_ports/arm_cpudetect.c @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include "arm.h" + +static int arm_cpu_env_flags(int *flags) +{ + char *env; + env = getenv("VPX_SIMD_CAPS"); + if (env && *env) + { + *flags = (int)strtol(env, NULL, 0); + return 0; + } + *flags = 0; + return -1; +} + +static int arm_cpu_env_mask(void) +{ + char *env; + env = getenv("VPX_SIMD_CAPS_MASK"); + return env && *env ? (int)strtol(env, NULL, 0) : ~0; +} + + +#if defined(_MSC_VER) +/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ +#define WIN32_LEAN_AND_MEAN +#define WIN32_EXTRA_LEAN +#include + +int arm_cpu_caps(void) +{ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); + /* MSVC has no inline __asm support for ARM, but it does let you __emit + * instructions via their assembled hex code. + * All of these instructions should be essentially nops. + */ +#if defined(HAVE_ARMV5TE) + if (mask & HAS_EDSP) + { + __try + { + /*PLD [r13]*/ + __emit(0xF5DDF000); + flags |= HAS_EDSP; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#if defined(HAVE_ARMV6) + if (mask & HAS_MEDIA) + __try + { + /*SHADD8 r3,r3,r3*/ + __emit(0xE6333F93); + flags |= HAS_MEDIA; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#if defined(HAVE_ARMV7) + if (mask & HAS_NEON) + { + __try + { + /*VORR q0,q0,q0*/ + __emit(0xF2200150); + flags |= HAS_NEON; + } + __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) + { + /*Ignore exception.*/ + } + } +#endif +#endif +#endif + return flags & mask; +} + +#elif defined(__linux__) +#include + +int arm_cpu_caps(void) +{ + FILE *fin; + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); + /* Reading /proc/self/auxv would be easier, but that doesn't work reliably + * on Android. + * This also means that detection will fail in Scratchbox. + */ + fin = fopen("/proc/cpuinfo","r"); + if(fin != NULL) + { + /* 512 should be enough for anybody (it's even enough for all the flags + * that x86 has accumulated... so far). + */ + char buf[512]; + while (fgets(buf, 511, fin) != NULL) + { +#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7) + if (memcmp(buf, "Features", 8) == 0) + { + char *p; +#if defined(HAVE_ARMV5TE) + p=strstr(buf, " edsp"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_EDSP; + } +#if defined(HAVE_ARMV7) + p = strstr(buf, " neon"); + if (p != NULL && (p[5] == ' ' || p[5] == '\n')) + { + flags |= HAS_NEON; + } +#endif +#endif + } +#endif +#if defined(HAVE_ARMV6) + if (memcmp(buf, "CPU architecture:",17) == 0){ + int version; + version = atoi(buf+17); + if (version >= 6) + { + flags |= HAS_MEDIA; + } + } +#endif + } + fclose(fin); + } + return flags & mask; +} + +#elif !CONFIG_RUNTIME_CPU_DETECT + +int arm_cpu_caps(void) +{ + int flags; + int mask; + if (!arm_cpu_env_flags(&flags)) + { + return flags; + } + mask = arm_cpu_env_mask(); +#if defined(HAVE_ARMV5TE) + flags |= HAS_EDSP; +#endif +#if defined(HAVE_ARMV6) + flags |= HAS_MEDIA; +#endif +#if defined(HAVE_ARMV7) + flags |= HAS_NEON; +#endif + return flags & mask; +} + +#else +#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ + "available for your platform. Reconfigure without --enable-runtime-cpu-detect." +#endif diff --git a/vpx_ports/config.h b/vpx_ports/config.h index b87669ec1..1abe70da9 100644 --- a/vpx_ports/config.h +++ b/vpx_ports/config.h @@ -1,9 +1,10 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm index 03e34992c..306e235ce 100644 --- a/vpx_ports/emms.asm +++ b/vpx_ports/emms.asm @@ -1,14 +1,15 @@ ; -; Copyright (c) 2010 The VP8 project authors. All Rights Reserved. +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; -; Use of this source code is governed by a BSD-style license and patent -; grant that can be found in the LICENSE file in the root of the source -; tree. All contributing project authors may be found in the AUTHORS -; file in the root of the source tree. +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. ; -%include "x86_abi_support.asm" +%include "vpx_ports/x86_abi_support.asm" section .text global sym(vpx_reset_mmx_state) diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h index 1078169ee..9ec34fec6 100644 --- a/vpx_ports/mem.h +++ b/vpx_ports/mem.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h index 869d583f8..c178b8b74 100644 --- a/vpx_ports/mem_ops.h +++ b/vpx_ports/mem_ops.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -59,7 +60,7 @@ #undef mem_get_be16 #define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) -static INLINE unsigned MEM_VALUE_T mem_get_be16(const void *vmem) +static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -71,7 +72,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be16(const void *vmem) #undef mem_get_be24 #define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) -static INLINE unsigned MEM_VALUE_T mem_get_be24(const void *vmem) +static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -84,7 +85,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be24(const void *vmem) #undef mem_get_be32 #define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) -static INLINE unsigned MEM_VALUE_T mem_get_be32(const void *vmem) +static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -98,7 +99,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be32(const void *vmem) #undef mem_get_le16 #define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) -static INLINE unsigned MEM_VALUE_T mem_get_le16(const void *vmem) +static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -110,7 +111,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le16(const void *vmem) #undef mem_get_le24 #define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) -static INLINE unsigned MEM_VALUE_T mem_get_le24(const void *vmem) +static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -123,7 +124,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le24(const void *vmem) #undef mem_get_le32 #define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) -static INLINE unsigned MEM_VALUE_T mem_get_le32(const void *vmem) +static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; @@ -136,7 +137,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le32(const void *vmem) } #define mem_get_s_generic(end,sz) \ - static INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ + static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\ const MAU_T *mem = (const MAU_T*)vmem;\ signed MEM_VALUE_T val = mem_get_##end##sz(mem);\ return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\ @@ -168,7 +169,7 @@ mem_get_s_generic(le, 32); #undef mem_put_be16 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) -static INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) +static void mem_put_be16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; @@ -178,7 +179,7 @@ static INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) #undef mem_put_be24 #define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) -static INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) +static void mem_put_be24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; @@ -189,7 +190,7 @@ static INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) #undef mem_put_be32 #define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) -static INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) +static void mem_put_be32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; @@ -201,7 +202,7 @@ static INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) #undef mem_put_le16 #define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) -static INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) +static void mem_put_le16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; @@ -211,7 +212,7 @@ static INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) #undef mem_put_le24 #define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) -static INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) +static void mem_put_le24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; @@ -222,7 +223,7 @@ static INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) #undef mem_put_le32 #define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) -static INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) +static void mem_put_le32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h index 1d0db2ccb..4c44aa260 100644 --- a/vpx_ports/mem_ops_aligned.h +++ b/vpx_ports/mem_ops_aligned.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -39,19 +40,19 @@ #define swap_endian_32_se(val,raw) swap_endian_32(val,raw) #define mem_get_ne_aligned_generic(end,sz) \ - static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ + static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ return *mem;\ } #define mem_get_sne_aligned_generic(end,sz) \ - static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ + static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ const int##sz##_t *mem = (const int##sz##_t *)vmem;\ return *mem;\ } #define mem_get_se_aligned_generic(end,sz) \ - static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ + static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\ const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\ unsigned MEM_VALUE_T val, raw = *mem;\ swap_endian_##sz(val,raw);\ @@ -59,7 +60,7 @@ } #define mem_get_sse_aligned_generic(end,sz) \ - static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ + static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\ const int##sz##_t *mem = (const int##sz##_t *)vmem;\ unsigned MEM_VALUE_T val, raw = *mem;\ swap_endian_##sz##_se(val,raw);\ @@ -67,13 +68,13 @@ } #define mem_put_ne_aligned_generic(end,sz) \ - static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ uint##sz##_t *mem = (uint##sz##_t *)vmem;\ *mem = (uint##sz##_t)val;\ } #define mem_put_se_aligned_generic(end,sz) \ - static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ + static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\ uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\ swap_endian_##sz(raw,val);\ *mem = (uint##sz##_t)raw;\ diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h index 5c045387f..37a0c7cb2 100644 --- a/vpx_ports/vpx_timer.h +++ b/vpx_ports/vpx_timer.h @@ -1,17 +1,18 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TIMER_H #define VPX_TIMER_H -#if defined(_MSC_VER) +#if defined(_WIN32) /* * Win32 specific includes */ @@ -42,7 +43,7 @@ struct vpx_usec_timer { -#if defined(_MSC_VER) +#if defined(_WIN32) LARGE_INTEGER begin, end; #else struct timeval begin, end; @@ -50,10 +51,10 @@ struct vpx_usec_timer }; -static INLINE void +static void vpx_usec_timer_start(struct vpx_usec_timer *t) { -#if defined(_MSC_VER) +#if defined(_WIN32) QueryPerformanceCounter(&t->begin); #else gettimeofday(&t->begin, NULL); @@ -61,10 +62,10 @@ vpx_usec_timer_start(struct vpx_usec_timer *t) } -static INLINE void +static void vpx_usec_timer_mark(struct vpx_usec_timer *t) { -#if defined(_MSC_VER) +#if defined(_WIN32) QueryPerformanceCounter(&t->end); #else gettimeofday(&t->end, NULL); @@ -72,10 +73,10 @@ vpx_usec_timer_mark(struct vpx_usec_timer *t) } -static INLINE long +static long vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { -#if defined(_MSC_VER) +#if defined(_WIN32) LARGE_INTEGER freq, diff; diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h index 14244bd68..2ab66b14b 100644 --- a/vpx_ports/vpxtypes.h +++ b/vpx_ports/vpxtypes.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h index 935d03762..190c8643a 100644 --- a/vpx_ports/x86.h +++ b/vpx_ports/x86.h @@ -1,10 +1,11 @@ /* - * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * - * Use of this source code is governed by a BSD-style license and patent - * grant that can be found in the LICENSE file in the root of the source - * tree. All contributing project authors may be found in the AUTHORS - * file in the root of the source tree. + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. */ @@ -13,6 +14,26 @@ #include #include "config.h" +typedef enum +{ + VPX_CPU_UNKNOWN = -1, + VPX_CPU_AMD, + VPX_CPU_AMD_OLD, + VPX_CPU_CENTAUR, + VPX_CPU_CYRIX, + VPX_CPU_INTEL, + VPX_CPU_NEXGEN, + VPX_CPU_NSC, + VPX_CPU_RISE, + VPX_CPU_SIS, + VPX_CPU_TRANSMETA, + VPX_CPU_TRANSMETA_OLD, + VPX_CPU_UMC, + VPX_CPU_VIA, + + VPX_CPU_LAST +} vpx_cpu_t; + #if defined(__GNUC__) && __GNUC__ #if ARCH_X86_64 #define cpuid(func,ax,bx,cx,dx)\ @@ -23,12 +44,11 @@ #else #define cpuid(func,ax,bx,cx,dx)\ __asm__ __volatile__ (\ - "pushl %%ebx \n\t" \ - "cpuid \n\t" \ - "movl %%ebx, %1 \n\t" \ - "popl %%ebx \n\t" \ - : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \ - : "a" (func)); + "mov %%ebx, %%edi \n\t" \ + "cpuid \n\t" \ + "xchg %%edi, %%ebx \n\t" \ + : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \ + : "a" (func)); #endif #else #if ARCH_X86_64 @@ -54,6 +74,7 @@ void __cpuid(int CPUInfo[4], int info_type); #define HAS_SSE2 0x04 #define HAS_SSE3 0x08 #define HAS_SSSE3 0x10 +#define HAS_SSE4_1 0x20 #ifndef BIT #define BIT(n) (1<