Remove asm offset dependencies
The obj_int_extract code is no longer worth maintaining. It creates significant issues when adapting for different build systems and no longer offers as significant of a performance benefit due to improvements in intrinsics. Source files will remain until the various third-party builds are updated. The neon fast quantizer has been moved to intrinsics. The armv6 version has been removed because so few remaining targets require it. Compilers and processors have improved significantly since the pack_tokens code was written. The assembly is no longer faster than the C code. pack_tokens were the only optimizations for the armv5te targets so the targets will be removed after the test infrastructure has been updated. BUG=710 Change-Id: Ic785b167cd9f95eeff31c7c76b7b736c07fb30eb
This commit is contained in:
Родитель
bf44117d5f
Коммит
6eec73a747
|
@ -1,18 +0,0 @@
|
|||
REM Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
||||
REM
|
||||
REM Use of this source code is governed by a BSD-style license
|
||||
REM that can be found in the LICENSE file in the root of the source
|
||||
REM tree. An additional intellectual property rights grant can be found
|
||||
REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
REM Arguments:
|
||||
REM %1 - Relative path to the directory containing the vp8 and vpx_scale
|
||||
REM source directories.
|
||||
REM %2 - Path to obj_int_extract.exe.
|
||||
cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
|
||||
%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
|
||||
|
||||
cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
|
||||
%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
|
|
@ -43,7 +43,7 @@
|
|||
# will remove any NEON dependency.
|
||||
|
||||
# To change to building armeabi, run ./libvpx/configure again, but with
|
||||
# --target=arm5te-android-gcc and modify the Application.mk file to
|
||||
# --target=armv6-android-gcc and modify the Application.mk file to
|
||||
# set APP_ABI := armeabi
|
||||
#
|
||||
# Running ndk-build will build libvpx and include it in your project.
|
||||
|
@ -60,7 +60,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
|||
include $(CONFIG_DIR)libs-armv7-android-gcc.mk
|
||||
LOCAL_ARM_MODE := arm
|
||||
else ifeq ($(TARGET_ARCH_ABI),armeabi)
|
||||
include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
|
||||
include $(CONFIG_DIR)libs-armv6-android-gcc.mk
|
||||
LOCAL_ARM_MODE := arm
|
||||
else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
|
||||
include $(CONFIG_DIR)libs-armv8-android-gcc.mk
|
||||
|
@ -91,51 +91,8 @@ LOCAL_CFLAGS := -O3
|
|||
# like x86inc.asm and x86_abi_support.asm
|
||||
LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Template : asm_offsets_template
|
||||
# Arguments : 1: assembly offsets file to be created
|
||||
# 2: c file to base assembly offsets on
|
||||
# Returns : None
|
||||
# Usage : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
|
||||
# Rationale : Create offsets at compile time using for structures that are
|
||||
# defined in c, but used in assembly functions.
|
||||
# -----------------------------------------------------------------------------
|
||||
define asm_offsets_template
|
||||
|
||||
_SRC:=$(2)
|
||||
_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
|
||||
|
||||
_FLAGS = $$($$(my)CFLAGS) \
|
||||
$$(call get-src-file-target-cflags,$(2)) \
|
||||
$$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
|
||||
$$(LOCAL_CFLAGS) \
|
||||
$$(NDK_APP_CFLAGS) \
|
||||
$$(call host-c-includes,$$($(my)C_INCLUDES)) \
|
||||
-DINLINE_ASM \
|
||||
-S \
|
||||
|
||||
_TEXT = "Compile $$(call get-src-file-text,$(2))"
|
||||
_CC = $$(TARGET_CC)
|
||||
|
||||
$$(eval $$(call ev-build-file))
|
||||
|
||||
$(1) : $$(_OBJ) $(2)
|
||||
@mkdir -p $$(dir $$@)
|
||||
@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
|
||||
endef
|
||||
|
||||
# Use ads2gas script to convert from RVCT format to GAS format. This
|
||||
# puts the processed file under $(ASM_CNV_PATH). Local clean rule
|
||||
# to handle removing these
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
|
||||
endif
|
||||
ifeq ($(HAVE_NEON_ASM), yes)
|
||||
ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
|
||||
endif
|
||||
|
||||
.PRECIOUS: %.asm.s
|
||||
$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
|
||||
$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
|
||||
@mkdir -p $(dir $@)
|
||||
@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
|
||||
|
||||
|
@ -224,24 +181,11 @@ endif
|
|||
clean:
|
||||
@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
|
||||
@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
|
||||
@$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
|
||||
@$(RM) -r $(ASM_CNV_PATH)
|
||||
@$(RM) $(CLEAN-OBJS)
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
ifeq ($(HAVE_NEON), yes)
|
||||
$(eval $(call asm_offsets_template,\
|
||||
$(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
|
||||
$(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_VP8_ENCODER), yes)
|
||||
$(eval $(call asm_offsets_template,\
|
||||
$(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
|
||||
$(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
|
||||
$(call import-module,cpufeatures)
|
||||
endif
|
||||
|
|
|
@ -216,14 +216,6 @@ else
|
|||
$(qexec)cp $< $@
|
||||
endif
|
||||
|
||||
#
|
||||
# Rule to extract assembly constants from C sources
|
||||
#
|
||||
obj_int_extract: build/make/obj_int_extract.c
|
||||
$(if $(quiet),@echo " [HOSTCC] $@")
|
||||
$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
|
||||
CLEAN-OBJS += obj_int_extract
|
||||
|
||||
#
|
||||
# Utility functions
|
||||
#
|
||||
|
@ -424,11 +416,7 @@ ifneq ($(call enabled,DIST-SRCS),)
|
|||
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/x86-msvs/obj_int_extract.bat
|
||||
DIST-SRCS-$(CONFIG_MSVS) += build/arm-msvs/obj_int_extract.bat
|
||||
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
|
||||
# Include obj_int_extract if we use offsets from *_asm_*_offsets
|
||||
DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64) += build/make/obj_int_extract.c
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl
|
||||
DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl
|
||||
|
|
|
@ -822,18 +822,12 @@ process_common_toolchain() {
|
|||
soft_enable neon
|
||||
soft_enable neon_asm
|
||||
soft_enable media
|
||||
soft_enable edsp
|
||||
soft_enable fast_unaligned
|
||||
;;
|
||||
armv6)
|
||||
soft_enable media
|
||||
soft_enable edsp
|
||||
soft_enable fast_unaligned
|
||||
;;
|
||||
armv5te)
|
||||
soft_enable edsp
|
||||
disable_feature fast_unaligned
|
||||
;;
|
||||
esac
|
||||
|
||||
asm_conversion_cmd="cat"
|
||||
|
|
|
@ -295,22 +295,7 @@ generate_vcproj() {
|
|||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$debug_runtime" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="1" \
|
||||
$warn_64bit \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \
|
||||
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="0" \
|
||||
|
@ -347,11 +332,6 @@ generate_vcproj() {
|
|||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
|
@ -400,24 +380,7 @@ generate_vcproj() {
|
|||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="2" \
|
||||
FavorSizeorSpeed="1" \
|
||||
AdditionalIncludeDirectories="$incs" \
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
|
||||
RuntimeLibrary="$release_runtime" \
|
||||
UsePrecompiledHeader="0" \
|
||||
WarningLevel="3" \
|
||||
DebugInformationFormat="0" \
|
||||
$warn_64bit \
|
||||
;;
|
||||
vpx)
|
||||
tag Tool \
|
||||
Name="VCPreBuildEventTool" \
|
||||
CommandLine="call obj_int_extract.bat "$src_path_bare" $plat_no_ws\\\$(ConfigurationName)" \
|
||||
|
||||
tag Tool \
|
||||
Name="VCCLCompilerTool" \
|
||||
Optimization="2" \
|
||||
|
@ -456,11 +419,6 @@ generate_vcproj() {
|
|||
case "$target" in
|
||||
x86*)
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
GenerateDebugInformation="true" \
|
||||
;;
|
||||
*)
|
||||
tag Tool \
|
||||
Name="VCLinkerTool" \
|
||||
|
|
|
@ -262,15 +262,9 @@ case "$target" in
|
|||
asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)""
|
||||
;;
|
||||
arm*)
|
||||
platforms[0]="ARM"
|
||||
asm_Debug_cmdline="armasm -nologo "%(FullPath)""
|
||||
asm_Release_cmdline="armasm -nologo "%(FullPath)""
|
||||
if [ "$name" = "obj_int_extract" ]; then
|
||||
# We don't want to build this tool for the target architecture,
|
||||
# but for an architecture we can run locally during the build.
|
||||
platforms[0]="Win32"
|
||||
else
|
||||
platforms[0]="ARM"
|
||||
fi
|
||||
;;
|
||||
*) die "Unsupported target $target!"
|
||||
;;
|
||||
|
@ -400,23 +394,13 @@ generate_vcxproj() {
|
|||
if [ "$hostplat" == "ARM" ]; then
|
||||
hostplat=Win32
|
||||
fi
|
||||
open_tag PreBuildEvent
|
||||
tag_content Command "call obj_int_extract.bat "$src_path_bare" $hostplat\\\$(Configuration)"
|
||||
close_tag PreBuildEvent
|
||||
fi
|
||||
open_tag ClCompile
|
||||
if [ "$config" = "Debug" ]; then
|
||||
opt=Disabled
|
||||
runtime=$debug_runtime
|
||||
curlibs=$debug_libs
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
debug=DEBUG
|
||||
;;
|
||||
*)
|
||||
debug=_DEBUG
|
||||
;;
|
||||
esac
|
||||
debug=_DEBUG
|
||||
else
|
||||
opt=MaxSpeed
|
||||
runtime=$release_runtime
|
||||
|
@ -424,14 +408,7 @@ generate_vcxproj() {
|
|||
tag_content FavorSizeOrSpeed Speed
|
||||
debug=NDEBUG
|
||||
fi
|
||||
case "$name" in
|
||||
obj_int_extract)
|
||||
extradefines=";_CONSOLE"
|
||||
;;
|
||||
*)
|
||||
extradefines=";$defines"
|
||||
;;
|
||||
esac
|
||||
extradefines=";$defines"
|
||||
tag_content Optimization $opt
|
||||
tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
|
||||
tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
|
||||
|
@ -451,10 +428,6 @@ generate_vcxproj() {
|
|||
case "$proj_kind" in
|
||||
exe)
|
||||
open_tag Link
|
||||
if [ "$name" != "obj_int_extract" ]; then
|
||||
tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
|
||||
tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
|
||||
fi
|
||||
tag_content GenerateDebugInformation true
|
||||
# Console is the default normally, but if
|
||||
# AppContainerApplication is set, we need to override it.
|
||||
|
|
|
@ -379,14 +379,11 @@ if ($opts{arch} eq 'x86') {
|
|||
}
|
||||
close CONFIG_FILE;
|
||||
mips;
|
||||
} elsif ($opts{arch} eq 'armv5te') {
|
||||
@ALL_ARCHS = filter(qw/edsp/);
|
||||
arm;
|
||||
} elsif ($opts{arch} eq 'armv6') {
|
||||
@ALL_ARCHS = filter(qw/edsp media/);
|
||||
@ALL_ARCHS = filter(qw/media/);
|
||||
arm;
|
||||
} elsif ($opts{arch} eq 'armv7') {
|
||||
@ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
|
||||
@ALL_ARCHS = filter(qw/media neon_asm neon/);
|
||||
@REQUIRES = filter(keys %required ? keys %required : qw/media/);
|
||||
&require(@REQUIRES);
|
||||
arm;
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
REM Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
REM
|
||||
REM Use of this source code is governed by a BSD-style license
|
||||
REM that can be found in the LICENSE file in the root of the source
|
||||
REM tree. An additional intellectual property rights grant can be found
|
||||
REM in the file PATENTS. All contributing project authors may
|
||||
REM be found in the AUTHORS file in the root of the source tree.
|
||||
echo on
|
||||
|
||||
REM Arguments:
|
||||
REM %1 - Relative path to the directory containing the vp8 source directory.
|
||||
REM %2 - Path to obj_int_extract.exe.
|
||||
cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
|
||||
%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
|
||||
|
|
@ -451,8 +451,6 @@ process_targets() {
|
|||
enabled child || write_common_config_banner
|
||||
enabled universal || write_common_target_config_h ${BUILD_PFX}vpx_config.h
|
||||
|
||||
# TODO: add host tools target (obj_int_extract, etc)
|
||||
|
||||
# For fat binaries, call configure recursively to configure for each
|
||||
# binary architecture to be included.
|
||||
if enabled universal; then
|
||||
|
|
50
libs.mk
50
libs.mk
|
@ -17,32 +17,6 @@ else
|
|||
ASM:=.asm
|
||||
endif
|
||||
|
||||
#
|
||||
# Calculate platform- and compiler-specific offsets for hand coded assembly
|
||||
#
|
||||
ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
|
||||
OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
|
||||
define asm_offsets_template
|
||||
$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
|
||||
@echo " [CREATE] $$@"
|
||||
$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
|
||||
$$(BUILD_PFX)$(2).S: $(2)
|
||||
CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
|
||||
endef
|
||||
else
|
||||
ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
|
||||
define asm_offsets_template
|
||||
$$(BUILD_PFX)$(1): obj_int_extract
|
||||
$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
|
||||
@echo " [CREATE] $$@"
|
||||
$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
|
||||
OBJS-yes += $$(BUILD_PFX)$(2).o
|
||||
CLEAN-OBJS += $$(BUILD_PFX)$(1)
|
||||
$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
|
||||
endef
|
||||
endif # rvct
|
||||
endif # !gcc
|
||||
|
||||
#
|
||||
# Rule to generate runtime cpu detection files
|
||||
#
|
||||
|
@ -212,26 +186,6 @@ CLEAN-OBJS += libvpx_srcs.txt
|
|||
ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
|
||||
ifeq ($(CONFIG_MSVS),yes)
|
||||
|
||||
obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
|
||||
@cp $^ $@
|
||||
|
||||
obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
|
||||
obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)$(GEN_VCPROJ) \
|
||||
--exe \
|
||||
--target=$(TOOLCHAIN) \
|
||||
--name=obj_int_extract \
|
||||
--ver=$(CONFIG_VS_VERSION) \
|
||||
--proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
|
||||
--src-path-bare="$(SRC_PATH_BARE)" \
|
||||
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
|
||||
--out=$@ $^ \
|
||||
-I. \
|
||||
-I"$(SRC_PATH_BARE)" \
|
||||
|
||||
PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
|
||||
|
||||
vpx.def: $(call enabled,CODEC_EXPORTS)
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
|
||||
|
@ -246,7 +200,7 @@ ASM_INCLUDES := \
|
|||
vpx_config.asm \
|
||||
vpx_ports/x86_abi_support.asm \
|
||||
|
||||
vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
|
||||
vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
|
||||
@echo " [CREATE] $@"
|
||||
$(qexec)$(GEN_VCPROJ) \
|
||||
$(if $(CONFIG_SHARED),--dll,--lib) \
|
||||
|
@ -377,7 +331,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm
|
|||
endif
|
||||
|
||||
#
|
||||
# Add assembler dependencies for configuration and offsets
|
||||
# Add assembler dependencies for configuration.
|
||||
#
|
||||
$(filter %.s.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
$(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
##
|
||||
|
||||
# libvpx reverse dependencies (targets that depend on libvpx)
|
||||
VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
|
||||
VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
|
||||
VPX_RDEPS=$(foreach vcp,\
|
||||
$(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)
|
||||
|
||||
|
@ -17,7 +17,6 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX))
|
|||
@echo " [CREATE] $@"
|
||||
$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
|
||||
$(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
|
||||
--dep=vpx:obj_int_extract \
|
||||
--dep=test_libvpx:gtest \
|
||||
--ver=$(CONFIG_VS_VERSION)\
|
||||
--out=$@ $^
|
||||
|
|
|
@ -181,12 +181,6 @@ INSTANTIATE_TEST_CASE_P(
|
|||
&vp8_regular_quantize_b_c)));
|
||||
#endif // HAVE_SSE4_1
|
||||
|
||||
#if HAVE_MEDIA
|
||||
INSTANTIATE_TEST_CASE_P(MEDIA, QuantizeTest,
|
||||
::testing::Values(make_tuple(&vp8_fast_quantize_b_armv6,
|
||||
&vp8_fast_quantize_b_c)));
|
||||
#endif // HAVE_MEDIA
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
|
||||
::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
|
||||
|
|
|
@ -454,8 +454,7 @@ add_proto qw/void vp8_regular_quantize_b/, "struct block *, struct blockd *";
|
|||
specialize qw/vp8_regular_quantize_b sse2 sse4_1/;
|
||||
|
||||
add_proto qw/void vp8_fast_quantize_b/, "struct block *, struct blockd *";
|
||||
specialize qw/vp8_fast_quantize_b sse2 ssse3 media neon/;
|
||||
$vp8_fast_quantize_b_media=vp8_fast_quantize_b_armv6;
|
||||
specialize qw/vp8_fast_quantize_b sse2 ssse3 neon/;
|
||||
|
||||
#
|
||||
# Block subtraction
|
||||
|
@ -473,16 +472,13 @@ specialize qw/vp8_mbuverror mmx sse2/;
|
|||
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
|
||||
|
||||
add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
|
||||
specialize qw/vp8_subtract_b mmx sse2 media neon/;
|
||||
$vp8_subtract_b_media=vp8_subtract_b_armv6;
|
||||
specialize qw/vp8_subtract_b mmx sse2 neon/;
|
||||
|
||||
add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
|
||||
specialize qw/vp8_subtract_mby mmx sse2 media neon/;
|
||||
$vp8_subtract_mby_media=vp8_subtract_mby_armv6;
|
||||
specialize qw/vp8_subtract_mby mmx sse2 neon/;
|
||||
|
||||
add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
|
||||
specialize qw/vp8_subtract_mbuv mmx sse2 media neon/;
|
||||
$vp8_subtract_mbuv_media=vp8_subtract_mbuv_armv6;
|
||||
specialize qw/vp8_subtract_mbuv mmx sse2 neon/;
|
||||
|
||||
#
|
||||
# Motion search
|
||||
|
|
|
@ -1,310 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_start_encode|
|
||||
EXPORT |vp8_encode_bool|
|
||||
EXPORT |vp8_stop_encode|
|
||||
EXPORT |vp8_encode_value|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 unsigned char *source
|
||||
; r2 unsigned char *source_end
|
||||
|vp8_start_encode| PROC
|
||||
str r2, [r0, #vp8_writer_buffer_end]
|
||||
mov r12, #0
|
||||
mov r3, #255
|
||||
mvn r2, #23
|
||||
str r12, [r0, #vp8_writer_lowvalue]
|
||||
str r3, [r0, #vp8_writer_range]
|
||||
str r2, [r0, #vp8_writer_count]
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
str r1, [r0, #vp8_writer_buffer]
|
||||
bx lr
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 int bit
|
||||
; r2 int probability
|
||||
|vp8_encode_bool| PROC
|
||||
push {r4-r10, lr}
|
||||
|
||||
mov r4, r2
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
cmp r1, #0
|
||||
mul r6, r4, r7 ; ((range-1) * probability)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * probability) >> 8)
|
||||
|
||||
addne r2, r2, r4 ; if (bit) lowvalue += split
|
||||
subne r4, r5, r4 ; if (bit) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r1, [r7, r4]
|
||||
cmpge r1, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r1, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r1, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r1 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r10, pc}
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
|vp8_stop_encode| PROC
|
||||
push {r4-r10, lr}
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
mov r10, #32
|
||||
|
||||
stop_encode_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_se ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_se
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_se
|
||||
token_zero_while_loop_se
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_se
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r1, [r7, r4]
|
||||
cmpge r1, #0xff
|
||||
beq token_zero_while_loop_se
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_se
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r1, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r1, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r1 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_se
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r10, r10, #1
|
||||
bne stop_encode_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r10, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
; r0 BOOL_CODER *br
|
||||
; r1 int data
|
||||
; r2 int bits
|
||||
|vp8_encode_value| PROC
|
||||
push {r4-r12, lr}
|
||||
|
||||
mov r10, r2
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
rsb r4, r10, #32 ; 32-n
|
||||
|
||||
; v is kept in r1 during the token pack loop
|
||||
lsl r1, r1, r4 ; r1 = v << 32 - n
|
||||
|
||||
encode_value_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r1, r1, #1 ; bit = v >> n
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bit) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bit) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_ev ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_ev
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_ev
|
||||
token_zero_while_loop_ev
|
||||
mov r9, #0
|
||||
strb r9, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_ev
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop_ev
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r9, [r7, r4] ; w->buffer[x]
|
||||
add r9, r9, #1
|
||||
strb r9, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_ev
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r9, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r9, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r9, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_ev
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r10, r10, #1
|
||||
bne encode_value_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -1,317 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
|
||||
; r0 vp8_writer *w
|
||||
; r1 const TOKENEXTRA *p
|
||||
; r2 int xcount
|
||||
; r3 vp8_coef_encodings
|
||||
; s0 vp8_extra_bits
|
||||
; s1 vp8_coef_tree
|
||||
|vp8cx_pack_tokens_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #16
|
||||
|
||||
; Add size of xcount * sizeof (TOKENEXTRA) to get stop
|
||||
; sizeof (TOKENEXTRA) is 8
|
||||
add r2, r1, r2, lsl #3 ; stop = p + xcount*sizeof(TOKENEXTRA)
|
||||
str r2, [sp, #0]
|
||||
str r3, [sp, #8] ; save vp8_coef_encodings
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
b check_p_lt_stop
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #8] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #60] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #60] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #56] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
add sp, sp, #16
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
END
|
|
@ -1,352 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_mb_row_tokens_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 VP8_COMP *cpi
|
||||
; r1 vp8_writer *w
|
||||
; r2 vp8_coef_encodings
|
||||
; r3 vp8_extra_bits
|
||||
; s0 vp8_coef_tree
|
||||
|
||||
|vp8cx_pack_mb_row_tokens_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #24
|
||||
|
||||
; Compute address of cpi->common.mb_rows
|
||||
ldr r4, _VP8_COMP_common_
|
||||
ldr r6, _VP8_COMMON_MBrows_
|
||||
add r4, r0, r4
|
||||
|
||||
ldr r5, [r4, r6] ; load up mb_rows
|
||||
|
||||
str r2, [sp, #20] ; save vp8_coef_encodings
|
||||
str r5, [sp, #12] ; save mb_rows
|
||||
str r3, [sp, #8] ; save vp8_extra_bits
|
||||
|
||||
ldr r4, _VP8_COMP_tplist_
|
||||
add r4, r0, r4
|
||||
ldr r7, [r4, #0] ; dereference cpi->tp_list
|
||||
|
||||
mov r0, r1 ; keep same as other loops
|
||||
|
||||
ldr r2, [r0, #vp8_writer_lowvalue]
|
||||
ldr r5, [r0, #vp8_writer_range]
|
||||
ldr r3, [r0, #vp8_writer_count]
|
||||
|
||||
mb_row_loop
|
||||
|
||||
ldr r1, [r7, #tokenlist_start]
|
||||
ldr r9, [r7, #tokenlist_stop]
|
||||
str r9, [sp, #0] ; save stop for later comparison
|
||||
str r7, [sp, #16] ; tokenlist address for next time
|
||||
|
||||
b check_p_lt_stop
|
||||
|
||||
; actuall work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #20] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #64] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #64] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #8] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
ldr r6, [sp, #12] ; mb_rows
|
||||
ldr r7, [sp, #16] ; tokenlist address
|
||||
subs r6, r6, #1
|
||||
add r7, r7, #TOKENLIST_SZ ; next element in the array
|
||||
str r6, [sp, #12]
|
||||
bne mb_row_loop
|
||||
|
||||
str r2, [r0, #vp8_writer_lowvalue]
|
||||
str r5, [r0, #vp8_writer_range]
|
||||
str r3, [r0, #vp8_writer_count]
|
||||
add sp, sp, #24
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
_VP8_COMP_common_
|
||||
DCD vp8_comp_common
|
||||
_VP8_COMMON_MBrows_
|
||||
DCD vp8_common_mb_rows
|
||||
_VP8_COMP_tplist_
|
||||
DCD vp8_comp_tplist
|
||||
|
||||
END
|
|
@ -1,471 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
|
||||
IMPORT |vp8_validate_buffer_arm|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA |.text|, CODE, READONLY
|
||||
|
||||
; macro for validating write buffer position
|
||||
; needs vp8_writer in r0
|
||||
; start shall not be in r1
|
||||
MACRO
|
||||
VALIDATE_POS $start, $pos
|
||||
push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
|
||||
ldr r2, [r0, #vp8_writer_buffer_end]
|
||||
ldr r3, [r0, #vp8_writer_error]
|
||||
mov r1, $pos
|
||||
mov r0, $start
|
||||
bl vp8_validate_buffer_arm
|
||||
pop {r0-r3, r12, lr}
|
||||
MEND
|
||||
|
||||
; r0 VP8_COMP *cpi
|
||||
; r1 unsigned char *cx_data
|
||||
; r2 const unsigned char *cx_data_end
|
||||
; r3 int num_part
|
||||
; s0 vp8_coef_encodings
|
||||
; s1 vp8_extra_bits,
|
||||
; s2 const vp8_tree_index *
|
||||
|
||||
|vp8cx_pack_tokens_into_partitions_armv5| PROC
|
||||
push {r4-r12, lr}
|
||||
sub sp, sp, #40
|
||||
|
||||
; Compute address of cpi->common.mb_rows
|
||||
ldr r4, _VP8_COMP_common_
|
||||
ldr r6, _VP8_COMMON_MBrows_
|
||||
add r4, r0, r4
|
||||
|
||||
ldr r5, [r4, r6] ; load up mb_rows
|
||||
|
||||
str r5, [sp, #36] ; save mb_rows
|
||||
str r1, [sp, #24] ; save ptr = cx_data
|
||||
str r3, [sp, #20] ; save num_part
|
||||
str r2, [sp, #8] ; save cx_data_end
|
||||
|
||||
ldr r4, _VP8_COMP_tplist_
|
||||
add r4, r0, r4
|
||||
ldr r7, [r4, #0] ; dereference cpi->tp_list
|
||||
str r7, [sp, #32] ; store start of cpi->tp_list
|
||||
|
||||
ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
|
||||
add r0, r0, r11
|
||||
|
||||
mov r11, #0
|
||||
str r11, [sp, #28] ; i
|
||||
|
||||
numparts_loop
|
||||
ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
|
||||
add r0, r2 ; bc[i + 1]
|
||||
|
||||
ldr r10, [sp, #24] ; ptr
|
||||
ldr r5, [sp, #36] ; move mb_rows to the counting section
|
||||
subs r5, r5, r11 ; move start point with each partition
|
||||
; mb_rows starts at i
|
||||
str r5, [sp, #12]
|
||||
|
||||
; Reset all of the VP8 Writer data for each partition that
|
||||
; is processed.
|
||||
; start_encode
|
||||
|
||||
ldr r3, [sp, #8]
|
||||
str r3, [r0, #vp8_writer_buffer_end]
|
||||
|
||||
mov r2, #0 ; vp8_writer_lowvalue
|
||||
mov r5, #255 ; vp8_writer_range
|
||||
mvn r3, #23 ; vp8_writer_count
|
||||
|
||||
str r2, [r0, #vp8_writer_pos]
|
||||
str r10, [r0, #vp8_writer_buffer]
|
||||
|
||||
ble end_partition ; if (mb_rows <= 0) end partition
|
||||
|
||||
mb_row_loop
|
||||
|
||||
ldr r1, [r7, #tokenlist_start]
|
||||
ldr r9, [r7, #tokenlist_stop]
|
||||
str r9, [sp, #0] ; save stop for later comparison
|
||||
str r7, [sp, #16] ; tokenlist address for next time
|
||||
|
||||
b check_p_lt_stop
|
||||
|
||||
; actual work gets done here!
|
||||
|
||||
while_p_lt_stop
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r4, [sp, #80] ; vp8_coef_encodings
|
||||
mov lr, #0
|
||||
add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
|
||||
ldr r9, [r1, #tokenextra_context_tree] ; pp
|
||||
|
||||
ldrb r7, [r1, #tokenextra_skip_eob_node]
|
||||
|
||||
ldr r6, [r4, #vp8_token_value] ; v
|
||||
ldr r8, [r4, #vp8_token_len] ; n
|
||||
|
||||
; vp8 specific skip_eob_node
|
||||
cmp r7, #0
|
||||
movne lr, #2 ; i = 2
|
||||
subne r8, r8, #1 ; --n
|
||||
|
||||
rsb r4, r8, #32 ; 32-n
|
||||
ldr r10, [sp, #88] ; vp8_coef_tree
|
||||
|
||||
; v is kept in r12 during the token pack loop
|
||||
lsl r12, r6, r4 ; r12 = v << 32 - n
|
||||
|
||||
; loop start
|
||||
token_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
; Decisions are made based on the bit value shifted
|
||||
; off of v, so set a flag here based on this.
|
||||
; This value is refered to as "bb"
|
||||
lsls r12, r12, #1 ; bb = v >> n
|
||||
mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
|
||||
|
||||
; bb can only be 0 or 1. So only execute this statement
|
||||
; if bb == 1, otherwise it will act like i + 0
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start
|
||||
token_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
; r10 is used earlier in the loop, but r10 is used as
|
||||
; temp variable here. So after r10 is used, reload
|
||||
; vp8_coef_tree_dcd into r10
|
||||
ldr r10, [sp, #88] ; vp8_coef_tree
|
||||
|
||||
token_count_lt_zero
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne token_loop
|
||||
|
||||
ldrb r6, [r1, #tokenextra_token] ; t
|
||||
ldr r7, [sp, #84] ; vp8_extra_bits
|
||||
; Add t * sizeof (vp8_extra_bit_struct) to get the desired
|
||||
; element. Here vp8_extra_bit_struct == 16
|
||||
add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
|
||||
|
||||
ldr r4, [r12, #vp8_extra_bit_struct_base_val]
|
||||
cmp r4, #0
|
||||
beq skip_extra_bits
|
||||
|
||||
; if( b->base_val)
|
||||
ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
|
||||
ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
|
||||
cmp r8, #0 ; if( L)
|
||||
beq no_extra_bits
|
||||
|
||||
ldr r9, [r12, #vp8_extra_bit_struct_prob]
|
||||
asr r7, lr, #1 ; v=e>>1
|
||||
|
||||
ldr r10, [r12, #vp8_extra_bit_struct_tree]
|
||||
str r10, [sp, #4] ; b->tree
|
||||
|
||||
rsb r4, r8, #32
|
||||
lsl r12, r7, r4
|
||||
|
||||
mov lr, #0 ; i = 0
|
||||
|
||||
extra_bits_loop
|
||||
ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
|
||||
sub r7, r5, #1 ; range-1
|
||||
lsls r12, r12, #1 ; v >> n
|
||||
mul r6, r4, r7 ; (range-1) * pp[i>>1]
|
||||
addcs lr, lr, #1 ; i + bb
|
||||
|
||||
mov r7, #1
|
||||
ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
|
||||
add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
|
||||
|
||||
addcs r2, r2, r4 ; if (bb) lowvalue += split
|
||||
subcs r4, r5, r4 ; if (bb) range = range-split
|
||||
|
||||
clz r6, r4
|
||||
sub r6, r6, #24
|
||||
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi extra_count_lt_zero ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset= shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl extra_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos - 1
|
||||
b extra_zero_while_start
|
||||
extra_zero_while_loop
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
extra_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq extra_zero_while_loop
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4]
|
||||
extra_high_bit_not_set
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
|
||||
ldr r10, [sp, #4] ; b->tree
|
||||
extra_count_lt_zero
|
||||
lsl r2, r2, r6
|
||||
|
||||
subs r8, r8, #1 ; --n
|
||||
bne extra_bits_loop ; while (n)
|
||||
|
||||
no_extra_bits
|
||||
ldr lr, [r1, #4] ; e = p->Extra
|
||||
add r4, r5, #1 ; range + 1
|
||||
tst lr, #1
|
||||
lsr r4, r4, #1 ; split = (range + 1) >> 1
|
||||
addne r2, r2, r4 ; lowvalue += split
|
||||
subne r4, r5, r4 ; range = range-split
|
||||
tst r2, #0x80000000 ; lowvalue & 0x80000000
|
||||
lsl r5, r4, #1 ; range <<= 1
|
||||
beq end_high_bit_not_set
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mov r7, #0
|
||||
sub r4, r4, #1
|
||||
b end_zero_while_start
|
||||
end_zero_while_loop
|
||||
strb r7, [r6, r4]
|
||||
sub r4, r4, #1 ; x--
|
||||
end_zero_while_start
|
||||
cmp r4, #0
|
||||
ldrge r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r12, [r6, r4]
|
||||
cmpge r12, #0xff
|
||||
beq end_zero_while_loop
|
||||
|
||||
ldr r6, [r0, #vp8_writer_buffer]
|
||||
ldrb r7, [r6, r4]
|
||||
add r7, r7, #1
|
||||
strb r7, [r6, r4]
|
||||
end_high_bit_not_set
|
||||
adds r3, r3, #1 ; ++count
|
||||
lsl r2, r2, #1 ; lowvalue <<= 1
|
||||
bne end_count_zero
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos]
|
||||
mvn r3, #7 ; count = -8
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
lsr r6, r2, #24 ; lowvalue >> 24
|
||||
add r12, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r12, [r0, #vp8_writer_pos]
|
||||
|
||||
VALIDATE_POS r7, r12 ; validate_buffer at pos
|
||||
|
||||
strb r6, [r7, r4]
|
||||
end_count_zero
|
||||
skip_extra_bits
|
||||
add r1, r1, #TOKENEXTRA_SZ ; ++p
|
||||
check_p_lt_stop
|
||||
ldr r4, [sp, #0] ; stop
|
||||
cmp r1, r4 ; while( p < stop)
|
||||
bcc while_p_lt_stop
|
||||
|
||||
ldr r10, [sp, #20] ; num_parts
|
||||
mov r1, #TOKENLIST_SZ
|
||||
mul r1, r10, r1
|
||||
|
||||
ldr r6, [sp, #12] ; mb_rows
|
||||
ldr r7, [sp, #16] ; tokenlist address
|
||||
subs r6, r6, r10
|
||||
add r7, r7, r1 ; next element in the array
|
||||
str r6, [sp, #12]
|
||||
bgt mb_row_loop
|
||||
|
||||
end_partition
|
||||
mov r12, #32
|
||||
|
||||
stop_encode_loop
|
||||
sub r7, r5, #1 ; range-1
|
||||
|
||||
mov r4, r7, lsl #7 ; ((range-1) * 128)
|
||||
|
||||
mov r7, #1
|
||||
add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
|
||||
|
||||
; Counting the leading zeros is used to normalize range.
|
||||
clz r6, r4
|
||||
sub r6, r6, #24 ; shift
|
||||
|
||||
; Flag is set on the sum of count. This flag is used later
|
||||
; to determine if count >= 0
|
||||
adds r3, r3, r6 ; count += shift
|
||||
lsl r5, r4, r6 ; range <<= shift
|
||||
bmi token_count_lt_zero_se ; if(count >= 0)
|
||||
|
||||
sub r6, r6, r3 ; offset = shift - count
|
||||
sub r4, r6, #1 ; offset-1
|
||||
lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
|
||||
bpl token_high_bit_not_set_se
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; x
|
||||
sub r4, r4, #1 ; x = w->pos-1
|
||||
b token_zero_while_start_se
|
||||
token_zero_while_loop_se
|
||||
mov r10, #0
|
||||
strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
|
||||
sub r4, r4, #1 ; x--
|
||||
token_zero_while_start_se
|
||||
cmp r4, #0
|
||||
ldrge r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r11, [r7, r4]
|
||||
cmpge r11, #0xff
|
||||
beq token_zero_while_loop_se
|
||||
|
||||
ldr r7, [r0, #vp8_writer_buffer]
|
||||
ldrb r10, [r7, r4] ; w->buffer[x]
|
||||
add r10, r10, #1
|
||||
strb r10, [r7, r4] ; w->buffer[x] + 1
|
||||
token_high_bit_not_set_se
|
||||
rsb r4, r6, #24 ; 24-offset
|
||||
ldr r10, [r0, #vp8_writer_buffer]
|
||||
lsr r7, r2, r4 ; lowvalue >> (24-offset)
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
lsl r2, r2, r6 ; lowvalue <<= offset
|
||||
mov r6, r3 ; shift = count
|
||||
add r11, r4, #1 ; w->pos++
|
||||
bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
|
||||
str r11, [r0, #vp8_writer_pos]
|
||||
sub r3, r3, #8 ; count -= 8
|
||||
|
||||
VALIDATE_POS r10, r11 ; validate_buffer at pos
|
||||
|
||||
strb r7, [r10, r4] ; w->buffer[w->pos++]
|
||||
|
||||
token_count_lt_zero_se
|
||||
lsl r2, r2, r6 ; lowvalue <<= shift
|
||||
|
||||
subs r12, r12, #1
|
||||
bne stop_encode_loop
|
||||
|
||||
ldr r4, [r0, #vp8_writer_pos] ; w->pos
|
||||
ldr r12, [sp, #24] ; ptr
|
||||
add r12, r12, r4 ; ptr += w->pos
|
||||
str r12, [sp, #24]
|
||||
|
||||
ldr r11, [sp, #28] ; i
|
||||
ldr r10, [sp, #20] ; num_parts
|
||||
|
||||
add r11, r11, #1 ; i++
|
||||
str r11, [sp, #28]
|
||||
|
||||
ldr r7, [sp, #32] ; cpi->tp_list[i]
|
||||
mov r1, #TOKENLIST_SZ
|
||||
add r7, r7, r1 ; next element in cpi->tp_list
|
||||
str r7, [sp, #32] ; cpi->tp_list[i+1]
|
||||
|
||||
cmp r10, r11
|
||||
bgt numparts_loop
|
||||
|
||||
add sp, sp, #40
|
||||
pop {r4-r12, pc}
|
||||
ENDP
|
||||
|
||||
_VP8_COMP_common_
|
||||
DCD vp8_comp_common
|
||||
_VP8_COMMON_MBrows_
|
||||
DCD vp8_common_mb_rows
|
||||
_VP8_COMP_tplist_
|
||||
DCD vp8_comp_tplist
|
||||
_VP8_COMP_bc_
|
||||
DCD vp8_comp_bc
|
||||
_vp8_writer_sz_
|
||||
DCD vp8_writer_sz
|
||||
|
||||
END
|
|
@ -1,225 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_fast_quantize_b_armv6|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *b
|
||||
; r1 BLOCKD *d
|
||||
|vp8_fast_quantize_b_armv6| PROC
|
||||
stmfd sp!, {r1, r4-r11, lr}
|
||||
|
||||
ldr r3, [r0, #vp8_block_coeff] ; coeff
|
||||
ldr r4, [r0, #vp8_block_quant_fast] ; quant_fast
|
||||
ldr r5, [r0, #vp8_block_round] ; round
|
||||
ldr r6, [r1, #vp8_blockd_qcoeff] ; qcoeff
|
||||
ldr r7, [r1, #vp8_blockd_dqcoeff] ; dqcoeff
|
||||
ldr r8, [r1, #vp8_blockd_dequant] ; dequant
|
||||
|
||||
ldr r2, loop_count ; loop_count=0x1000000. 'lsls' instruction
|
||||
; is used to update the counter so that
|
||||
; it can be used to mark nonzero
|
||||
; quantized coefficient pairs.
|
||||
|
||||
mov r1, #0 ; flags for quantized coeffs
|
||||
|
||||
; PART 1: quantization and dequantization loop
|
||||
loop
|
||||
ldr r9, [r3], #4 ; [z1 | z0]
|
||||
ldr r10, [r5], #4 ; [r1 | r0]
|
||||
ldr r11, [r4], #4 ; [q1 | q0]
|
||||
|
||||
ssat16 lr, #1, r9 ; [sz1 | sz0]
|
||||
eor r9, r9, lr ; [z1 ^ sz1 | z0 ^ sz0]
|
||||
ssub16 r9, r9, lr ; x = (z ^ sz) - sz
|
||||
sadd16 r9, r9, r10 ; [x1+r1 | x0+r0]
|
||||
|
||||
ldr r12, [r3], #4 ; [z3 | z2]
|
||||
|
||||
smulbb r0, r9, r11 ; [(x0+r0)*q0]
|
||||
smultt r9, r9, r11 ; [(x1+r1)*q1]
|
||||
|
||||
ldr r10, [r5], #4 ; [r3 | r2]
|
||||
|
||||
ssat16 r11, #1, r12 ; [sz3 | sz2]
|
||||
eor r12, r12, r11 ; [z3 ^ sz3 | z2 ^ sz2]
|
||||
pkhtb r0, r9, r0, asr #16 ; [y1 | y0]
|
||||
ldr r9, [r4], #4 ; [q3 | q2]
|
||||
ssub16 r12, r12, r11 ; x = (z ^ sz) - sz
|
||||
|
||||
sadd16 r12, r12, r10 ; [x3+r3 | x2+r2]
|
||||
|
||||
eor r0, r0, lr ; [(y1 ^ sz1) | (y0 ^ sz0)]
|
||||
|
||||
smulbb r10, r12, r9 ; [(x2+r2)*q2]
|
||||
smultt r12, r12, r9 ; [(x3+r3)*q3]
|
||||
|
||||
ssub16 r0, r0, lr ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r0, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #24 ; add flag for nonzero coeffs
|
||||
|
||||
str r0, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r9, [r8], #4 ; [dq1 | dq0]
|
||||
|
||||
pkhtb r10, r12, r10, asr #16 ; [y3 | y2]
|
||||
eor r10, r10, r11 ; [(y3 ^ sz3) | (y2 ^ sz2)]
|
||||
ssub16 r10, r10, r11 ; x = (y ^ sz) - sz
|
||||
|
||||
cmp r10, #0 ; check if zero
|
||||
orrne r1, r1, r2, lsr #23 ; add flag for nonzero coeffs
|
||||
|
||||
str r10, [r6], #4 ; *qcoeff++ = x
|
||||
ldr r11, [r8], #4 ; [dq3 | dq2]
|
||||
|
||||
smulbb r12, r0, r9 ; [x0*dq0]
|
||||
smultt r0, r0, r9 ; [x1*dq1]
|
||||
|
||||
smulbb r9, r10, r11 ; [x2*dq2]
|
||||
smultt r10, r10, r11 ; [x3*dq3]
|
||||
|
||||
lsls r2, r2, #2 ; update loop counter
|
||||
strh r12, [r7, #0] ; dqcoeff[0] = [x0*dq0]
|
||||
strh r0, [r7, #2] ; dqcoeff[1] = [x1*dq1]
|
||||
strh r9, [r7, #4] ; dqcoeff[2] = [x2*dq2]
|
||||
strh r10, [r7, #6] ; dqcoeff[3] = [x3*dq3]
|
||||
add r7, r7, #8 ; dqcoeff += 8
|
||||
bne loop
|
||||
|
||||
; PART 2: check position for eob...
|
||||
ldr r11, [sp, #0] ; restore BLOCKD pointer
|
||||
mov lr, #0 ; init eob
|
||||
cmp r1, #0 ; coeffs after quantization?
|
||||
ldr r12, [r11, #vp8_blockd_eob]
|
||||
beq end ; skip eob calculations if all zero
|
||||
|
||||
ldr r0, [r11, #vp8_blockd_qcoeff]
|
||||
|
||||
; check shortcut for nonzero qcoeffs
|
||||
tst r1, #0x80
|
||||
bne quant_coeff_15_14
|
||||
tst r1, #0x20
|
||||
bne quant_coeff_13_11
|
||||
tst r1, #0x8
|
||||
bne quant_coeff_12_7
|
||||
tst r1, #0x40
|
||||
bne quant_coeff_10_9
|
||||
tst r1, #0x10
|
||||
bne quant_coeff_8_3
|
||||
tst r1, #0x2
|
||||
bne quant_coeff_6_5
|
||||
tst r1, #0x4
|
||||
bne quant_coeff_4_2
|
||||
b quant_coeff_1_0
|
||||
|
||||
quant_coeff_15_14
|
||||
ldrh r2, [r0, #30] ; rc=15, i=15
|
||||
mov lr, #16
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #28] ; rc=14, i=14
|
||||
mov lr, #15
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_13_11
|
||||
ldrh r2, [r0, #22] ; rc=11, i=13
|
||||
mov lr, #14
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_12_7
|
||||
ldrh r3, [r0, #14] ; rc=7, i=12
|
||||
mov lr, #13
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #20] ; rc=10, i=11
|
||||
mov lr, #12
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_10_9
|
||||
ldrh r3, [r0, #26] ; rc=13, i=10
|
||||
mov lr, #11
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #24] ; rc=12, i=9
|
||||
mov lr, #10
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_8_3
|
||||
ldrh r3, [r0, #18] ; rc=9, i=8
|
||||
mov lr, #9
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #12] ; rc=6, i=7
|
||||
mov lr, #8
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_6_5
|
||||
ldrh r3, [r0, #6] ; rc=3, i=6
|
||||
mov lr, #7
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #4] ; rc=2, i=5
|
||||
mov lr, #6
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_4_2
|
||||
ldrh r3, [r0, #10] ; rc=5, i=4
|
||||
mov lr, #5
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
ldrh r2, [r0, #16] ; rc=8, i=3
|
||||
mov lr, #4
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
ldrh r3, [r0, #8] ; rc=4, i=2
|
||||
mov lr, #3
|
||||
cmp r3, #0
|
||||
bne end
|
||||
|
||||
quant_coeff_1_0
|
||||
ldrh r2, [r0, #2] ; rc=1, i=1
|
||||
mov lr, #2
|
||||
cmp r2, #0
|
||||
bne end
|
||||
|
||||
mov lr, #1 ; rc=0, i=0
|
||||
|
||||
end
|
||||
strb lr, [r12]
|
||||
ldmfd sp!, {r1, r4-r11, pc}
|
||||
|
||||
ENDP
|
||||
|
||||
loop_count
|
||||
DCD 0x1000000
|
||||
|
||||
END
|
||||
|
|
@ -1,272 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_subtract_mby_armv6|
|
||||
EXPORT |vp8_subtract_mbuv_armv6|
|
||||
EXPORT |vp8_subtract_b_armv6|
|
||||
|
||||
INCLUDE vp8_asm_enc_offsets.asm
|
||||
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
; r0 BLOCK *be
|
||||
; r1 BLOCKD *bd
|
||||
; r2 int pitch
|
||||
|vp8_subtract_b_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r9}
|
||||
|
||||
ldr r4, [r0, #vp8_block_base_src]
|
||||
ldr r5, [r0, #vp8_block_src]
|
||||
ldr r6, [r0, #vp8_block_src_diff]
|
||||
|
||||
ldr r3, [r4]
|
||||
ldr r7, [r0, #vp8_block_src_stride]
|
||||
add r3, r3, r5 ; src = *base_src + src
|
||||
ldr r8, [r1, #vp8_blockd_predictor]
|
||||
|
||||
mov r9, #4 ; loop count
|
||||
|
||||
loop_block
|
||||
|
||||
ldr r0, [r3], r7 ; src
|
||||
ldr r1, [r8], r2 ; pred
|
||||
|
||||
uxtb16 r4, r0 ; [s2 | s0]
|
||||
uxtb16 r5, r1 ; [p2 | p0]
|
||||
uxtb16 r0, r0, ror #8 ; [s3 | s1]
|
||||
uxtb16 r1, r1, ror #8 ; [p3 | p1]
|
||||
|
||||
usub16 r4, r4, r5 ; [d2 | d0]
|
||||
usub16 r5, r0, r1 ; [d3 | d1]
|
||||
|
||||
subs r9, r9, #1 ; decrement loop counter
|
||||
|
||||
pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
|
||||
pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
|
||||
|
||||
str r0, [r6, #0] ; diff
|
||||
str r1, [r6, #4] ; diff
|
||||
|
||||
add r6, r6, r2, lsl #1 ; update diff pointer
|
||||
bne loop_block
|
||||
|
||||
ldmfd sp!, {r4-r9}
|
||||
mov pc, lr
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
; r0 short *diff
|
||||
; r1 unsigned char *usrc
|
||||
; r2 unsigned char *vsrc
|
||||
; r3 int src_stride
|
||||
; sp unsigned char *upred
|
||||
; sp unsigned char *vpred
|
||||
; sp int pred_stride
|
||||
|vp8_subtract_mbuv_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r11}
|
||||
|
||||
add r0, r0, #512 ; set *diff point to Cb
|
||||
mov r4, #8 ; loop count
|
||||
ldr r5, [sp, #32] ; upred
|
||||
ldr r12, [sp, #40] ; pred_stride
|
||||
|
||||
; Subtract U block
|
||||
loop_u
|
||||
ldr r6, [r1] ; usrc (A)
|
||||
ldr r7, [r5] ; upred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r1, #4] ; usrc (B)
|
||||
ldr r11, [r5, #4] ; upred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
add r1, r1, r3 ; update usrc pointer
|
||||
add r5, r5, r12 ; update upred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
bne loop_u
|
||||
|
||||
ldr r5, [sp, #36] ; vpred
|
||||
mov r4, #8 ; loop count
|
||||
|
||||
; Subtract V block
|
||||
loop_v
|
||||
ldr r6, [r2] ; vsrc (A)
|
||||
ldr r7, [r5] ; vpred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r2, #4] ; vsrc (B)
|
||||
ldr r11, [r5, #4] ; vpred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
add r2, r2, r3 ; update vsrc pointer
|
||||
add r5, r5, r12 ; update vpred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
bne loop_v
|
||||
|
||||
ldmfd sp!, {r4-r11}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
|
||||
; r0 short *diff
|
||||
; r1 unsigned char *src
|
||||
; r2 int src_stride
|
||||
; r3 unsigned char *pred
|
||||
; sp int pred_stride
|
||||
|vp8_subtract_mby_armv6| PROC
|
||||
|
||||
stmfd sp!, {r4-r11}
|
||||
ldr r12, [sp, #32] ; pred_stride
|
||||
mov r4, #16
|
||||
loop
|
||||
ldr r6, [r1] ; src (A)
|
||||
ldr r7, [r3] ; pred (A)
|
||||
|
||||
uxtb16 r8, r6 ; [s2 | s0] (A)
|
||||
uxtb16 r9, r7 ; [p2 | p0] (A)
|
||||
uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
|
||||
uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (A)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (A)
|
||||
|
||||
ldr r10, [r1, #4] ; src (B)
|
||||
ldr r11, [r3, #4] ; pred (B)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
|
||||
|
||||
str r8, [r0], #4 ; diff (A)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (B)
|
||||
str r9, [r0], #4 ; diff (A)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (B)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (B)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (B)
|
||||
|
||||
ldr r10, [r1, #8] ; src (C)
|
||||
ldr r11, [r3, #8] ; pred (C)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
|
||||
|
||||
str r8, [r0], #4 ; diff (B)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (C)
|
||||
str r9, [r0], #4 ; diff (B)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (C)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (C)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (C)
|
||||
|
||||
ldr r10, [r1, #12] ; src (D)
|
||||
ldr r11, [r3, #12] ; pred (D)
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
|
||||
|
||||
str r8, [r0], #4 ; diff (C)
|
||||
uxtb16 r8, r10 ; [s2 | s0] (D)
|
||||
str r9, [r0], #4 ; diff (C)
|
||||
|
||||
uxtb16 r9, r11 ; [p2 | p0] (D)
|
||||
uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
|
||||
uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
|
||||
|
||||
usub16 r6, r8, r9 ; [d2 | d0] (D)
|
||||
usub16 r7, r10, r11 ; [d3 | d1] (D)
|
||||
|
||||
add r1, r1, r2 ; update src pointer
|
||||
add r3, r3, r12 ; update pred pointer
|
||||
|
||||
pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
|
||||
pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
|
||||
|
||||
str r8, [r0], #4 ; diff (D)
|
||||
subs r4, r4, #1 ; update loop counter
|
||||
str r9, [r0], #4 ; diff (D)
|
||||
|
||||
bne loop
|
||||
|
||||
ldmfd sp!, {r4-r11}
|
||||
bx lr
|
||||
|
||||
ENDP
|
||||
|
||||
END
|
||||
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "vp8/encoder/boolhuff.h"
|
||||
#include "vpx/internal/vpx_codec_internal.h"
|
||||
|
||||
const unsigned int vp8_prob_cost[256] =
|
||||
{
|
||||
2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046,
|
||||
1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778,
|
||||
767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625,
|
||||
617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516,
|
||||
511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433,
|
||||
428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365,
|
||||
361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307,
|
||||
304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257,
|
||||
255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214,
|
||||
211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174,
|
||||
172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139,
|
||||
137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107,
|
||||
105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77,
|
||||
75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50,
|
||||
48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24,
|
||||
22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1
|
||||
};
|
||||
|
||||
int vp8_validate_buffer_arm(const unsigned char *start,
|
||||
size_t len,
|
||||
const unsigned char *end,
|
||||
struct vpx_internal_error_info *error)
|
||||
{
|
||||
return validate_buffer(start, len, end, error);
|
||||
}
|
|
@ -159,7 +159,7 @@ static void write_split(vp8_writer *bc, int x)
|
|||
);
|
||||
}
|
||||
|
||||
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount)
|
||||
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
|
||||
{
|
||||
const TOKENEXTRA *stop = p + xcount;
|
||||
unsigned int split;
|
||||
|
@ -374,7 +374,7 @@ static void write_partition_size(unsigned char *cx_data, int size)
|
|||
|
||||
}
|
||||
|
||||
static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data,
|
||||
unsigned char * cx_data_end,
|
||||
int num_part)
|
||||
{
|
||||
|
@ -398,7 +398,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
|||
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
|
||||
int tokens = (int)(stop - p);
|
||||
|
||||
vp8_pack_tokens_c(w, p, tokens);
|
||||
vp8_pack_tokens(w, p, tokens);
|
||||
}
|
||||
|
||||
vp8_stop_encode(w);
|
||||
|
@ -407,7 +407,7 @@ static void pack_tokens_into_partitions_c(VP8_COMP *cpi, unsigned char *cx_data,
|
|||
}
|
||||
|
||||
|
||||
static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
|
||||
static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w)
|
||||
{
|
||||
int mb_row;
|
||||
|
||||
|
@ -417,7 +417,7 @@ static void pack_mb_row_tokens_c(VP8_COMP *cpi, vp8_writer *w)
|
|||
const TOKENEXTRA *stop = cpi->tplist[mb_row].stop;
|
||||
int tokens = (int)(stop - p);
|
||||
|
||||
vp8_pack_tokens_c(w, p, tokens);
|
||||
vp8_pack_tokens(w, p, tokens);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1676,7 +1676,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
|
|||
pack_mb_row_tokens(cpi, &cpi->bc[1]);
|
||||
else
|
||||
#endif
|
||||
pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
|
||||
vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count);
|
||||
|
||||
vp8_stop_encode(&cpi->bc[1]);
|
||||
|
||||
|
|
|
@ -16,36 +16,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if HAVE_EDSP
|
||||
void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *,
|
||||
unsigned char * cx_data,
|
||||
const unsigned char *cx_data_end,
|
||||
int num_parts,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
|
||||
vp8_token *,
|
||||
const vp8_extra_bit_struct *,
|
||||
const vp8_tree_index *);
|
||||
# define pack_tokens(a,b,c) \
|
||||
vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
# define pack_tokens_into_partitions(a,b,c,d) \
|
||||
vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
# define pack_mb_row_tokens(a,b) \
|
||||
vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
|
||||
#else
|
||||
|
||||
void vp8_pack_tokens_c(vp8_writer *w, const TOKENEXTRA *p, int xcount);
|
||||
|
||||
# define pack_tokens(a,b,c) vp8_pack_tokens_c(a,b,c)
|
||||
# define pack_tokens_into_partitions(a,b,c,d) pack_tokens_into_partitions_c(a,b,c,d)
|
||||
# define pack_mb_row_tokens(a,b) pack_mb_row_tokens_c(a,b)
|
||||
#endif
|
||||
void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -574,7 +574,7 @@ void encode_mb_row(VP8_COMP *cpi,
|
|||
/* pack tokens for this MB */
|
||||
{
|
||||
int tok_count = *tp - tp_start;
|
||||
pack_tokens(w, tp_start, tok_count);
|
||||
vp8_pack_tokens(w, tp_start, tok_count);
|
||||
}
|
||||
#endif
|
||||
/* Increment pointer into gf usage flags structure. */
|
||||
|
|
|
@ -261,7 +261,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
|
|||
/* pack tokens for this MB */
|
||||
{
|
||||
int tok_count = tp - tp_start;
|
||||
pack_tokens(w, tp_start, tok_count);
|
||||
vp8_pack_tokens(w, tp_start, tok_count);
|
||||
}
|
||||
#else
|
||||
cpi->tplist[mb_row].stop = tp;
|
||||
|
|
|
@ -10,84 +10,7 @@
|
|||
|
||||
|
||||
#include "vpx_ports/asm_offsets.h"
|
||||
#include "vpx_config.h"
|
||||
#include "block.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "onyx_int.h"
|
||||
#include "treewriter.h"
|
||||
#include "tokenize.h"
|
||||
|
||||
BEGIN
|
||||
|
||||
/* regular quantize */
|
||||
DEFINE(vp8_block_coeff, offsetof(BLOCK, coeff));
|
||||
DEFINE(vp8_block_zbin, offsetof(BLOCK, zbin));
|
||||
DEFINE(vp8_block_round, offsetof(BLOCK, round));
|
||||
DEFINE(vp8_block_quant, offsetof(BLOCK, quant));
|
||||
DEFINE(vp8_block_quant_fast, offsetof(BLOCK, quant_fast));
|
||||
DEFINE(vp8_block_zbin_extra, offsetof(BLOCK, zbin_extra));
|
||||
DEFINE(vp8_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost));
|
||||
DEFINE(vp8_block_quant_shift, offsetof(BLOCK, quant_shift));
|
||||
|
||||
DEFINE(vp8_blockd_qcoeff, offsetof(BLOCKD, qcoeff));
|
||||
DEFINE(vp8_blockd_dequant, offsetof(BLOCKD, dequant));
|
||||
DEFINE(vp8_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff));
|
||||
DEFINE(vp8_blockd_eob, offsetof(BLOCKD, eob));
|
||||
|
||||
/* subtract */
|
||||
DEFINE(vp8_block_base_src, offsetof(BLOCK, base_src));
|
||||
DEFINE(vp8_block_src, offsetof(BLOCK, src));
|
||||
DEFINE(vp8_block_src_diff, offsetof(BLOCK, src_diff));
|
||||
DEFINE(vp8_block_src_stride, offsetof(BLOCK, src_stride));
|
||||
|
||||
DEFINE(vp8_blockd_predictor, offsetof(BLOCKD, predictor));
|
||||
|
||||
/* pack tokens */
|
||||
DEFINE(vp8_writer_lowvalue, offsetof(vp8_writer, lowvalue));
|
||||
DEFINE(vp8_writer_range, offsetof(vp8_writer, range));
|
||||
DEFINE(vp8_writer_count, offsetof(vp8_writer, count));
|
||||
DEFINE(vp8_writer_pos, offsetof(vp8_writer, pos));
|
||||
DEFINE(vp8_writer_buffer, offsetof(vp8_writer, buffer));
|
||||
DEFINE(vp8_writer_buffer_end, offsetof(vp8_writer, buffer_end));
|
||||
DEFINE(vp8_writer_error, offsetof(vp8_writer, error));
|
||||
|
||||
DEFINE(tokenextra_token, offsetof(TOKENEXTRA, Token));
|
||||
DEFINE(tokenextra_extra, offsetof(TOKENEXTRA, Extra));
|
||||
DEFINE(tokenextra_context_tree, offsetof(TOKENEXTRA, context_tree));
|
||||
DEFINE(tokenextra_skip_eob_node, offsetof(TOKENEXTRA, skip_eob_node));
|
||||
DEFINE(TOKENEXTRA_SZ, sizeof(TOKENEXTRA));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct));
|
||||
|
||||
DEFINE(vp8_token_value, offsetof(vp8_token, value));
|
||||
DEFINE(vp8_token_len, offsetof(vp8_token, Len));
|
||||
|
||||
DEFINE(vp8_extra_bit_struct_tree, offsetof(vp8_extra_bit_struct, tree));
|
||||
DEFINE(vp8_extra_bit_struct_prob, offsetof(vp8_extra_bit_struct, prob));
|
||||
DEFINE(vp8_extra_bit_struct_len, offsetof(vp8_extra_bit_struct, Len));
|
||||
DEFINE(vp8_extra_bit_struct_base_val, offsetof(vp8_extra_bit_struct, base_val));
|
||||
|
||||
DEFINE(vp8_comp_tplist, offsetof(VP8_COMP, tplist));
|
||||
DEFINE(vp8_comp_common, offsetof(VP8_COMP, common));
|
||||
DEFINE(vp8_comp_bc , offsetof(VP8_COMP, bc));
|
||||
DEFINE(vp8_writer_sz , sizeof(vp8_writer));
|
||||
|
||||
DEFINE(tokenlist_start, offsetof(TOKENLIST, start));
|
||||
DEFINE(tokenlist_stop, offsetof(TOKENLIST, stop));
|
||||
DEFINE(TOKENLIST_SZ, sizeof(TOKENLIST));
|
||||
|
||||
DEFINE(vp8_common_mb_rows, offsetof(VP8_COMMON, mb_rows));
|
||||
|
||||
END
|
||||
|
||||
/* add asserts for any offset that is not supported by assembly code
|
||||
* add asserts for any size that is not supported by assembly code
|
||||
|
||||
* These are used in vp8cx_pack_tokens. They are hard coded so if their sizes
|
||||
* change they will have to be adjusted.
|
||||
*/
|
||||
|
||||
#if HAVE_EDSP
|
||||
ct_assert(TOKENEXTRA_SZ, sizeof(TOKENEXTRA) == 8)
|
||||
ct_assert(vp8_extra_bit_struct_sz, sizeof(vp8_extra_bit_struct) == 16)
|
||||
#endif
|
||||
|
|
|
@ -75,7 +75,6 @@ VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
|
|||
VP8_CX_SRCS-yes += encoder/temporal_filter.c
|
||||
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c
|
||||
VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h
|
||||
VP8_CX_SRCS-yes += encoder/vp8_asm_enc_offsets.c
|
||||
|
||||
ifeq ($(CONFIG_REALTIME_ONLY),yes)
|
||||
VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c
|
||||
|
@ -107,6 +106,3 @@ VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
|||
endif
|
||||
|
||||
VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes))
|
||||
|
||||
$(eval $(call asm_offsets_template,\
|
||||
vp8_asm_enc_offsets.asm, $(VP8_PREFIX)encoder/vp8_asm_enc_offsets.c))
|
||||
|
|
|
@ -15,20 +15,9 @@ VP8_CX_SRCS-$(ARCH_ARM) += vp8cx_arm.mk
|
|||
# encoder
|
||||
VP8_CX_SRCS-$(ARCH_ARM) += encoder/arm/dct_arm.c
|
||||
|
||||
#File list for edsp
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/boolhuff_arm.c
|
||||
VP8_CX_SRCS_REMOVE-$(HAVE_EDSP) += encoder/boolhuff.c
|
||||
VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_EDSP) += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
|
||||
|
||||
#File list for media
|
||||
# encoder
|
||||
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_subtract_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_short_fdct4x4_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_fast_quantize_b_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/vp8_mse16x16_armv6$(ASM)
|
||||
VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
|
||||
|
||||
|
|
|
@ -49,9 +49,6 @@ int arm_cpu_caps(void) {
|
|||
return flags;
|
||||
}
|
||||
mask = arm_cpu_env_mask();
|
||||
#if HAVE_EDSP
|
||||
flags |= HAS_EDSP;
|
||||
#endif /* HAVE_EDSP */
|
||||
#if HAVE_MEDIA
|
||||
flags |= HAS_MEDIA;
|
||||
#endif /* HAVE_MEDIA */
|
||||
|
@ -78,17 +75,6 @@ int arm_cpu_caps(void) {
|
|||
* instructions via their assembled hex code.
|
||||
* All of these instructions should be essentially nops.
|
||||
*/
|
||||
#if HAVE_EDSP
|
||||
if (mask & HAS_EDSP) {
|
||||
__try {
|
||||
/*PLD [r13]*/
|
||||
__emit(0xF5DDF000);
|
||||
flags |= HAS_EDSP;
|
||||
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
|
||||
/*Ignore exception.*/
|
||||
}
|
||||
}
|
||||
#endif /* HAVE_EDSP */
|
||||
#if HAVE_MEDIA
|
||||
if (mask & HAS_MEDIA)
|
||||
__try {
|
||||
|
@ -127,9 +113,6 @@ int arm_cpu_caps(void) {
|
|||
mask = arm_cpu_env_mask();
|
||||
features = android_getCpuFeatures();
|
||||
|
||||
#if HAVE_EDSP
|
||||
flags |= HAS_EDSP;
|
||||
#endif /* HAVE_EDSP */
|
||||
#if HAVE_MEDIA
|
||||
flags |= HAS_MEDIA;
|
||||
#endif /* HAVE_MEDIA */
|
||||
|
@ -163,23 +146,15 @@ int arm_cpu_caps(void) {
|
|||
*/
|
||||
char buf[512];
|
||||
while (fgets(buf, 511, fin) != NULL) {
|
||||
#if HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
if (memcmp(buf, "Features", 8) == 0) {
|
||||
char *p;
|
||||
#if HAVE_EDSP
|
||||
p = strstr(buf, " edsp");
|
||||
if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
|
||||
flags |= HAS_EDSP;
|
||||
}
|
||||
#endif /* HAVE_EDSP */
|
||||
#if HAVE_NEON || HAVE_NEON_ASM
|
||||
p = strstr(buf, " neon");
|
||||
if (p != NULL && (p[5] == ' ' || p[5] == '\n')) {
|
||||
flags |= HAS_NEON;
|
||||
}
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
}
|
||||
#endif /* HAVE_EDSP || HAVE_NEON || HAVE_NEON_ASM */
|
||||
#endif /* HAVE_NEON || HAVE_NEON_ASM */
|
||||
#if HAVE_MEDIA
|
||||
if (memcmp(buf, "CPU architecture:", 17) == 0) {
|
||||
int version;
|
||||
|
|
|
@ -5,7 +5,6 @@ SCALE_SRCS-yes += generic/vpx_scale.c
|
|||
SCALE_SRCS-yes += generic/yv12config.c
|
||||
SCALE_SRCS-yes += generic/yv12extend.c
|
||||
SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c
|
||||
SCALE_SRCS-yes += vpx_scale_asm_offsets.c
|
||||
SCALE_SRCS-yes += vpx_scale_rtcd.c
|
||||
SCALE_SRCS-yes += vpx_scale_rtcd.pl
|
||||
|
||||
|
@ -14,7 +13,4 @@ SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c
|
|||
|
||||
SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes)
|
||||
|
||||
$(eval $(call asm_offsets_template,\
|
||||
vpx_scale_asm_offsets.asm, vpx_scale/vpx_scale_asm_offsets.c))
|
||||
|
||||
$(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl))
|
||||
|
|
|
@ -9,32 +9,8 @@
|
|||
*/
|
||||
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx/vpx_codec.h"
|
||||
#include "vpx_ports/asm_offsets.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
BEGIN
|
||||
|
||||
/* vpx_scale */
|
||||
DEFINE(yv12_buffer_config_y_width, offsetof(YV12_BUFFER_CONFIG, y_width));
|
||||
DEFINE(yv12_buffer_config_y_height, offsetof(YV12_BUFFER_CONFIG, y_height));
|
||||
DEFINE(yv12_buffer_config_y_stride, offsetof(YV12_BUFFER_CONFIG, y_stride));
|
||||
DEFINE(yv12_buffer_config_uv_width, offsetof(YV12_BUFFER_CONFIG, uv_width));
|
||||
DEFINE(yv12_buffer_config_uv_height, offsetof(YV12_BUFFER_CONFIG, uv_height));
|
||||
DEFINE(yv12_buffer_config_uv_stride, offsetof(YV12_BUFFER_CONFIG, uv_stride));
|
||||
DEFINE(yv12_buffer_config_y_buffer, offsetof(YV12_BUFFER_CONFIG, y_buffer));
|
||||
DEFINE(yv12_buffer_config_u_buffer, offsetof(YV12_BUFFER_CONFIG, u_buffer));
|
||||
DEFINE(yv12_buffer_config_v_buffer, offsetof(YV12_BUFFER_CONFIG, v_buffer));
|
||||
DEFINE(yv12_buffer_config_border, offsetof(YV12_BUFFER_CONFIG, border));
|
||||
DEFINE(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS);
|
||||
|
||||
END
|
||||
|
||||
/* add asserts for any offset that is not supported by assembly code */
|
||||
/* add asserts for any size that is not supported by assembly code */
|
||||
|
||||
#if HAVE_NEON
|
||||
/* vp8_yv12_extend_frame_borders_neon makes several assumptions based on this */
|
||||
ct_assert(VP8BORDERINPIXELS_VAL, VP8BORDERINPIXELS == 32)
|
||||
#endif
|
||||
|
|
Загрузка…
Ссылка в новой задаче