Merge "Merge branch 'master' into nextgenv2" into nextgenv2
This commit is contained in:
Коммит
06c297bd1c
1
README
1
README
|
@ -47,7 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
|
|||
--help output of the configure script. As of this writing, the list of
|
||||
available targets is:
|
||||
|
||||
armv6-darwin-gcc
|
||||
armv6-linux-rvct
|
||||
armv6-linux-gcc
|
||||
armv6-none-rvct
|
||||
|
|
|
@ -119,29 +119,25 @@ utiltest:
|
|||
test-no-data-check::
|
||||
exampletest-no-data-check utiltest-no-data-check:
|
||||
|
||||
# Add compiler flags for intrinsic files
|
||||
# Force to realign stack always on OS/2
|
||||
ifeq ($(TOOLCHAIN), x86-os2-gcc)
|
||||
STACKREALIGN=-mstackrealign
|
||||
else
|
||||
STACKREALIGN=
|
||||
CFLAGS += -mstackrealign
|
||||
endif
|
||||
|
||||
$(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
|
||||
$(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
|
||||
$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
|
||||
$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
|
||||
$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)
|
||||
$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
|
||||
$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2
|
||||
$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3
|
||||
$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3
|
||||
$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3
|
||||
$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
|
||||
$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
|
||||
$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
|
||||
$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
|
||||
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
|
||||
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
|
||||
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
|
||||
|
||||
$(BUILD_PFX)%.c.d: %.c
|
||||
$(if $(quiet),@echo " [DEP] $@")
|
||||
|
|
|
@ -185,6 +185,25 @@ add_extralibs() {
|
|||
#
|
||||
# Boolean Manipulation Functions
|
||||
#
|
||||
|
||||
enable_codec(){
|
||||
enabled $1 || echo " enabling $1"
|
||||
set_all yes $1
|
||||
|
||||
is_in $1 vp8 vp9 vp10 && \
|
||||
set_all yes $1_encoder && \
|
||||
set_all yes $1_decoder
|
||||
}
|
||||
|
||||
disable_codec(){
|
||||
disabled $1 || echo " disabling $1"
|
||||
set_all no $1
|
||||
|
||||
is_in $1 vp8 vp9 vp10 && \
|
||||
set_all no $1_encoder && \
|
||||
set_all no $1_decoder
|
||||
}
|
||||
|
||||
enable_feature(){
|
||||
set_all yes $*
|
||||
}
|
||||
|
@ -521,22 +540,20 @@ process_common_cmdline() {
|
|||
;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
|
||||
if is_in ${option} ${ARCH_EXT_LIST}; then
|
||||
[ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
|
||||
elif [ $action = "disable" ] && ! disabled $option ; then
|
||||
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
|
||||
die_unknown $opt
|
||||
is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
|
||||
log_echo " disabling $option"
|
||||
elif [ $action = "enable" ] && ! enabled $option ; then
|
||||
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
|
||||
die_unknown $opt
|
||||
is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
|
||||
log_echo " enabling $option"
|
||||
fi
|
||||
${action}_feature $option
|
||||
;;
|
||||
--require-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then
|
||||
if is_in ${option} ${ARCH_EXT_LIST}; then
|
||||
RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
|
||||
else
|
||||
die_unknown $opt
|
||||
|
@ -638,6 +655,26 @@ show_darwin_sdk_major_version() {
|
|||
xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
|
||||
}
|
||||
|
||||
# Print the Xcode version.
|
||||
show_xcode_version() {
|
||||
xcodebuild -version | head -n1 | cut -d' ' -f2
|
||||
}
|
||||
|
||||
# Fails when Xcode version is less than 6.3.
|
||||
check_xcode_minimum_version() {
|
||||
xcode_major=$(show_xcode_version | cut -f1 -d.)
|
||||
xcode_minor=$(show_xcode_version | cut -f2 -d.)
|
||||
xcode_min_major=6
|
||||
xcode_min_minor=3
|
||||
if [ ${xcode_major} -lt ${xcode_min_major} ]; then
|
||||
return 1
|
||||
fi
|
||||
if [ ${xcode_major} -eq ${xcode_min_major} ] \
|
||||
&& [ ${xcode_minor} -lt ${xcode_min_minor} ]; then
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
process_common_toolchain() {
|
||||
if [ -z "$toolchain" ]; then
|
||||
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
|
||||
|
@ -751,7 +788,14 @@ process_common_toolchain() {
|
|||
enabled shared && soft_enable pic
|
||||
|
||||
# Minimum iOS version for all target platforms (darwin and iphonesimulator).
|
||||
IOS_VERSION_MIN="6.0"
|
||||
# Shared library framework builds are only possible on iOS 8 and later.
|
||||
if enabled shared; then
|
||||
IOS_VERSION_OPTIONS="--enable-shared"
|
||||
IOS_VERSION_MIN="8.0"
|
||||
else
|
||||
IOS_VERSION_OPTIONS=""
|
||||
IOS_VERSION_MIN="6.0"
|
||||
fi
|
||||
|
||||
# Handle darwin variants. Newer SDKs allow targeting older
|
||||
# platforms, so use the newest one available.
|
||||
|
@ -1018,18 +1062,7 @@ EOF
|
|||
NM="$(${XCRUN_FIND} nm)"
|
||||
RANLIB="$(${XCRUN_FIND} ranlib)"
|
||||
AS_SFX=.s
|
||||
|
||||
# Special handling of ld for armv6 because libclang_rt.ios.a does
|
||||
# not contain armv6 support in Apple's clang package:
|
||||
# Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn).
|
||||
# TODO(tomfinegan): Remove this. Our minimum iOS version (6.0)
|
||||
# renders support for armv6 unnecessary because the 3GS and up
|
||||
# support neon.
|
||||
if [ "${tgt_isa}" = "armv6" ]; then
|
||||
LD="$(${XCRUN_FIND} ld)"
|
||||
else
|
||||
LD="${CXX:-$(${XCRUN_FIND} ld)}"
|
||||
fi
|
||||
LD="${CXX:-$(${XCRUN_FIND} ld)}"
|
||||
|
||||
# ASFLAGS is written here instead of using check_add_asflags
|
||||
# because we need to overwrite all of ASFLAGS and purge the
|
||||
|
@ -1055,6 +1088,19 @@ EOF
|
|||
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
|
||||
done
|
||||
|
||||
case ${tgt_isa} in
|
||||
armv7|armv7s|armv8|arm64)
|
||||
if enabled neon && ! check_xcode_minimum_version; then
|
||||
soft_disable neon
|
||||
log_echo " neon disabled: upgrade Xcode (need v6.3+)."
|
||||
if enabled neon_asm; then
|
||||
soft_disable neon_asm
|
||||
log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)."
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
|
||||
|
||||
if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
|
||||
|
@ -1069,7 +1115,7 @@ EOF
|
|||
if enabled rvct; then
|
||||
# Check if we have CodeSourcery GCC in PATH. Needed for
|
||||
# libraries
|
||||
hash arm-none-linux-gnueabi-gcc 2>&- || \
|
||||
which arm-none-linux-gnueabi-gcc 2>&- || \
|
||||
die "Couldn't find CodeSourcery GCC from PATH"
|
||||
|
||||
# Use armcc as a linker to enable translation of
|
||||
|
@ -1110,7 +1156,7 @@ EOF
|
|||
check_add_ldflags -mfp64
|
||||
;;
|
||||
i6400)
|
||||
check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
|
||||
check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
|
||||
check_add_cflags -mload-store-pairs -mhard-float -mfp64
|
||||
check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
|
||||
check_add_ldflags -mips64r6 -mabi=64 -mfp64
|
||||
|
|
|
@ -211,7 +211,7 @@ for opt in "$@"; do
|
|||
done
|
||||
|
||||
# Make one call to fix_path for file_list to improve performance.
|
||||
fix_file_list
|
||||
fix_file_list file_list
|
||||
|
||||
outfile=${outfile:-/dev/stdout}
|
||||
guid=${guid:-`generate_uuid`}
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>CFBundleDevelopmentRegion</key>
|
||||
<string>en</string>
|
||||
<key>CFBundleExecutable</key>
|
||||
<string>VPX</string>
|
||||
<key>CFBundleIdentifier</key>
|
||||
<string>org.webmproject.VPX</string>
|
||||
<key>CFBundleInfoDictionaryVersion</key>
|
||||
<string>6.0</string>
|
||||
<key>CFBundleName</key>
|
||||
<string>VPX</string>
|
||||
<key>CFBundlePackageType</key>
|
||||
<string>FMWK</string>
|
||||
<key>CFBundleShortVersionString</key>
|
||||
<string>${VERSION}</string>
|
||||
<key>CFBundleSignature</key>
|
||||
<string>????</string>
|
||||
<key>CFBundleSupportedPlatforms</key>
|
||||
<array>
|
||||
<string>iPhoneOS</string>
|
||||
</array>
|
||||
<key>CFBundleVersion</key>
|
||||
<string>${VERSION}</string>
|
||||
<key>MinimumOSVersion</key>
|
||||
<string>${IOS_VERSION_MIN}</string>
|
||||
<key>UIDeviceFamily</key>
|
||||
<array>
|
||||
<integer>1</integer>
|
||||
<integer>2</integer>
|
||||
</array>
|
||||
<key>VPXFullVersion</key>
|
||||
<string>${FULLVERSION}</string>
|
||||
</dict>
|
||||
</plist>
|
|
@ -24,6 +24,7 @@ CONFIGURE_ARGS="--disable-docs
|
|||
--disable-unit-tests"
|
||||
DIST_DIR="_dist"
|
||||
FRAMEWORK_DIR="VPX.framework"
|
||||
FRAMEWORK_LIB="VPX.framework/VPX"
|
||||
HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
|
||||
SCRIPT_DIR=$(dirname "$0")
|
||||
LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
|
||||
|
@ -137,6 +138,44 @@ create_vpx_framework_config_shim() {
|
|||
printf "#endif // ${include_guard}" >> "${config_file}"
|
||||
}
|
||||
|
||||
# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
|
||||
verify_framework_targets() {
|
||||
local requested_cpus=""
|
||||
local cpu=""
|
||||
|
||||
# Extract CPU from full target name.
|
||||
for target; do
|
||||
cpu="${target%%-*}"
|
||||
if [ "${cpu}" = "x86" ]; then
|
||||
# lipo -info outputs i386 for libvpx x86 targets.
|
||||
cpu="i386"
|
||||
fi
|
||||
requested_cpus="${requested_cpus}${cpu} "
|
||||
done
|
||||
|
||||
# Get target CPUs present in framework library.
|
||||
local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
|
||||
|
||||
# $LIPO -info outputs a string like the following:
|
||||
# Architectures in the fat file: $FRAMEWORK_LIB <architectures>
|
||||
# Capture only the architecture strings.
|
||||
targets_built=${targets_built##*: }
|
||||
|
||||
# Sort CPU strings to make the next step a simple string compare.
|
||||
local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
|
||||
local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
|
||||
|
||||
vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
|
||||
vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
|
||||
|
||||
if [ "${requested}" != "${actual}" ]; then
|
||||
elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
|
||||
elog " Requested target CPUs: ${requested}"
|
||||
elog " Actual target CPUs: ${actual}"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Configures and builds each target specified by $1, and then builds
|
||||
# VPX.framework.
|
||||
build_framework() {
|
||||
|
@ -157,7 +196,12 @@ build_framework() {
|
|||
for target in ${targets}; do
|
||||
build_target "${target}"
|
||||
target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
|
||||
lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
|
||||
if [ "${ENABLE_SHARED}" = "yes" ]; then
|
||||
local suffix="dylib"
|
||||
else
|
||||
local suffix="a"
|
||||
fi
|
||||
lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}"
|
||||
done
|
||||
|
||||
cd "${ORIG_PWD}"
|
||||
|
@ -176,13 +220,25 @@ build_framework() {
|
|||
# Copy in vpx_version.h.
|
||||
cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"
|
||||
|
||||
vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
|
||||
if [ "${ENABLE_SHARED}" = "yes" ]; then
|
||||
# Adjust the dylib's name so dynamic linking in apps works as expected.
|
||||
install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX
|
||||
|
||||
# Copy in Info.plist.
|
||||
cat "${SCRIPT_DIR}/ios-Info.plist" \
|
||||
| sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
|
||||
| sed "s/\${VERSION}/${VERSION}/g" \
|
||||
| sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
|
||||
> "${FRAMEWORK_DIR}/Info.plist"
|
||||
fi
|
||||
|
||||
# Confirm VPX.framework/VPX contains the targets requested.
|
||||
verify_framework_targets ${targets}
|
||||
|
||||
vlog "Created fat library ${FRAMEWORK_LIB} containing:"
|
||||
for lib in ${lib_list}; do
|
||||
vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')"
|
||||
done
|
||||
|
||||
# TODO(tomfinegan): Verify that expected targets are included within
|
||||
# VPX.framework/VPX via lipo -info.
|
||||
}
|
||||
|
||||
# Trap function. Cleans up the subtree used to build all targets contained in
|
||||
|
@ -213,6 +269,7 @@ iosbuild_usage() {
|
|||
cat << EOF
|
||||
Usage: ${0##*/} [arguments]
|
||||
--help: Display this message and exit.
|
||||
--enable-shared: Build a dynamic framework for use on iOS 8 or later.
|
||||
--extra-configure-args <args>: Extra args to pass when configuring libvpx.
|
||||
--macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
|
||||
and x86_64. Allows linking to framework when builds target MacOSX
|
||||
|
@ -251,6 +308,9 @@ while [ -n "$1" ]; do
|
|||
iosbuild_usage
|
||||
exit
|
||||
;;
|
||||
--enable-shared)
|
||||
ENABLE_SHARED=yes
|
||||
;;
|
||||
--preserve-build-output)
|
||||
PRESERVE_BUILD_OUTPUT=yes
|
||||
;;
|
||||
|
@ -278,6 +338,21 @@ while [ -n "$1" ]; do
|
|||
shift
|
||||
done
|
||||
|
||||
if [ "${ENABLE_SHARED}" = "yes" ]; then
|
||||
CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
|
||||
fi
|
||||
|
||||
FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}")
|
||||
VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
|
||||
|
||||
if [ "$ENABLE_SHARED" = "yes" ]; then
|
||||
IOS_VERSION_OPTIONS="--enable-shared"
|
||||
IOS_VERSION_MIN="8.0"
|
||||
else
|
||||
IOS_VERSION_OPTIONS=""
|
||||
IOS_VERSION_MIN="6.0"
|
||||
fi
|
||||
|
||||
if [ "${VERBOSE}" = "yes" ]; then
|
||||
cat << EOF
|
||||
BUILD_ROOT=${BUILD_ROOT}
|
||||
|
@ -285,6 +360,7 @@ cat << EOF
|
|||
CONFIGURE_ARGS=${CONFIGURE_ARGS}
|
||||
EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
|
||||
FRAMEWORK_DIR=${FRAMEWORK_DIR}
|
||||
FRAMEWORK_LIB=${FRAMEWORK_LIB}
|
||||
HEADER_DIR=${HEADER_DIR}
|
||||
LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
|
||||
LIPO=${LIPO}
|
||||
|
@ -292,8 +368,13 @@ cat << EOF
|
|||
ORIG_PWD=${ORIG_PWD}
|
||||
PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
|
||||
TARGETS="$(print_list "" ${TARGETS})"
|
||||
ENABLE_SHARED=${ENABLE_SHARED}
|
||||
OSX_TARGETS="${OSX_TARGETS}"
|
||||
SIM_TARGETS="${SIM_TARGETS}"
|
||||
SCRIPT_DIR="${SCRIPT_DIR}"
|
||||
FULLVERSION="${FULLVERSION}"
|
||||
VERSION="${VERSION}"
|
||||
IOS_VERSION_MIN="${IOS_VERSION_MIN}"
|
||||
EOF
|
||||
fi
|
||||
|
||||
|
|
|
@ -39,11 +39,12 @@ fix_path() {
|
|||
}
|
||||
|
||||
# Corrects the paths in file_list in one pass for efficiency.
|
||||
# $1 is the name of the array to be modified.
|
||||
fix_file_list() {
|
||||
# TODO(jzern): this could be more generic and take the array as a param.
|
||||
files=$(fix_path "${file_list[@]}")
|
||||
declare -n array_ref=$1
|
||||
files=$(fix_path "${array_ref[@]}")
|
||||
local IFS=$'\n'
|
||||
file_list=($files)
|
||||
array_ref=($files)
|
||||
}
|
||||
|
||||
generate_uuid() {
|
||||
|
|
|
@ -24,8 +24,9 @@ out_file=${2}
|
|||
id=${3:-VERSION_STRING}
|
||||
|
||||
git_version_id=""
|
||||
if [ -d "${source_path}/.git" ]; then
|
||||
if [ -e "${source_path}/.git" ]; then
|
||||
# Source Path is a git working copy. Check for local modifications.
|
||||
# Note that git submodules may have a file as .git, not a directory.
|
||||
export GIT_DIR="${source_path}/.git"
|
||||
git_version_id=`git describe --match=v[0-9]* 2>/dev/null`
|
||||
fi
|
||||
|
|
|
@ -98,7 +98,6 @@ EOF
|
|||
|
||||
# all_platforms is a list of all supported target platforms. Maintain
|
||||
# alphabetically by architecture, generic-gnu last.
|
||||
all_platforms="${all_platforms} armv6-darwin-gcc"
|
||||
all_platforms="${all_platforms} armv6-linux-rvct"
|
||||
all_platforms="${all_platforms} armv6-linux-gcc"
|
||||
all_platforms="${all_platforms} armv6-none-rvct"
|
||||
|
@ -191,12 +190,12 @@ if [ ${doxy_major:-0} -ge 1 ]; then
|
|||
fi
|
||||
|
||||
# disable codecs when their source directory does not exist
|
||||
[ -d "${source_path}/vp8" ] || disable_feature vp8
|
||||
[ -d "${source_path}/vp9" ] || disable_feature vp9
|
||||
[ -d "${source_path}/vp10" ] || disable_feature vp10
|
||||
[ -d "${source_path}/vp8" ] || disable_codec vp8
|
||||
[ -d "${source_path}/vp9" ] || disable_codec vp9
|
||||
[ -d "${source_path}/vp10" ] || disable_codec vp10
|
||||
|
||||
# disable vp10 codec by default
|
||||
disable_feature vp10
|
||||
disable_codec vp10
|
||||
|
||||
# install everything except the sources, by default. sources will have
|
||||
# to be enabled when doing dist builds, since that's no longer a common
|
||||
|
@ -406,15 +405,19 @@ process_cmdline() {
|
|||
for opt do
|
||||
optval="${opt#*=}"
|
||||
case "$opt" in
|
||||
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
|
||||
--disable-codecs)
|
||||
for c in ${CODEC_FAMILIES}; do disable_codec $c; done
|
||||
;;
|
||||
--enable-?*|--disable-?*)
|
||||
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
|
||||
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
|
||||
if is_in ${option} ${EXPERIMENT_LIST}; then
|
||||
if enabled experimental; then
|
||||
${action}_feature $option
|
||||
else
|
||||
log_echo "Ignoring $opt -- not in experimental mode."
|
||||
fi
|
||||
elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
|
||||
${action}_codec ${option}
|
||||
else
|
||||
process_common_cmdline $opt
|
||||
fi
|
||||
|
@ -428,14 +431,6 @@ process_cmdline() {
|
|||
post_process_cmdline() {
|
||||
c=""
|
||||
|
||||
# If the codec family is disabled, disable all components of that family.
|
||||
# If the codec family is enabled, enable all components of that family.
|
||||
log_echo "Configuring selected codecs"
|
||||
for c in ${CODECS}; do
|
||||
disabled ${c%%_*} && disable_feature ${c}
|
||||
enabled ${c%%_*} && enable_feature ${c}
|
||||
done
|
||||
|
||||
# Enable all detected codecs, if they haven't been disabled
|
||||
for c in ${CODECS}; do soft_enable $c; done
|
||||
|
||||
|
@ -530,13 +525,18 @@ process_detect() {
|
|||
# Can only build shared libs on a subset of platforms. Doing this check
|
||||
# here rather than at option parse time because the target auto-detect
|
||||
# magic happens after the command line has been parsed.
|
||||
if ! enabled linux && ! enabled os2; then
|
||||
case "${tgt_os}" in
|
||||
linux|os2|darwin*|iphonesimulator*)
|
||||
# Supported platforms
|
||||
;;
|
||||
*)
|
||||
if enabled gnu; then
|
||||
echo "--enable-shared is only supported on ELF; assuming this is OK"
|
||||
else
|
||||
die "--enable-shared only supported on ELF and OS/2 for now"
|
||||
die "--enable-shared only supported on ELF, OS/2, and Darwin for now"
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
if [ -z "$CC" ] || enabled external_build; then
|
||||
echo "Bypassing toolchain for environment detection."
|
||||
|
|
|
@ -109,8 +109,8 @@ static const char *exec_name;
|
|||
void usage_exit(void) {
|
||||
fprintf(stderr,
|
||||
"Usage: %s <codec> <width> <height> <infile> <outfile> "
|
||||
"<keyframe-interval> [<error-resilient>]\nSee comments in "
|
||||
"simple_encoder.c for more information.\n",
|
||||
"<keyframe-interval> <error-resilient> <frames to encode>\n"
|
||||
"See comments in simple_encoder.c for more information.\n",
|
||||
exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@ -147,6 +147,7 @@ static int encode_frame(vpx_codec_ctx_t *codec,
|
|||
return got_pkts;
|
||||
}
|
||||
|
||||
// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
|
||||
int main(int argc, char **argv) {
|
||||
FILE *infile = NULL;
|
||||
vpx_codec_ctx_t codec;
|
||||
|
@ -157,12 +158,11 @@ int main(int argc, char **argv) {
|
|||
VpxVideoInfo info = {0};
|
||||
VpxVideoWriter *writer = NULL;
|
||||
const VpxInterface *encoder = NULL;
|
||||
const int fps = 30; // TODO(dkovalev) add command line argument
|
||||
const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument
|
||||
const int fps = 30;
|
||||
const int bitrate = 200;
|
||||
int keyframe_interval = 0;
|
||||
|
||||
// TODO(dkovalev): Add some simple command line parsing code to make the
|
||||
// command line more flexible.
|
||||
int max_frames = 0;
|
||||
int frames_encoded = 0;
|
||||
const char *codec_arg = NULL;
|
||||
const char *width_arg = NULL;
|
||||
const char *height_arg = NULL;
|
||||
|
@ -172,7 +172,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
exec_name = argv[0];
|
||||
|
||||
if (argc < 7)
|
||||
if (argc != 9)
|
||||
die("Invalid number of arguments");
|
||||
|
||||
codec_arg = argv[1];
|
||||
|
@ -181,6 +181,7 @@ int main(int argc, char **argv) {
|
|||
infile_arg = argv[4];
|
||||
outfile_arg = argv[5];
|
||||
keyframe_interval_arg = argv[6];
|
||||
max_frames = strtol(argv[8], NULL, 0);
|
||||
|
||||
encoder = get_vpx_encoder_by_name(codec_arg);
|
||||
if (!encoder)
|
||||
|
@ -219,7 +220,7 @@ int main(int argc, char **argv) {
|
|||
cfg.g_timebase.num = info.time_base.numerator;
|
||||
cfg.g_timebase.den = info.time_base.denominator;
|
||||
cfg.rc_target_bitrate = bitrate;
|
||||
cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
|
||||
cfg.g_error_resilient = strtol(argv[7], NULL, 0);
|
||||
|
||||
writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
|
||||
if (!writer)
|
||||
|
@ -237,6 +238,9 @@ int main(int argc, char **argv) {
|
|||
if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
|
||||
flags |= VPX_EFLAG_FORCE_KF;
|
||||
encode_frame(&codec, &raw, frame_count++, flags, writer);
|
||||
frames_encoded++;
|
||||
if (max_frames > 0 && frames_encoded >= max_frames)
|
||||
break;
|
||||
}
|
||||
|
||||
// Flush encoder.
|
||||
|
|
|
@ -59,7 +59,9 @@
|
|||
static const char *exec_name;
|
||||
|
||||
void usage_exit(void) {
|
||||
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
|
||||
fprintf(stderr,
|
||||
"Usage: %s <codec> <width> <height> <infile> <outfile> "
|
||||
"<frame limit>\n",
|
||||
exec_name);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
@ -129,7 +131,8 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
|
|||
static vpx_fixed_buf_t pass0(vpx_image_t *raw,
|
||||
FILE *infile,
|
||||
const VpxInterface *encoder,
|
||||
const vpx_codec_enc_cfg_t *cfg) {
|
||||
const vpx_codec_enc_cfg_t *cfg,
|
||||
int max_frames) {
|
||||
vpx_codec_ctx_t codec;
|
||||
int frame_count = 0;
|
||||
vpx_fixed_buf_t stats = {NULL, 0};
|
||||
|
@ -142,6 +145,8 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw,
|
|||
++frame_count;
|
||||
get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
|
||||
&stats);
|
||||
if (max_frames > 0 && frame_count >= max_frames)
|
||||
break;
|
||||
}
|
||||
|
||||
// Flush encoder.
|
||||
|
@ -159,7 +164,8 @@ static void pass1(vpx_image_t *raw,
|
|||
FILE *infile,
|
||||
const char *outfile_name,
|
||||
const VpxInterface *encoder,
|
||||
const vpx_codec_enc_cfg_t *cfg) {
|
||||
const vpx_codec_enc_cfg_t *cfg,
|
||||
int max_frames) {
|
||||
VpxVideoInfo info = {
|
||||
encoder->fourcc,
|
||||
cfg->g_w,
|
||||
|
@ -181,6 +187,9 @@ static void pass1(vpx_image_t *raw,
|
|||
while (vpx_img_read(raw, infile)) {
|
||||
++frame_count;
|
||||
encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer);
|
||||
|
||||
if (max_frames > 0 && frame_count >= max_frames)
|
||||
break;
|
||||
}
|
||||
|
||||
// Flush encoder.
|
||||
|
@ -213,11 +222,14 @@ int main(int argc, char **argv) {
|
|||
const char *const height_arg = argv[3];
|
||||
const char *const infile_arg = argv[4];
|
||||
const char *const outfile_arg = argv[5];
|
||||
int max_frames = 0;
|
||||
exec_name = argv[0];
|
||||
|
||||
if (argc != 6)
|
||||
if (argc != 7)
|
||||
die("Invalid number of arguments.");
|
||||
|
||||
max_frames = strtol(argv[6], NULL, 0);
|
||||
|
||||
encoder = get_vpx_encoder_by_name(codec_arg);
|
||||
if (!encoder)
|
||||
die("Unsupported codec.");
|
||||
|
@ -249,13 +261,13 @@ int main(int argc, char **argv) {
|
|||
|
||||
// Pass 0
|
||||
cfg.g_pass = VPX_RC_FIRST_PASS;
|
||||
stats = pass0(&raw, infile, encoder, &cfg);
|
||||
stats = pass0(&raw, infile, encoder, &cfg, max_frames);
|
||||
|
||||
// Pass 1
|
||||
rewind(infile);
|
||||
cfg.g_pass = VPX_RC_LAST_PASS;
|
||||
cfg.rc_twopass_stats_in = stats;
|
||||
pass1(&raw, infile, outfile_arg, encoder, &cfg);
|
||||
pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames);
|
||||
free(stats.buf);
|
||||
|
||||
vpx_img_free(&raw);
|
||||
|
|
|
@ -715,7 +715,7 @@ int main(int argc, char **argv) {
|
|||
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
|
||||
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
|
||||
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
|
||||
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
|
||||
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
|
||||
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
|
||||
|
|
2
ivfdec.c
2
ivfdec.c
|
@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) {
|
|||
// we can guess the framerate using only the timebase in this
|
||||
// case. Other files would require reading ahead to guess the
|
||||
// timebase, like we do for webm.
|
||||
if (*num < 1000) {
|
||||
if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) {
|
||||
// Correct for the factor of 2 applied to the timebase in the encoder.
|
||||
if (*num & 1)
|
||||
*den *= 2;
|
||||
|
|
10
libs.mk
10
libs.mk
|
@ -183,6 +183,9 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
|
|||
endif
|
||||
CODEC_EXPORTS-yes += vpx/exports_com
|
||||
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
|
||||
ifeq ($(CONFIG_SPATIAL_SVC),yes)
|
||||
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
|
||||
endif
|
||||
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
|
||||
|
||||
INSTALL-LIBS-yes += include/vpx/vpx_codec.h
|
||||
|
@ -270,6 +273,12 @@ EXPORT_FILE := libvpx.syms
|
|||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
||||
libvpx.dylib )
|
||||
else
|
||||
ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
|
||||
SHARED_LIB_SUF := .dylib
|
||||
EXPORT_FILE := libvpx.syms
|
||||
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, libvpx.dylib)
|
||||
else
|
||||
ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
|
||||
LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
|
||||
SHARED_LIB_SUF := _dll.a
|
||||
|
@ -285,6 +294,7 @@ LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
|
|||
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
|
||||
$(notdir $(LIBVPX_SO_SYMLINKS)) \
|
||||
|
|
15
md5_utils.c
15
md5_utils.c
|
@ -150,12 +150,23 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
|
|||
#define MD5STEP(f,w,x,y,z,in,s) \
|
||||
(w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
|
||||
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK
|
||||
#define VPX_NO_UNSIGNED_OVERFLOW_CHECK
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The core of the MD5 algorithm, this alters an existing MD5 hash to
|
||||
* reflect the addition of 16 longwords of new data. MD5Update blocks
|
||||
* the data and converts bytes into longwords for this routine.
|
||||
*/
|
||||
void
|
||||
VPX_NO_UNSIGNED_OVERFLOW_CHECK void
|
||||
MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
|
||||
register UWORD32 a, b, c, d;
|
||||
|
||||
|
@ -238,4 +249,6 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
|
|||
buf[3] += d;
|
||||
}
|
||||
|
||||
#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK
|
||||
|
||||
#endif
|
||||
|
|
|
@ -32,6 +32,12 @@ class ACMRandom {
|
|||
return (value >> 15) & 0xffff;
|
||||
}
|
||||
|
||||
int16_t Rand9Signed(void) {
|
||||
// Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
|
||||
const uint32_t value = random_.Generate(512);
|
||||
return static_cast<int16_t>(value) - 256;
|
||||
}
|
||||
|
||||
uint8_t Rand8(void) {
|
||||
const uint32_t value =
|
||||
random_.Generate(testing::internal::Random::kMaxRange);
|
||||
|
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <math.h>
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// TODO(jimbankoski): make width and height integers not unsigned.
|
||||
typedef void (*AddNoiseFunc)(unsigned char *start, char *noise,
|
||||
char blackclamp[16], char whiteclamp[16],
|
||||
char bothclamp[16], unsigned int width,
|
||||
unsigned int height, int pitch);
|
||||
|
||||
class AddNoiseTest
|
||||
: public ::testing::TestWithParam<AddNoiseFunc> {
|
||||
public:
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
virtual ~AddNoiseTest() {}
|
||||
};
|
||||
|
||||
double stddev6(char a, char b, char c, char d, char e, char f) {
|
||||
const double n = (a + b + c + d + e + f) / 6.0;
|
||||
const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) +
|
||||
(d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) /
|
||||
6.0;
|
||||
return sqrt(v);
|
||||
}
|
||||
|
||||
// TODO(jimbankoski): The following 2 functions are duplicated in each codec.
|
||||
// For now the vp9 one has been copied into the test as is. We should normalize
|
||||
// these in vpx_dsp and not have 3 copies of these unless there is different
|
||||
// noise we add for each codec.
|
||||
|
||||
double gaussian(double sigma, double mu, double x) {
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
|
||||
}
|
||||
|
||||
int setup_noise(int size_noise, char *noise) {
|
||||
char char_dist[300];
|
||||
const int ai = 4;
|
||||
const int qi = 24;
|
||||
const double sigma = ai + .5 + .6 * (63 - qi) / 63.0;
|
||||
|
||||
/* set up a lookup table of 256 entries that matches
|
||||
* a gaussian distribution with sigma determined by q.
|
||||
*/
|
||||
int next = 0;
|
||||
|
||||
for (int i = -32; i < 32; i++) {
|
||||
int a_i = (int) (0.5 + 256 * gaussian(sigma, 0, i));
|
||||
|
||||
if (a_i) {
|
||||
for (int j = 0; j < a_i; j++) {
|
||||
char_dist[next + j] = (char)(i);
|
||||
}
|
||||
|
||||
next = next + a_i;
|
||||
}
|
||||
}
|
||||
|
||||
for (; next < 256; next++)
|
||||
char_dist[next] = 0;
|
||||
|
||||
for (int i = 0; i < size_noise; i++) {
|
||||
noise[i] = char_dist[rand() & 0xff]; // NOLINT
|
||||
}
|
||||
|
||||
// Returns the most negative value in distribution.
|
||||
return char_dist[0];
|
||||
}
|
||||
|
||||
TEST_P(AddNoiseTest, CheckNoiseAdded) {
|
||||
DECLARE_ALIGNED(16, char, blackclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, whiteclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, bothclamp[16]);
|
||||
const int width = 64;
|
||||
const int height = 64;
|
||||
const int image_size = width * height;
|
||||
char noise[3072];
|
||||
|
||||
const int clamp = setup_noise(3072, noise);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
blackclamp[i] = -clamp;
|
||||
whiteclamp[i] = -clamp;
|
||||
bothclamp[i] = -2 * clamp;
|
||||
}
|
||||
|
||||
uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
|
||||
memset(s, 99, image_size);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
|
||||
bothclamp, width, height, width));
|
||||
|
||||
// Check to make sure we don't end up having either the same or no added
|
||||
// noise either vertically or horizontally.
|
||||
for (int i = 0; i < image_size - 6 * width - 6; ++i) {
|
||||
const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99,
|
||||
s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99);
|
||||
const double vd = stddev6(s[i] - 99, s[i + width] - 99,
|
||||
s[i + 2 * width] - 99, s[i + 3 * width] - 99,
|
||||
s[i + 4 * width] - 99, s[i + 5 * width] - 99);
|
||||
|
||||
EXPECT_NE(hd, 0);
|
||||
EXPECT_NE(vd, 0);
|
||||
}
|
||||
|
||||
// Initialize pixels in the image to 255 and check for roll over.
|
||||
memset(s, 255, image_size);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
|
||||
bothclamp, width, height, width));
|
||||
|
||||
// Check to make sure don't roll over.
|
||||
for (int i = 0; i < image_size; ++i) {
|
||||
EXPECT_GT((int)s[i], 10) << "i = " << i;
|
||||
}
|
||||
|
||||
// Initialize pixels in the image to 0 and check for roll under.
|
||||
memset(s, 0, image_size);
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
|
||||
bothclamp, width, height, width));
|
||||
|
||||
// Check to make sure don't roll under.
|
||||
for (int i = 0; i < image_size; ++i) {
|
||||
EXPECT_LT((int)s[i], 245) << "i = " << i;
|
||||
}
|
||||
|
||||
vpx_free(s);
|
||||
}
|
||||
|
||||
TEST_P(AddNoiseTest, CheckCvsAssembly) {
|
||||
DECLARE_ALIGNED(16, char, blackclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, whiteclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, bothclamp[16]);
|
||||
const int width = 64;
|
||||
const int height = 64;
|
||||
const int image_size = width * height;
|
||||
char noise[3072];
|
||||
|
||||
const int clamp = setup_noise(3072, noise);
|
||||
for (int i = 0; i < 16; i++) {
|
||||
blackclamp[i] = -clamp;
|
||||
whiteclamp[i] = -clamp;
|
||||
bothclamp[i] = -2 * clamp;
|
||||
}
|
||||
|
||||
uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
|
||||
uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
|
||||
|
||||
memset(s, 99, image_size);
|
||||
memset(d, 99, image_size);
|
||||
|
||||
srand(0);
|
||||
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
|
||||
bothclamp, width, height, width));
|
||||
srand(0);
|
||||
ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp,
|
||||
whiteclamp, bothclamp,
|
||||
width, height, width));
|
||||
|
||||
for (int i = 0; i < image_size; ++i) {
|
||||
EXPECT_EQ((int)s[i], (int)d[i]) << "i = " << i;
|
||||
}
|
||||
|
||||
vpx_free(d);
|
||||
vpx_free(s);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, AddNoiseTest,
|
||||
::testing::Values(vpx_plane_add_noise_c));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest,
|
||||
::testing::Values(vpx_plane_add_noise_sse2));
|
||||
#endif
|
||||
|
||||
#if HAVE_MSA
|
||||
INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest,
|
||||
::testing::Values(vpx_plane_add_noise_msa));
|
||||
#endif
|
||||
} // namespace
|
|
@ -138,7 +138,8 @@ void filter_block2d_8_c(const uint8_t *src_ptr,
|
|||
// and filter_max_width = 16
|
||||
//
|
||||
uint8_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
|
||||
const int intermediate_next_stride = 1 - intermediate_height * output_width;
|
||||
const int intermediate_next_stride =
|
||||
1 - static_cast<int>(intermediate_height * output_width);
|
||||
|
||||
// Horizontal pass (src -> transposed intermediate).
|
||||
uint8_t *output_ptr = intermediate_buffer;
|
||||
|
@ -250,7 +251,8 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
|
|||
* and filter_max_width = 16
|
||||
*/
|
||||
uint16_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
|
||||
const int intermediate_next_stride = 1 - intermediate_height * output_width;
|
||||
const int intermediate_next_stride =
|
||||
1 - static_cast<int>(intermediate_height * output_width);
|
||||
|
||||
// Horizontal pass (src -> transposed intermediate).
|
||||
{
|
||||
|
|
|
@ -90,7 +90,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
|
|||
<< pkt->data.frame.pts;
|
||||
}
|
||||
|
||||
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
|
||||
const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
|
||||
|
||||
// Subtract from the buffer the bits associated with a played back frame.
|
||||
bits_in_buffer_model_ -= frame_size_in_bits;
|
||||
|
@ -450,7 +450,28 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
|
|||
int denoiser_offon_period_;
|
||||
};
|
||||
|
||||
// Check basic rate targeting,
|
||||
// Check basic rate targeting for VBR mode.
|
||||
TEST_P(DatarateTestVP9Large, BasicRateTargetingVBR) {
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.g_error_resilient = 0;
|
||||
cfg_.rc_end_usage = VPX_VBR;
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
|
||||
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
|
||||
30, 1, 0, 300);
|
||||
for (int i = 400; i <= 800; i += 400) {
|
||||
cfg_.rc_target_bitrate = i;
|
||||
ResetModel();
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
|
||||
<< " The datarate for the file is lower than target by too much!";
|
||||
ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25)
|
||||
<< " The datarate for the file is greater than target by too much!";
|
||||
}
|
||||
}
|
||||
|
||||
// Check basic rate targeting for CBR,
|
||||
TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
|
||||
cfg_.rc_buf_initial_sz = 500;
|
||||
cfg_.rc_buf_optimal_sz = 500;
|
||||
|
@ -474,7 +495,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
|
|||
}
|
||||
}
|
||||
|
||||
// Check basic rate targeting,
|
||||
// Check basic rate targeting for CBR.
|
||||
TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
|
||||
::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
|
||||
|
||||
|
|
|
@ -365,10 +365,10 @@ class Trans16x16TestBase {
|
|||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const uint32_t diff =
|
||||
const int32_t diff =
|
||||
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||
#else
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const int32_t diff = dst[j] - src[j];
|
||||
#endif
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
|
|
|
@ -147,10 +147,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
|
|||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const uint32_t diff =
|
||||
const int32_t diff =
|
||||
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||
#else
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const int32_t diff = dst[j] - src[j];
|
||||
#endif
|
||||
const uint32_t error = diff * diff;
|
||||
if (max_error < error)
|
||||
|
|
|
@ -302,22 +302,12 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8, 16)));
|
||||
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, Trans4x4WHT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0,
|
||||
VPX_BITS_8, 16)));
|
||||
#endif
|
||||
|
||||
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
|
||||
!CONFIG_EMULATE_HARDWARE
|
||||
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans4x4WHT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0,
|
||||
VPX_BITS_8, 16)));
|
||||
make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8, 16),
|
||||
make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8, 16)));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
|
||||
|
|
|
@ -425,10 +425,10 @@ class FwdTrans8x8TestBase {
|
|||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const uint32_t diff =
|
||||
const int diff =
|
||||
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
|
||||
#else
|
||||
const uint32_t diff = dst[j] - src[j];
|
||||
const int diff = dst[j] - src[j];
|
||||
#endif
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
|
||||
|
@ -458,7 +458,7 @@ class FwdTrans8x8TestBase {
|
|||
coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
|
||||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
const uint32_t diff = coeff[j] - coeff_r[j];
|
||||
const int32_t diff = coeff[j] - coeff_r[j];
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
|
||||
<< "Error: 8x8 DCT has error " << error
|
||||
|
@ -511,10 +511,10 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
|
|||
|
||||
for (int j = 0; j < kNumCoeffs; ++j) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
const uint32_t diff =
|
||||
const int diff =
|
||||
bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
|
||||
#else
|
||||
const uint32_t diff = dst[j] - ref[j];
|
||||
const int diff = dst[j] - ref[j];
|
||||
#endif
|
||||
const uint32_t error = diff * diff;
|
||||
EXPECT_EQ(0u, error)
|
||||
|
|
|
@ -0,0 +1,220 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using ::libvpx_test::ACMRandom;
|
||||
|
||||
typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
|
||||
|
||||
void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
|
||||
int16_t b[8];
|
||||
for (int i = 0; i < 8; i += 2) {
|
||||
b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
|
||||
b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
|
||||
}
|
||||
int16_t c[8];
|
||||
for (int i = 0; i < 8; i += 4) {
|
||||
c[i + 0] = b[i + 0] + b[i + 2];
|
||||
c[i + 1] = b[i + 1] + b[i + 3];
|
||||
c[i + 2] = b[i + 0] - b[i + 2];
|
||||
c[i + 3] = b[i + 1] - b[i + 3];
|
||||
}
|
||||
out[0] = c[0] + c[4];
|
||||
out[7] = c[1] + c[5];
|
||||
out[3] = c[2] + c[6];
|
||||
out[4] = c[3] + c[7];
|
||||
out[2] = c[0] - c[4];
|
||||
out[6] = c[1] - c[5];
|
||||
out[1] = c[2] - c[6];
|
||||
out[5] = c[3] - c[7];
|
||||
}
|
||||
|
||||
void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
|
||||
int16_t buf[64];
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
hadamard_loop(a + i, a_stride, buf + i * 8);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
hadamard_loop(buf + i, 8, b + i * 8);
|
||||
}
|
||||
}
|
||||
|
||||
void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
|
||||
/* The source is a 16x16 block. The destination is rearranged to 8x32.
|
||||
* Input is 9 bit. */
|
||||
reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
|
||||
reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
|
||||
reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
|
||||
reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
|
||||
|
||||
/* Overlay the 8x8 blocks and combine. */
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
/* 8x8 steps the range up to 15 bits. */
|
||||
const int16_t a0 = b[0];
|
||||
const int16_t a1 = b[64];
|
||||
const int16_t a2 = b[128];
|
||||
const int16_t a3 = b[192];
|
||||
|
||||
/* Prevent the result from escaping int16_t. */
|
||||
const int16_t b0 = (a0 + a1) >> 1;
|
||||
const int16_t b1 = (a0 - a1) >> 1;
|
||||
const int16_t b2 = (a2 + a3) >> 1;
|
||||
const int16_t b3 = (a2 - a3) >> 1;
|
||||
|
||||
/* Store a 16 bit value. */
|
||||
b[ 0] = b0 + b2;
|
||||
b[ 64] = b1 + b3;
|
||||
b[128] = b0 - b2;
|
||||
b[192] = b1 - b3;
|
||||
|
||||
++b;
|
||||
}
|
||||
}
|
||||
|
||||
class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
h_func_ = GetParam();
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
protected:
|
||||
HadamardFunc h_func_;
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
class Hadamard8x8Test : public HadamardTestBase {};
|
||||
|
||||
TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
|
||||
DECLARE_ALIGNED(16, int16_t, a[64]);
|
||||
DECLARE_ALIGNED(16, int16_t, b[64]);
|
||||
int16_t b_ref[64];
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
a[i] = rnd_.Rand9Signed();
|
||||
}
|
||||
memset(b, 0, sizeof(b));
|
||||
memset(b_ref, 0, sizeof(b_ref));
|
||||
|
||||
reference_hadamard8x8(a, 8, b_ref);
|
||||
ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
|
||||
|
||||
// The order of the output is not important. Sort before checking.
|
||||
std::sort(b, b + 64);
|
||||
std::sort(b_ref, b_ref + 64);
|
||||
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
|
||||
}
|
||||
|
||||
TEST_P(Hadamard8x8Test, VaryStride) {
|
||||
DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
|
||||
DECLARE_ALIGNED(16, int16_t, b[64]);
|
||||
int16_t b_ref[64];
|
||||
for (int i = 0; i < 64 * 8; ++i) {
|
||||
a[i] = rnd_.Rand9Signed();
|
||||
}
|
||||
|
||||
for (int i = 8; i < 64; i += 8) {
|
||||
memset(b, 0, sizeof(b));
|
||||
memset(b_ref, 0, sizeof(b_ref));
|
||||
|
||||
reference_hadamard8x8(a, i, b_ref);
|
||||
ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
|
||||
|
||||
// The order of the output is not important. Sort before checking.
|
||||
std::sort(b, b + 64);
|
||||
std::sort(b_ref, b_ref + 64);
|
||||
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
|
||||
::testing::Values(&vpx_hadamard_8x8_c));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
|
||||
::testing::Values(&vpx_hadamard_8x8_sse2));
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
|
||||
INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
|
||||
::testing::Values(&vpx_hadamard_8x8_ssse3));
|
||||
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
|
||||
::testing::Values(&vpx_hadamard_8x8_neon));
|
||||
#endif // HAVE_NEON
|
||||
|
||||
class Hadamard16x16Test : public HadamardTestBase {};
|
||||
|
||||
TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
|
||||
DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
|
||||
DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
|
||||
int16_t b_ref[16 * 16];
|
||||
for (int i = 0; i < 16 * 16; ++i) {
|
||||
a[i] = rnd_.Rand9Signed();
|
||||
}
|
||||
memset(b, 0, sizeof(b));
|
||||
memset(b_ref, 0, sizeof(b_ref));
|
||||
|
||||
reference_hadamard16x16(a, 16, b_ref);
|
||||
ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
|
||||
|
||||
// The order of the output is not important. Sort before checking.
|
||||
std::sort(b, b + 16 * 16);
|
||||
std::sort(b_ref, b_ref + 16 * 16);
|
||||
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
|
||||
}
|
||||
|
||||
TEST_P(Hadamard16x16Test, VaryStride) {
|
||||
DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
|
||||
DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
|
||||
int16_t b_ref[16 * 16];
|
||||
for (int i = 0; i < 16 * 16 * 8; ++i) {
|
||||
a[i] = rnd_.Rand9Signed();
|
||||
}
|
||||
|
||||
for (int i = 8; i < 64; i += 8) {
|
||||
memset(b, 0, sizeof(b));
|
||||
memset(b_ref, 0, sizeof(b_ref));
|
||||
|
||||
reference_hadamard16x16(a, i, b_ref);
|
||||
ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
|
||||
|
||||
// The order of the output is not important. Sort before checking.
|
||||
std::sort(b, b + 16 * 16);
|
||||
std::sort(b_ref, b_ref + 16 * 16);
|
||||
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
|
||||
::testing::Values(&vpx_hadamard_16x16_c));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
|
||||
::testing::Values(&vpx_hadamard_16x16_sse2));
|
||||
#endif // HAVE_SSE2
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
|
||||
::testing::Values(&vpx_hadamard_16x16_neon));
|
||||
#endif // HAVE_NEON
|
||||
} // namespace
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/i420_video_source.h"
|
||||
#include "test/util.h"
|
||||
|
||||
namespace {
|
||||
class LevelTest
|
||||
: public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
|
||||
protected:
|
||||
LevelTest()
|
||||
: EncoderTest(GET_PARAM(0)),
|
||||
encoding_mode_(GET_PARAM(1)),
|
||||
cpu_used_(GET_PARAM(2)),
|
||||
min_gf_internal_(24),
|
||||
target_level_(0),
|
||||
level_(0) {}
|
||||
virtual ~LevelTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
SetMode(encoding_mode_);
|
||||
if (encoding_mode_ != ::libvpx_test::kRealTime) {
|
||||
cfg_.g_lag_in_frames = 25;
|
||||
cfg_.rc_end_usage = VPX_VBR;
|
||||
} else {
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
cfg_.rc_end_usage = VPX_CBR;
|
||||
}
|
||||
cfg_.rc_2pass_vbr_minsection_pct = 5;
|
||||
cfg_.rc_2pass_vbr_maxsection_pct = 2000;
|
||||
cfg_.rc_target_bitrate = 400;
|
||||
cfg_.rc_max_quantizer = 63;
|
||||
cfg_.rc_min_quantizer = 0;
|
||||
}
|
||||
|
||||
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
|
||||
::libvpx_test::Encoder *encoder) {
|
||||
if (video->frame() == 0) {
|
||||
encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
|
||||
encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
|
||||
encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
|
||||
if (encoding_mode_ != ::libvpx_test::kRealTime) {
|
||||
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
|
||||
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
|
||||
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
|
||||
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
|
||||
}
|
||||
}
|
||||
encoder->Control(VP9E_GET_LEVEL, &level_);
|
||||
ASSERT_LE(level_, 51);
|
||||
ASSERT_GE(level_, 0);
|
||||
}
|
||||
|
||||
::libvpx_test::TestMode encoding_mode_;
|
||||
int cpu_used_;
|
||||
int min_gf_internal_;
|
||||
int target_level_;
|
||||
int level_;
|
||||
};
|
||||
|
||||
// Test for keeping level stats only
|
||||
TEST_P(LevelTest, TestTargetLevel0) {
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
|
||||
40);
|
||||
target_level_ = 0;
|
||||
min_gf_internal_ = 4;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_EQ(11, level_);
|
||||
|
||||
cfg_.rc_target_bitrate = 1600;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
ASSERT_EQ(20, level_);
|
||||
}
|
||||
|
||||
// Test for level control being turned off
|
||||
TEST_P(LevelTest, TestTargetLevel255) {
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
|
||||
30);
|
||||
target_level_ = 255;
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
}
|
||||
|
||||
TEST_P(LevelTest, TestTargetLevelApi) {
|
||||
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
|
||||
static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo;
|
||||
vpx_codec_ctx_t enc;
|
||||
vpx_codec_enc_cfg_t cfg;
|
||||
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
|
||||
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
|
||||
for (int level = 0; level <= 256; ++level) {
|
||||
if (level == 10 || level == 11 || level == 20 || level == 21 ||
|
||||
level == 30 || level == 31 || level == 40 || level == 41 ||
|
||||
level == 50 || level == 51 || level == 52 || level == 60 ||
|
||||
level == 61 || level == 62 || level == 0 || level == 255)
|
||||
EXPECT_EQ(VPX_CODEC_OK,
|
||||
vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
|
||||
else
|
||||
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
|
||||
vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
|
||||
}
|
||||
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
|
||||
}
|
||||
|
||||
VP9_INSTANTIATE_TEST_CASE(LevelTest,
|
||||
::testing::Values(::libvpx_test::kTwoPassGood,
|
||||
::libvpx_test::kOnePassGood),
|
||||
::testing::Range(0, 9));
|
||||
} // namespace
|
|
@ -430,16 +430,6 @@ TEST_P(Loop8Test9Param, ValueCheck) {
|
|||
|
||||
using std::tr1::make_tuple;
|
||||
|
||||
#if HAVE_MMX && CONFIG_USE_X86INC && !CONFIG_VP9_HIGHBITDEPTH
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_lpf_horizontal_4_mmx,
|
||||
&vpx_lpf_horizontal_4_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_4_mmx,
|
||||
&vpx_lpf_vertical_4_c, 8)));
|
||||
#endif // HAVE_MMX
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
|
@ -497,12 +487,16 @@ INSTANTIATE_TEST_CASE_P(
|
|||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Loop8Test6Param,
|
||||
::testing::Values(
|
||||
make_tuple(&vpx_lpf_horizontal_4_sse2,
|
||||
&vpx_lpf_horizontal_4_c, 8),
|
||||
make_tuple(&vpx_lpf_horizontal_8_sse2,
|
||||
&vpx_lpf_horizontal_8_c, 8),
|
||||
make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
|
||||
&vpx_lpf_horizontal_edge_8_c, 8),
|
||||
make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
|
||||
&vpx_lpf_horizontal_edge_16_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_4_sse2,
|
||||
&vpx_lpf_vertical_4_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_8_sse2,
|
||||
&vpx_lpf_vertical_8_c, 8),
|
||||
make_tuple(&vpx_lpf_vertical_16_sse2,
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
#include "test/acm_random.h"
|
||||
#include "test/register_state_check.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using ::libvpx_test::ACMRandom;
|
||||
|
||||
typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
int *min, int *max);
|
||||
|
||||
class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
mm_func_ = GetParam();
|
||||
rnd_.Reset(ACMRandom::DeterministicSeed());
|
||||
}
|
||||
|
||||
protected:
|
||||
MinMaxFunc mm_func_;
|
||||
ACMRandom rnd_;
|
||||
};
|
||||
|
||||
void reference_minmax(const uint8_t *a, int a_stride,
|
||||
const uint8_t *b, int b_stride,
|
||||
int *min_ret, int *max_ret) {
|
||||
int min = 255;
|
||||
int max = 0;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
for (int j = 0; j < 8; j++) {
|
||||
const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
|
||||
if (min > diff) min = diff;
|
||||
if (max < diff) max = diff;
|
||||
}
|
||||
}
|
||||
|
||||
*min_ret = min;
|
||||
*max_ret = max;
|
||||
}
|
||||
|
||||
TEST_P(MinMaxTest, MinValue) {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
uint8_t a[64], b[64];
|
||||
memset(a, 0, sizeof(a));
|
||||
memset(b, 255, sizeof(b));
|
||||
b[i] = i; // Set a minimum difference of i.
|
||||
|
||||
int min, max;
|
||||
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
|
||||
EXPECT_EQ(255, max);
|
||||
EXPECT_EQ(i, min);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MinMaxTest, MaxValue) {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
uint8_t a[64], b[64];
|
||||
memset(a, 0, sizeof(a));
|
||||
memset(b, 0, sizeof(b));
|
||||
b[i] = i; // Set a maximum difference of i.
|
||||
|
||||
int min, max;
|
||||
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
|
||||
EXPECT_EQ(i, max);
|
||||
EXPECT_EQ(0, min);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(MinMaxTest, CompareReference) {
|
||||
uint8_t a[64], b[64];
|
||||
for (int j = 0; j < 64; j++) {
|
||||
a[j] = rnd_.Rand8();
|
||||
b[j] = rnd_.Rand8();
|
||||
}
|
||||
|
||||
int min_ref, max_ref, min, max;
|
||||
reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
|
||||
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
|
||||
EXPECT_EQ(max_ref, max);
|
||||
EXPECT_EQ(min_ref, min);
|
||||
}
|
||||
|
||||
TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
|
||||
uint8_t a[8 * 64], b[8 * 64];
|
||||
for (int i = 0; i < 8 * 64; i++) {
|
||||
a[i] = rnd_.Rand8();
|
||||
b[i] = rnd_.Rand8();
|
||||
}
|
||||
for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
|
||||
for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
|
||||
int min_ref, max_ref, min, max;
|
||||
reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
|
||||
ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
|
||||
EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
|
||||
<< " and b_stride = " << b_stride;;
|
||||
EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
|
||||
<< " and b_stride = " << b_stride;;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
|
||||
::testing::Values(&vpx_minmax_8x8_sse2));
|
||||
#endif
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
|
||||
::testing::Values(&vpx_minmax_8x8_neon));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "test/codec_factory.h"
|
||||
#include "test/encode_test_driver.h"
|
||||
#include "test/util.h"
|
||||
#include "test/video_source.h"
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
||||
namespace {
|
||||
|
||||
const int kVideoSourceWidth = 320;
|
||||
const int kVideoSourceHeight = 240;
|
||||
const int kFramesToEncode = 2;
|
||||
|
||||
class RealtimeTest
|
||||
: public ::libvpx_test::EncoderTest,
|
||||
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
|
||||
protected:
|
||||
RealtimeTest()
|
||||
: EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
|
||||
virtual ~RealtimeTest() {}
|
||||
|
||||
virtual void SetUp() {
|
||||
InitializeConfig();
|
||||
cfg_.g_lag_in_frames = 0;
|
||||
SetMode(::libvpx_test::kRealTime);
|
||||
}
|
||||
|
||||
virtual void BeginPassHook(unsigned int /*pass*/) {
|
||||
// TODO(tomfinegan): We're changing the pass value here to make sure
|
||||
// we get frames when real time mode is combined with |g_pass| set to
|
||||
// VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
|
||||
// the pass value based on the mode passed into EncoderTest::SetMode(),
|
||||
// which overrides the one specified in SetUp() above.
|
||||
cfg_.g_pass = VPX_RC_FIRST_PASS;
|
||||
}
|
||||
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
|
||||
frame_packets_++;
|
||||
}
|
||||
|
||||
int frame_packets_;
|
||||
};
|
||||
|
||||
TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
|
||||
::libvpx_test::RandomVideoSource video;
|
||||
video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
|
||||
video.set_limit(kFramesToEncode);
|
||||
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
|
||||
EXPECT_EQ(kFramesToEncode, frame_packets_);
|
||||
}
|
||||
|
||||
VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
|
||||
::testing::Values(::libvpx_test::kRealTime));
|
||||
VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
|
||||
::testing::Values(::libvpx_test::kRealTime));
|
||||
|
||||
} // namespace
|
|
@ -36,16 +36,10 @@
|
|||
#include <windows.h>
|
||||
#include <winnt.h>
|
||||
|
||||
namespace testing {
|
||||
namespace internal {
|
||||
|
||||
inline bool operator==(const M128A& lhs, const M128A& rhs) {
|
||||
return (lhs.Low == rhs.Low && lhs.High == rhs.High);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace testing
|
||||
|
||||
namespace libvpx_test {
|
||||
|
||||
// Compares the state of xmm[6-15] at construction with their state at
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include <stdio.h>
|
||||
|
||||
#include <climits>
|
||||
#include <vector>
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
|
@ -558,9 +560,13 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
|
|||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Verify that we get 1 resize down event in this test.
|
||||
ASSERT_EQ(1, resize_count) << "Resizing should occur.";
|
||||
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
|
||||
#else
|
||||
printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
|
||||
|
@ -602,9 +608,13 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
|
|||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_DECODER
|
||||
// Verify that we get 2 resize events in this test.
|
||||
ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
|
||||
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
|
||||
#else
|
||||
printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
vpx_img_fmt_t CspForFrameNumber(int frame) {
|
||||
|
|
|
@ -749,17 +749,6 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
|
|||
|
||||
//------------------------------------------------------------------------------
|
||||
// x86 functions
|
||||
#if HAVE_MMX
|
||||
const SadMxNParam mmx_tests[] = {
|
||||
make_tuple(16, 16, &vpx_sad16x16_mmx, -1),
|
||||
make_tuple(16, 8, &vpx_sad16x8_mmx, -1),
|
||||
make_tuple(8, 16, &vpx_sad8x16_mmx, -1),
|
||||
make_tuple(8, 8, &vpx_sad8x8_mmx, -1),
|
||||
make_tuple(4, 4, &vpx_sad4x4_mmx, -1),
|
||||
};
|
||||
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
|
||||
#endif // HAVE_MMX
|
||||
|
||||
#if HAVE_SSE2
|
||||
#if CONFIG_USE_X86INC
|
||||
const SadMxNParam sse2_tests[] = {
|
||||
|
|
|
@ -23,7 +23,7 @@ simple_encoder_verify_environment() {
|
|||
fi
|
||||
}
|
||||
|
||||
# Runs simple_encoder using the codec specified by $1.
|
||||
# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
|
||||
simple_encoder() {
|
||||
local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
|
||||
local codec="$1"
|
||||
|
@ -35,7 +35,7 @@ simple_encoder() {
|
|||
fi
|
||||
|
||||
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
|
||||
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
|
||||
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \
|
||||
${devnull}
|
||||
|
||||
[ -e "${output_file}" ] || return 1
|
||||
|
@ -47,16 +47,13 @@ simple_encoder_vp8() {
|
|||
fi
|
||||
}
|
||||
|
||||
# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
|
||||
# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
|
||||
# machine.
|
||||
DISABLED_simple_encoder_vp9() {
|
||||
simple_encoder_vp9() {
|
||||
if [ "$(vp9_encode_available)" = "yes" ]; then
|
||||
simple_encoder vp9 || return 1
|
||||
fi
|
||||
}
|
||||
|
||||
simple_encoder_tests="simple_encoder_vp8
|
||||
DISABLED_simple_encoder_vp9"
|
||||
simple_encoder_vp9"
|
||||
|
||||
run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
|
||||
|
|
|
@ -25,6 +25,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc
|
|||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h
|
||||
|
@ -43,6 +44,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
|
|||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc
|
||||
|
||||
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
|
||||
LIBVPX_TEST_SRCS-yes += decode_test_driver.h
|
||||
|
@ -108,6 +110,7 @@ LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
|
|||
LIBVPX_TEST_SRCS-yes += vp8_fragments_test.cc
|
||||
endif
|
||||
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += add_noise_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
|
||||
|
@ -148,6 +151,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
|
|||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
|
||||
|
|
|
@ -191,14 +191,15 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
|
|||
INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
|
||||
vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
|
||||
vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
|
||||
vpx_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
|
||||
NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
|
||||
vpx_tm_predictor_4x4_sse2)
|
||||
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
|
||||
|
||||
#if HAVE_SSSE3 && CONFIG_USE_X86INC
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vpx_d45_predictor_4x4_ssse3, NULL, NULL,
|
||||
vpx_d153_predictor_4x4_ssse3, vpx_d207_predictor_4x4_ssse3,
|
||||
NULL, NULL, NULL, NULL,
|
||||
vpx_d153_predictor_4x4_ssse3, NULL,
|
||||
vpx_d63_predictor_4x4_ssse3, NULL)
|
||||
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
|
||||
|
||||
|
@ -240,13 +241,13 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
|
|||
INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
|
||||
vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
|
||||
vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
|
||||
vpx_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vpx_tm_predictor_8x8_sse2)
|
||||
vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
|
||||
NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
|
||||
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
|
||||
|
||||
#if HAVE_SSSE3 && CONFIG_USE_X86INC
|
||||
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, vpx_d45_predictor_8x8_ssse3, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL,
|
||||
vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3,
|
||||
vpx_d63_predictor_8x8_ssse3, NULL)
|
||||
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
|
||||
|
|
|
@ -23,7 +23,8 @@ twopass_encoder_verify_environment() {
|
|||
fi
|
||||
}
|
||||
|
||||
# Runs twopass_encoder using the codec specified by $1.
|
||||
# Runs twopass_encoder using the codec specified by $1 with a frame limit of
|
||||
# 100.
|
||||
twopass_encoder() {
|
||||
local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}"
|
||||
local codec="$1"
|
||||
|
@ -35,7 +36,7 @@ twopass_encoder() {
|
|||
fi
|
||||
|
||||
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
|
||||
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
|
||||
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 100 \
|
||||
${devnull}
|
||||
|
||||
[ -e "${output_file}" ] || return 1
|
||||
|
@ -47,16 +48,13 @@ twopass_encoder_vp8() {
|
|||
fi
|
||||
}
|
||||
|
||||
# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this
|
||||
# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast
|
||||
# machine.
|
||||
DISABLED_twopass_encoder_vp9() {
|
||||
twopass_encoder_vp9() {
|
||||
if [ "$(vp9_encode_available)" = "yes" ]; then
|
||||
twopass_encoder vp9 || return 1
|
||||
fi
|
||||
}
|
||||
|
||||
twopass_encoder_tests="twopass_encoder_vp8
|
||||
DISABLED_twopass_encoder_vp9"
|
||||
twopass_encoder_vp9"
|
||||
|
||||
run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
|
||||
|
|
|
@ -1062,30 +1062,6 @@ INSTANTIATE_TEST_CASE_P(
|
|||
::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest,
|
||||
::testing::Values(make_tuple(4, 4, &vpx_mse16x16_mmx)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest,
|
||||
::testing::Values(vpx_get_mb_ss_mmx));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, VpxVarianceTest,
|
||||
::testing::Values(make_tuple(4, 4, &vpx_variance16x16_mmx, 0),
|
||||
make_tuple(4, 3, &vpx_variance16x8_mmx, 0),
|
||||
make_tuple(3, 4, &vpx_variance8x16_mmx, 0),
|
||||
make_tuple(3, 3, &vpx_variance8x8_mmx, 0),
|
||||
make_tuple(2, 2, &vpx_variance4x4_mmx, 0)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
MMX, VpxSubpelVarianceTest,
|
||||
::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_mmx, 0),
|
||||
make_tuple(4, 3, &vpx_sub_pixel_variance16x8_mmx, 0),
|
||||
make_tuple(3, 4, &vpx_sub_pixel_variance8x16_mmx, 0),
|
||||
make_tuple(3, 3, &vpx_sub_pixel_variance8x8_mmx, 0),
|
||||
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_mmx, 0)));
|
||||
#endif // HAVE_MMX
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
|
||||
::testing::Values(vpx_get_mb_ss_sse2));
|
||||
|
@ -1126,8 +1102,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0),
|
||||
make_tuple(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0),
|
||||
make_tuple(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0),
|
||||
make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse, 0),
|
||||
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse, 0)));
|
||||
make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse2, 0),
|
||||
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse2, 0)));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VpxSubpelAvgVarianceTest,
|
||||
|
@ -1143,8 +1119,8 @@ INSTANTIATE_TEST_CASE_P(
|
|||
make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0),
|
||||
make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0),
|
||||
make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0),
|
||||
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse, 0),
|
||||
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0)));
|
||||
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0),
|
||||
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0)));
|
||||
#endif // CONFIG_USE_X86INC
|
||||
|
||||
#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH
|
||||
|
|
|
@ -94,8 +94,7 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
|
|||
// Test for all block size.
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, VP9DenoiserTest,
|
||||
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
|
||||
BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32,
|
||||
BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
|
||||
BLOCK_64X64));
|
||||
::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
|
||||
BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64,
|
||||
BLOCK_64X32, BLOCK_64X64));
|
||||
} // namespace
|
||||
|
|
|
@ -62,7 +62,7 @@ class WebMVideoSource : public CompressedVideoSource {
|
|||
|
||||
void FillFrame() {
|
||||
ASSERT_TRUE(vpx_ctx_->file != NULL);
|
||||
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
|
||||
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
|
||||
ASSERT_GE(status, 0) << "webm_read_frame failed";
|
||||
if (status == 1) {
|
||||
end_of_file_ = true;
|
||||
|
@ -72,7 +72,7 @@ class WebMVideoSource : public CompressedVideoSource {
|
|||
void SeekToNextKeyFrame() {
|
||||
ASSERT_TRUE(vpx_ctx_->file != NULL);
|
||||
do {
|
||||
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
|
||||
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
|
||||
ASSERT_GE(status, 0) << "webm_read_frame failed";
|
||||
++frame_;
|
||||
if (status == 1) {
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vp10_rtcd.h"
|
||||
|
||||
|
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
|||
state->last_noise = a;
|
||||
}
|
||||
|
||||
void vp10_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
|
||||
// Current mip will be the prev_mip for the next frame.
|
||||
MODE_INFO *temp = cm->postproc_state.prev_mip;
|
||||
|
@ -727,7 +702,7 @@ int vp10_post_proc_frame(struct VP10Common *cm,
|
|||
fillrd(ppstate, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
ppstate->whiteclamp, ppstate->bothclamp,
|
||||
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
|
||||
}
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -15,13 +15,14 @@
|
|||
|
||||
#include "./vpx_config.h"
|
||||
#include "vpx_dsp/txfm_common.h"
|
||||
#include "vpx_dsp/inv_txfm.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static INLINE tran_low_t check_range(tran_high_t input) {
|
||||
static INLINE tran_high_t check_range(tran_high_t input) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid VP9 input streams, intermediate stage coefficients should always
|
||||
// stay within the range of a signed 16 bit integer. Coefficients can go out
|
||||
|
@ -32,17 +33,17 @@ static INLINE tran_low_t check_range(tran_high_t input) {
|
|||
assert(INT16_MIN <= input);
|
||||
assert(input <= INT16_MAX);
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
return (tran_low_t)input;
|
||||
return input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
||||
static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return check_range(rv);
|
||||
return rv;
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static INLINE tran_low_t highbd_check_range(tran_high_t input,
|
||||
int bd) {
|
||||
static INLINE tran_high_t highbd_check_range(tran_high_t input,
|
||||
int bd) {
|
||||
#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
||||
// stay within the ranges:
|
||||
|
@ -56,13 +57,12 @@ static INLINE tran_low_t highbd_check_range(tran_high_t input,
|
|||
(void) int_min;
|
||||
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
|
||||
(void) bd;
|
||||
return (tran_low_t)input;
|
||||
return input;
|
||||
}
|
||||
|
||||
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
|
||||
int bd) {
|
||||
static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
|
||||
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
||||
return highbd_check_range(rv, bd);
|
||||
return rv;
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
|
@ -83,9 +83,21 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
|
|||
// bd of 10 uses trans_low with 18bits, need to remove 14bits
|
||||
// bd of 12 uses trans_low with 20bits, need to remove 12bits
|
||||
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
||||
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
||||
#else
|
||||
#define WRAPLOW(x, bd) ((int32_t)(x))
|
||||
|
||||
#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define HIGHBD_WRAPLOW(x, bd) \
|
||||
((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#else // CONFIG_EMULATE_HARDWARE
|
||||
|
||||
#define WRAPLOW(x) ((int32_t)check_range(x))
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
#define HIGHBD_WRAPLOW(x, bd) \
|
||||
((int32_t)highbd_check_range((x), bd))
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#endif // CONFIG_EMULATE_HARDWARE
|
||||
|
||||
void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
|
||||
|
@ -107,14 +119,14 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
|
|||
|
||||
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
|
||||
int bd) {
|
||||
trans = WRAPLOW(trans, bd);
|
||||
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
|
||||
trans = HIGHBD_WRAPLOW(trans, bd);
|
||||
return clip_pixel_highbd(dest + trans, bd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
|
||||
trans = WRAPLOW(trans, 8);
|
||||
return clip_pixel(WRAPLOW(dest + trans, 8));
|
||||
trans = WRAPLOW(trans);
|
||||
return clip_pixel(dest + trans);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -73,10 +73,6 @@ add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint
|
|||
specialize qw/vp10_post_proc_down_and_across sse2/;
|
||||
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
|
||||
|
||||
add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp10_plane_add_noise sse2/;
|
||||
$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
|
@ -365,9 +361,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
|
||||
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
|
||||
specialize qw/vp10_highbd_post_proc_down_and_across/;
|
||||
|
||||
add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp10_highbd_plane_add_noise/;
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -447,7 +440,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vp10_fht32x32/;
|
||||
|
||||
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
|
||||
specialize qw/vp10_fwht4x4/, "$sse2_x86inc";
|
||||
} else {
|
||||
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp10_fht4x4 sse2/;
|
||||
|
@ -468,7 +461,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vp10_fht32x32/;
|
||||
|
||||
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
|
||||
specialize qw/vp10_fwht4x4 msa/, "$sse2_x86inc";
|
||||
}
|
||||
|
||||
add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";
|
||||
|
|
|
@ -624,68 +624,6 @@ sym(vp10_mbpost_proc_across_ip_xmm):
|
|||
%undef flimit4
|
||||
|
||||
|
||||
;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height, int pitch)
|
||||
global sym(vp10_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp10_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
|
|
|
@ -2805,6 +2805,8 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
|
|||
const double dr =
|
||||
(double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
|
||||
const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
|
||||
const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
|
||||
const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
|
||||
|
||||
if (cpi->b_calculate_psnr) {
|
||||
const double total_psnr =
|
||||
|
@ -2844,8 +2846,9 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
|
|||
SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
|
||||
}
|
||||
|
||||
fprintf(f, "%s\t Time\n", headings);
|
||||
fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
|
||||
fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
|
||||
fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,
|
||||
total_encode_time, rate_err, fabs(rate_err));
|
||||
}
|
||||
|
||||
fclose(f);
|
||||
|
|
|
@ -139,7 +139,7 @@ typedef struct VP10EncoderConfig {
|
|||
int height; // height of data passed to the compressor
|
||||
unsigned int input_bit_depth; // Input bit depth.
|
||||
double init_framerate; // set to passed in framerate
|
||||
int64_t target_bandwidth; // bandwidth to be used in kilobits per second
|
||||
int64_t target_bandwidth; // bandwidth to be used in bits per second
|
||||
|
||||
int noise_sensitivity; // pre processing blur: recommendation 0
|
||||
int sharpness; // sharpening output: recommendation 0:
|
||||
|
|
|
@ -45,7 +45,6 @@
|
|||
|
||||
#define BOOST_BREAKOUT 12.5
|
||||
#define BOOST_FACTOR 12.5
|
||||
#define ERR_DIVISOR 128.0
|
||||
#define FACTOR_PT_LOW 0.70
|
||||
#define FACTOR_PT_HIGH 0.90
|
||||
#define FIRST_PASS_Q 10.0
|
||||
|
@ -230,6 +229,13 @@ static void subtract_stats(FIRSTPASS_STATS *section,
|
|||
section->duration -= frame->duration;
|
||||
}
|
||||
|
||||
// Calculate the linear size relative to a baseline of 1080P
|
||||
#define BASE_SIZE 2073600.0 // 1920x1080
|
||||
static double get_linear_size_factor(const VP10_COMP *cpi) {
|
||||
const double this_area = cpi->initial_width * cpi->initial_height;
|
||||
return pow(this_area / BASE_SIZE, 0.5);
|
||||
}
|
||||
|
||||
// Calculate an active area of the image that discounts formatting
|
||||
// bars and partially discounts other 0 energy areas.
|
||||
#define MIN_ACTIVE_AREA 0.5
|
||||
|
@ -1121,11 +1127,7 @@ static double calc_correction_factor(double err_per_mb,
|
|||
return fclamp(pow(error_term, power_term), 0.05, 5.0);
|
||||
}
|
||||
|
||||
// Larger image formats are expected to be a little harder to code relatively
|
||||
// given the same prediction error score. This in part at least relates to the
|
||||
// increased size and hence coding cost of motion vectors.
|
||||
#define EDIV_SIZE_FACTOR 800
|
||||
|
||||
#define ERR_DIVISOR 100.0
|
||||
static int get_twopass_worst_quality(const VP10_COMP *cpi,
|
||||
const double section_err,
|
||||
double inactive_zone,
|
||||
|
@ -1144,12 +1146,22 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
|
|||
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
|
||||
const double av_err_per_mb = section_err / active_mbs;
|
||||
const double speed_term = 1.0 + 0.04 * oxcf->speed;
|
||||
const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
|
||||
double ediv_size_correction;
|
||||
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
|
||||
BPER_MB_NORMBITS) / active_mbs;
|
||||
|
||||
int q;
|
||||
|
||||
// Larger image formats are expected to be a little harder to code
|
||||
// relatively given the same prediction error score. This in part at
|
||||
// least relates to the increased size and hence coding overheads of
|
||||
// motion vectors. Some account of this is made through adjustment of
|
||||
// the error divisor.
|
||||
ediv_size_correction =
|
||||
VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
|
||||
if (ediv_size_correction < 1.0)
|
||||
ediv_size_correction = -(1.0 / ediv_size_correction);
|
||||
ediv_size_correction *= 4.0;
|
||||
|
||||
// Try and pick a max Q that will be high enough to encode the
|
||||
// content at the given rate.
|
||||
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
|
||||
/* Return the buffer at the given absolute index and increment the index */
|
||||
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
|
||||
unsigned int *idx) {
|
||||
unsigned int index = *idx;
|
||||
int *idx) {
|
||||
int index = *idx;
|
||||
struct lookahead_entry *buf = ctx->buf + index;
|
||||
|
||||
assert(index < ctx->max_sz);
|
||||
|
@ -35,7 +35,7 @@ static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
|
|||
void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
|
||||
if (ctx) {
|
||||
if (ctx->buf) {
|
||||
unsigned int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ctx->max_sz; i++)
|
||||
vpx_free_frame_buffer(&ctx->buf[i].img);
|
||||
|
@ -221,9 +221,9 @@ struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
|
|||
|
||||
if (index >= 0) {
|
||||
// Forward peek
|
||||
if (index < (int)ctx->sz) {
|
||||
if (index < ctx->sz) {
|
||||
index += ctx->read_idx;
|
||||
if (index >= (int)ctx->max_sz)
|
||||
if (index >= ctx->max_sz)
|
||||
index -= ctx->max_sz;
|
||||
buf = ctx->buf + index;
|
||||
}
|
||||
|
|
|
@ -31,10 +31,10 @@ struct lookahead_entry {
|
|||
#define MAX_PRE_FRAMES 1
|
||||
|
||||
struct lookahead_ctx {
|
||||
unsigned int max_sz; /* Absolute size of the queue */
|
||||
unsigned int sz; /* Number of buffers currently in the queue */
|
||||
unsigned int read_idx; /* Read index */
|
||||
unsigned int write_idx; /* Write index */
|
||||
int max_sz; /* Absolute size of the queue */
|
||||
int sz; /* Number of buffers currently in the queue */
|
||||
int read_idx; /* Read index */
|
||||
int write_idx; /* Write index */
|
||||
struct lookahead_entry *buf; /* Buffer list */
|
||||
};
|
||||
|
||||
|
|
|
@ -1158,12 +1158,12 @@ void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr,
|
|||
|
||||
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
|
||||
unsigned t;
|
||||
int l;
|
||||
int l, m;
|
||||
t = d;
|
||||
for (l = 0; t > 1; l++)
|
||||
t >>= 1;
|
||||
t = 1 + (1 << (16 + l)) / d;
|
||||
*quant = (int16_t)(t - (1 << 16));
|
||||
m = 1 + (1 << (16 + l)) / d;
|
||||
*quant = (int16_t)(m - (1 << 16));
|
||||
*shift = 1 << (16 - l);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,104 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%define private_prefix vp10
|
||||
|
||||
%include "third_party/x86inc/x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro TRANSFORM_COLS 0
|
||||
paddw m0, m1
|
||||
movq m4, m0
|
||||
psubw m3, m2
|
||||
psubw m4, m3
|
||||
psraw m4, 1
|
||||
movq m5, m4
|
||||
psubw m5, m1 ;b1
|
||||
psubw m4, m2 ;c1
|
||||
psubw m0, m4
|
||||
paddw m3, m5
|
||||
; m0 a0
|
||||
SWAP 1, 4 ; m1 c1
|
||||
SWAP 2, 3 ; m2 d1
|
||||
SWAP 3, 5 ; m3 b1
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE_4X4 0
|
||||
movq m4, m0
|
||||
movq m5, m2
|
||||
punpcklwd m4, m1
|
||||
punpckhwd m0, m1
|
||||
punpcklwd m5, m3
|
||||
punpckhwd m2, m3
|
||||
movq m1, m4
|
||||
movq m3, m0
|
||||
punpckldq m1, m5
|
||||
punpckhdq m4, m5
|
||||
punpckldq m3, m2
|
||||
punpckhdq m0, m2
|
||||
SWAP 2, 3, 0, 1, 4
|
||||
%endmacro
|
||||
|
||||
INIT_MMX mmx
|
||||
cglobal fwht4x4, 3, 4, 8, input, output, stride
|
||||
lea r3q, [inputq + strideq*4]
|
||||
movq m0, [inputq] ;a1
|
||||
movq m1, [inputq + strideq*2] ;b1
|
||||
movq m2, [r3q] ;c1
|
||||
movq m3, [r3q + strideq*2] ;d1
|
||||
|
||||
TRANSFORM_COLS
|
||||
TRANSPOSE_4X4
|
||||
TRANSFORM_COLS
|
||||
TRANSPOSE_4X4
|
||||
|
||||
psllw m0, 2
|
||||
psllw m1, 2
|
||||
psllw m2, 2
|
||||
psllw m3, 2
|
||||
|
||||
%if CONFIG_VP9_HIGHBITDEPTH
|
||||
pxor m4, m4
|
||||
pxor m5, m5
|
||||
pcmpgtw m4, m0
|
||||
pcmpgtw m5, m1
|
||||
movq m6, m0
|
||||
movq m7, m1
|
||||
punpcklwd m0, m4
|
||||
punpcklwd m1, m5
|
||||
punpckhwd m6, m4
|
||||
punpckhwd m7, m5
|
||||
movq [outputq], m0
|
||||
movq [outputq + 8], m6
|
||||
movq [outputq + 16], m1
|
||||
movq [outputq + 24], m7
|
||||
pxor m4, m4
|
||||
pxor m5, m5
|
||||
pcmpgtw m4, m2
|
||||
pcmpgtw m5, m3
|
||||
movq m6, m2
|
||||
movq m7, m3
|
||||
punpcklwd m2, m4
|
||||
punpcklwd m3, m5
|
||||
punpckhwd m6, m4
|
||||
punpckhwd m7, m5
|
||||
movq [outputq + 32], m2
|
||||
movq [outputq + 40], m6
|
||||
movq [outputq + 48], m3
|
||||
movq [outputq + 56], m7
|
||||
%else
|
||||
movq [outputq], m0
|
||||
movq [outputq + 8], m1
|
||||
movq [outputq + 16], m2
|
||||
movq [outputq + 24], m3
|
||||
%endif
|
||||
|
||||
RET
|
|
@ -0,0 +1,86 @@
|
|||
;
|
||||
; Copyright (c) 2016 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%define private_prefix vp10
|
||||
|
||||
%include "third_party/x86inc/x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
%macro TRANSFORM_COLS 0
|
||||
paddw m0, m1
|
||||
movq m4, m0
|
||||
psubw m3, m2
|
||||
psubw m4, m3
|
||||
psraw m4, 1
|
||||
movq m5, m4
|
||||
psubw m5, m1 ;b1
|
||||
psubw m4, m2 ;c1
|
||||
psubw m0, m4
|
||||
paddw m3, m5
|
||||
; m0 a0
|
||||
SWAP 1, 4 ; m1 c1
|
||||
SWAP 2, 3 ; m2 d1
|
||||
SWAP 3, 5 ; m3 b1
|
||||
%endmacro
|
||||
|
||||
%macro TRANSPOSE_4X4 0
|
||||
; 00 01 02 03
|
||||
; 10 11 12 13
|
||||
; 20 21 22 23
|
||||
; 30 31 32 33
|
||||
punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13
|
||||
punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33
|
||||
mova m1, m0
|
||||
punpckldq m0, m2 ; 00 10 20 30 01 11 21 31
|
||||
punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33
|
||||
%endmacro
|
||||
|
||||
INIT_XMM sse2
|
||||
cglobal fwht4x4, 3, 4, 8, input, output, stride
|
||||
lea r3q, [inputq + strideq*4]
|
||||
movq m0, [inputq] ;a1
|
||||
movq m1, [inputq + strideq*2] ;b1
|
||||
movq m2, [r3q] ;c1
|
||||
movq m3, [r3q + strideq*2] ;d1
|
||||
|
||||
TRANSFORM_COLS
|
||||
TRANSPOSE_4X4
|
||||
SWAP 1, 2
|
||||
psrldq m1, m0, 8
|
||||
psrldq m3, m2, 8
|
||||
TRANSFORM_COLS
|
||||
TRANSPOSE_4X4
|
||||
|
||||
psllw m0, 2
|
||||
psllw m1, 2
|
||||
|
||||
%if CONFIG_VP9_HIGHBITDEPTH
|
||||
; sign extension
|
||||
mova m2, m0
|
||||
mova m3, m1
|
||||
punpcklwd m0, m0
|
||||
punpcklwd m1, m1
|
||||
punpckhwd m2, m2
|
||||
punpckhwd m3, m3
|
||||
psrad m0, 16
|
||||
psrad m1, 16
|
||||
psrad m2, 16
|
||||
psrad m3, 16
|
||||
mova [outputq], m0
|
||||
mova [outputq + 16], m2
|
||||
mova [outputq + 32], m1
|
||||
mova [outputq + 48], m3
|
||||
%else
|
||||
mova [outputq], m0
|
||||
mova [outputq + 16], m1
|
||||
%endif
|
||||
|
||||
RET
|
|
@ -104,7 +104,7 @@ VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
|
|||
endif
|
||||
|
||||
ifeq ($(CONFIG_USE_X86INC),yes)
|
||||
VP10_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
|
||||
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
|
||||
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm
|
||||
endif
|
||||
|
||||
|
@ -114,7 +114,7 @@ VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3_x86_64.asm
|
|||
endif
|
||||
endif
|
||||
|
||||
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.c
|
||||
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
|
||||
VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
|
||||
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
|
||||
VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
|
||||
|
|
|
@ -21,114 +21,6 @@ static const uint8_t bifilter4_coeff[8][2] = {
|
|||
{ 16, 112}
|
||||
};
|
||||
|
||||
void vp8_bilinear_predict4x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
|
||||
uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
|
||||
uint8x16_t q1u8, q2u8;
|
||||
uint16x8_t q1u16, q2u16;
|
||||
uint16x8_t q7u16, q8u16, q9u16;
|
||||
uint64x2_t q4u64, q5u64;
|
||||
uint64x1_t d12u64;
|
||||
uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
|
||||
|
||||
if (xoffset == 0) { // skip_1stpass_filter
|
||||
uint32x2_t d28u32 = vdup_n_u32(0);
|
||||
uint32x2_t d29u32 = vdup_n_u32(0);
|
||||
uint32x2_t d30u32 = vdup_n_u32(0);
|
||||
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
|
||||
src_ptr += src_pixels_per_line;
|
||||
d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
|
||||
d28u8 = vreinterpret_u8_u32(d28u32);
|
||||
d29u8 = vreinterpret_u8_u32(d29u32);
|
||||
d30u8 = vreinterpret_u8_u32(d30u32);
|
||||
} else {
|
||||
d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
|
||||
d6u8 = vld1_u8(src_ptr);
|
||||
|
||||
q1u8 = vcombine_u8(d2u8, d3u8);
|
||||
q2u8 = vcombine_u8(d4u8, d5u8);
|
||||
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
|
||||
|
||||
q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
|
||||
q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
|
||||
d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
|
||||
vreinterpret_u32_u8(vget_high_u8(q1u8)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
|
||||
vreinterpret_u32_u8(vget_high_u8(q2u8)));
|
||||
d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q4u64)));
|
||||
d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
|
||||
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
|
||||
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
|
||||
q9u16 = vmull_u8(d6u8, d0u8);
|
||||
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
|
||||
q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
|
||||
|
||||
d28u8 = vqrshrn_n_u16(q7u16, 7);
|
||||
d29u8 = vqrshrn_n_u16(q8u16, 7);
|
||||
d30u8 = vqrshrn_n_u16(q9u16, 7);
|
||||
}
|
||||
|
||||
// secondpass_filter
|
||||
if (yoffset == 0) { // skip_2ndpass_filter
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
|
||||
} else {
|
||||
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
|
||||
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
|
||||
|
||||
q1u16 = vmull_u8(d28u8, d0u8);
|
||||
q2u16 = vmull_u8(d29u8, d0u8);
|
||||
|
||||
d26u8 = vext_u8(d28u8, d29u8, 4);
|
||||
d27u8 = vext_u8(d29u8, d30u8, 4);
|
||||
|
||||
q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
|
||||
q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
|
||||
|
||||
d2u8 = vqrshrn_n_u16(q1u16, 7);
|
||||
d3u8 = vqrshrn_n_u16(q2u16, 7);
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_bilinear_predict8x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
|
|
|
@ -22,383 +22,6 @@ static const int8_t vp8_sub_pel_filters[8][8] = {
|
|||
{0, -1, 12, 123, -6, 0, 0, 0},
|
||||
};
|
||||
|
||||
void vp8_sixtap_predict4x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
unsigned char *dst_ptr,
|
||||
int dst_pitch) {
|
||||
unsigned char *src;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8;
|
||||
uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
|
||||
int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
|
||||
uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
|
||||
int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
|
||||
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
|
||||
uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8;
|
||||
uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64;
|
||||
uint32x2x2_t d0u32x2, d1u32x2;
|
||||
|
||||
if (xoffset == 0) { // secondpass_filter4x4_only
|
||||
uint32x2_t d27u32 = vdup_n_u32(0);
|
||||
uint32x2_t d28u32 = vdup_n_u32(0);
|
||||
uint32x2_t d29u32 = vdup_n_u32(0);
|
||||
uint32x2_t d30u32 = vdup_n_u32(0);
|
||||
uint32x2_t d31u32 = vdup_n_u32(0);
|
||||
|
||||
// load second_pass filter
|
||||
dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
|
||||
d0s8 = vdup_lane_s8(dtmps8, 0);
|
||||
d1s8 = vdup_lane_s8(dtmps8, 1);
|
||||
d2s8 = vdup_lane_s8(dtmps8, 2);
|
||||
d3s8 = vdup_lane_s8(dtmps8, 3);
|
||||
d4s8 = vdup_lane_s8(dtmps8, 4);
|
||||
d5s8 = vdup_lane_s8(dtmps8, 5);
|
||||
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
|
||||
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
|
||||
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
|
||||
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
|
||||
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
|
||||
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
|
||||
|
||||
// load src data
|
||||
src = src_ptr - src_pixels_per_line * 2;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0);
|
||||
src += src_pixels_per_line;
|
||||
d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1);
|
||||
src += src_pixels_per_line;
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0);
|
||||
src += src_pixels_per_line;
|
||||
d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1);
|
||||
src += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0);
|
||||
src += src_pixels_per_line;
|
||||
d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1);
|
||||
src += src_pixels_per_line;
|
||||
d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0);
|
||||
src += src_pixels_per_line;
|
||||
d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1);
|
||||
src += src_pixels_per_line;
|
||||
d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0);
|
||||
|
||||
d27u8 = vreinterpret_u8_u32(d27u32);
|
||||
d28u8 = vreinterpret_u8_u32(d28u32);
|
||||
d29u8 = vreinterpret_u8_u32(d29u32);
|
||||
d30u8 = vreinterpret_u8_u32(d30u32);
|
||||
d31u8 = vreinterpret_u8_u32(d31u32);
|
||||
|
||||
d23u8 = vext_u8(d27u8, d28u8, 4);
|
||||
d24u8 = vext_u8(d28u8, d29u8, 4);
|
||||
d25u8 = vext_u8(d29u8, d30u8, 4);
|
||||
d26u8 = vext_u8(d30u8, d31u8, 4);
|
||||
|
||||
q3u16 = vmull_u8(d27u8, d0u8);
|
||||
q4u16 = vmull_u8(d28u8, d0u8);
|
||||
q5u16 = vmull_u8(d25u8, d5u8);
|
||||
q6u16 = vmull_u8(d26u8, d5u8);
|
||||
|
||||
q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
|
||||
q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
|
||||
q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
|
||||
q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
|
||||
|
||||
q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
|
||||
q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
|
||||
q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
|
||||
q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
|
||||
|
||||
q3s16 = vreinterpretq_s16_u16(q3u16);
|
||||
q4s16 = vreinterpretq_s16_u16(q4u16);
|
||||
q5s16 = vreinterpretq_s16_u16(q5u16);
|
||||
q6s16 = vreinterpretq_s16_u16(q6u16);
|
||||
|
||||
q5s16 = vqaddq_s16(q5s16, q3s16);
|
||||
q6s16 = vqaddq_s16(q6s16, q4s16);
|
||||
|
||||
d3u8 = vqrshrun_n_s16(q5s16, 7);
|
||||
d4u8 = vqrshrun_n_s16(q6s16, 7);
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
|
||||
return;
|
||||
}
|
||||
|
||||
// load first_pass filter
|
||||
dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
|
||||
d0s8 = vdup_lane_s8(dtmps8, 0);
|
||||
d1s8 = vdup_lane_s8(dtmps8, 1);
|
||||
d2s8 = vdup_lane_s8(dtmps8, 2);
|
||||
d3s8 = vdup_lane_s8(dtmps8, 3);
|
||||
d4s8 = vdup_lane_s8(dtmps8, 4);
|
||||
d5s8 = vdup_lane_s8(dtmps8, 5);
|
||||
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
|
||||
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
|
||||
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
|
||||
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
|
||||
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
|
||||
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
|
||||
|
||||
// First pass: output_height lines x output_width columns (9x4)
|
||||
|
||||
if (yoffset == 0) // firstpass_filter4x4_only
|
||||
src = src_ptr - 2;
|
||||
else
|
||||
src = src_ptr - 2 - (src_pixels_per_line * 2);
|
||||
|
||||
q3u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q6u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
|
||||
d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
|
||||
d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
|
||||
d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
|
||||
d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
|
||||
|
||||
// vswp here
|
||||
q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
|
||||
q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
|
||||
vreinterpret_u32_u8(d19u8));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
|
||||
vreinterpret_u32_u8(d21u8));
|
||||
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
|
||||
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
|
||||
|
||||
// keep original src data in q4 q6
|
||||
q4u64 = vreinterpretq_u64_u8(q3u8);
|
||||
q6u64 = vreinterpretq_u64_u8(q5u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
|
||||
vreinterpret_u32_u8(vget_high_u8(q3u8)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
|
||||
vreinterpret_u32_u8(vget_high_u8(q5u8)));
|
||||
q9u64 = vshrq_n_u64(q4u64, 8);
|
||||
q10u64 = vshrq_n_u64(q6u64, 8);
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
|
||||
vreinterpret_u32_u64(vget_high_u64(q9u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
|
||||
vreinterpret_u32_u64(vget_high_u64(q10u64)));
|
||||
q3u64 = vshrq_n_u64(q4u64, 32);
|
||||
q5u64 = vshrq_n_u64(q6u64, 32);
|
||||
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
|
||||
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
|
||||
vreinterpret_u32_u64(vget_high_u64(q3u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
q9u64 = vshrq_n_u64(q4u64, 16);
|
||||
q10u64 = vshrq_n_u64(q6u64, 16);
|
||||
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
|
||||
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
|
||||
vreinterpret_u32_u64(vget_high_u64(q9u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
|
||||
vreinterpret_u32_u64(vget_high_u64(q10u64)));
|
||||
q3u64 = vshrq_n_u64(q4u64, 24);
|
||||
q5u64 = vshrq_n_u64(q6u64, 24);
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
|
||||
vreinterpret_u32_u64(vget_high_u64(q3u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
|
||||
q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
|
||||
|
||||
q7s16 = vreinterpretq_s16_u16(q7u16);
|
||||
q8s16 = vreinterpretq_s16_u16(q8u16);
|
||||
q9s16 = vreinterpretq_s16_u16(q9u16);
|
||||
q10s16 = vreinterpretq_s16_u16(q10u16);
|
||||
q7s16 = vqaddq_s16(q7s16, q9s16);
|
||||
q8s16 = vqaddq_s16(q8s16, q10s16);
|
||||
|
||||
d27u8 = vqrshrun_n_s16(q7s16, 7);
|
||||
d28u8 = vqrshrun_n_s16(q8s16, 7);
|
||||
|
||||
if (yoffset == 0) { // firstpass_filter4x4_only
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
|
||||
return;
|
||||
}
|
||||
|
||||
// First Pass on rest 5-line data
|
||||
q3u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q4u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q5u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q6u8 = vld1q_u8(src);
|
||||
src += src_pixels_per_line;
|
||||
q11u8 = vld1q_u8(src);
|
||||
|
||||
d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
|
||||
d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
|
||||
d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
|
||||
d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
|
||||
|
||||
// vswp here
|
||||
q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
|
||||
q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
|
||||
vreinterpret_u32_u8(d19u8));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
|
||||
vreinterpret_u32_u8(d21u8));
|
||||
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5);
|
||||
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
|
||||
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
|
||||
q12u16 = vmull_u8(d31u8, d5u8);
|
||||
|
||||
q4u64 = vreinterpretq_u64_u8(q3u8);
|
||||
q6u64 = vreinterpretq_u64_u8(q5u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
|
||||
vreinterpret_u32_u8(vget_high_u8(q3u8)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
|
||||
vreinterpret_u32_u8(vget_high_u8(q5u8)));
|
||||
q9u64 = vshrq_n_u64(q4u64, 8);
|
||||
q10u64 = vshrq_n_u64(q6u64, 8);
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
|
||||
q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
|
||||
vreinterpret_u32_u64(vget_high_u64(q9u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
|
||||
vreinterpret_u32_u64(vget_high_u64(q10u64)));
|
||||
q3u64 = vshrq_n_u64(q4u64, 32);
|
||||
q5u64 = vshrq_n_u64(q6u64, 32);
|
||||
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1);
|
||||
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
|
||||
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
|
||||
q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
|
||||
vreinterpret_u32_u64(vget_high_u64(q3u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
q9u64 = vshrq_n_u64(q4u64, 16);
|
||||
q10u64 = vshrq_n_u64(q6u64, 16);
|
||||
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4);
|
||||
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
|
||||
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
|
||||
q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
|
||||
vreinterpret_u32_u64(vget_high_u64(q9u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
|
||||
vreinterpret_u32_u64(vget_high_u64(q10u64)));
|
||||
q3u64 = vshrq_n_u64(q4u64, 24);
|
||||
q5u64 = vshrq_n_u64(q6u64, 24);
|
||||
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2);
|
||||
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
|
||||
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
|
||||
q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
|
||||
|
||||
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
|
||||
vreinterpret_u32_u64(vget_high_u64(q3u64)));
|
||||
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
|
||||
vreinterpret_u32_u64(vget_high_u64(q5u64)));
|
||||
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3);
|
||||
q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
|
||||
q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
|
||||
q11u16 = vmull_u8(d31u8, d3u8);
|
||||
|
||||
q7s16 = vreinterpretq_s16_u16(q7u16);
|
||||
q8s16 = vreinterpretq_s16_u16(q8u16);
|
||||
q9s16 = vreinterpretq_s16_u16(q9u16);
|
||||
q10s16 = vreinterpretq_s16_u16(q10u16);
|
||||
q11s16 = vreinterpretq_s16_u16(q11u16);
|
||||
q12s16 = vreinterpretq_s16_u16(q12u16);
|
||||
q7s16 = vqaddq_s16(q7s16, q9s16);
|
||||
q8s16 = vqaddq_s16(q8s16, q10s16);
|
||||
q12s16 = vqaddq_s16(q12s16, q11s16);
|
||||
|
||||
d29u8 = vqrshrun_n_s16(q7s16, 7);
|
||||
d30u8 = vqrshrun_n_s16(q8s16, 7);
|
||||
d31u8 = vqrshrun_n_s16(q12s16, 7);
|
||||
|
||||
// Second pass: 4x4
|
||||
dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
|
||||
d0s8 = vdup_lane_s8(dtmps8, 0);
|
||||
d1s8 = vdup_lane_s8(dtmps8, 1);
|
||||
d2s8 = vdup_lane_s8(dtmps8, 2);
|
||||
d3s8 = vdup_lane_s8(dtmps8, 3);
|
||||
d4s8 = vdup_lane_s8(dtmps8, 4);
|
||||
d5s8 = vdup_lane_s8(dtmps8, 5);
|
||||
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
|
||||
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
|
||||
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
|
||||
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
|
||||
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
|
||||
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
|
||||
|
||||
d23u8 = vext_u8(d27u8, d28u8, 4);
|
||||
d24u8 = vext_u8(d28u8, d29u8, 4);
|
||||
d25u8 = vext_u8(d29u8, d30u8, 4);
|
||||
d26u8 = vext_u8(d30u8, d31u8, 4);
|
||||
|
||||
q3u16 = vmull_u8(d27u8, d0u8);
|
||||
q4u16 = vmull_u8(d28u8, d0u8);
|
||||
q5u16 = vmull_u8(d25u8, d5u8);
|
||||
q6u16 = vmull_u8(d26u8, d5u8);
|
||||
|
||||
q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
|
||||
q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
|
||||
q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
|
||||
q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
|
||||
|
||||
q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
|
||||
q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
|
||||
q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
|
||||
q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
|
||||
|
||||
q3s16 = vreinterpretq_s16_u16(q3u16);
|
||||
q4s16 = vreinterpretq_s16_u16(q4u16);
|
||||
q5s16 = vreinterpretq_s16_u16(q5u16);
|
||||
q6s16 = vreinterpretq_s16_u16(q6u16);
|
||||
|
||||
q5s16 = vqaddq_s16(q5s16, q3s16);
|
||||
q6s16 = vqaddq_s16(q6s16, q4s16);
|
||||
|
||||
d3u8 = vqrshrun_n_s16(q5s16, 7);
|
||||
d4u8 = vqrshrun_n_s16(q6s16, 7);
|
||||
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
|
||||
dst_ptr += dst_pitch;
|
||||
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_sixtap_predict8x4_neon(
|
||||
unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
|
|
|
@ -104,7 +104,7 @@ vp8_prob *vp8_mv_ref_probs(
|
|||
extern const unsigned char vp8_mbsplit_offset[4][16];
|
||||
|
||||
|
||||
static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
|
||||
{
|
||||
if (!(b & 3))
|
||||
{
|
||||
|
@ -119,7 +119,8 @@ static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
|
|||
return (cur_mb->bmi + b - 1)->mv.as_int;
|
||||
}
|
||||
|
||||
static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
|
||||
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
|
||||
int mi_stride)
|
||||
{
|
||||
if (!(b >> 2))
|
||||
{
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <stdlib.h>
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vp8/common/mips/msa/vp8_macros_msa.h"
|
||||
|
||||
static const int16_t vp8_rv_msa[] =
|
||||
|
@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
|
||||
char blackclamp[16], char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
uint32_t width, uint32_t height,
|
||||
int32_t pitch)
|
||||
{
|
||||
uint32_t i, j;
|
||||
|
||||
for (i = 0; i < height / 2; ++i)
|
||||
{
|
||||
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
|
||||
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
|
||||
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
|
||||
for (j = width / 16; j--;)
|
||||
{
|
||||
v16i8 temp00_s, temp01_s;
|
||||
v16u8 temp00, temp01, black_clamp, white_clamp;
|
||||
v16u8 pos0, ref0, pos1, ref1;
|
||||
v16i8 const127 = __msa_ldi_b(127);
|
||||
|
||||
pos0 = LD_UB(pos0_ptr);
|
||||
ref0 = LD_UB(ref0_ptr);
|
||||
pos1 = LD_UB(pos1_ptr);
|
||||
ref1 = LD_UB(ref1_ptr);
|
||||
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
|
||||
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
|
||||
temp00 = (pos0 < black_clamp);
|
||||
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
|
||||
temp01 = (pos1 < black_clamp);
|
||||
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp00 = (v16u8)(temp00_s < pos0);
|
||||
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
|
||||
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
|
||||
temp01 = (temp01_s < pos1);
|
||||
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
|
||||
XORI_B2_128_UB(pos0, pos1);
|
||||
pos0 += ref0;
|
||||
ST_UB(pos0, pos0_ptr);
|
||||
pos1 += ref1;
|
||||
ST_UB(pos1, pos1_ptr);
|
||||
pos0_ptr += 16;
|
||||
pos1_ptr += 16;
|
||||
ref0_ptr += 16;
|
||||
ref1_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vpx_dsp_rtcd.h"
|
||||
#include "vp8_rtcd.h"
|
||||
#include "vpx_scale_rtcd.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a)
|
|||
state->last_noise = a;
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
*
|
||||
* ROUTINE : plane_add_noise_c
|
||||
*
|
||||
* INPUTS : unsigned char *Start starting address of buffer to add gaussian
|
||||
* noise to
|
||||
* unsigned int Width width of plane
|
||||
* unsigned int Height height of plane
|
||||
* int Pitch distance between subsequent lines of frame
|
||||
* int q quantizer used to determine amount of noise
|
||||
* to add
|
||||
*
|
||||
* OUTPUTS : None.
|
||||
*
|
||||
* RETURNS : void.
|
||||
*
|
||||
* FUNCTION : adds gaussian noise to a plane of pixels
|
||||
*
|
||||
* SPECIAL NOTES : None.
|
||||
*
|
||||
****************************************************************************/
|
||||
void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int Width, unsigned int Height, int Pitch)
|
||||
{
|
||||
unsigned int i, j;
|
||||
(void)bothclamp;
|
||||
|
||||
for (i = 0; i < Height; i++)
|
||||
{
|
||||
unsigned char *Pos = Start + i * Pitch;
|
||||
char *Ref = (char *)(noise + (rand() & 0xff));
|
||||
|
||||
for (j = 0; j < Width; j++)
|
||||
{
|
||||
if (Pos[j] < blackclamp[0])
|
||||
Pos[j] = blackclamp[0];
|
||||
|
||||
if (Pos[j] > 255 + whiteclamp[0])
|
||||
Pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
Pos[j] += Ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Blend the macro block with a solid colored square. Leave the
|
||||
* edges unblended to give distinction to macro blocks in areas
|
||||
* filled with the same color block.
|
||||
|
@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
|
|||
fillrd(&oci->postproc_state, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp8_plane_add_noise
|
||||
vpx_plane_add_noise
|
||||
(oci->post_proc_buffer.y_buffer,
|
||||
oci->postproc_state.noise,
|
||||
oci->postproc_state.blackclamp,
|
||||
|
|
|
@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
|
|||
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
|
||||
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
|
||||
|
||||
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
|
||||
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
|
||||
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
|
||||
# no asm yet
|
||||
|
||||
|
@ -209,7 +205,6 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
|
|||
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
|
||||
|
||||
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
|
||||
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/;
|
||||
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
|
||||
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
|
||||
|
@ -227,7 +222,6 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/;
|
|||
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
|
||||
|
||||
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
|
||||
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
|
||||
specialize qw/vp8_bilinear_predict4x4 mmx media msa/;
|
||||
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;
|
||||
|
||||
|
|
|
@ -44,8 +44,8 @@ extern "C" {
|
|||
#include <os2.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
#define THREAD_FUNCTION void
|
||||
#define THREAD_FUNCTION_RETURN void
|
||||
#define THREAD_FUNCTION void *
|
||||
#define THREAD_FUNCTION_RETURN void *
|
||||
#define THREAD_SPECIFIC_INDEX PULONG
|
||||
#define pthread_t TID
|
||||
#define pthread_attr_t ULONG
|
||||
|
|
|
@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx):
|
|||
%undef flimit2
|
||||
|
||||
|
||||
;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
global sym(vp8_plane_add_noise_mmx) PRIVATE
|
||||
sym(vp8_plane_add_noise_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movq mm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb mm1, [rdx+32] ;bothclamp
|
||||
psubusb mm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movq mm2,[rdi+rax] ; get the noise for this line
|
||||
paddb mm1,mm2 ; add it in
|
||||
movq [rsi+rax],mm1 ; store the result
|
||||
|
||||
add rax,8 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
Blur:
|
||||
|
|
|
@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm):
|
|||
%undef flimit4
|
||||
|
||||
|
||||
;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int Width, unsigned int Height, int Pitch)
|
||||
global sym(vp8_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp8_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
four8s:
|
||||
|
|
|
@ -44,7 +44,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
|
|||
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
|
||||
size_t bytes_left = br->user_buffer_end - bufptr;
|
||||
size_t bits_left = bytes_left * CHAR_BIT;
|
||||
int x = (int)(shift + CHAR_BIT - bits_left);
|
||||
int x = shift + CHAR_BIT - (int)bits_left;
|
||||
int loop_end = 0;
|
||||
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];
|
||||
|
||||
|
|
|
@ -83,7 +83,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
|
|||
}
|
||||
|
||||
{
|
||||
register unsigned int shift = vp8_norm[range];
|
||||
register int shift = vp8_norm[range];
|
||||
range <<= shift;
|
||||
value <<= shift;
|
||||
count -= shift;
|
||||
|
|
|
@ -986,7 +986,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
|||
VP8_COMMON *const pc = &pbi->common;
|
||||
MACROBLOCKD *const xd = &pbi->mb;
|
||||
const unsigned char *data = pbi->fragments.ptrs[0];
|
||||
const unsigned char *data_end = data + pbi->fragments.sizes[0];
|
||||
const unsigned int data_sz = pbi->fragments.sizes[0];
|
||||
const unsigned char *data_end = data + data_sz;
|
||||
ptrdiff_t first_partition_length_in_bytes;
|
||||
|
||||
int i, j, k, l;
|
||||
|
@ -1022,7 +1023,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
|
|||
const unsigned char *clear = data;
|
||||
if (pbi->decrypt_cb)
|
||||
{
|
||||
int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
|
||||
int n = (int)VPXMIN(sizeof(clear_buffer), data_sz);
|
||||
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
|
||||
clear = clear_buffer;
|
||||
}
|
||||
|
|
|
@ -194,7 +194,7 @@ void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul,
|
|||
return;
|
||||
}
|
||||
|
||||
if (new_row <= (-4 << 3) || new_col <= (-4 << 3))
|
||||
if (new_row <= -32 || new_col <= -32)
|
||||
{
|
||||
/* outside the frame */
|
||||
return;
|
||||
|
|
|
@ -163,7 +163,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
|
|||
{
|
||||
const TOKENEXTRA *stop = p + xcount;
|
||||
unsigned int split;
|
||||
unsigned int shift;
|
||||
int shift;
|
||||
int count = w->count;
|
||||
unsigned int range = w->range;
|
||||
unsigned int lowvalue = w->lowvalue;
|
||||
|
|
|
@ -65,7 +65,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
|
|||
int count = br->count;
|
||||
unsigned int range = br->range;
|
||||
unsigned int lowvalue = br->lowvalue;
|
||||
register unsigned int shift;
|
||||
register int shift;
|
||||
|
||||
#ifdef VP8_ENTROPY_STATS
|
||||
#if defined(SECTIONBITS_OUTPUT)
|
||||
|
|
|
@ -529,7 +529,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
|
|||
// Bias on zero motion vector sse.
|
||||
const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
|
||||
zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
|
||||
sse_diff = zero_mv_sse - best_sse;
|
||||
sse_diff = (int)zero_mv_sse - (int)best_sse;
|
||||
|
||||
saved_mbmi = *mbmi;
|
||||
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SUM_DIFF_THRESHOLD 448
|
||||
#define SUM_DIFF_THRESHOLD_HIGH 512
|
||||
#define SUM_DIFF_THRESHOLD 512
|
||||
#define SUM_DIFF_THRESHOLD_HIGH 600
|
||||
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
|
||||
|
||||
#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "onyx_int.h"
|
||||
#include "vpx_dsp/variance.h"
|
||||
#include "encodeintra.h"
|
||||
#include "vp8/common/common.h"
|
||||
#include "vp8/common/setupintrarecon.h"
|
||||
#include "vp8/common/systemdependent.h"
|
||||
#include "mcomp.h"
|
||||
|
@ -2417,7 +2418,7 @@ void vp8_second_pass(VP8_COMP *cpi)
|
|||
int tmp_q;
|
||||
int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
|
||||
|
||||
FIRSTPASS_STATS this_frame = {0};
|
||||
FIRSTPASS_STATS this_frame;
|
||||
FIRSTPASS_STATS this_frame_copy;
|
||||
|
||||
double this_frame_intra_error;
|
||||
|
@ -2425,6 +2426,8 @@ void vp8_second_pass(VP8_COMP *cpi)
|
|||
|
||||
int overhead_bits;
|
||||
|
||||
vp8_zero(this_frame);
|
||||
|
||||
if (!cpi->twopass.stats_in)
|
||||
{
|
||||
return ;
|
||||
|
@ -2808,7 +2811,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
|
|||
* static scene.
|
||||
*/
|
||||
if ( detect_transition_to_still( cpi, i,
|
||||
(cpi->key_frame_frequency-i),
|
||||
((int)(cpi->key_frame_frequency) -
|
||||
(int)i),
|
||||
loop_decay_rate,
|
||||
decay_accumulator ) )
|
||||
{
|
||||
|
|
|
@ -1591,7 +1591,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
|
|||
int col_min = ref_col - distance;
|
||||
int col_max = ref_col + distance;
|
||||
|
||||
// TODO(johannkoenig): check if this alignment is necessary.
|
||||
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
|
||||
unsigned int sad_array[3];
|
||||
|
||||
|
|
|
@ -1523,7 +1523,8 @@ static void update_layer_contexts (VP8_COMP *cpi)
|
|||
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
||||
{
|
||||
VP8_COMMON *cm = &cpi->common;
|
||||
int last_w, last_h, prev_number_of_layers;
|
||||
int last_w, last_h;
|
||||
unsigned int prev_number_of_layers;
|
||||
|
||||
if (!cpi)
|
||||
return;
|
||||
|
@ -1786,10 +1787,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
|
|||
if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height)
|
||||
cpi->force_next_frame_intra = 1;
|
||||
|
||||
if (((cm->Width + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & 0xfffffff0) !=
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
|
||||
((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
|
||||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
|
||||
{
|
||||
dealloc_raw_frame_buffers(cpi);
|
||||
|
@ -2247,6 +2246,8 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
|||
double total_encode_time = (cpi->time_receive_data +
|
||||
cpi->time_compress_data) / 1000.000;
|
||||
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
|
||||
const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
|
||||
const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
|
||||
|
||||
if (cpi->b_calculate_psnr)
|
||||
{
|
||||
|
@ -2292,12 +2293,14 @@ void vp8_remove_compressor(VP8_COMP **ptr)
|
|||
cpi->summed_weights, 8.0);
|
||||
|
||||
fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
|
||||
"GLPsnrP\tVPXSSIM\t Time(us)\n");
|
||||
"GLPsnrP\tVPXSSIM\t Time(us) Rc-Err "
|
||||
"Abs Err\n");
|
||||
fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
|
||||
"%7.3f\t%8.0f\n",
|
||||
"%7.3f\t%8.0f %7.2f %7.2f\n",
|
||||
dr, cpi->total / cpi->count, total_psnr,
|
||||
cpi->totalp / cpi->count, total_psnr2,
|
||||
total_ssim, total_encode_time);
|
||||
total_ssim, total_encode_time,
|
||||
rate_err, fabs(rate_err));
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
|
@ -5168,7 +5171,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
|
|||
vp8_second_pass(cpi);
|
||||
|
||||
encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags);
|
||||
cpi->twopass.bits_left -= 8 * *size;
|
||||
cpi->twopass.bits_left -= 8 * (int)(*size);
|
||||
|
||||
if (!cpi->common.refresh_alt_ref_frame)
|
||||
{
|
||||
|
@ -5772,7 +5775,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
|
|||
return -1;
|
||||
|
||||
// Check number of rows and columns match
|
||||
if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
|
||||
if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols)
|
||||
return -1;
|
||||
|
||||
// Range check the delta Q values and convert the external Q range values
|
||||
|
@ -5828,7 +5831,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
|
|||
|
||||
int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
|
||||
{
|
||||
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
|
||||
if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols)
|
||||
{
|
||||
if (map)
|
||||
{
|
||||
|
|
|
@ -371,7 +371,7 @@ typedef struct VP8_COMP
|
|||
double key_frame_rate_correction_factor;
|
||||
double gf_rate_correction_factor;
|
||||
|
||||
unsigned int frames_since_golden;
|
||||
int frames_since_golden;
|
||||
/* Count down till next GF */
|
||||
int frames_till_gf_update_due;
|
||||
|
||||
|
|
|
@ -90,7 +90,7 @@ static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
|
|||
{
|
||||
int i = 0;
|
||||
// No skin if block has been zero motion for long consecutive time.
|
||||
if (consec_zeromv > 80)
|
||||
if (consec_zeromv > 60)
|
||||
return 0;
|
||||
// Exit on grey.
|
||||
if (cb == 128 && cr == 128)
|
||||
|
@ -103,7 +103,7 @@ static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
|
|||
if (skin_color_diff < skin_threshold[i + 1]) {
|
||||
if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
|
||||
return 0;
|
||||
else if (consec_zeromv > 30 &&
|
||||
else if (consec_zeromv > 25 &&
|
||||
skin_color_diff > (skin_threshold[i + 1] >> 1))
|
||||
return 0;
|
||||
else
|
||||
|
|
|
@ -1899,7 +1899,8 @@ static int calculate_final_rd_costs(int this_rd,
|
|||
int prob_skip_cost;
|
||||
|
||||
prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
|
||||
prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
|
||||
prob_skip_cost -=
|
||||
(int)vp8_cost_bit(cpi->prob_skip_false, 0);
|
||||
rd->rate2 += prob_skip_cost;
|
||||
*other_cost += prob_skip_cost;
|
||||
}
|
||||
|
|
|
@ -227,12 +227,12 @@ static void invert_quant(int improved_quant, short *quant,
|
|||
if(improved_quant)
|
||||
{
|
||||
unsigned t;
|
||||
int l;
|
||||
int l, m;
|
||||
t = d;
|
||||
for(l = 0; t > 1; l++)
|
||||
t>>=1;
|
||||
t = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(t - (1<<16));
|
||||
m = 1 + (1<<(16+l))/d;
|
||||
*quant = (short)(m - (1<<16));
|
||||
*shift = l;
|
||||
/* use multiplication and constant shift by 16 */
|
||||
*shift = 1 << (16 - *shift);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "vpx/vp8cx.h"
|
||||
#include "vp8/encoder/firstpass.h"
|
||||
#include "vp8/common/onyx.h"
|
||||
#include "vp8/common/common.h"
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
@ -760,7 +761,7 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
|
|||
unsigned long duration,
|
||||
unsigned long deadline)
|
||||
{
|
||||
unsigned int new_qc;
|
||||
int new_qc;
|
||||
|
||||
#if !(CONFIG_REALTIME_ONLY)
|
||||
/* Use best quality mode if no deadline is given. */
|
||||
|
@ -785,7 +786,9 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
|
|||
new_qc = MODE_REALTIME;
|
||||
#endif
|
||||
|
||||
if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
|
||||
if (deadline == VPX_DL_REALTIME)
|
||||
new_qc = MODE_REALTIME;
|
||||
else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
|
||||
new_qc = MODE_FIRSTPASS;
|
||||
else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
|
||||
new_qc = (new_qc == MODE_BESTQUALITY)
|
||||
|
@ -1116,7 +1119,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
|
|||
{
|
||||
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp8_ppflags_t flags = {0};
|
||||
vp8_ppflags_t flags;
|
||||
vp8_zero(flags);
|
||||
|
||||
if (ctx->preview_ppcfg.post_proc_flag)
|
||||
{
|
||||
|
@ -1305,8 +1309,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
|||
30, /* rc_resize_up_thresold */
|
||||
|
||||
VPX_VBR, /* rc_end_usage */
|
||||
{0}, /* rc_twopass_stats_in */
|
||||
{0}, /* rc_firstpass_mb_stats_in */
|
||||
{NULL, 0}, /* rc_twopass_stats_in */
|
||||
{NULL, 0}, /* rc_firstpass_mb_stats_in */
|
||||
256, /* rc_target_bandwidth */
|
||||
4, /* rc_min_quantizer */
|
||||
63, /* rc_max_quantizer */
|
||||
|
@ -1334,6 +1338,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
|
|||
{0}, /* ts_rate_decimator */
|
||||
0, /* ts_periodicity */
|
||||
{0}, /* ts_layer_id */
|
||||
{0}, /* layer_target_bitrate */
|
||||
0 /* temporal_layering_mode */
|
||||
}},
|
||||
};
|
||||
|
||||
|
|
|
@ -522,7 +522,8 @@ static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
|
|||
{
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
int64_t time_stamp = 0, time_end_stamp = 0;
|
||||
vp8_ppflags_t flags = {0};
|
||||
vp8_ppflags_t flags;
|
||||
vp8_zero(flags);
|
||||
|
||||
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
|
||||
{
|
||||
|
@ -816,11 +817,12 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
|
|||
},
|
||||
{ /* encoder functions */
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
NULL, /* vpx_codec_enc_cfg_map_t */
|
||||
NULL, /* vpx_codec_encode_fn_t */
|
||||
NULL, /* vpx_codec_get_cx_data_fn_t */
|
||||
NULL, /* vpx_codec_enc_config_set_fn_t */
|
||||
NULL, /* vpx_codec_get_global_headers_fn_t */
|
||||
NULL, /* vpx_codec_get_preview_frame_fn_t */
|
||||
NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */
|
||||
}
|
||||
};
|
||||
|
|
|
@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
|
|||
|
||||
#define VP9_FRAME_MARKER 0x2
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
@ -159,3 +159,18 @@ const struct {
|
|||
{0, 8 }, // 64X32 - {0b0000, 0b1000}
|
||||
{0, 0 }, // 64X64 - {0b0000, 0b0000}
|
||||
};
|
||||
|
||||
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
const uint8_t need_top_left[INTRA_MODES] = {
|
||||
0, // DC_PRED
|
||||
0, // V_PRED
|
||||
0, // H_PRED
|
||||
0, // D45_PRED
|
||||
1, // D135_PRED
|
||||
1, // D117_PRED
|
||||
1, // D153_PRED
|
||||
0, // D207_PRED
|
||||
0, // D63_PRED
|
||||
1, // TM_PRED
|
||||
};
|
||||
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
|
|
|
@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
|
|||
extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
|
||||
extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
|
||||
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
|
||||
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
extern const uint8_t need_top_left[INTRA_MODES];
|
||||
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -298,196 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
|
|||
|
||||
static void filter_selectively_vert_row2(int subsampling_factor,
|
||||
uint8_t *s, int pitch,
|
||||
unsigned int mask_16x16_l,
|
||||
unsigned int mask_8x8_l,
|
||||
unsigned int mask_4x4_l,
|
||||
unsigned int mask_4x4_int_l,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
unsigned int mask_16x16,
|
||||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl) {
|
||||
const int mask_shift = subsampling_factor ? 4 : 8;
|
||||
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
|
||||
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
|
||||
const int lfl_forward = subsampling_factor ? 4 : 8;
|
||||
|
||||
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
|
||||
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
|
||||
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
|
||||
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
|
||||
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
|
||||
const unsigned int dual_one = 1 | (1 << lfl_forward);
|
||||
unsigned int mask;
|
||||
uint8_t *ss[2];
|
||||
ss[0] = s;
|
||||
|
||||
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
|
||||
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
|
||||
mask; mask >>= 1) {
|
||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||
for (mask =
|
||||
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
|
||||
mask; mask = (mask & ~dual_one) >> 1) {
|
||||
if (mask & dual_one) {
|
||||
const loop_filter_thresh *lfis[2];
|
||||
lfis[0] = lfthr + *lfl;
|
||||
lfis[1] = lfthr + *(lfl + lfl_forward);
|
||||
ss[1] = ss[0] + 8 * pitch;
|
||||
|
||||
if (mask & 1) {
|
||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||
vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr);
|
||||
} else if (mask_16x16_0 & 1) {
|
||||
vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr);
|
||||
if (mask_16x16 & dual_one) {
|
||||
if ((mask_16x16 & dual_one) == dual_one) {
|
||||
vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
|
||||
lfis[0]->hev_thr);
|
||||
} else {
|
||||
vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
|
||||
lfi1->lim, lfi1->hev_thr);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
|
||||
vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_8x8_0 | mask_8x8_1) & 1) {
|
||||
if ((mask_8x8_0 & mask_8x8_1) & 1) {
|
||||
vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
} else if (mask_8x8_0 & 1) {
|
||||
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||
if (mask_8x8 & dual_one) {
|
||||
if ((mask_8x8 & dual_one) == dual_one) {
|
||||
vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
|
||||
lfis[0]->hev_thr, lfis[1]->mblim,
|
||||
lfis[1]->lim, lfis[1]->hev_thr);
|
||||
} else {
|
||||
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
|
||||
vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_4x4_0 | mask_4x4_1) & 1) {
|
||||
if ((mask_4x4_0 & mask_4x4_1) & 1) {
|
||||
vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
} else if (mask_4x4_0 & 1) {
|
||||
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
|
||||
if (mask_4x4 & dual_one) {
|
||||
if ((mask_4x4 & dual_one) == dual_one) {
|
||||
vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
|
||||
lfis[0]->hev_thr, lfis[1]->mblim,
|
||||
lfis[1]->lim, lfis[1]->hev_thr);
|
||||
} else {
|
||||
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
|
||||
vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
|
||||
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
|
||||
vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
} else if (mask_4x4_int_0 & 1) {
|
||||
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr);
|
||||
if (mask_4x4_int & dual_one) {
|
||||
if ((mask_4x4_int & dual_one) == dual_one) {
|
||||
vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
|
||||
lfis[0]->lim, lfis[0]->hev_thr,
|
||||
lfis[1]->mblim, lfis[1]->lim,
|
||||
lfis[1]->hev_thr);
|
||||
} else {
|
||||
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
|
||||
vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s += 8;
|
||||
ss[0] += 8;
|
||||
lfl += 1;
|
||||
mask_16x16_0 >>= 1;
|
||||
mask_8x8_0 >>= 1;
|
||||
mask_4x4_0 >>= 1;
|
||||
mask_4x4_int_0 >>= 1;
|
||||
mask_16x16_1 >>= 1;
|
||||
mask_8x8_1 >>= 1;
|
||||
mask_4x4_1 >>= 1;
|
||||
mask_4x4_int_1 >>= 1;
|
||||
mask_16x16 >>= 1;
|
||||
mask_8x8 >>= 1;
|
||||
mask_4x4 >>= 1;
|
||||
mask_4x4_int >>= 1;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
|
||||
uint16_t *s, int pitch,
|
||||
unsigned int mask_16x16_l,
|
||||
unsigned int mask_8x8_l,
|
||||
unsigned int mask_4x4_l,
|
||||
unsigned int mask_4x4_int_l,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
unsigned int mask_16x16,
|
||||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl, int bd) {
|
||||
const int mask_shift = subsampling_factor ? 4 : 8;
|
||||
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
|
||||
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
|
||||
const int lfl_forward = subsampling_factor ? 4 : 8;
|
||||
|
||||
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
|
||||
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
|
||||
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
|
||||
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
|
||||
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
|
||||
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
|
||||
const unsigned int dual_one = 1 | (1 << lfl_forward);
|
||||
unsigned int mask;
|
||||
uint16_t *ss[2];
|
||||
ss[0] = s;
|
||||
|
||||
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
|
||||
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
|
||||
mask; mask >>= 1) {
|
||||
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
|
||||
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
|
||||
for (mask =
|
||||
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
|
||||
mask; mask = (mask & ~dual_one) >> 1) {
|
||||
if (mask & dual_one) {
|
||||
const loop_filter_thresh *lfis[2];
|
||||
lfis[0] = lfthr + *lfl;
|
||||
lfis[1] = lfthr + *(lfl + lfl_forward);
|
||||
ss[1] = ss[0] + 8 * pitch;
|
||||
|
||||
if (mask & 1) {
|
||||
if ((mask_16x16_0 | mask_16x16_1) & 1) {
|
||||
if ((mask_16x16_0 & mask_16x16_1) & 1) {
|
||||
vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, bd);
|
||||
} else if (mask_16x16_0 & 1) {
|
||||
vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, bd);
|
||||
if (mask_16x16 & dual_one) {
|
||||
if ((mask_16x16 & dual_one) == dual_one) {
|
||||
vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
|
||||
lfis[0]->lim, lfis[0]->hev_thr, bd);
|
||||
} else {
|
||||
vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
|
||||
lfi1->lim, lfi1->hev_thr, bd);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
|
||||
vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, bd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_8x8_0 | mask_8x8_1) & 1) {
|
||||
if ((mask_8x8_0 & mask_8x8_1) & 1) {
|
||||
vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr, bd);
|
||||
} else if (mask_8x8_0 & 1) {
|
||||
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, bd);
|
||||
if (mask_8x8 & dual_one) {
|
||||
if ((mask_8x8 & dual_one) == dual_one) {
|
||||
vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
|
||||
lfis[0]->lim, lfis[0]->hev_thr,
|
||||
lfis[1]->mblim, lfis[1]->lim,
|
||||
lfis[1]->hev_thr, bd);
|
||||
} else {
|
||||
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
|
||||
lfi1->lim, lfi1->hev_thr, bd);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
|
||||
vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, bd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_4x4_0 | mask_4x4_1) & 1) {
|
||||
if ((mask_4x4_0 & mask_4x4_1) & 1) {
|
||||
vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr, bd);
|
||||
} else if (mask_4x4_0 & 1) {
|
||||
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, bd);
|
||||
if (mask_4x4 & dual_one) {
|
||||
if ((mask_4x4 & dual_one) == dual_one) {
|
||||
vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
|
||||
lfis[0]->lim, lfis[0]->hev_thr,
|
||||
lfis[1]->mblim, lfis[1]->lim,
|
||||
lfis[1]->hev_thr, bd);
|
||||
} else {
|
||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
|
||||
lfi1->lim, lfi1->hev_thr, bd);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
|
||||
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
|
||||
lfi->lim, lfi->hev_thr, bd);
|
||||
}
|
||||
}
|
||||
|
||||
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
|
||||
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
|
||||
vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
|
||||
lfi1->hev_thr, bd);
|
||||
} else if (mask_4x4_int_0 & 1) {
|
||||
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
|
||||
lfi0->hev_thr, bd);
|
||||
if (mask_4x4_int & dual_one) {
|
||||
if ((mask_4x4_int & dual_one) == dual_one) {
|
||||
vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
|
||||
lfis[0]->lim, lfis[0]->hev_thr,
|
||||
lfis[1]->mblim, lfis[1]->lim,
|
||||
lfis[1]->hev_thr, bd);
|
||||
} else {
|
||||
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
|
||||
lfi1->lim, lfi1->hev_thr, bd);
|
||||
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
|
||||
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
|
||||
lfi->mblim, lfi->lim, lfi->hev_thr, bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s += 8;
|
||||
ss[0] += 8;
|
||||
lfl += 1;
|
||||
mask_16x16_0 >>= 1;
|
||||
mask_8x8_0 >>= 1;
|
||||
mask_4x4_0 >>= 1;
|
||||
mask_4x4_int_0 >>= 1;
|
||||
mask_16x16_1 >>= 1;
|
||||
mask_8x8_1 >>= 1;
|
||||
mask_4x4_1 >>= 1;
|
||||
mask_4x4_int_1 >>= 1;
|
||||
mask_16x16 >>= 1;
|
||||
mask_8x8 >>= 1;
|
||||
mask_4x4 >>= 1;
|
||||
mask_4x4_int >>= 1;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -497,17 +469,17 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl) {
|
||||
unsigned int mask;
|
||||
int count;
|
||||
|
||||
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
|
||||
mask; mask >>= count) {
|
||||
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
|
||||
|
||||
count = 1;
|
||||
if (mask & 1) {
|
||||
const loop_filter_thresh *lfi = lfthr + *lfl;
|
||||
|
||||
if (mask_16x16 & 1) {
|
||||
if ((mask_16x16 & 3) == 3) {
|
||||
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
|
@ -520,7 +492,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||
} else if (mask_8x8 & 1) {
|
||||
if ((mask_8x8 & 3) == 3) {
|
||||
// Next block's thresholds.
|
||||
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
|
||||
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
|
||||
|
||||
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, lfin->mblim, lfin->lim,
|
||||
|
@ -549,7 +521,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||
} else if (mask_4x4 & 1) {
|
||||
if ((mask_4x4 & 3) == 3) {
|
||||
// Next block's thresholds.
|
||||
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
|
||||
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
|
||||
|
||||
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, lfin->mblim, lfin->lim,
|
||||
|
@ -574,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
|
|||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
} else if (mask_4x4_int & 1) {
|
||||
} else {
|
||||
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr);
|
||||
}
|
||||
|
@ -594,17 +566,17 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl, int bd) {
|
||||
unsigned int mask;
|
||||
int count;
|
||||
|
||||
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
|
||||
mask; mask >>= count) {
|
||||
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
|
||||
|
||||
count = 1;
|
||||
if (mask & 1) {
|
||||
const loop_filter_thresh *lfi = lfthr + *lfl;
|
||||
|
||||
if (mask_16x16 & 1) {
|
||||
if ((mask_16x16 & 3) == 3) {
|
||||
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
|
||||
|
@ -617,7 +589,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||
} else if (mask_8x8 & 1) {
|
||||
if ((mask_8x8 & 3) == 3) {
|
||||
// Next block's thresholds.
|
||||
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
|
||||
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
|
||||
|
||||
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, lfin->mblim, lfin->lim,
|
||||
|
@ -650,7 +622,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||
} else if (mask_4x4 & 1) {
|
||||
if ((mask_4x4 & 3) == 3) {
|
||||
// Next block's thresholds.
|
||||
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
|
||||
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
|
||||
|
||||
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, lfin->mblim, lfin->lim,
|
||||
|
@ -679,7 +651,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||
lfi->lim, lfi->hev_thr, bd);
|
||||
}
|
||||
}
|
||||
} else if (mask_4x4_int & 1) {
|
||||
} else {
|
||||
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
|
||||
lfi->hev_thr, bd);
|
||||
}
|
||||
|
@ -700,7 +672,6 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
|
|||
// whether there were any coefficients encoded, and the loop filter strength
|
||||
// block we are currently looking at. Shift is used to position the
|
||||
// 1's we produce.
|
||||
// TODO(JBB) Need another function for different resolution color..
|
||||
static void build_masks(const loop_filter_info_n *const lfi_n,
|
||||
const MODE_INFO *mi, const int shift_y,
|
||||
const int shift_uv,
|
||||
|
@ -935,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
|
|||
|
||||
// This function sets up the bit masks for the entire 64x64 region represented
|
||||
// by mi_row, mi_col.
|
||||
// TODO(JBB): This function only works for yv12.
|
||||
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
|
||||
MODE_INFO **mi, const int mode_info_stride,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
|
@ -971,9 +941,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
|
|||
vp9_zero(*lfm);
|
||||
assert(mip[0] != NULL);
|
||||
|
||||
// TODO(jimbankoski): Try moving most of the following code into decode
|
||||
// loop and storing lfm in the mbmi structure so that we don't have to go
|
||||
// through the recursive loop structure multiple times.
|
||||
switch (mip[0]->sb_type) {
|
||||
case BLOCK_64X64:
|
||||
build_masks(lfi_n, mip[0] , 0, 0, lfm);
|
||||
|
@ -1077,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
|
|||
}
|
||||
break;
|
||||
}
|
||||
|
||||
vp9_adjust_mask(cm, mi_row, mi_col, lfm);
|
||||
}
|
||||
|
||||
static void filter_selectively_vert(uint8_t *s, int pitch,
|
||||
|
@ -1086,13 +1051,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
|
|||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl) {
|
||||
unsigned int mask;
|
||||
|
||||
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
|
||||
mask; mask >>= 1) {
|
||||
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
|
||||
const loop_filter_thresh *lfi = lfthr + *lfl;
|
||||
|
||||
if (mask & 1) {
|
||||
if (mask_16x16 & 1) {
|
||||
|
@ -1120,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
|
|||
unsigned int mask_8x8,
|
||||
unsigned int mask_4x4,
|
||||
unsigned int mask_4x4_int,
|
||||
const loop_filter_info_n *lfi_n,
|
||||
const loop_filter_thresh *lfthr,
|
||||
const uint8_t *lfl, int bd) {
|
||||
unsigned int mask;
|
||||
|
||||
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
|
||||
mask; mask >>= 1) {
|
||||
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
|
||||
const loop_filter_thresh *lfi = lfthr + *lfl;
|
||||
|
||||
if (mask & 1) {
|
||||
if (mask_16x16 & 1) {
|
||||
|
@ -1257,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
|
|||
mask_8x8_c & border_mask,
|
||||
mask_4x4_c & border_mask,
|
||||
mask_4x4_int[r],
|
||||
&cm->lf_info, &lfl[r << 3],
|
||||
cm->lf_info.lfthr, &lfl[r << 3],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_vert(dst->buf, dst->stride,
|
||||
mask_16x16_c & border_mask,
|
||||
mask_8x8_c & border_mask,
|
||||
mask_4x4_c & border_mask,
|
||||
mask_4x4_int[r],
|
||||
&cm->lf_info, &lfl[r << 3]);
|
||||
cm->lf_info.lfthr, &lfl[r << 3]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#else
|
||||
filter_selectively_vert(dst->buf, dst->stride,
|
||||
mask_16x16_c & border_mask,
|
||||
mask_8x8_c & border_mask,
|
||||
mask_4x4_c & border_mask,
|
||||
mask_4x4_int[r],
|
||||
&cm->lf_info, &lfl[r << 3]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
dst->buf += 8 * dst->stride;
|
||||
mi_8x8 += row_step_stride;
|
||||
|
@ -1306,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
|
|||
mask_8x8_r,
|
||||
mask_4x4_r,
|
||||
mask_4x4_int_r,
|
||||
&cm->lf_info, &lfl[r << 3],
|
||||
cm->lf_info.lfthr, &lfl[r << 3],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_horiz(dst->buf, dst->stride,
|
||||
mask_16x16_r,
|
||||
mask_8x8_r,
|
||||
mask_4x4_r,
|
||||
mask_4x4_int_r,
|
||||
&cm->lf_info, &lfl[r << 3]);
|
||||
cm->lf_info.lfthr, &lfl[r << 3]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#else
|
||||
filter_selectively_horiz(dst->buf, dst->stride,
|
||||
mask_16x16_r,
|
||||
mask_8x8_r,
|
||||
mask_4x4_r,
|
||||
mask_4x4_int_r,
|
||||
&cm->lf_info, &lfl[r << 3]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
dst->buf += 8 * dst->stride;
|
||||
}
|
||||
|
@ -1344,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
|
|||
|
||||
// Vertical pass: do 2 rows at one time
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
|
||||
unsigned int mask_16x16_l = mask_16x16 & 0xffff;
|
||||
unsigned int mask_8x8_l = mask_8x8 & 0xffff;
|
||||
unsigned int mask_4x4_l = mask_4x4 & 0xffff;
|
||||
unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
|
||||
|
||||
// Disable filtering on the leftmost column.
|
||||
// Disable filtering on the leftmost column.
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_vert_row2(
|
||||
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
|
||||
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
|
||||
&lfm->lfl_y[r << 3], (int)cm->bit_depth);
|
||||
highbd_filter_selectively_vert_row2(plane->subsampling_x,
|
||||
CONVERT_TO_SHORTPTR(dst->buf),
|
||||
dst->stride,
|
||||
(unsigned int)mask_16x16,
|
||||
(unsigned int)mask_8x8,
|
||||
(unsigned int)mask_4x4,
|
||||
(unsigned int)mask_4x4_int,
|
||||
cm->lf_info.lfthr,
|
||||
&lfm->lfl_y[r << 3],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
filter_selectively_vert_row2(
|
||||
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
|
||||
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
|
||||
(unsigned int)mask_16x16,
|
||||
(unsigned int)mask_8x8,
|
||||
(unsigned int)mask_4x4,
|
||||
(unsigned int)mask_4x4_int,
|
||||
cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#else
|
||||
filter_selectively_vert_row2(
|
||||
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
|
||||
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
dst->buf += 16 * dst->stride;
|
||||
mask_16x16 >>= 16;
|
||||
|
@ -1397,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
|
|||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_horiz(
|
||||
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
|
||||
(int)cm->bit_depth);
|
||||
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
|
||||
dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int & 0xff,
|
||||
cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
|
||||
&lfm->lfl_y[r << 3]);
|
||||
mask_4x4_r, mask_4x4_int & 0xff,
|
||||
cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#else
|
||||
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
|
||||
&lfm->lfl_y[r << 3]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
dst->buf += 8 * dst->stride;
|
||||
|
@ -1443,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
|
|||
lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
|
||||
}
|
||||
|
||||
{
|
||||
unsigned int mask_16x16_l = mask_16x16 & 0xff;
|
||||
unsigned int mask_8x8_l = mask_8x8 & 0xff;
|
||||
unsigned int mask_4x4_l = mask_4x4 & 0xff;
|
||||
unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
|
||||
|
||||
// Disable filtering on the leftmost column.
|
||||
// Disable filtering on the leftmost column.
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_vert_row2(
|
||||
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
|
||||
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
|
||||
&lfl_uv[r << 1], (int)cm->bit_depth);
|
||||
} else {
|
||||
filter_selectively_vert_row2(
|
||||
plane->subsampling_x, dst->buf, dst->stride,
|
||||
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
|
||||
&lfl_uv[r << 1]);
|
||||
}
|
||||
#else
|
||||
filter_selectively_vert_row2(
|
||||
plane->subsampling_x, dst->buf, dst->stride,
|
||||
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
|
||||
&lfl_uv[r << 1]);
|
||||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_vert_row2(plane->subsampling_x,
|
||||
CONVERT_TO_SHORTPTR(dst->buf),
|
||||
dst->stride,
|
||||
(unsigned int)mask_16x16,
|
||||
(unsigned int)mask_8x8,
|
||||
(unsigned int)mask_4x4,
|
||||
(unsigned int)mask_4x4_int,
|
||||
cm->lf_info.lfthr, &lfl_uv[r << 1],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
|
||||
(unsigned int)mask_16x16,
|
||||
(unsigned int)mask_8x8,
|
||||
(unsigned int)mask_4x4,
|
||||
(unsigned int)mask_4x4_int,
|
||||
cm->lf_info.lfthr, &lfl_uv[r << 1]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
dst->buf += 16 * dst->stride;
|
||||
mask_16x16 >>= 8;
|
||||
mask_8x8 >>= 8;
|
||||
mask_4x4 >>= 8;
|
||||
mask_4x4_int >>= 8;
|
||||
}
|
||||
dst->buf += 16 * dst->stride;
|
||||
mask_16x16 >>= 8;
|
||||
mask_8x8 >>= 8;
|
||||
mask_4x4 >>= 8;
|
||||
mask_4x4_int >>= 8;
|
||||
}
|
||||
|
||||
// Horizontal pass
|
||||
|
@ -1506,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
|
|||
if (cm->use_highbitdepth) {
|
||||
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
|
||||
dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
|
||||
&lfl_uv[r << 1], (int)cm->bit_depth);
|
||||
mask_4x4_r, mask_4x4_int_r,
|
||||
cm->lf_info.lfthr, &lfl_uv[r << 1],
|
||||
(int)cm->bit_depth);
|
||||
} else {
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
|
||||
mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
|
||||
&lfl_uv[r << 1]);
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
#else
|
||||
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
|
||||
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
|
||||
&lfl_uv[r << 1]);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
dst->buf += 8 * dst->stride;
|
||||
|
@ -1552,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
|
|||
|
||||
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
|
||||
|
||||
// TODO(JBB): Make setup_mask work for non 420.
|
||||
// TODO(jimbankoski): For 444 only need to do y mask.
|
||||
vp9_adjust_mask(cm, mi_row, mi_col, lfm);
|
||||
|
||||
vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
|
||||
|
@ -1592,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
|
|||
}
|
||||
|
||||
// Used by the encoder to build the loopfilter masks.
|
||||
// TODO(slavarnway): Do the encoder the same way the decoder does it and
|
||||
// build the masks in line as part of the encode process.
|
||||
void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
|
||||
int partial_frame) {
|
||||
int start_mi_row, end_mi_row, mi_rows_to_filter;
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
|
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
|
|||
state->last_noise = a;
|
||||
}
|
||||
|
||||
void vp9_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
|
||||
// Current mip will be the prev_mip for the next frame.
|
||||
MODE_INFO *temp = cm->postproc_state.prev_mip;
|
||||
|
@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
|
|||
ppstate->last_noise != noise_level) {
|
||||
fillrd(ppstate, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
ppstate->whiteclamp, ppstate->bothclamp,
|
||||
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
|
||||
}
|
||||
|
|
|
@ -28,9 +28,9 @@ int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
|
|||
|
||||
if (left_type == above_type)
|
||||
return left_type;
|
||||
else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
|
||||
else if (left_type == SWITCHABLE_FILTERS)
|
||||
return above_type;
|
||||
else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
|
||||
else if (above_type == SWITCHABLE_FILTERS)
|
||||
return left_type;
|
||||
else
|
||||
return SWITCHABLE_FILTERS;
|
||||
|
|
|
@ -20,19 +20,6 @@
|
|||
#include "vp9/common/vp9_reconintra.h"
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void high_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const int subpel_x,
|
||||
const int subpel_y,
|
||||
const struct scale_factors *sf,
|
||||
int w, int h, int ref,
|
||||
const InterpKernel *kernel,
|
||||
int xs, int ys, int bd) {
|
||||
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
||||
src, src_stride, dst, dst_stride,
|
||||
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
|
||||
}
|
||||
|
||||
void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const MV *src_mv,
|
||||
|
@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
|
|||
|
||||
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
|
||||
|
||||
high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
|
||||
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
|
||||
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
|
||||
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4,
|
||||
bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
|
@ -222,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
|
|||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
|
||||
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
|
||||
xd->bd);
|
||||
highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
|
||||
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
|
||||
xd->bd);
|
||||
} else {
|
||||
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
|
||||
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);
|
||||
|
|
|
@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
|
|||
}
|
||||
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
void high_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const int subpel_x,
|
||||
const int subpel_y,
|
||||
const struct scale_factors *sf,
|
||||
int w, int h, int ref,
|
||||
const InterpKernel *kernel,
|
||||
int xs, int ys, int bd);
|
||||
static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
const int subpel_x,
|
||||
const int subpel_y,
|
||||
const struct scale_factors *sf,
|
||||
int w, int h, int ref,
|
||||
const InterpKernel *kernel,
|
||||
int xs, int ys, int bd) {
|
||||
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
|
||||
src, src_stride, dst, dst_stride,
|
||||
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
|
||||
}
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
|
||||
MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,
|
||||
|
|
|
@ -142,6 +142,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
|||
// 129 C D .. W X
|
||||
// 129 E F .. U V
|
||||
// 129 G H .. S T T T T T
|
||||
// For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
|
||||
|
||||
// Get current frame pointer, width and height.
|
||||
if (plane == 0) {
|
||||
|
@ -177,7 +178,6 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
|||
left_col[i] = ref[i * ref_stride - 1];
|
||||
}
|
||||
} else {
|
||||
// TODO(Peter): this value should probably change for high bitdepth
|
||||
vpx_memset16(left_col, base + 1, bs);
|
||||
}
|
||||
}
|
||||
|
@ -239,7 +239,6 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
|||
vpx_memset16(above_row + r, above_row[r - 1],
|
||||
x0 + 2 * bs - frame_width);
|
||||
}
|
||||
// TODO(Peter) this value should probably change for high bitdepth
|
||||
above_row[-1] = left_available ? above_ref[-1] : (base + 1);
|
||||
} else {
|
||||
/* faster path if the block does not need extension */
|
||||
|
@ -251,13 +250,11 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
|
|||
memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
|
||||
else
|
||||
vpx_memset16(above_row + bs, above_row[bs - 1], bs);
|
||||
// TODO(Peter): this value should probably change for high bitdepth
|
||||
above_row[-1] = left_available ? above_ref[-1] : (base + 1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
vpx_memset16(above_row, base - 1, bs * 2);
|
||||
// TODO(Peter): this value should probably change for high bitdepth
|
||||
above_row[-1] = base - 1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
|
|||
specialize qw/vp9_post_proc_down_and_across sse2/;
|
||||
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
|
||||
|
||||
add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp9_plane_add_noise sse2/;
|
||||
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
|
@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
|
||||
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
|
||||
specialize qw/vp9_highbd_post_proc_down_and_across/;
|
||||
|
||||
add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp9_highbd_plane_add_noise/;
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -252,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vp9_fht16x16 sse2/;
|
||||
|
||||
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
|
||||
specialize qw/vp9_fwht4x4/, "$sse2_x86inc";
|
||||
} else {
|
||||
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
|
||||
specialize qw/vp9_fht4x4 sse2 msa/;
|
||||
|
@ -264,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
|
|||
specialize qw/vp9_fht16x16 sse2 msa/;
|
||||
|
||||
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
|
||||
specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
|
||||
specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc";
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -276,7 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
|
|||
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
|
||||
|
||||
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
|
||||
specialize qw/vp9_diamond_search_sad/;
|
||||
specialize qw/vp9_diamond_search_sad avx/;
|
||||
|
||||
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
|
||||
specialize qw/vp9_temporal_filter_apply sse2 msa/;
|
||||
|
|
|
@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
|
|||
void vp9_clearall_segfeatures(struct segmentation *seg) {
|
||||
vp9_zero(seg->feature_data);
|
||||
vp9_zero(seg->feature_mask);
|
||||
seg->aq_av_offset = 0;
|
||||
}
|
||||
|
||||
void vp9_enable_segfeature(struct segmentation *seg, int segment_id,
|
||||
|
|
|
@ -47,6 +47,7 @@ struct segmentation {
|
|||
|
||||
int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
|
||||
unsigned int feature_mask[MAX_SEGMENTS];
|
||||
int aq_av_offset;
|
||||
};
|
||||
|
||||
static INLINE int segfeature_active(const struct segmentation *seg,
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vpx_dsp/x86/inv_txfm_sse2.h"
|
||||
#include "vpx_dsp/x86/txfm_common_sse2.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
|
|
@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
|
|||
%undef flimit4
|
||||
|
||||
|
||||
;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height, int pitch)
|
||||
global sym(vp9_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp9_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
|
|
|
@ -525,8 +525,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
|
|||
}
|
||||
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
|
||||
highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
|
||||
} else {
|
||||
inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys);
|
||||
|
@ -699,8 +699,8 @@ static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd,
|
|||
}
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
|
||||
highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
|
||||
} else {
|
||||
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
|
||||
subpel_y, sf, w, h, ref, kernel, xs, ys);
|
||||
|
@ -1315,11 +1315,16 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
|
|||
BufferPool *const pool = cm->buffer_pool;
|
||||
for (i = 0; i < REFS_PER_FRAME; ++i) {
|
||||
if (vpx_rb_read_bit(rb)) {
|
||||
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
|
||||
width = buf->y_crop_width;
|
||||
height = buf->y_crop_height;
|
||||
found = 1;
|
||||
break;
|
||||
if (cm->frame_refs[i].idx != INVALID_IDX) {
|
||||
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
|
||||
width = buf->y_crop_width;
|
||||
height = buf->y_crop_height;
|
||||
found = 1;
|
||||
break;
|
||||
} else {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Failed to decode frame size");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1334,22 +1339,23 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
|
|||
// has valid dimensions.
|
||||
for (i = 0; i < REFS_PER_FRAME; ++i) {
|
||||
RefBuffer *const ref_frame = &cm->frame_refs[i];
|
||||
has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width,
|
||||
ref_frame->buf->y_crop_height,
|
||||
width, height);
|
||||
has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX &&
|
||||
valid_ref_frame_size(ref_frame->buf->y_crop_width,
|
||||
ref_frame->buf->y_crop_height,
|
||||
width, height));
|
||||
}
|
||||
if (!has_valid_ref_frame)
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Referenced frame has invalid size");
|
||||
for (i = 0; i < REFS_PER_FRAME; ++i) {
|
||||
RefBuffer *const ref_frame = &cm->frame_refs[i];
|
||||
if (!valid_ref_frame_img_fmt(
|
||||
ref_frame->buf->bit_depth,
|
||||
ref_frame->buf->subsampling_x,
|
||||
ref_frame->buf->subsampling_y,
|
||||
cm->bit_depth,
|
||||
cm->subsampling_x,
|
||||
cm->subsampling_y))
|
||||
if (ref_frame->idx == INVALID_IDX ||
|
||||
!valid_ref_frame_img_fmt(ref_frame->buf->bit_depth,
|
||||
ref_frame->buf->subsampling_x,
|
||||
ref_frame->buf->subsampling_y,
|
||||
cm->bit_depth,
|
||||
cm->subsampling_x,
|
||||
cm->subsampling_y))
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
|
||||
"Referenced frame has incompatible color format");
|
||||
}
|
||||
|
|
|
@ -371,9 +371,9 @@ static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
|
|||
|
||||
if (left_type == above_type)
|
||||
return left_type;
|
||||
else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
|
||||
else if (left_type == SWITCHABLE_FILTERS)
|
||||
return above_type;
|
||||
else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
|
||||
else if (above_type == SWITCHABLE_FILTERS)
|
||||
return left_type;
|
||||
else
|
||||
return SWITCHABLE_FILTERS;
|
||||
|
@ -902,4 +902,10 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
|
|||
frame_mvs += cm->mi_cols;
|
||||
}
|
||||
}
|
||||
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
|
||||
(xd->above_mi == NULL || xd->left_mi == NULL) &&
|
||||
!is_inter_block(mi) && need_top_left[mi->uv_mode])
|
||||
assert(0);
|
||||
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
|
||||
}
|
||||
|
|
|
@ -505,7 +505,7 @@ vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
|
|||
uint32_t this_sz = 0;
|
||||
|
||||
for (j = 0; j < mag; ++j)
|
||||
this_sz |= (*x++) << (j * 8);
|
||||
this_sz |= ((uint32_t)(*x++)) << (j * 8);
|
||||
sizes[i] = this_sz;
|
||||
}
|
||||
*count = frames;
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_ports/system_state.h"
|
||||
|
||||
#include "vp9/encoder/vp9_aq_360.h"
|
||||
#include "vp9/encoder/vp9_aq_variance.h"
|
||||
|
||||
#include "vp9/common/vp9_seg_common.h"
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "vp9/encoder/vp9_segmentation.h"
|
||||
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
|
||||
size_t last_coded_q_map_size;
|
||||
size_t consec_zero_mv_size;
|
||||
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
|
||||
if (cr == NULL)
|
||||
return NULL;
|
||||
|
@ -40,21 +39,12 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
|
|||
}
|
||||
assert(MAXQ <= 255);
|
||||
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
|
||||
|
||||
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
|
||||
cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
|
||||
if (cr->consec_zero_mv == NULL) {
|
||||
vp9_cyclic_refresh_free(cr);
|
||||
return NULL;
|
||||
}
|
||||
memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
|
||||
return cr;
|
||||
}
|
||||
|
||||
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
|
||||
vpx_free(cr->map);
|
||||
vpx_free(cr->last_coded_q_map);
|
||||
vpx_free(cr->consec_zero_mv);
|
||||
vpx_free(cr);
|
||||
}
|
||||
|
||||
|
@ -244,7 +234,6 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
|
|||
BLOCK_SIZE bsize) {
|
||||
const VP9_COMMON *const cm = &cpi->common;
|
||||
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
|
||||
MV mv = mi->mv[0].as_mv;
|
||||
const int bw = num_8x8_blocks_wide_lookup[bsize];
|
||||
const int bh = num_8x8_blocks_high_lookup[bsize];
|
||||
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
|
||||
|
@ -268,15 +257,8 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
|
|||
clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id],
|
||||
0, MAXQ),
|
||||
cr->last_coded_q_map[map_offset]);
|
||||
// Update the consecutive zero/low_mv count.
|
||||
if (is_inter_block(mi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
|
||||
if (cr->consec_zero_mv[map_offset] < 255)
|
||||
cr->consec_zero_mv[map_offset]++;
|
||||
} else {
|
||||
cr->consec_zero_mv[map_offset] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update the actual number of blocks that were applied the segment delta q.
|
||||
|
@ -410,13 +392,18 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
|
|||
cr->target_num_seg_blocks = 0;
|
||||
if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
|
||||
consec_zero_mv_thresh = 100;
|
||||
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
|
||||
consec_zero_mv_thresh = 80;
|
||||
}
|
||||
qindex_thresh =
|
||||
cpi->oxcf.content == VP9E_CONTENT_SCREEN
|
||||
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
|
||||
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
|
||||
// More aggressive settings for noisy content.
|
||||
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
|
||||
consec_zero_mv_thresh = 80;
|
||||
qindex_thresh =
|
||||
VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),
|
||||
7 * cm->base_qindex >> 3);
|
||||
}
|
||||
do {
|
||||
int sum_map = 0;
|
||||
// Get the mi_row/mi_col corresponding to superblock index i.
|
||||
|
@ -441,7 +428,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
|
|||
if (cr->map[bl_index2] == 0) {
|
||||
count_tot++;
|
||||
if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
|
||||
cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
|
||||
cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
|
||||
sum_map++;
|
||||
count_sel++;
|
||||
}
|
||||
|
@ -480,6 +467,8 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
|
|||
cr->percent_refresh = 5;
|
||||
cr->max_qdelta_perc = 50;
|
||||
cr->time_for_refresh = 0;
|
||||
cr->motion_thresh = 32;
|
||||
cr->rate_boost_fac = 15;
|
||||
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
|
||||
// periods of the refresh cycle, after a key frame.
|
||||
// Account for larger interval on base layer for temporal layers.
|
||||
|
@ -489,9 +478,11 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
|
|||
cr->rate_ratio_qdelta = 3.0;
|
||||
} else {
|
||||
cr->rate_ratio_qdelta = 2.0;
|
||||
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
|
||||
// Reduce the delta-qp if the estimated source noise is above threshold.
|
||||
cr->rate_ratio_qdelta = 1.5;
|
||||
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
|
||||
// Reduce the delta-qp if the estimated source noise is above threshold.
|
||||
cr->rate_ratio_qdelta = 1.7;
|
||||
cr->rate_boost_fac = 13;
|
||||
}
|
||||
}
|
||||
// Adjust some parameters for low resolutions at low bitrates.
|
||||
if (cm->width <= 352 &&
|
||||
|
@ -499,9 +490,6 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
|
|||
rc->avg_frame_bandwidth < 3400) {
|
||||
cr->motion_thresh = 4;
|
||||
cr->rate_boost_fac = 10;
|
||||
} else {
|
||||
cr->motion_thresh = 32;
|
||||
cr->rate_boost_fac = 15;
|
||||
}
|
||||
if (cpi->svc.spatial_layer_id > 0) {
|
||||
cr->motion_thresh = 4;
|
||||
|
@ -544,8 +532,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
|
|||
if (cm->frame_type == KEY_FRAME) {
|
||||
memset(cr->last_coded_q_map, MAXQ,
|
||||
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
|
||||
memset(cr->consec_zero_mv, 0,
|
||||
cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
|
||||
cr->sb_index = 0;
|
||||
}
|
||||
return;
|
||||
|
@ -620,7 +606,6 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
|
|||
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
|
||||
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
|
||||
memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
|
||||
memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
|
||||
cr->sb_index = 0;
|
||||
cpi->refresh_golden_frame = 1;
|
||||
cpi->refresh_alt_ref_frame = 1;
|
||||
|
|
|
@ -53,8 +53,6 @@ struct CYCLIC_REFRESH {
|
|||
signed char *map;
|
||||
// Map of the last q a block was coded at.
|
||||
uint8_t *last_coded_q_map;
|
||||
// Count on how many consecutive times a block uses ZER0MV for encoding.
|
||||
uint8_t *consec_zero_mv;
|
||||
// Thresholds applied to the projected rate/distortion of the coding block,
|
||||
// when deciding whether block should be refreshed.
|
||||
int64_t thresh_rate_sb;
|
||||
|
|
|
@ -167,7 +167,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
vp9_64_zeros, 0, bw, bh, &sse, &avg);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
var = sse - (((int64_t)avg * avg) / (bw * bh));
|
||||
return (256 * var) / (bw * bh);
|
||||
return (unsigned int)(((uint64_t)256 * var) / (bw * bh));
|
||||
} else {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
|
@ -185,7 +185,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
x->plane[0].src.stride,
|
||||
vp9_64_zeros, 0, &sse);
|
||||
#endif // CONFIG_VP9_HIGHBITDEPTH
|
||||
return (256 * var) >> num_pels_log2_lookup[bs];
|
||||
return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче