Merge "Merge branch 'master' into nextgenv2" into nextgenv2

This commit is contained in:
Yaowu Xu 2016-07-15 04:45:52 +00:00 коммит произвёл Gerrit Code Review
Родитель 010d4a8a93 6fe07a207b
Коммит 06c297bd1c
178 изменённых файлов: 6700 добавлений и 6498 удалений

1
README
Просмотреть файл

@ -47,7 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
--help output of the configure script. As of this writing, the list of
available targets is:
armv6-darwin-gcc
armv6-linux-rvct
armv6-linux-gcc
armv6-none-rvct

Просмотреть файл

@ -119,29 +119,25 @@ utiltest:
test-no-data-check::
exampletest-no-data-check utiltest-no-data-check:
# Add compiler flags for intrinsic files
# Force to realign stack always on OS/2
ifeq ($(TOOLCHAIN), x86-os2-gcc)
STACKREALIGN=-mstackrealign
else
STACKREALIGN=
CFLAGS += -mstackrealign
endif
$(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
$(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN)
$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN)
$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN)
$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN)
$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN)
$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN)
$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN)
$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN)
$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)
$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2
$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3
$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3
$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3
$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
$(BUILD_PFX)%.c.d: %.c
$(if $(quiet),@echo " [DEP] $@")

Просмотреть файл

@ -185,6 +185,25 @@ add_extralibs() {
#
# Boolean Manipulation Functions
#
enable_codec(){
enabled $1 || echo " enabling $1"
set_all yes $1
is_in $1 vp8 vp9 vp10 && \
set_all yes $1_encoder && \
set_all yes $1_decoder
}
disable_codec(){
disabled $1 || echo " disabling $1"
set_all no $1
is_in $1 vp8 vp9 vp10 && \
set_all no $1_encoder && \
set_all no $1_decoder
}
enable_feature(){
set_all yes $*
}
@ -521,22 +540,20 @@ process_common_cmdline() {
;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
if echo "${ARCH_EXT_LIST}" | grep "^ *$option\$" >/dev/null; then
if is_in ${option} ${ARCH_EXT_LIST}; then
[ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} "
elif [ $action = "disable" ] && ! disabled $option ; then
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
die_unknown $opt
is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " disabling $option"
elif [ $action = "enable" ] && ! enabled $option ; then
echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
die_unknown $opt
is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt
log_echo " enabling $option"
fi
${action}_feature $option
;;
--require-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
if echo "${ARCH_EXT_LIST}" none | grep "^ *$option\$" >/dev/null; then
if is_in ${option} ${ARCH_EXT_LIST}; then
RTCD_OPTIONS="${RTCD_OPTIONS}${opt} "
else
die_unknown $opt
@ -638,6 +655,26 @@ show_darwin_sdk_major_version() {
xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1
}
# Print the Xcode version.
show_xcode_version() {
xcodebuild -version | head -n1 | cut -d' ' -f2
}
# Fails when Xcode version is less than 6.3.
check_xcode_minimum_version() {
xcode_major=$(show_xcode_version | cut -f1 -d.)
xcode_minor=$(show_xcode_version | cut -f2 -d.)
xcode_min_major=6
xcode_min_minor=3
if [ ${xcode_major} -lt ${xcode_min_major} ]; then
return 1
fi
if [ ${xcode_major} -eq ${xcode_min_major} ] \
&& [ ${xcode_minor} -lt ${xcode_min_minor} ]; then
return 1
fi
}
process_common_toolchain() {
if [ -z "$toolchain" ]; then
gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
@ -751,7 +788,14 @@ process_common_toolchain() {
enabled shared && soft_enable pic
# Minimum iOS version for all target platforms (darwin and iphonesimulator).
IOS_VERSION_MIN="6.0"
# Shared library framework builds are only possible on iOS 8 and later.
if enabled shared; then
IOS_VERSION_OPTIONS="--enable-shared"
IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
IOS_VERSION_MIN="6.0"
fi
# Handle darwin variants. Newer SDKs allow targeting older
# platforms, so use the newest one available.
@ -1018,18 +1062,7 @@ EOF
NM="$(${XCRUN_FIND} nm)"
RANLIB="$(${XCRUN_FIND} ranlib)"
AS_SFX=.s
# Special handling of ld for armv6 because libclang_rt.ios.a does
# not contain armv6 support in Apple's clang package:
# Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn).
# TODO(tomfinegan): Remove this. Our minimum iOS version (6.0)
# renders support for armv6 unnecessary because the 3GS and up
# support neon.
if [ "${tgt_isa}" = "armv6" ]; then
LD="$(${XCRUN_FIND} ld)"
else
LD="${CXX:-$(${XCRUN_FIND} ld)}"
fi
LD="${CXX:-$(${XCRUN_FIND} ld)}"
# ASFLAGS is written here instead of using check_add_asflags
# because we need to overwrite all of ASFLAGS and purge the
@ -1055,6 +1088,19 @@ EOF
[ -d "${try_dir}" ] && add_ldflags -L"${try_dir}"
done
case ${tgt_isa} in
armv7|armv7s|armv8|arm64)
if enabled neon && ! check_xcode_minimum_version; then
soft_disable neon
log_echo " neon disabled: upgrade Xcode (need v6.3+)."
if enabled neon_asm; then
soft_disable neon_asm
log_echo " neon_asm disabled: upgrade Xcode (need v6.3+)."
fi
fi
;;
esac
asm_conversion_cmd="${source_path}/build/make/ads2gas_apple.pl"
if [ "$(show_darwin_sdk_major_version iphoneos)" -gt 8 ]; then
@ -1069,7 +1115,7 @@ EOF
if enabled rvct; then
# Check if we have CodeSourcery GCC in PATH. Needed for
# libraries
hash arm-none-linux-gnueabi-gcc 2>&- || \
which arm-none-linux-gnueabi-gcc 2>&- || \
die "Couldn't find CodeSourcery GCC from PATH"
# Use armcc as a linker to enable translation of
@ -1110,7 +1156,7 @@ EOF
check_add_ldflags -mfp64
;;
i6400)
check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
check_add_cflags -mips64r6 -mabi=64 -funroll-loops -msched-weight
check_add_cflags -mload-store-pairs -mhard-float -mfp64
check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64
check_add_ldflags -mips64r6 -mabi=64 -mfp64

Просмотреть файл

@ -211,7 +211,7 @@ for opt in "$@"; do
done
# Make one call to fix_path for file_list to improve performance.
fix_file_list
fix_file_list file_list
outfile=${outfile:-/dev/stdout}
guid=${guid:-`generate_uuid`}

37
build/make/ios-Info.plist Normal file
Просмотреть файл

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>en</string>
<key>CFBundleExecutable</key>
<string>VPX</string>
<key>CFBundleIdentifier</key>
<string>org.webmproject.VPX</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>VPX</string>
<key>CFBundlePackageType</key>
<string>FMWK</string>
<key>CFBundleShortVersionString</key>
<string>${VERSION}</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>iPhoneOS</string>
</array>
<key>CFBundleVersion</key>
<string>${VERSION}</string>
<key>MinimumOSVersion</key>
<string>${IOS_VERSION_MIN}</string>
<key>UIDeviceFamily</key>
<array>
<integer>1</integer>
<integer>2</integer>
</array>
<key>VPXFullVersion</key>
<string>${FULLVERSION}</string>
</dict>
</plist>

Просмотреть файл

@ -24,6 +24,7 @@ CONFIGURE_ARGS="--disable-docs
--disable-unit-tests"
DIST_DIR="_dist"
FRAMEWORK_DIR="VPX.framework"
FRAMEWORK_LIB="VPX.framework/VPX"
HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
SCRIPT_DIR=$(dirname "$0")
LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
@ -137,6 +138,44 @@ create_vpx_framework_config_shim() {
printf "#endif // ${include_guard}" >> "${config_file}"
}
# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
verify_framework_targets() {
local requested_cpus=""
local cpu=""
# Extract CPU from full target name.
for target; do
cpu="${target%%-*}"
if [ "${cpu}" = "x86" ]; then
# lipo -info outputs i386 for libvpx x86 targets.
cpu="i386"
fi
requested_cpus="${requested_cpus}${cpu} "
done
# Get target CPUs present in framework library.
local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
# $LIPO -info outputs a string like the following:
# Architectures in the fat file: $FRAMEWORK_LIB <architectures>
# Capture only the architecture strings.
targets_built=${targets_built##*: }
# Sort CPU strings to make the next step a simple string compare.
local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
if [ "${requested}" != "${actual}" ]; then
elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
elog " Requested target CPUs: ${requested}"
elog " Actual target CPUs: ${actual}"
return 1
fi
}
# Configures and builds each target specified by $1, and then builds
# VPX.framework.
build_framework() {
@ -157,7 +196,12 @@ build_framework() {
for target in ${targets}; do
build_target "${target}"
target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
if [ "${ENABLE_SHARED}" = "yes" ]; then
local suffix="dylib"
else
local suffix="a"
fi
lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}"
done
cd "${ORIG_PWD}"
@ -176,13 +220,25 @@ build_framework() {
# Copy in vpx_version.h.
cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"
vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
if [ "${ENABLE_SHARED}" = "yes" ]; then
# Adjust the dylib's name so dynamic linking in apps works as expected.
install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX
# Copy in Info.plist.
cat "${SCRIPT_DIR}/ios-Info.plist" \
| sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
| sed "s/\${VERSION}/${VERSION}/g" \
| sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
> "${FRAMEWORK_DIR}/Info.plist"
fi
# Confirm VPX.framework/VPX contains the targets requested.
verify_framework_targets ${targets}
vlog "Created fat library ${FRAMEWORK_LIB} containing:"
for lib in ${lib_list}; do
vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')"
done
# TODO(tomfinegan): Verify that expected targets are included within
# VPX.framework/VPX via lipo -info.
}
# Trap function. Cleans up the subtree used to build all targets contained in
@ -213,6 +269,7 @@ iosbuild_usage() {
cat << EOF
Usage: ${0##*/} [arguments]
--help: Display this message and exit.
--enable-shared: Build a dynamic framework for use on iOS 8 or later.
--extra-configure-args <args>: Extra args to pass when configuring libvpx.
--macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
and x86_64. Allows linking to framework when builds target MacOSX
@ -251,6 +308,9 @@ while [ -n "$1" ]; do
iosbuild_usage
exit
;;
--enable-shared)
ENABLE_SHARED=yes
;;
--preserve-build-output)
PRESERVE_BUILD_OUTPUT=yes
;;
@ -278,6 +338,21 @@ while [ -n "$1" ]; do
shift
done
if [ "${ENABLE_SHARED}" = "yes" ]; then
CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
fi
FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}")
VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
if [ "$ENABLE_SHARED" = "yes" ]; then
IOS_VERSION_OPTIONS="--enable-shared"
IOS_VERSION_MIN="8.0"
else
IOS_VERSION_OPTIONS=""
IOS_VERSION_MIN="6.0"
fi
if [ "${VERBOSE}" = "yes" ]; then
cat << EOF
BUILD_ROOT=${BUILD_ROOT}
@ -285,6 +360,7 @@ cat << EOF
CONFIGURE_ARGS=${CONFIGURE_ARGS}
EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
FRAMEWORK_DIR=${FRAMEWORK_DIR}
FRAMEWORK_LIB=${FRAMEWORK_LIB}
HEADER_DIR=${HEADER_DIR}
LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
LIPO=${LIPO}
@ -292,8 +368,13 @@ cat << EOF
ORIG_PWD=${ORIG_PWD}
PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
TARGETS="$(print_list "" ${TARGETS})"
ENABLE_SHARED=${ENABLE_SHARED}
OSX_TARGETS="${OSX_TARGETS}"
SIM_TARGETS="${SIM_TARGETS}"
SCRIPT_DIR="${SCRIPT_DIR}"
FULLVERSION="${FULLVERSION}"
VERSION="${VERSION}"
IOS_VERSION_MIN="${IOS_VERSION_MIN}"
EOF
fi

Просмотреть файл

@ -39,11 +39,12 @@ fix_path() {
}
# Corrects the paths in file_list in one pass for efficiency.
# $1 is the name of the array to be modified.
fix_file_list() {
# TODO(jzern): this could be more generic and take the array as a param.
files=$(fix_path "${file_list[@]}")
declare -n array_ref=$1
files=$(fix_path "${array_ref[@]}")
local IFS=$'\n'
file_list=($files)
array_ref=($files)
}
generate_uuid() {

Просмотреть файл

@ -24,8 +24,9 @@ out_file=${2}
id=${3:-VERSION_STRING}
git_version_id=""
if [ -d "${source_path}/.git" ]; then
if [ -e "${source_path}/.git" ]; then
# Source Path is a git working copy. Check for local modifications.
# Note that git submodules may have a file as .git, not a directory.
export GIT_DIR="${source_path}/.git"
git_version_id=`git describe --match=v[0-9]* 2>/dev/null`
fi

36
configure поставляемый
Просмотреть файл

@ -98,7 +98,6 @@ EOF
# all_platforms is a list of all supported target platforms. Maintain
# alphabetically by architecture, generic-gnu last.
all_platforms="${all_platforms} armv6-darwin-gcc"
all_platforms="${all_platforms} armv6-linux-rvct"
all_platforms="${all_platforms} armv6-linux-gcc"
all_platforms="${all_platforms} armv6-none-rvct"
@ -191,12 +190,12 @@ if [ ${doxy_major:-0} -ge 1 ]; then
fi
# disable codecs when their source directory does not exist
[ -d "${source_path}/vp8" ] || disable_feature vp8
[ -d "${source_path}/vp9" ] || disable_feature vp9
[ -d "${source_path}/vp10" ] || disable_feature vp10
[ -d "${source_path}/vp8" ] || disable_codec vp8
[ -d "${source_path}/vp9" ] || disable_codec vp9
[ -d "${source_path}/vp10" ] || disable_codec vp10
# disable vp10 codec by default
disable_feature vp10
disable_codec vp10
# install everything except the sources, by default. sources will have
# to be enabled when doing dist builds, since that's no longer a common
@ -406,15 +405,19 @@ process_cmdline() {
for opt do
optval="${opt#*=}"
case "$opt" in
--disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
--disable-codecs)
for c in ${CODEC_FAMILIES}; do disable_codec $c; done
;;
--enable-?*|--disable-?*)
eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
if is_in ${option} ${EXPERIMENT_LIST}; then
if enabled experimental; then
${action}_feature $option
else
log_echo "Ignoring $opt -- not in experimental mode."
fi
elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
${action}_codec ${option}
else
process_common_cmdline $opt
fi
@ -428,14 +431,6 @@ process_cmdline() {
post_process_cmdline() {
c=""
# If the codec family is disabled, disable all components of that family.
# If the codec family is enabled, enable all components of that family.
log_echo "Configuring selected codecs"
for c in ${CODECS}; do
disabled ${c%%_*} && disable_feature ${c}
enabled ${c%%_*} && enable_feature ${c}
done
# Enable all detected codecs, if they haven't been disabled
for c in ${CODECS}; do soft_enable $c; done
@ -530,13 +525,18 @@ process_detect() {
# Can only build shared libs on a subset of platforms. Doing this check
# here rather than at option parse time because the target auto-detect
# magic happens after the command line has been parsed.
if ! enabled linux && ! enabled os2; then
case "${tgt_os}" in
linux|os2|darwin*|iphonesimulator*)
# Supported platforms
;;
*)
if enabled gnu; then
echo "--enable-shared is only supported on ELF; assuming this is OK"
else
die "--enable-shared only supported on ELF and OS/2 for now"
die "--enable-shared only supported on ELF, OS/2, and Darwin for now"
fi
fi
;;
esac
fi
if [ -z "$CC" ] || enabled external_build; then
echo "Bypassing toolchain for environment detection."

Просмотреть файл

@ -109,8 +109,8 @@ static const char *exec_name;
void usage_exit(void) {
fprintf(stderr,
"Usage: %s <codec> <width> <height> <infile> <outfile> "
"<keyframe-interval> [<error-resilient>]\nSee comments in "
"simple_encoder.c for more information.\n",
"<keyframe-interval> <error-resilient> <frames to encode>\n"
"See comments in simple_encoder.c for more information.\n",
exec_name);
exit(EXIT_FAILURE);
}
@ -147,6 +147,7 @@ static int encode_frame(vpx_codec_ctx_t *codec,
return got_pkts;
}
// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
int main(int argc, char **argv) {
FILE *infile = NULL;
vpx_codec_ctx_t codec;
@ -157,12 +158,11 @@ int main(int argc, char **argv) {
VpxVideoInfo info = {0};
VpxVideoWriter *writer = NULL;
const VpxInterface *encoder = NULL;
const int fps = 30; // TODO(dkovalev) add command line argument
const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument
const int fps = 30;
const int bitrate = 200;
int keyframe_interval = 0;
// TODO(dkovalev): Add some simple command line parsing code to make the
// command line more flexible.
int max_frames = 0;
int frames_encoded = 0;
const char *codec_arg = NULL;
const char *width_arg = NULL;
const char *height_arg = NULL;
@ -172,7 +172,7 @@ int main(int argc, char **argv) {
exec_name = argv[0];
if (argc < 7)
if (argc != 9)
die("Invalid number of arguments");
codec_arg = argv[1];
@ -181,6 +181,7 @@ int main(int argc, char **argv) {
infile_arg = argv[4];
outfile_arg = argv[5];
keyframe_interval_arg = argv[6];
max_frames = strtol(argv[8], NULL, 0);
encoder = get_vpx_encoder_by_name(codec_arg);
if (!encoder)
@ -219,7 +220,7 @@ int main(int argc, char **argv) {
cfg.g_timebase.num = info.time_base.numerator;
cfg.g_timebase.den = info.time_base.denominator;
cfg.rc_target_bitrate = bitrate;
cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;
cfg.g_error_resilient = strtol(argv[7], NULL, 0);
writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
if (!writer)
@ -237,6 +238,9 @@ int main(int argc, char **argv) {
if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
flags |= VPX_EFLAG_FORCE_KF;
encode_frame(&codec, &raw, frame_count++, flags, writer);
frames_encoded++;
if (max_frames > 0 && frames_encoded >= max_frames)
break;
}
// Flush encoder.

Просмотреть файл

@ -59,7 +59,9 @@
static const char *exec_name;
void usage_exit(void) {
fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
fprintf(stderr,
"Usage: %s <codec> <width> <height> <infile> <outfile> "
"<frame limit>\n",
exec_name);
exit(EXIT_FAILURE);
}
@ -129,7 +131,8 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
static vpx_fixed_buf_t pass0(vpx_image_t *raw,
FILE *infile,
const VpxInterface *encoder,
const vpx_codec_enc_cfg_t *cfg) {
const vpx_codec_enc_cfg_t *cfg,
int max_frames) {
vpx_codec_ctx_t codec;
int frame_count = 0;
vpx_fixed_buf_t stats = {NULL, 0};
@ -142,6 +145,8 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw,
++frame_count;
get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
&stats);
if (max_frames > 0 && frame_count >= max_frames)
break;
}
// Flush encoder.
@ -159,7 +164,8 @@ static void pass1(vpx_image_t *raw,
FILE *infile,
const char *outfile_name,
const VpxInterface *encoder,
const vpx_codec_enc_cfg_t *cfg) {
const vpx_codec_enc_cfg_t *cfg,
int max_frames) {
VpxVideoInfo info = {
encoder->fourcc,
cfg->g_w,
@ -181,6 +187,9 @@ static void pass1(vpx_image_t *raw,
while (vpx_img_read(raw, infile)) {
++frame_count;
encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer);
if (max_frames > 0 && frame_count >= max_frames)
break;
}
// Flush encoder.
@ -213,11 +222,14 @@ int main(int argc, char **argv) {
const char *const height_arg = argv[3];
const char *const infile_arg = argv[4];
const char *const outfile_arg = argv[5];
int max_frames = 0;
exec_name = argv[0];
if (argc != 6)
if (argc != 7)
die("Invalid number of arguments.");
max_frames = strtol(argv[6], NULL, 0);
encoder = get_vpx_encoder_by_name(codec_arg);
if (!encoder)
die("Unsupported codec.");
@ -249,13 +261,13 @@ int main(int argc, char **argv) {
// Pass 0
cfg.g_pass = VPX_RC_FIRST_PASS;
stats = pass0(&raw, infile, encoder, &cfg);
stats = pass0(&raw, infile, encoder, &cfg, max_frames);
// Pass 1
rewind(infile);
cfg.g_pass = VPX_RC_LAST_PASS;
cfg.rc_twopass_stats_in = stats;
pass1(&raw, infile, outfile_arg, encoder, &cfg);
pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames);
free(stats.buf);
vpx_img_free(&raw);

Просмотреть файл

@ -715,7 +715,7 @@ int main(int argc, char **argv) {
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));

Просмотреть файл

@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) {
// we can guess the framerate using only the timebase in this
// case. Other files would require reading ahead to guess the
// timebase, like we do for webm.
if (*num < 1000) {
if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) {
// Correct for the factor of 2 applied to the timebase in the encoder.
if (*num & 1)
*den *= 2;

10
libs.mk
Просмотреть файл

@ -183,6 +183,9 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
endif
CODEC_EXPORTS-yes += vpx/exports_com
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
ifeq ($(CONFIG_SPATIAL_SVC),yes)
CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
endif
CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
INSTALL-LIBS-yes += include/vpx/vpx_codec.h
@ -270,6 +273,12 @@ EXPORT_FILE := libvpx.syms
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.dylib )
else
ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
SHARED_LIB_SUF := .dylib
EXPORT_FILE := libvpx.syms
LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, libvpx.dylib)
else
ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll
SHARED_LIB_SUF := _dll.a
@ -285,6 +294,7 @@ LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \
libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
endif
endif
endif
LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
$(notdir $(LIBVPX_SO_SYMLINKS)) \

Просмотреть файл

@ -150,12 +150,23 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
#define MD5STEP(f,w,x,y,z,in,s) \
(w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
#if defined(__clang__) && defined(__has_attribute)
#if __has_attribute(no_sanitize)
#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \
__attribute__((no_sanitize("unsigned-integer-overflow")))
#endif
#endif
#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK
#define VPX_NO_UNSIGNED_OVERFLOW_CHECK
#endif
/*
* The core of the MD5 algorithm, this alters an existing MD5 hash to
* reflect the addition of 16 longwords of new data. MD5Update blocks
* the data and converts bytes into longwords for this routine.
*/
void
VPX_NO_UNSIGNED_OVERFLOW_CHECK void
MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
register UWORD32 a, b, c, d;
@ -238,4 +249,6 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
buf[3] += d;
}
#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK
#endif

Просмотреть файл

@ -32,6 +32,12 @@ class ACMRandom {
return (value >> 15) & 0xffff;
}
int16_t Rand9Signed(void) {
// Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
const uint32_t value = random_.Generate(512);
return static_cast<int16_t>(value) - 256;
}
uint8_t Rand8(void) {
const uint32_t value =
random_.Generate(testing::internal::Random::kMaxRange);

197
test/add_noise_test.cc Normal file
Просмотреть файл

@ -0,0 +1,197 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include "test/clear_system_state.h"
#include "test/register_state_check.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "vpx_mem/vpx_mem.h"
namespace {
// TODO(jimbankoski): make width and height integers not unsigned.
typedef void (*AddNoiseFunc)(unsigned char *start, char *noise,
char blackclamp[16], char whiteclamp[16],
char bothclamp[16], unsigned int width,
unsigned int height, int pitch);
class AddNoiseTest
: public ::testing::TestWithParam<AddNoiseFunc> {
public:
virtual void TearDown() {
libvpx_test::ClearSystemState();
}
virtual ~AddNoiseTest() {}
};
double stddev6(char a, char b, char c, char d, char e, char f) {
const double n = (a + b + c + d + e + f) / 6.0;
const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) +
(d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) /
6.0;
return sqrt(v);
}
// TODO(jimbankoski): The following 2 functions are duplicated in each codec.
// For now the vp9 one has been copied into the test as is. We should normalize
// these in vpx_dsp and not have 3 copies of these unless there is different
// noise we add for each codec.
double gaussian(double sigma, double mu, double x) {
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
}
int setup_noise(int size_noise, char *noise) {
char char_dist[300];
const int ai = 4;
const int qi = 24;
const double sigma = ai + .5 + .6 * (63 - qi) / 63.0;
/* set up a lookup table of 256 entries that matches
* a gaussian distribution with sigma determined by q.
*/
int next = 0;
for (int i = -32; i < 32; i++) {
int a_i = (int) (0.5 + 256 * gaussian(sigma, 0, i));
if (a_i) {
for (int j = 0; j < a_i; j++) {
char_dist[next + j] = (char)(i);
}
next = next + a_i;
}
}
for (; next < 256; next++)
char_dist[next] = 0;
for (int i = 0; i < size_noise; i++) {
noise[i] = char_dist[rand() & 0xff]; // NOLINT
}
// Returns the most negative value in distribution.
return char_dist[0];
}
TEST_P(AddNoiseTest, CheckNoiseAdded) {
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
const int width = 64;
const int height = 64;
const int image_size = width * height;
char noise[3072];
const int clamp = setup_noise(3072, noise);
for (int i = 0; i < 16; i++) {
blackclamp[i] = -clamp;
whiteclamp[i] = -clamp;
bothclamp[i] = -2 * clamp;
}
uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
memset(s, 99, image_size);
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
bothclamp, width, height, width));
// Check to make sure we don't end up having either the same or no added
// noise either vertically or horizontally.
for (int i = 0; i < image_size - 6 * width - 6; ++i) {
const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99,
s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99);
const double vd = stddev6(s[i] - 99, s[i + width] - 99,
s[i + 2 * width] - 99, s[i + 3 * width] - 99,
s[i + 4 * width] - 99, s[i + 5 * width] - 99);
EXPECT_NE(hd, 0);
EXPECT_NE(vd, 0);
}
// Initialize pixels in the image to 255 and check for roll over.
memset(s, 255, image_size);
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
bothclamp, width, height, width));
// Check to make sure don't roll over.
for (int i = 0; i < image_size; ++i) {
EXPECT_GT((int)s[i], 10) << "i = " << i;
}
// Initialize pixels in the image to 0 and check for roll under.
memset(s, 0, image_size);
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
bothclamp, width, height, width));
// Check to make sure don't roll under.
for (int i = 0; i < image_size; ++i) {
EXPECT_LT((int)s[i], 245) << "i = " << i;
}
vpx_free(s);
}
TEST_P(AddNoiseTest, CheckCvsAssembly) {
DECLARE_ALIGNED(16, char, blackclamp[16]);
DECLARE_ALIGNED(16, char, whiteclamp[16]);
DECLARE_ALIGNED(16, char, bothclamp[16]);
const int width = 64;
const int height = 64;
const int image_size = width * height;
char noise[3072];
const int clamp = setup_noise(3072, noise);
for (int i = 0; i < 16; i++) {
blackclamp[i] = -clamp;
whiteclamp[i] = -clamp;
bothclamp[i] = -2 * clamp;
}
uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
memset(s, 99, image_size);
memset(d, 99, image_size);
srand(0);
ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
bothclamp, width, height, width));
srand(0);
ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp,
whiteclamp, bothclamp,
width, height, width));
for (int i = 0; i < image_size; ++i) {
EXPECT_EQ((int)s[i], (int)d[i]) << "i = " << i;
}
vpx_free(d);
vpx_free(s);
}
INSTANTIATE_TEST_CASE_P(C, AddNoiseTest,
::testing::Values(vpx_plane_add_noise_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest,
::testing::Values(vpx_plane_add_noise_sse2));
#endif
#if HAVE_MSA
INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest,
::testing::Values(vpx_plane_add_noise_msa));
#endif
} // namespace

Просмотреть файл

@ -138,7 +138,8 @@ void filter_block2d_8_c(const uint8_t *src_ptr,
// and filter_max_width = 16
//
uint8_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
const int intermediate_next_stride = 1 - intermediate_height * output_width;
const int intermediate_next_stride =
1 - static_cast<int>(intermediate_height * output_width);
// Horizontal pass (src -> transposed intermediate).
uint8_t *output_ptr = intermediate_buffer;
@ -250,7 +251,8 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
* and filter_max_width = 16
*/
uint16_t intermediate_buffer[(kMaxDimension+8) * kMaxDimension];
const int intermediate_next_stride = 1 - intermediate_height * output_width;
const int intermediate_next_stride =
1 - static_cast<int>(intermediate_height * output_width);
// Horizontal pass (src -> transposed intermediate).
{

Просмотреть файл

@ -90,7 +90,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
<< pkt->data.frame.pts;
}
const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
// Subtract from the buffer the bits associated with a played back frame.
bits_in_buffer_model_ -= frame_size_in_bits;
@ -450,7 +450,28 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
int denoiser_offon_period_;
};
// Check basic rate targeting,
// Check basic rate targeting for VBR mode.
TEST_P(DatarateTestVP9Large, BasicRateTargetingVBR) {
cfg_.rc_min_quantizer = 0;
cfg_.rc_max_quantizer = 63;
cfg_.g_error_resilient = 0;
cfg_.rc_end_usage = VPX_VBR;
cfg_.g_lag_in_frames = 0;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
30, 1, 0, 300);
for (int i = 400; i <= 800; i += 400) {
cfg_.rc_target_bitrate = i;
ResetModel();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
<< " The datarate for the file is lower than target by too much!";
ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25)
<< " The datarate for the file is greater than target by too much!";
}
}
// Check basic rate targeting for CBR,
TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
cfg_.rc_buf_initial_sz = 500;
cfg_.rc_buf_optimal_sz = 500;
@ -474,7 +495,7 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
}
}
// Check basic rate targeting,
// Check basic rate targeting for CBR.
TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);

Просмотреть файл

@ -365,10 +365,10 @@ class Trans16x16TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
const int32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
const uint32_t diff = dst[j] - src[j];
const int32_t diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error)

Просмотреть файл

@ -147,10 +147,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
const int32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
const uint32_t diff = dst[j] - src[j];
const int32_t diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
if (max_error < error)

Просмотреть файл

@ -302,22 +302,12 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8, 16)));
#endif // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
MMX, Trans4x4WHT,
::testing::Values(
make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0,
VPX_BITS_8, 16)));
#endif
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4WHT,
::testing::Values(
make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0,
VPX_BITS_8, 16)));
make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8, 16),
make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8, 16)));
#endif
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

Просмотреть файл

@ -425,10 +425,10 @@ class FwdTrans8x8TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
#else
const uint32_t diff = dst[j] - src[j];
const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
@ -458,7 +458,7 @@ class FwdTrans8x8TestBase {
coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));
for (int j = 0; j < kNumCoeffs; ++j) {
const uint32_t diff = coeff[j] - coeff_r[j];
const int32_t diff = coeff[j] - coeff_r[j];
const uint32_t error = diff * diff;
EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
<< "Error: 8x8 DCT has error " << error
@ -511,10 +511,10 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
const int diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
const uint32_t diff = dst[j] - ref[j];
const int diff = dst[j] - ref[j];
#endif
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error)

220
test/hadamard_test.cc Normal file
Просмотреть файл

@ -0,0 +1,220 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <algorithm>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
#include "test/acm_random.h"
#include "test/register_state_check.h"
namespace {
using ::libvpx_test::ACMRandom;
typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
int16_t b[8];
for (int i = 0; i < 8; i += 2) {
b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
}
int16_t c[8];
for (int i = 0; i < 8; i += 4) {
c[i + 0] = b[i + 0] + b[i + 2];
c[i + 1] = b[i + 1] + b[i + 3];
c[i + 2] = b[i + 0] - b[i + 2];
c[i + 3] = b[i + 1] - b[i + 3];
}
out[0] = c[0] + c[4];
out[7] = c[1] + c[5];
out[3] = c[2] + c[6];
out[4] = c[3] + c[7];
out[2] = c[0] - c[4];
out[6] = c[1] - c[5];
out[1] = c[2] - c[6];
out[5] = c[3] - c[7];
}
void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
int16_t buf[64];
for (int i = 0; i < 8; ++i) {
hadamard_loop(a + i, a_stride, buf + i * 8);
}
for (int i = 0; i < 8; ++i) {
hadamard_loop(buf + i, 8, b + i * 8);
}
}
void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
/* The source is a 16x16 block. The destination is rearranged to 8x32.
* Input is 9 bit. */
reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
/* Overlay the 8x8 blocks and combine. */
for (int i = 0; i < 64; ++i) {
/* 8x8 steps the range up to 15 bits. */
const int16_t a0 = b[0];
const int16_t a1 = b[64];
const int16_t a2 = b[128];
const int16_t a3 = b[192];
/* Prevent the result from escaping int16_t. */
const int16_t b0 = (a0 + a1) >> 1;
const int16_t b1 = (a0 - a1) >> 1;
const int16_t b2 = (a2 + a3) >> 1;
const int16_t b3 = (a2 - a3) >> 1;
/* Store a 16 bit value. */
b[ 0] = b0 + b2;
b[ 64] = b1 + b3;
b[128] = b0 - b2;
b[192] = b1 - b3;
++b;
}
}
class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
public:
virtual void SetUp() {
h_func_ = GetParam();
rnd_.Reset(ACMRandom::DeterministicSeed());
}
protected:
HadamardFunc h_func_;
ACMRandom rnd_;
};
class Hadamard8x8Test : public HadamardTestBase {};
TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
DECLARE_ALIGNED(16, int16_t, a[64]);
DECLARE_ALIGNED(16, int16_t, b[64]);
int16_t b_ref[64];
for (int i = 0; i < 64; ++i) {
a[i] = rnd_.Rand9Signed();
}
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
reference_hadamard8x8(a, 8, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
// The order of the output is not important. Sort before checking.
std::sort(b, b + 64);
std::sort(b_ref, b_ref + 64);
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
TEST_P(Hadamard8x8Test, VaryStride) {
DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
DECLARE_ALIGNED(16, int16_t, b[64]);
int16_t b_ref[64];
for (int i = 0; i < 64 * 8; ++i) {
a[i] = rnd_.Rand9Signed();
}
for (int i = 8; i < 64; i += 8) {
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
reference_hadamard8x8(a, i, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
// The order of the output is not important. Sort before checking.
std::sort(b, b + 64);
std::sort(b_ref, b_ref + 64);
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
}
INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_sse2));
#endif // HAVE_SSE2
#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_ssse3));
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
::testing::Values(&vpx_hadamard_8x8_neon));
#endif // HAVE_NEON
class Hadamard16x16Test : public HadamardTestBase {};
TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
int16_t b_ref[16 * 16];
for (int i = 0; i < 16 * 16; ++i) {
a[i] = rnd_.Rand9Signed();
}
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
reference_hadamard16x16(a, 16, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
// The order of the output is not important. Sort before checking.
std::sort(b, b + 16 * 16);
std::sort(b_ref, b_ref + 16 * 16);
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
TEST_P(Hadamard16x16Test, VaryStride) {
DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
int16_t b_ref[16 * 16];
for (int i = 0; i < 16 * 16 * 8; ++i) {
a[i] = rnd_.Rand9Signed();
}
for (int i = 8; i < 64; i += 8) {
memset(b, 0, sizeof(b));
memset(b_ref, 0, sizeof(b_ref));
reference_hadamard16x16(a, i, b_ref);
ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
// The order of the output is not important. Sort before checking.
std::sort(b, b + 16 * 16);
std::sort(b_ref, b_ref + 16 * 16);
EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
}
}
INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
::testing::Values(&vpx_hadamard_16x16_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
::testing::Values(&vpx_hadamard_16x16_sse2));
#endif // HAVE_SSE2
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
::testing::Values(&vpx_hadamard_16x16_neon));
#endif // HAVE_NEON
} // namespace

119
test/level_test.cc Normal file
Просмотреть файл

@ -0,0 +1,119 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/util.h"
namespace {
class LevelTest
: public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
protected:
LevelTest()
: EncoderTest(GET_PARAM(0)),
encoding_mode_(GET_PARAM(1)),
cpu_used_(GET_PARAM(2)),
min_gf_internal_(24),
target_level_(0),
level_(0) {}
virtual ~LevelTest() {}
virtual void SetUp() {
InitializeConfig();
SetMode(encoding_mode_);
if (encoding_mode_ != ::libvpx_test::kRealTime) {
cfg_.g_lag_in_frames = 25;
cfg_.rc_end_usage = VPX_VBR;
} else {
cfg_.g_lag_in_frames = 0;
cfg_.rc_end_usage = VPX_CBR;
}
cfg_.rc_2pass_vbr_minsection_pct = 5;
cfg_.rc_2pass_vbr_maxsection_pct = 2000;
cfg_.rc_target_bitrate = 400;
cfg_.rc_max_quantizer = 63;
cfg_.rc_min_quantizer = 0;
}
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
if (video->frame() == 0) {
encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
if (encoding_mode_ != ::libvpx_test::kRealTime) {
encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
encoder->Control(VP8E_SET_ARNR_TYPE, 3);
}
}
encoder->Control(VP9E_GET_LEVEL, &level_);
ASSERT_LE(level_, 51);
ASSERT_GE(level_, 0);
}
::libvpx_test::TestMode encoding_mode_;
int cpu_used_;
int min_gf_internal_;
int target_level_;
int level_;
};
// Test for keeping level stats only
TEST_P(LevelTest, TestTargetLevel0) {
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
40);
target_level_ = 0;
min_gf_internal_ = 4;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_EQ(11, level_);
cfg_.rc_target_bitrate = 1600;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
ASSERT_EQ(20, level_);
}
// Test for level control being turned off
TEST_P(LevelTest, TestTargetLevel255) {
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
30);
target_level_ = 255;
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
TEST_P(LevelTest, TestTargetLevelApi) {
::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo;
vpx_codec_ctx_t enc;
vpx_codec_enc_cfg_t cfg;
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
for (int level = 0; level <= 256; ++level) {
if (level == 10 || level == 11 || level == 20 || level == 21 ||
level == 30 || level == 31 || level == 40 || level == 41 ||
level == 50 || level == 51 || level == 52 || level == 60 ||
level == 61 || level == 62 || level == 0 || level == 255)
EXPECT_EQ(VPX_CODEC_OK,
vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
else
EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
}
EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
}
VP9_INSTANTIATE_TEST_CASE(LevelTest,
::testing::Values(::libvpx_test::kTwoPassGood,
::libvpx_test::kOnePassGood),
::testing::Range(0, 9));
} // namespace

Просмотреть файл

@ -430,16 +430,6 @@ TEST_P(Loop8Test9Param, ValueCheck) {
using std::tr1::make_tuple;
#if HAVE_MMX && CONFIG_USE_X86INC && !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
MMX, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_lpf_horizontal_4_mmx,
&vpx_lpf_horizontal_4_c, 8),
make_tuple(&vpx_lpf_vertical_4_mmx,
&vpx_lpf_vertical_4_c, 8)));
#endif // HAVE_MMX
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
@ -497,12 +487,16 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_lpf_horizontal_4_sse2,
&vpx_lpf_horizontal_4_c, 8),
make_tuple(&vpx_lpf_horizontal_8_sse2,
&vpx_lpf_horizontal_8_c, 8),
make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
&vpx_lpf_horizontal_edge_8_c, 8),
make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
&vpx_lpf_horizontal_edge_16_c, 8),
make_tuple(&vpx_lpf_vertical_4_sse2,
&vpx_lpf_vertical_4_c, 8),
make_tuple(&vpx_lpf_vertical_8_sse2,
&vpx_lpf_vertical_8_c, 8),
make_tuple(&vpx_lpf_vertical_16_sse2,

132
test/minmax_test.cc Normal file
Просмотреть файл

@ -0,0 +1,132 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include <string.h>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
#include "test/acm_random.h"
#include "test/register_state_check.h"
namespace {
using ::libvpx_test::ACMRandom;
typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int *min, int *max);
class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
public:
virtual void SetUp() {
mm_func_ = GetParam();
rnd_.Reset(ACMRandom::DeterministicSeed());
}
protected:
MinMaxFunc mm_func_;
ACMRandom rnd_;
};
void reference_minmax(const uint8_t *a, int a_stride,
const uint8_t *b, int b_stride,
int *min_ret, int *max_ret) {
int min = 255;
int max = 0;
for (int i = 0; i < 8; i++) {
for (int j = 0; j < 8; j++) {
const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
if (min > diff) min = diff;
if (max < diff) max = diff;
}
}
*min_ret = min;
*max_ret = max;
}
TEST_P(MinMaxTest, MinValue) {
for (int i = 0; i < 64; i++) {
uint8_t a[64], b[64];
memset(a, 0, sizeof(a));
memset(b, 255, sizeof(b));
b[i] = i; // Set a minimum difference of i.
int min, max;
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(255, max);
EXPECT_EQ(i, min);
}
}
TEST_P(MinMaxTest, MaxValue) {
for (int i = 0; i < 64; i++) {
uint8_t a[64], b[64];
memset(a, 0, sizeof(a));
memset(b, 0, sizeof(b));
b[i] = i; // Set a maximum difference of i.
int min, max;
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(i, max);
EXPECT_EQ(0, min);
}
}
TEST_P(MinMaxTest, CompareReference) {
uint8_t a[64], b[64];
for (int j = 0; j < 64; j++) {
a[j] = rnd_.Rand8();
b[j] = rnd_.Rand8();
}
int min_ref, max_ref, min, max;
reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
EXPECT_EQ(max_ref, max);
EXPECT_EQ(min_ref, min);
}
TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
uint8_t a[8 * 64], b[8 * 64];
for (int i = 0; i < 8 * 64; i++) {
a[i] = rnd_.Rand8();
b[i] = rnd_.Rand8();
}
for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
int min_ref, max_ref, min, max;
reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
<< " and b_stride = " << b_stride;;
EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
<< " and b_stride = " << b_stride;;
}
}
}
INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_sse2));
#endif
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
::testing::Values(&vpx_minmax_8x8_neon));
#endif
} // namespace

64
test/realtime_test.cc Normal file
Просмотреть файл

@ -0,0 +1,64 @@
/*
* Copyright (c) 2016 The WebM project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/util.h"
#include "test/video_source.h"
#include "third_party/googletest/src/include/gtest/gtest.h"
namespace {
const int kVideoSourceWidth = 320;
const int kVideoSourceHeight = 240;
const int kFramesToEncode = 2;
class RealtimeTest
: public ::libvpx_test::EncoderTest,
public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
protected:
RealtimeTest()
: EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
virtual ~RealtimeTest() {}
virtual void SetUp() {
InitializeConfig();
cfg_.g_lag_in_frames = 0;
SetMode(::libvpx_test::kRealTime);
}
virtual void BeginPassHook(unsigned int /*pass*/) {
// TODO(tomfinegan): We're changing the pass value here to make sure
// we get frames when real time mode is combined with |g_pass| set to
// VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
// the pass value based on the mode passed into EncoderTest::SetMode(),
// which overrides the one specified in SetUp() above.
cfg_.g_pass = VPX_RC_FIRST_PASS;
}
virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
frame_packets_++;
}
int frame_packets_;
};
TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
::libvpx_test::RandomVideoSource video;
video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
video.set_limit(kFramesToEncode);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
EXPECT_EQ(kFramesToEncode, frame_packets_);
}
VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
::testing::Values(::libvpx_test::kRealTime));
VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
::testing::Values(::libvpx_test::kRealTime));
} // namespace

Просмотреть файл

@ -36,16 +36,10 @@
#include <windows.h>
#include <winnt.h>
namespace testing {
namespace internal {
inline bool operator==(const M128A& lhs, const M128A& rhs) {
return (lhs.Low == rhs.Low && lhs.High == rhs.High);
}
} // namespace internal
} // namespace testing
namespace libvpx_test {
// Compares the state of xmm[6-15] at construction with their state at

Просмотреть файл

@ -7,6 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdio.h>
#include <climits>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
@ -558,9 +560,13 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
}
}
#if CONFIG_VP9_DECODER
// Verify that we get 1 resize down event in this test.
ASSERT_EQ(1, resize_count) << "Resizing should occur.";
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
#else
printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
#endif
}
// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
@ -602,9 +608,13 @@ TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
}
}
#if CONFIG_VP9_DECODER
// Verify that we get 2 resize events in this test.
ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
#else
printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
#endif
}
vpx_img_fmt_t CspForFrameNumber(int frame) {

Просмотреть файл

@ -749,17 +749,6 @@ INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
//------------------------------------------------------------------------------
// x86 functions
#if HAVE_MMX
const SadMxNParam mmx_tests[] = {
make_tuple(16, 16, &vpx_sad16x16_mmx, -1),
make_tuple(16, 8, &vpx_sad16x8_mmx, -1),
make_tuple(8, 16, &vpx_sad8x16_mmx, -1),
make_tuple(8, 8, &vpx_sad8x8_mmx, -1),
make_tuple(4, 4, &vpx_sad4x4_mmx, -1),
};
INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));
#endif // HAVE_MMX
#if HAVE_SSE2
#if CONFIG_USE_X86INC
const SadMxNParam sse2_tests[] = {

Просмотреть файл

@ -23,7 +23,7 @@ simple_encoder_verify_environment() {
fi
}
# Runs simple_encoder using the codec specified by $1.
# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
simple_encoder() {
local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
local codec="$1"
@ -35,7 +35,7 @@ simple_encoder() {
fi
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \
${devnull}
[ -e "${output_file}" ] || return 1
@ -47,16 +47,13 @@ simple_encoder_vp8() {
fi
}
# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
# machine.
DISABLED_simple_encoder_vp9() {
simple_encoder_vp9() {
if [ "$(vp9_encode_available)" = "yes" ]; then
simple_encoder vp9 || return 1
fi
}
simple_encoder_tests="simple_encoder_vp8
DISABLED_simple_encoder_vp9"
simple_encoder_vp9"
run_tests simple_encoder_verify_environment "${simple_encoder_tests}"

Просмотреть файл

@ -25,6 +25,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += datarate_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h
LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h
@ -43,6 +44,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc
LIBVPX_TEST_SRCS-yes += decode_test_driver.cc
LIBVPX_TEST_SRCS-yes += decode_test_driver.h
@ -108,6 +110,7 @@ LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc
LIBVPX_TEST_SRCS-yes += vp8_fragments_test.cc
endif
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += add_noise_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
@ -148,6 +151,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc

Просмотреть файл

@ -191,14 +191,15 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
vpx_h_predictor_4x4_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
vpx_tm_predictor_4x4_sse2)
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_d45_predictor_4x4_ssse3, NULL, NULL,
vpx_d153_predictor_4x4_ssse3, vpx_d207_predictor_4x4_ssse3,
NULL, NULL, NULL, NULL,
vpx_d153_predictor_4x4_ssse3, NULL,
vpx_d63_predictor_4x4_ssse3, NULL)
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC
@ -240,13 +241,13 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
vpx_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_tm_predictor_8x8_sse2)
vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
#endif // HAVE_SSE2 && CONFIG_USE_X86INC
#if HAVE_SSSE3 && CONFIG_USE_X86INC
INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
NULL, vpx_d45_predictor_8x8_ssse3, NULL, NULL,
NULL, NULL, NULL, NULL,
vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3,
vpx_d63_predictor_8x8_ssse3, NULL)
#endif // HAVE_SSSE3 && CONFIG_USE_X86INC

Просмотреть файл

@ -23,7 +23,8 @@ twopass_encoder_verify_environment() {
fi
}
# Runs twopass_encoder using the codec specified by $1.
# Runs twopass_encoder using the codec specified by $1 with a frame limit of
# 100.
twopass_encoder() {
local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}"
local codec="$1"
@ -35,7 +36,7 @@ twopass_encoder() {
fi
eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \
"${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 100 \
${devnull}
[ -e "${output_file}" ] || return 1
@ -47,16 +48,13 @@ twopass_encoder_vp8() {
fi
}
# TODO(tomfinegan): Add a frame limit param to twopass_encoder and enable this
# test. VP9 is just too slow right now: This test takes 31m16s+ on a fast
# machine.
DISABLED_twopass_encoder_vp9() {
twopass_encoder_vp9() {
if [ "$(vp9_encode_available)" = "yes" ]; then
twopass_encoder vp9 || return 1
fi
}
twopass_encoder_tests="twopass_encoder_vp8
DISABLED_twopass_encoder_vp9"
twopass_encoder_vp9"
run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"

Просмотреть файл

@ -1062,30 +1062,6 @@ INSTANTIATE_TEST_CASE_P(
::testing::ValuesIn(kArrayHBDSubpelAvgVariance_c));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_MMX
INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest,
::testing::Values(make_tuple(4, 4, &vpx_mse16x16_mmx)));
INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest,
::testing::Values(vpx_get_mb_ss_mmx));
INSTANTIATE_TEST_CASE_P(
MMX, VpxVarianceTest,
::testing::Values(make_tuple(4, 4, &vpx_variance16x16_mmx, 0),
make_tuple(4, 3, &vpx_variance16x8_mmx, 0),
make_tuple(3, 4, &vpx_variance8x16_mmx, 0),
make_tuple(3, 3, &vpx_variance8x8_mmx, 0),
make_tuple(2, 2, &vpx_variance4x4_mmx, 0)));
INSTANTIATE_TEST_CASE_P(
MMX, VpxSubpelVarianceTest,
::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_mmx, 0),
make_tuple(4, 3, &vpx_sub_pixel_variance16x8_mmx, 0),
make_tuple(3, 4, &vpx_sub_pixel_variance8x16_mmx, 0),
make_tuple(3, 3, &vpx_sub_pixel_variance8x8_mmx, 0),
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_mmx, 0)));
#endif // HAVE_MMX
#if HAVE_SSE2
INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest,
::testing::Values(vpx_get_mb_ss_sse2));
@ -1126,8 +1102,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0),
make_tuple(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0),
make_tuple(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0),
make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse, 0),
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse, 0)));
make_tuple(2, 3, &vpx_sub_pixel_variance4x8_sse2, 0),
make_tuple(2, 2, &vpx_sub_pixel_variance4x4_sse2, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, VpxSubpelAvgVarianceTest,
@ -1143,8 +1119,8 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0),
make_tuple(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0),
make_tuple(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0),
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse, 0),
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse, 0)));
make_tuple(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0),
make_tuple(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0)));
#endif // CONFIG_USE_X86INC
#if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -94,8 +94,7 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
// Test for all block size.
INSTANTIATE_TEST_CASE_P(
SSE2, VP9DenoiserTest,
::testing::Values(BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8,
BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32,
BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
BLOCK_64X64));
::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_16X16,
BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64,
BLOCK_64X32, BLOCK_64X64));
} // namespace

Просмотреть файл

@ -62,7 +62,7 @@ class WebMVideoSource : public CompressedVideoSource {
void FillFrame() {
ASSERT_TRUE(vpx_ctx_->file != NULL);
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
ASSERT_GE(status, 0) << "webm_read_frame failed";
if (status == 1) {
end_of_file_ = true;
@ -72,7 +72,7 @@ class WebMVideoSource : public CompressedVideoSource {
void SeekToNextKeyFrame() {
ASSERT_TRUE(vpx_ctx_->file != NULL);
do {
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_);
ASSERT_GE(status, 0) << "webm_read_frame failed";
++frame_;
if (status == 1) {

Просмотреть файл

@ -13,6 +13,7 @@
#include <stdio.h>
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "./vpx_scale_rtcd.h"
#include "./vp10_rtcd.h"
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
state->last_noise = a;
}
void vp10_plane_add_noise_c(uint8_t *start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
// TODO(jbb): why does simd code use both but c doesn't, normalize and
// fix..
(void) bothclamp;
for (i = 0; i < height; i++) {
uint8_t *pos = start + i * pitch;
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
for (j = 0; j < width; j++) {
if (pos[j] < blackclamp[0])
pos[j] = blackclamp[0];
if (pos[j] > 255 + whiteclamp[0])
pos[j] = 255 + whiteclamp[0];
pos[j] += ref[j];
}
}
}
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
@ -727,7 +702,7 @@ int vp10_post_proc_frame(struct VP10Common *cm,
fillrd(ppstate, 63 - q, noise_level);
}
vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -15,13 +15,14 @@
#include "./vpx_config.h"
#include "vpx_dsp/txfm_common.h"
#include "vpx_dsp/inv_txfm.h"
#include "vpx_ports/mem.h"
#ifdef __cplusplus
extern "C" {
#endif
static INLINE tran_low_t check_range(tran_high_t input) {
static INLINE tran_high_t check_range(tran_high_t input) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid VP9 input streams, intermediate stage coefficients should always
// stay within the range of a signed 16 bit integer. Coefficients can go out
@ -32,17 +33,17 @@ static INLINE tran_low_t check_range(tran_high_t input) {
assert(INT16_MIN <= input);
assert(input <= INT16_MAX);
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
return (tran_low_t)input;
return input;
}
static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
static INLINE tran_high_t dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return check_range(rv);
return rv;
}
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE tran_low_t highbd_check_range(tran_high_t input,
int bd) {
static INLINE tran_high_t highbd_check_range(tran_high_t input,
int bd) {
#if CONFIG_COEFFICIENT_RANGE_CHECKING
// For valid highbitdepth VP9 streams, intermediate stage coefficients will
// stay within the ranges:
@ -56,13 +57,12 @@ static INLINE tran_low_t highbd_check_range(tran_high_t input,
(void) int_min;
#endif // CONFIG_COEFFICIENT_RANGE_CHECKING
(void) bd;
return (tran_low_t)input;
return input;
}
static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
int bd) {
static INLINE tran_high_t highbd_dct_const_round_shift(tran_high_t input) {
tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
return highbd_check_range(rv, bd);
return rv;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@ -83,9 +83,21 @@ static INLINE tran_low_t highbd_dct_const_round_shift(tran_high_t input,
// bd of 10 uses trans_low with 18bits, need to remove 14bits
// bd of 12 uses trans_low with 20bits, need to remove 12bits
// bd of x uses trans_low with 8+x bits, need to remove 24-x bits
#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
#else
#define WRAPLOW(x, bd) ((int32_t)(x))
#define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16)
#if CONFIG_VP9_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) \
((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd))
#endif // CONFIG_VP9_HIGHBITDEPTH
#else // CONFIG_EMULATE_HARDWARE
#define WRAPLOW(x) ((int32_t)check_range(x))
#if CONFIG_VP9_HIGHBITDEPTH
#define HIGHBD_WRAPLOW(x, bd) \
((int32_t)highbd_check_range((x), bd))
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_EMULATE_HARDWARE
void vp10_idct4_c(const tran_low_t *input, tran_low_t *output);
@ -107,14 +119,14 @@ void vp10_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd);
static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
int bd) {
trans = WRAPLOW(trans, bd);
return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
trans = HIGHBD_WRAPLOW(trans, bd);
return clip_pixel_highbd(dest + trans, bd);
}
#endif
static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
trans = WRAPLOW(trans, 8);
return clip_pixel(WRAPLOW(dest + trans, 8));
trans = WRAPLOW(trans);
return clip_pixel(dest + trans);
}
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -73,10 +73,6 @@ add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint
specialize qw/vp10_post_proc_down_and_across sse2/;
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp10_plane_add_noise sse2/;
$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
@ -365,9 +361,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp10_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp10_highbd_post_proc_down_and_across/;
add_proto qw/void vp10_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp10_highbd_plane_add_noise/;
}
#
@ -447,7 +440,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp10_fht32x32/;
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
specialize qw/vp10_fwht4x4/, "$sse2_x86inc";
} else {
add_proto qw/void vp10_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht4x4 sse2/;
@ -468,7 +461,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp10_fht32x32/;
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
specialize qw/vp10_fwht4x4 msa/, "$sse2_x86inc";
}
add_proto qw/void vp10_fwd_idtx/, "const int16_t *src_diff, tran_low_t *coeff, int stride, int bs, int tx_type";

Просмотреть файл

@ -624,68 +624,6 @@ sym(vp10_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int width, unsigned int height, int pitch)
global sym(vp10_plane_add_noise_wmt) PRIVATE
sym(vp10_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd42:

Просмотреть файл

@ -2805,6 +2805,8 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
const double dr =
(double)cpi->bytes * (double) 8 / (double)1000 / time_encoded;
const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1);
const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr) {
const double total_psnr =
@ -2844,8 +2846,9 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
}
fprintf(f, "%s\t Time\n", headings);
fprintf(f, "%s\t%8.0f\n", results, total_encode_time);
fprintf(f, "%s\t Time Rc-Err Abs Err\n", headings);
fprintf(f, "%s\t%8.0f %7.2f %7.2f\n", results,
total_encode_time, rate_err, fabs(rate_err));
}
fclose(f);

Просмотреть файл

@ -139,7 +139,7 @@ typedef struct VP10EncoderConfig {
int height; // height of data passed to the compressor
unsigned int input_bit_depth; // Input bit depth.
double init_framerate; // set to passed in framerate
int64_t target_bandwidth; // bandwidth to be used in kilobits per second
int64_t target_bandwidth; // bandwidth to be used in bits per second
int noise_sensitivity; // pre processing blur: recommendation 0
int sharpness; // sharpening output: recommendation 0:

Просмотреть файл

@ -45,7 +45,6 @@
#define BOOST_BREAKOUT 12.5
#define BOOST_FACTOR 12.5
#define ERR_DIVISOR 128.0
#define FACTOR_PT_LOW 0.70
#define FACTOR_PT_HIGH 0.90
#define FIRST_PASS_Q 10.0
@ -230,6 +229,13 @@ static void subtract_stats(FIRSTPASS_STATS *section,
section->duration -= frame->duration;
}
// Calculate the linear size relative to a baseline of 1080P
#define BASE_SIZE 2073600.0 // 1920x1080
static double get_linear_size_factor(const VP10_COMP *cpi) {
const double this_area = cpi->initial_width * cpi->initial_height;
return pow(this_area / BASE_SIZE, 0.5);
}
// Calculate an active area of the image that discounts formatting
// bars and partially discounts other 0 energy areas.
#define MIN_ACTIVE_AREA 0.5
@ -1121,11 +1127,7 @@ static double calc_correction_factor(double err_per_mb,
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
// Larger image formats are expected to be a little harder to code relatively
// given the same prediction error score. This in part at least relates to the
// increased size and hence coding cost of motion vectors.
#define EDIV_SIZE_FACTOR 800
#define ERR_DIVISOR 100.0
static int get_twopass_worst_quality(const VP10_COMP *cpi,
const double section_err,
double inactive_zone,
@ -1144,12 +1146,22 @@ static int get_twopass_worst_quality(const VP10_COMP *cpi,
const int active_mbs = VPXMAX(1, num_mbs - (int)(num_mbs * inactive_zone));
const double av_err_per_mb = section_err / active_mbs;
const double speed_term = 1.0 + 0.04 * oxcf->speed;
const double ediv_size_correction = (double)num_mbs / EDIV_SIZE_FACTOR;
double ediv_size_correction;
const int target_norm_bits_per_mb = ((uint64_t)section_target_bandwidth <<
BPER_MB_NORMBITS) / active_mbs;
int q;
// Larger image formats are expected to be a little harder to code
// relatively given the same prediction error score. This in part at
// least relates to the increased size and hence coding overheads of
// motion vectors. Some account of this is made through adjustment of
// the error divisor.
ediv_size_correction =
VPXMAX(0.2, VPXMIN(5.0, get_linear_size_factor(cpi)));
if (ediv_size_correction < 1.0)
ediv_size_correction = -(1.0 / ediv_size_correction);
ediv_size_correction *= 4.0;
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {

Просмотреть файл

@ -20,8 +20,8 @@
/* Return the buffer at the given absolute index and increment the index */
static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
unsigned int *idx) {
unsigned int index = *idx;
int *idx) {
int index = *idx;
struct lookahead_entry *buf = ctx->buf + index;
assert(index < ctx->max_sz);
@ -35,7 +35,7 @@ static struct lookahead_entry *pop(struct lookahead_ctx *ctx,
void vp10_lookahead_destroy(struct lookahead_ctx *ctx) {
if (ctx) {
if (ctx->buf) {
unsigned int i;
int i;
for (i = 0; i < ctx->max_sz; i++)
vpx_free_frame_buffer(&ctx->buf[i].img);
@ -221,9 +221,9 @@ struct lookahead_entry *vp10_lookahead_peek(struct lookahead_ctx *ctx,
if (index >= 0) {
// Forward peek
if (index < (int)ctx->sz) {
if (index < ctx->sz) {
index += ctx->read_idx;
if (index >= (int)ctx->max_sz)
if (index >= ctx->max_sz)
index -= ctx->max_sz;
buf = ctx->buf + index;
}

Просмотреть файл

@ -31,10 +31,10 @@ struct lookahead_entry {
#define MAX_PRE_FRAMES 1
struct lookahead_ctx {
unsigned int max_sz; /* Absolute size of the queue */
unsigned int sz; /* Number of buffers currently in the queue */
unsigned int read_idx; /* Read index */
unsigned int write_idx; /* Write index */
int max_sz; /* Absolute size of the queue */
int sz; /* Number of buffers currently in the queue */
int read_idx; /* Read index */
int write_idx; /* Write index */
struct lookahead_entry *buf; /* Buffer list */
};

Просмотреть файл

@ -1158,12 +1158,12 @@ void vp10_highbd_quantize_dc(const tran_low_t *coeff_ptr,
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
unsigned t;
int l;
int l, m;
t = d;
for (l = 0; t > 1; l++)
t >>= 1;
t = 1 + (1 << (16 + l)) / d;
*quant = (int16_t)(t - (1 << 16));
m = 1 + (1 << (16 + l)) / d;
*quant = (int16_t)(m - (1 << 16));
*shift = 1 << (16 - l);
}

Просмотреть файл

Просмотреть файл

@ -1,104 +0,0 @@
;
; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%define private_prefix vp10
%include "third_party/x86inc/x86inc.asm"
SECTION .text
%macro TRANSFORM_COLS 0
paddw m0, m1
movq m4, m0
psubw m3, m2
psubw m4, m3
psraw m4, 1
movq m5, m4
psubw m5, m1 ;b1
psubw m4, m2 ;c1
psubw m0, m4
paddw m3, m5
; m0 a0
SWAP 1, 4 ; m1 c1
SWAP 2, 3 ; m2 d1
SWAP 3, 5 ; m3 b1
%endmacro
%macro TRANSPOSE_4X4 0
movq m4, m0
movq m5, m2
punpcklwd m4, m1
punpckhwd m0, m1
punpcklwd m5, m3
punpckhwd m2, m3
movq m1, m4
movq m3, m0
punpckldq m1, m5
punpckhdq m4, m5
punpckldq m3, m2
punpckhdq m0, m2
SWAP 2, 3, 0, 1, 4
%endmacro
INIT_MMX mmx
cglobal fwht4x4, 3, 4, 8, input, output, stride
lea r3q, [inputq + strideq*4]
movq m0, [inputq] ;a1
movq m1, [inputq + strideq*2] ;b1
movq m2, [r3q] ;c1
movq m3, [r3q + strideq*2] ;d1
TRANSFORM_COLS
TRANSPOSE_4X4
TRANSFORM_COLS
TRANSPOSE_4X4
psllw m0, 2
psllw m1, 2
psllw m2, 2
psllw m3, 2
%if CONFIG_VP9_HIGHBITDEPTH
pxor m4, m4
pxor m5, m5
pcmpgtw m4, m0
pcmpgtw m5, m1
movq m6, m0
movq m7, m1
punpcklwd m0, m4
punpcklwd m1, m5
punpckhwd m6, m4
punpckhwd m7, m5
movq [outputq], m0
movq [outputq + 8], m6
movq [outputq + 16], m1
movq [outputq + 24], m7
pxor m4, m4
pxor m5, m5
pcmpgtw m4, m2
pcmpgtw m5, m3
movq m6, m2
movq m7, m3
punpcklwd m2, m4
punpcklwd m3, m5
punpckhwd m6, m4
punpckhwd m7, m5
movq [outputq + 32], m2
movq [outputq + 40], m6
movq [outputq + 48], m3
movq [outputq + 56], m7
%else
movq [outputq], m0
movq [outputq + 8], m1
movq [outputq + 16], m2
movq [outputq + 24], m3
%endif
RET

Просмотреть файл

@ -0,0 +1,86 @@
;
; Copyright (c) 2016 The WebM project authors. All Rights Reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%define private_prefix vp10
%include "third_party/x86inc/x86inc.asm"
SECTION .text
%macro TRANSFORM_COLS 0
paddw m0, m1
movq m4, m0
psubw m3, m2
psubw m4, m3
psraw m4, 1
movq m5, m4
psubw m5, m1 ;b1
psubw m4, m2 ;c1
psubw m0, m4
paddw m3, m5
; m0 a0
SWAP 1, 4 ; m1 c1
SWAP 2, 3 ; m2 d1
SWAP 3, 5 ; m3 b1
%endmacro
%macro TRANSPOSE_4X4 0
; 00 01 02 03
; 10 11 12 13
; 20 21 22 23
; 30 31 32 33
punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13
punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33
mova m1, m0
punpckldq m0, m2 ; 00 10 20 30 01 11 21 31
punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33
%endmacro
INIT_XMM sse2
cglobal fwht4x4, 3, 4, 8, input, output, stride
lea r3q, [inputq + strideq*4]
movq m0, [inputq] ;a1
movq m1, [inputq + strideq*2] ;b1
movq m2, [r3q] ;c1
movq m3, [r3q + strideq*2] ;d1
TRANSFORM_COLS
TRANSPOSE_4X4
SWAP 1, 2
psrldq m1, m0, 8
psrldq m3, m2, 8
TRANSFORM_COLS
TRANSPOSE_4X4
psllw m0, 2
psllw m1, 2
%if CONFIG_VP9_HIGHBITDEPTH
; sign extension
mova m2, m0
mova m3, m1
punpcklwd m0, m0
punpcklwd m1, m1
punpckhwd m2, m2
punpckhwd m3, m3
psrad m0, 16
psrad m1, 16
psrad m2, 16
psrad m3, 16
mova [outputq], m0
mova [outputq + 16], m2
mova [outputq + 32], m1
mova [outputq + 48], m3
%else
mova [outputq], m0
mova [outputq + 16], m1
%endif
RET

Просмотреть файл

@ -104,7 +104,7 @@ VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
endif
ifeq ($(CONFIG_USE_X86INC),yes)
VP10_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm
endif
@ -114,7 +114,7 @@ VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3_x86_64.asm
endif
endif
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.c
VP10_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
VP10_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP10_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c

Просмотреть файл

@ -21,114 +21,6 @@ static const uint8_t bifilter4_coeff[8][2] = {
{ 16, 112}
};
void vp8_bilinear_predict4x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8;
uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8;
uint8x16_t q1u8, q2u8;
uint16x8_t q1u16, q2u16;
uint16x8_t q7u16, q8u16, q9u16;
uint64x2_t q4u64, q5u64;
uint64x1_t d12u64;
uint32x2x2_t d0u32x2, d1u32x2, d2u32x2, d3u32x2;
if (xoffset == 0) { // skip_1stpass_filter
uint32x2_t d28u32 = vdup_n_u32(0);
uint32x2_t d29u32 = vdup_n_u32(0);
uint32x2_t d30u32 = vdup_n_u32(0);
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 0);
src_ptr += src_pixels_per_line;
d28u32 = vld1_lane_u32((const uint32_t *)src_ptr, d28u32, 1);
src_ptr += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 0);
src_ptr += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src_ptr, d29u32, 1);
src_ptr += src_pixels_per_line;
d30u32 = vld1_lane_u32((const uint32_t *)src_ptr, d30u32, 0);
d28u8 = vreinterpret_u8_u32(d28u32);
d29u8 = vreinterpret_u8_u32(d29u32);
d30u8 = vreinterpret_u8_u32(d30u32);
} else {
d2u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d3u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d4u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d5u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line;
d6u8 = vld1_u8(src_ptr);
q1u8 = vcombine_u8(d2u8, d3u8);
q2u8 = vcombine_u8(d4u8, d5u8);
d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]);
q4u64 = vshrq_n_u64(vreinterpretq_u64_u8(q1u8), 8);
q5u64 = vshrq_n_u64(vreinterpretq_u64_u8(q2u8), 8);
d12u64 = vshr_n_u64(vreinterpret_u64_u8(d6u8), 8);
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q1u8)),
vreinterpret_u32_u8(vget_high_u8(q1u8)));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q2u8)),
vreinterpret_u32_u8(vget_high_u8(q2u8)));
d2u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q4u64)),
vreinterpret_u32_u64(vget_high_u64(q4u64)));
d3u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)),
vreinterpret_u32_u64(vget_high_u64(q5u64)));
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
q9u16 = vmull_u8(d6u8, d0u8);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d2u32x2.val[0]), d1u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d3u32x2.val[0]), d1u8);
q9u16 = vmlal_u8(q9u16, vreinterpret_u8_u64(d12u64), d1u8);
d28u8 = vqrshrn_n_u16(q7u16, 7);
d29u8 = vqrshrn_n_u16(q8u16, 7);
d30u8 = vqrshrn_n_u16(q9u16, 7);
}
// secondpass_filter
if (yoffset == 0) { // skip_2ndpass_filter
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d29u8), 1);
} else {
d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]);
d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]);
q1u16 = vmull_u8(d28u8, d0u8);
q2u16 = vmull_u8(d29u8, d0u8);
d26u8 = vext_u8(d28u8, d29u8, 4);
d27u8 = vext_u8(d29u8, d30u8, 4);
q1u16 = vmlal_u8(q1u16, d26u8, d1u8);
q2u16 = vmlal_u8(q2u16, d27u8, d1u8);
d2u8 = vqrshrn_n_u16(q1u16, 7);
d3u8 = vqrshrn_n_u16(q2u16, 7);
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
}
return;
}
void vp8_bilinear_predict8x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,

Просмотреть файл

@ -22,383 +22,6 @@ static const int8_t vp8_sub_pel_filters[8][8] = {
{0, -1, 12, 123, -6, 0, 0, 0},
};
void vp8_sixtap_predict4x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
unsigned char *dst_ptr,
int dst_pitch) {
unsigned char *src;
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d18u8, d19u8, d20u8, d21u8;
uint8x8_t d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8, d31u8;
int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8;
uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16;
uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16;
int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16;
int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16;
uint8x16_t q3u8, q4u8, q5u8, q6u8, q11u8;
uint64x2_t q3u64, q4u64, q5u64, q6u64, q9u64, q10u64;
uint32x2x2_t d0u32x2, d1u32x2;
if (xoffset == 0) { // secondpass_filter4x4_only
uint32x2_t d27u32 = vdup_n_u32(0);
uint32x2_t d28u32 = vdup_n_u32(0);
uint32x2_t d29u32 = vdup_n_u32(0);
uint32x2_t d30u32 = vdup_n_u32(0);
uint32x2_t d31u32 = vdup_n_u32(0);
// load second_pass filter
dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
d0s8 = vdup_lane_s8(dtmps8, 0);
d1s8 = vdup_lane_s8(dtmps8, 1);
d2s8 = vdup_lane_s8(dtmps8, 2);
d3s8 = vdup_lane_s8(dtmps8, 3);
d4s8 = vdup_lane_s8(dtmps8, 4);
d5s8 = vdup_lane_s8(dtmps8, 5);
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
// load src data
src = src_ptr - src_pixels_per_line * 2;
d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 0);
src += src_pixels_per_line;
d27u32 = vld1_lane_u32((const uint32_t *)src, d27u32, 1);
src += src_pixels_per_line;
d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 0);
src += src_pixels_per_line;
d28u32 = vld1_lane_u32((const uint32_t *)src, d28u32, 1);
src += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 0);
src += src_pixels_per_line;
d29u32 = vld1_lane_u32((const uint32_t *)src, d29u32, 1);
src += src_pixels_per_line;
d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 0);
src += src_pixels_per_line;
d30u32 = vld1_lane_u32((const uint32_t *)src, d30u32, 1);
src += src_pixels_per_line;
d31u32 = vld1_lane_u32((const uint32_t *)src, d31u32, 0);
d27u8 = vreinterpret_u8_u32(d27u32);
d28u8 = vreinterpret_u8_u32(d28u32);
d29u8 = vreinterpret_u8_u32(d29u32);
d30u8 = vreinterpret_u8_u32(d30u32);
d31u8 = vreinterpret_u8_u32(d31u32);
d23u8 = vext_u8(d27u8, d28u8, 4);
d24u8 = vext_u8(d28u8, d29u8, 4);
d25u8 = vext_u8(d29u8, d30u8, 4);
d26u8 = vext_u8(d30u8, d31u8, 4);
q3u16 = vmull_u8(d27u8, d0u8);
q4u16 = vmull_u8(d28u8, d0u8);
q5u16 = vmull_u8(d25u8, d5u8);
q6u16 = vmull_u8(d26u8, d5u8);
q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
q3s16 = vreinterpretq_s16_u16(q3u16);
q4s16 = vreinterpretq_s16_u16(q4u16);
q5s16 = vreinterpretq_s16_u16(q5u16);
q6s16 = vreinterpretq_s16_u16(q6u16);
q5s16 = vqaddq_s16(q5s16, q3s16);
q6s16 = vqaddq_s16(q6s16, q4s16);
d3u8 = vqrshrun_n_s16(q5s16, 7);
d4u8 = vqrshrun_n_s16(q6s16, 7);
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
return;
}
// load first_pass filter
dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]);
d0s8 = vdup_lane_s8(dtmps8, 0);
d1s8 = vdup_lane_s8(dtmps8, 1);
d2s8 = vdup_lane_s8(dtmps8, 2);
d3s8 = vdup_lane_s8(dtmps8, 3);
d4s8 = vdup_lane_s8(dtmps8, 4);
d5s8 = vdup_lane_s8(dtmps8, 5);
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
// First pass: output_height lines x output_width columns (9x4)
if (yoffset == 0) // firstpass_filter4x4_only
src = src_ptr - 2;
else
src = src_ptr - 2 - (src_pixels_per_line * 2);
q3u8 = vld1q_u8(src);
src += src_pixels_per_line;
q4u8 = vld1q_u8(src);
src += src_pixels_per_line;
q5u8 = vld1q_u8(src);
src += src_pixels_per_line;
q6u8 = vld1q_u8(src);
src += src_pixels_per_line;
d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
// vswp here
q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
vreinterpret_u32_u8(d19u8));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
vreinterpret_u32_u8(d21u8));
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
// keep original src data in q4 q6
q4u64 = vreinterpretq_u64_u8(q3u8);
q6u64 = vreinterpretq_u64_u8(q5u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
vreinterpret_u32_u8(vget_high_u8(q3u8)));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
vreinterpret_u32_u8(vget_high_u8(q5u8)));
q9u64 = vshrq_n_u64(q4u64, 8);
q10u64 = vshrq_n_u64(q6u64, 8);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
vreinterpret_u32_u64(vget_high_u64(q9u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
vreinterpret_u32_u64(vget_high_u64(q10u64)));
q3u64 = vshrq_n_u64(q4u64, 32);
q5u64 = vshrq_n_u64(q6u64, 32);
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
vreinterpret_u32_u64(vget_high_u64(q3u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
vreinterpret_u32_u64(vget_high_u64(q5u64)));
q9u64 = vshrq_n_u64(q4u64, 16);
q10u64 = vshrq_n_u64(q6u64, 16);
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
vreinterpret_u32_u64(vget_high_u64(q9u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
vreinterpret_u32_u64(vget_high_u64(q10u64)));
q3u64 = vshrq_n_u64(q4u64, 24);
q5u64 = vshrq_n_u64(q6u64, 24);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
vreinterpret_u32_u64(vget_high_u64(q3u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
vreinterpret_u32_u64(vget_high_u64(q5u64)));
q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
q7s16 = vreinterpretq_s16_u16(q7u16);
q8s16 = vreinterpretq_s16_u16(q8u16);
q9s16 = vreinterpretq_s16_u16(q9u16);
q10s16 = vreinterpretq_s16_u16(q10u16);
q7s16 = vqaddq_s16(q7s16, q9s16);
q8s16 = vqaddq_s16(q8s16, q10s16);
d27u8 = vqrshrun_n_s16(q7s16, 7);
d28u8 = vqrshrun_n_s16(q8s16, 7);
if (yoffset == 0) { // firstpass_filter4x4_only
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d27u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d28u8), 1);
return;
}
// First Pass on rest 5-line data
q3u8 = vld1q_u8(src);
src += src_pixels_per_line;
q4u8 = vld1q_u8(src);
src += src_pixels_per_line;
q5u8 = vld1q_u8(src);
src += src_pixels_per_line;
q6u8 = vld1q_u8(src);
src += src_pixels_per_line;
q11u8 = vld1q_u8(src);
d18u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5);
d19u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5);
d20u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5);
d21u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5);
// vswp here
q3u8 = vcombine_u8(vget_low_u8(q3u8), vget_low_u8(q4u8));
q5u8 = vcombine_u8(vget_low_u8(q5u8), vget_low_u8(q6u8));
d0u32x2 = vzip_u32(vreinterpret_u32_u8(d18u8), // d18 d19
vreinterpret_u32_u8(d19u8));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(d20u8), // d20 d21
vreinterpret_u32_u8(d21u8));
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 5);
q7u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d5u8);
q8u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d5u8);
q12u16 = vmull_u8(d31u8, d5u8);
q4u64 = vreinterpretq_u64_u8(q3u8);
q6u64 = vreinterpretq_u64_u8(q5u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q3u8)), // d6 d7
vreinterpret_u32_u8(vget_high_u8(q3u8)));
d1u32x2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(q5u8)), // d10 d11
vreinterpret_u32_u8(vget_high_u8(q5u8)));
q9u64 = vshrq_n_u64(q4u64, 8);
q10u64 = vshrq_n_u64(q6u64, 8);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d0u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d0u8);
q12u16 = vmlal_u8(q12u16, vget_low_u8(q11u8), d0u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
vreinterpret_u32_u64(vget_high_u64(q9u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
vreinterpret_u32_u64(vget_high_u64(q10u64)));
q3u64 = vshrq_n_u64(q4u64, 32);
q5u64 = vshrq_n_u64(q6u64, 32);
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 1);
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d1u8);
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d1u8);
q12u16 = vmlsl_u8(q12u16, d31u8, d1u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
vreinterpret_u32_u64(vget_high_u64(q3u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
vreinterpret_u32_u64(vget_high_u64(q5u64)));
q9u64 = vshrq_n_u64(q4u64, 16);
q10u64 = vshrq_n_u64(q6u64, 16);
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 4);
q7u16 = vmlsl_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d4u8);
q8u16 = vmlsl_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d4u8);
q12u16 = vmlsl_u8(q12u16, d31u8, d4u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q9u64)), // d18 d19
vreinterpret_u32_u64(vget_high_u64(q9u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q10u64)), // d20 d211
vreinterpret_u32_u64(vget_high_u64(q10u64)));
q3u64 = vshrq_n_u64(q4u64, 24);
q5u64 = vshrq_n_u64(q6u64, 24);
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 2);
q7u16 = vmlal_u8(q7u16, vreinterpret_u8_u32(d0u32x2.val[0]), d2u8);
q8u16 = vmlal_u8(q8u16, vreinterpret_u8_u32(d1u32x2.val[0]), d2u8);
q12u16 = vmlal_u8(q12u16, d31u8, d2u8);
d0u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q3u64)), // d6 d7
vreinterpret_u32_u64(vget_high_u64(q3u64)));
d1u32x2 = vzip_u32(vreinterpret_u32_u64(vget_low_u64(q5u64)), // d10 d11
vreinterpret_u32_u64(vget_high_u64(q5u64)));
d31u8 = vext_u8(vget_low_u8(q11u8), vget_high_u8(q11u8), 3);
q9u16 = vmull_u8(vreinterpret_u8_u32(d0u32x2.val[0]), d3u8);
q10u16 = vmull_u8(vreinterpret_u8_u32(d1u32x2.val[0]), d3u8);
q11u16 = vmull_u8(d31u8, d3u8);
q7s16 = vreinterpretq_s16_u16(q7u16);
q8s16 = vreinterpretq_s16_u16(q8u16);
q9s16 = vreinterpretq_s16_u16(q9u16);
q10s16 = vreinterpretq_s16_u16(q10u16);
q11s16 = vreinterpretq_s16_u16(q11u16);
q12s16 = vreinterpretq_s16_u16(q12u16);
q7s16 = vqaddq_s16(q7s16, q9s16);
q8s16 = vqaddq_s16(q8s16, q10s16);
q12s16 = vqaddq_s16(q12s16, q11s16);
d29u8 = vqrshrun_n_s16(q7s16, 7);
d30u8 = vqrshrun_n_s16(q8s16, 7);
d31u8 = vqrshrun_n_s16(q12s16, 7);
// Second pass: 4x4
dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]);
d0s8 = vdup_lane_s8(dtmps8, 0);
d1s8 = vdup_lane_s8(dtmps8, 1);
d2s8 = vdup_lane_s8(dtmps8, 2);
d3s8 = vdup_lane_s8(dtmps8, 3);
d4s8 = vdup_lane_s8(dtmps8, 4);
d5s8 = vdup_lane_s8(dtmps8, 5);
d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8));
d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8));
d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8));
d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8));
d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8));
d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8));
d23u8 = vext_u8(d27u8, d28u8, 4);
d24u8 = vext_u8(d28u8, d29u8, 4);
d25u8 = vext_u8(d29u8, d30u8, 4);
d26u8 = vext_u8(d30u8, d31u8, 4);
q3u16 = vmull_u8(d27u8, d0u8);
q4u16 = vmull_u8(d28u8, d0u8);
q5u16 = vmull_u8(d25u8, d5u8);
q6u16 = vmull_u8(d26u8, d5u8);
q3u16 = vmlsl_u8(q3u16, d29u8, d4u8);
q4u16 = vmlsl_u8(q4u16, d30u8, d4u8);
q5u16 = vmlsl_u8(q5u16, d23u8, d1u8);
q6u16 = vmlsl_u8(q6u16, d24u8, d1u8);
q3u16 = vmlal_u8(q3u16, d28u8, d2u8);
q4u16 = vmlal_u8(q4u16, d29u8, d2u8);
q5u16 = vmlal_u8(q5u16, d24u8, d3u8);
q6u16 = vmlal_u8(q6u16, d25u8, d3u8);
q3s16 = vreinterpretq_s16_u16(q3u16);
q4s16 = vreinterpretq_s16_u16(q4u16);
q5s16 = vreinterpretq_s16_u16(q5u16);
q6s16 = vreinterpretq_s16_u16(q6u16);
q5s16 = vqaddq_s16(q5s16, q3s16);
q6s16 = vqaddq_s16(q6s16, q4s16);
d3u8 = vqrshrun_n_s16(q5s16, 7);
d4u8 = vqrshrun_n_s16(q6s16, 7);
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d3u8), 1);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 0);
dst_ptr += dst_pitch;
vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d4u8), 1);
return;
}
void vp8_sixtap_predict8x4_neon(
unsigned char *src_ptr,
int src_pixels_per_line,

Просмотреть файл

@ -104,7 +104,7 @@ vp8_prob *vp8_mv_ref_probs(
extern const unsigned char vp8_mbsplit_offset[4][16];
static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b)
{
if (!(b & 3))
{
@ -119,7 +119,8 @@ static INLINE int left_block_mv(const MODE_INFO *cur_mb, int b)
return (cur_mb->bmi + b - 1)->mv.as_int;
}
static INLINE int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b,
int mi_stride)
{
if (!(b >> 2))
{

Просмотреть файл

@ -10,6 +10,7 @@
#include <stdlib.h>
#include "./vp8_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vp8/common/mips/msa/vp8_macros_msa.h"
static const int16_t vp8_rv_msa[] =
@ -798,54 +799,3 @@ void vp8_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows,
}
}
}
void vp8_plane_add_noise_msa(uint8_t *start_ptr, char *noise,
char blackclamp[16], char whiteclamp[16],
char bothclamp[16],
uint32_t width, uint32_t height,
int32_t pitch)
{
uint32_t i, j;
for (i = 0; i < height / 2; ++i)
{
uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch;
int8_t *ref0_ptr = (int8_t *) (noise + (rand() & 0xff));
uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch;
int8_t *ref1_ptr = (int8_t *) (noise + (rand() & 0xff));
for (j = width / 16; j--;)
{
v16i8 temp00_s, temp01_s;
v16u8 temp00, temp01, black_clamp, white_clamp;
v16u8 pos0, ref0, pos1, ref1;
v16i8 const127 = __msa_ldi_b(127);
pos0 = LD_UB(pos0_ptr);
ref0 = LD_UB(ref0_ptr);
pos1 = LD_UB(pos1_ptr);
ref1 = LD_UB(ref1_ptr);
black_clamp = (v16u8)__msa_fill_b(blackclamp[0]);
white_clamp = (v16u8)__msa_fill_b(whiteclamp[0]);
temp00 = (pos0 < black_clamp);
pos0 = __msa_bmnz_v(pos0, black_clamp, temp00);
temp01 = (pos1 < black_clamp);
pos1 = __msa_bmnz_v(pos1, black_clamp, temp01);
XORI_B2_128_UB(pos0, pos1);
temp00_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp00 = (v16u8)(temp00_s < pos0);
pos0 = (v16u8)__msa_bmnz_v((v16u8)pos0, (v16u8)temp00_s, temp00);
temp01_s = __msa_adds_s_b((v16i8)white_clamp, const127);
temp01 = (temp01_s < pos1);
pos1 = (v16u8)__msa_bmnz_v((v16u8)pos1, (v16u8)temp01_s, temp01);
XORI_B2_128_UB(pos0, pos1);
pos0 += ref0;
ST_UB(pos0, pos0_ptr);
pos1 += ref1;
ST_UB(pos1, pos1_ptr);
pos0_ptr += 16;
pos1_ptr += 16;
ref0_ptr += 16;
ref1_ptr += 16;
}
}
}

Просмотреть файл

@ -10,6 +10,7 @@
#include "vpx_config.h"
#include "vpx_dsp_rtcd.h"
#include "vp8_rtcd.h"
#include "vpx_scale_rtcd.h"
#include "vpx_scale/yv12config.h"
@ -490,54 +491,6 @@ static void fillrd(struct postproc_state *state, int q, int a)
state->last_noise = a;
}
/****************************************************************************
*
* ROUTINE : plane_add_noise_c
*
* INPUTS : unsigned char *Start starting address of buffer to add gaussian
* noise to
* unsigned int Width width of plane
* unsigned int Height height of plane
* int Pitch distance between subsequent lines of frame
* int q quantizer used to determine amount of noise
* to add
*
* OUTPUTS : None.
*
* RETURNS : void.
*
* FUNCTION : adds gaussian noise to a plane of pixels
*
* SPECIAL NOTES : None.
*
****************************************************************************/
void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int Width, unsigned int Height, int Pitch)
{
unsigned int i, j;
(void)bothclamp;
for (i = 0; i < Height; i++)
{
unsigned char *Pos = Start + i * Pitch;
char *Ref = (char *)(noise + (rand() & 0xff));
for (j = 0; j < Width; j++)
{
if (Pos[j] < blackclamp[0])
Pos[j] = blackclamp[0];
if (Pos[j] > 255 + whiteclamp[0])
Pos[j] = 255 + whiteclamp[0];
Pos[j] += Ref[j];
}
}
}
/* Blend the macro block with a solid colored square. Leave the
* edges unblended to give distinction to macro blocks in areas
* filled with the same color block.
@ -828,7 +781,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
fillrd(&oci->postproc_state, 63 - q, noise_level);
}
vp8_plane_add_noise
vpx_plane_add_noise
(oci->post_proc_buffer.y_buffer,
oci->postproc_state.noise,
oci->postproc_state.blackclamp,

Просмотреть файл

@ -167,10 +167,6 @@ if (vpx_config("CONFIG_POSTPROC") eq "yes") {
add_proto qw/void vp8_post_proc_down_and_across_mb_row/, "unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size";
specialize qw/vp8_post_proc_down_and_across_mb_row sse2 msa/;
add_proto qw/void vp8_plane_add_noise/, "unsigned char *s, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int w, unsigned int h, int pitch";
specialize qw/vp8_plane_add_noise mmx sse2 msa/;
$vp8_plane_add_noise_sse2=vp8_plane_add_noise_wmt;
add_proto qw/void vp8_blend_mb_inner/, "unsigned char *y, unsigned char *u, unsigned char *v, int y1, int u1, int v1, int alpha, int stride";
# no asm yet
@ -209,7 +205,6 @@ $vp8_sixtap_predict8x4_media=vp8_sixtap_predict8x4_armv6;
$vp8_sixtap_predict8x4_dspr2=vp8_sixtap_predict8x4_dspr2;
add_proto qw/void vp8_sixtap_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=817
specialize qw/vp8_sixtap_predict4x4 mmx ssse3 media dspr2 msa/;
$vp8_sixtap_predict4x4_media=vp8_sixtap_predict4x4_armv6;
$vp8_sixtap_predict4x4_dspr2=vp8_sixtap_predict4x4_dspr2;
@ -227,7 +222,6 @@ specialize qw/vp8_bilinear_predict8x4 mmx media neon msa/;
$vp8_bilinear_predict8x4_media=vp8_bilinear_predict8x4_armv6;
add_proto qw/void vp8_bilinear_predict4x4/, "unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch";
#TODO(johannkoenig): fix the neon version https://code.google.com/p/webm/issues/detail?id=892
specialize qw/vp8_bilinear_predict4x4 mmx media msa/;
$vp8_bilinear_predict4x4_media=vp8_bilinear_predict4x4_armv6;

Просмотреть файл

@ -44,8 +44,8 @@ extern "C" {
#include <os2.h>
#include <stdlib.h>
#define THREAD_FUNCTION void
#define THREAD_FUNCTION_RETURN void
#define THREAD_FUNCTION void *
#define THREAD_FUNCTION_RETURN void *
#define THREAD_SPECIFIC_INDEX PULONG
#define pthread_t TID
#define pthread_attr_t ULONG

Просмотреть файл

@ -241,68 +241,6 @@ sym(vp8_mbpost_proc_down_mmx):
%undef flimit2
;void vp8_plane_add_noise_mmx (unsigned char *Start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
global sym(vp8_plane_add_noise_mmx) PRIVATE
sym(vp8_plane_add_noise_mmx):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movq mm1,[rsi+rax] ; get the source
psubusb mm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb mm1, [rdx+32] ;bothclamp
psubusb mm1, [rdx+16] ;whiteclamp
movq mm2,[rdi+rax] ; get the noise for this line
paddb mm1,mm2 ; add it in
movq [rsi+rax],mm1 ; store the result
add rax,8 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
Blur:

Просмотреть файл

@ -655,68 +655,6 @@ sym(vp8_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp8_plane_add_noise_wmt (unsigned char *Start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int Width, unsigned int Height, int Pitch)
global sym(vp8_plane_add_noise_wmt) PRIVATE
sym(vp8_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
four8s:

Просмотреть файл

@ -44,7 +44,7 @@ void vp8dx_bool_decoder_fill(BOOL_DECODER *br)
int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT);
size_t bytes_left = br->user_buffer_end - bufptr;
size_t bits_left = bytes_left * CHAR_BIT;
int x = (int)(shift + CHAR_BIT - bits_left);
int x = shift + CHAR_BIT - (int)bits_left;
int loop_end = 0;
unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1];

Просмотреть файл

@ -83,7 +83,7 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
}
{
register unsigned int shift = vp8_norm[range];
register int shift = vp8_norm[range];
range <<= shift;
value <<= shift;
count -= shift;

Просмотреть файл

@ -986,7 +986,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
VP8_COMMON *const pc = &pbi->common;
MACROBLOCKD *const xd = &pbi->mb;
const unsigned char *data = pbi->fragments.ptrs[0];
const unsigned char *data_end = data + pbi->fragments.sizes[0];
const unsigned int data_sz = pbi->fragments.sizes[0];
const unsigned char *data_end = data + data_sz;
ptrdiff_t first_partition_length_in_bytes;
int i, j, k, l;
@ -1022,7 +1023,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
const unsigned char *clear = data;
if (pbi->decrypt_cb)
{
int n = (int)VPXMIN(sizeof(clear_buffer), data_end - data);
int n = (int)VPXMIN(sizeof(clear_buffer), data_sz);
pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
clear = clear_buffer;
}

Просмотреть файл

@ -194,7 +194,7 @@ void vp8_calculate_overlaps(MB_OVERLAP *overlap_ul,
return;
}
if (new_row <= (-4 << 3) || new_col <= (-4 << 3))
if (new_row <= -32 || new_col <= -32)
{
/* outside the frame */
return;

Просмотреть файл

@ -163,7 +163,7 @@ void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount)
{
const TOKENEXTRA *stop = p + xcount;
unsigned int split;
unsigned int shift;
int shift;
int count = w->count;
unsigned int range = w->range;
unsigned int lowvalue = w->lowvalue;

Просмотреть файл

@ -65,7 +65,7 @@ static void vp8_encode_bool(BOOL_CODER *br, int bit, int probability)
int count = br->count;
unsigned int range = br->range;
unsigned int lowvalue = br->lowvalue;
register unsigned int shift;
register int shift;
#ifdef VP8_ENTROPY_STATS
#if defined(SECTIONBITS_OUTPUT)

Просмотреть файл

@ -529,7 +529,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
// Bias on zero motion vector sse.
const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
sse_diff = zero_mv_sse - best_sse;
sse_diff = (int)zero_mv_sse - (int)best_sse;
saved_mbmi = *mbmi;

Просмотреть файл

@ -18,8 +18,8 @@
extern "C" {
#endif
#define SUM_DIFF_THRESHOLD 448
#define SUM_DIFF_THRESHOLD_HIGH 512
#define SUM_DIFF_THRESHOLD 512
#define SUM_DIFF_THRESHOLD_HIGH 600
#define MOTION_MAGNITUDE_THRESHOLD (8*3)
#define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5)

Просмотреть файл

@ -18,6 +18,7 @@
#include "onyx_int.h"
#include "vpx_dsp/variance.h"
#include "encodeintra.h"
#include "vp8/common/common.h"
#include "vp8/common/setupintrarecon.h"
#include "vp8/common/systemdependent.h"
#include "mcomp.h"
@ -2417,7 +2418,7 @@ void vp8_second_pass(VP8_COMP *cpi)
int tmp_q;
int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame);
FIRSTPASS_STATS this_frame = {0};
FIRSTPASS_STATS this_frame;
FIRSTPASS_STATS this_frame_copy;
double this_frame_intra_error;
@ -2425,6 +2426,8 @@ void vp8_second_pass(VP8_COMP *cpi)
int overhead_bits;
vp8_zero(this_frame);
if (!cpi->twopass.stats_in)
{
return ;
@ -2808,7 +2811,8 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
* static scene.
*/
if ( detect_transition_to_still( cpi, i,
(cpi->key_frame_frequency-i),
((int)(cpi->key_frame_frequency) -
(int)i),
loop_decay_rate,
decay_accumulator ) )
{

Просмотреть файл

@ -1591,7 +1591,6 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
int col_min = ref_col - distance;
int col_max = ref_col + distance;
// TODO(johannkoenig): check if this alignment is necessary.
DECLARE_ALIGNED(16, unsigned int, sad_array8[8]);
unsigned int sad_array[3];

Просмотреть файл

@ -1523,7 +1523,8 @@ static void update_layer_contexts (VP8_COMP *cpi)
void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
{
VP8_COMMON *cm = &cpi->common;
int last_w, last_h, prev_number_of_layers;
int last_w, last_h;
unsigned int prev_number_of_layers;
if (!cpi)
return;
@ -1786,10 +1787,8 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height)
cpi->force_next_frame_intra = 1;
if (((cm->Width + 15) & 0xfffffff0) !=
cm->yv12_fb[cm->lst_fb_idx].y_width ||
((cm->Height + 15) & 0xfffffff0) !=
cm->yv12_fb[cm->lst_fb_idx].y_height ||
if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
{
dealloc_raw_frame_buffers(cpi);
@ -2247,6 +2246,8 @@ void vp8_remove_compressor(VP8_COMP **ptr)
double total_encode_time = (cpi->time_receive_data +
cpi->time_compress_data) / 1000.000;
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000;
const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
if (cpi->b_calculate_psnr)
{
@ -2292,12 +2293,14 @@ void vp8_remove_compressor(VP8_COMP **ptr)
cpi->summed_weights, 8.0);
fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t"
"GLPsnrP\tVPXSSIM\t Time(us)\n");
"GLPsnrP\tVPXSSIM\t Time(us) Rc-Err "
"Abs Err\n");
fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
"%7.3f\t%8.0f\n",
"%7.3f\t%8.0f %7.2f %7.2f\n",
dr, cpi->total / cpi->count, total_psnr,
cpi->totalp / cpi->count, total_psnr2,
total_ssim, total_encode_time);
total_ssim, total_encode_time,
rate_err, fabs(rate_err));
}
}
fclose(f);
@ -5168,7 +5171,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
vp8_second_pass(cpi);
encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags);
cpi->twopass.bits_left -= 8 * *size;
cpi->twopass.bits_left -= 8 * (int)(*size);
if (!cpi->common.refresh_alt_ref_frame)
{
@ -5772,7 +5775,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
return -1;
// Check number of rows and columns match
if (cpi->common.mb_rows != rows || cpi->common.mb_cols != cols)
if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols)
return -1;
// Range check the delta Q values and convert the external Q range values
@ -5828,7 +5831,7 @@ int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigne
int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols)
{
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols)
if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols)
{
if (map)
{

Просмотреть файл

@ -371,7 +371,7 @@ typedef struct VP8_COMP
double key_frame_rate_correction_factor;
double gf_rate_correction_factor;
unsigned int frames_since_golden;
int frames_since_golden;
/* Count down till next GF */
int frames_till_gf_update_due;

Просмотреть файл

@ -90,7 +90,7 @@ static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
{
int i = 0;
// No skin if block has been zero motion for long consecutive time.
if (consec_zeromv > 80)
if (consec_zeromv > 60)
return 0;
// Exit on grey.
if (cb == 128 && cr == 128)
@ -103,7 +103,7 @@ static int is_skin_color(int y, int cb, int cr, int consec_zeromv)
if (skin_color_diff < skin_threshold[i + 1]) {
if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2))
return 0;
else if (consec_zeromv > 30 &&
else if (consec_zeromv > 25 &&
skin_color_diff > (skin_threshold[i + 1] >> 1))
return 0;
else

Просмотреть файл

@ -1899,7 +1899,8 @@ static int calculate_final_rd_costs(int this_rd,
int prob_skip_cost;
prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1);
prob_skip_cost -= vp8_cost_bit(cpi->prob_skip_false, 0);
prob_skip_cost -=
(int)vp8_cost_bit(cpi->prob_skip_false, 0);
rd->rate2 += prob_skip_cost;
*other_cost += prob_skip_cost;
}

Просмотреть файл

@ -227,12 +227,12 @@ static void invert_quant(int improved_quant, short *quant,
if(improved_quant)
{
unsigned t;
int l;
int l, m;
t = d;
for(l = 0; t > 1; l++)
t>>=1;
t = 1 + (1<<(16+l))/d;
*quant = (short)(t - (1<<16));
m = 1 + (1<<(16+l))/d;
*quant = (short)(m - (1<<16));
*shift = l;
/* use multiplication and constant shift by 16 */
*shift = 1 << (16 - *shift);

Просмотреть файл

@ -22,6 +22,7 @@
#include "vpx/vp8cx.h"
#include "vp8/encoder/firstpass.h"
#include "vp8/common/onyx.h"
#include "vp8/common/common.h"
#include <stdlib.h>
#include <string.h>
@ -760,7 +761,7 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
unsigned long duration,
unsigned long deadline)
{
unsigned int new_qc;
int new_qc;
#if !(CONFIG_REALTIME_ONLY)
/* Use best quality mode if no deadline is given. */
@ -785,7 +786,9 @@ static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx,
new_qc = MODE_REALTIME;
#endif
if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
if (deadline == VPX_DL_REALTIME)
new_qc = MODE_REALTIME;
else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS)
new_qc = MODE_FIRSTPASS;
else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS)
new_qc = (new_qc == MODE_BESTQUALITY)
@ -1116,7 +1119,8 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
{
YV12_BUFFER_CONFIG sd;
vp8_ppflags_t flags = {0};
vp8_ppflags_t flags;
vp8_zero(flags);
if (ctx->preview_ppcfg.post_proc_flag)
{
@ -1305,8 +1309,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
30, /* rc_resize_up_thresold */
VPX_VBR, /* rc_end_usage */
{0}, /* rc_twopass_stats_in */
{0}, /* rc_firstpass_mb_stats_in */
{NULL, 0}, /* rc_twopass_stats_in */
{NULL, 0}, /* rc_firstpass_mb_stats_in */
256, /* rc_target_bandwidth */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
@ -1334,6 +1338,8 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
{0}, /* ts_rate_decimator */
0, /* ts_periodicity */
{0}, /* ts_layer_id */
{0}, /* layer_target_bitrate */
0 /* temporal_layering_mode */
}},
};

Просмотреть файл

@ -522,7 +522,8 @@ static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx,
{
YV12_BUFFER_CONFIG sd;
int64_t time_stamp = 0, time_end_stamp = 0;
vp8_ppflags_t flags = {0};
vp8_ppflags_t flags;
vp8_zero(flags);
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
{
@ -816,11 +817,12 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
},
{ /* encoder functions */
0,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL
NULL, /* vpx_codec_enc_cfg_map_t */
NULL, /* vpx_codec_encode_fn_t */
NULL, /* vpx_codec_get_cx_data_fn_t */
NULL, /* vpx_codec_enc_config_set_fn_t */
NULL, /* vpx_codec_get_global_headers_fn_t */
NULL, /* vpx_codec_get_preview_frame_fn_t */
NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */
}
};

Просмотреть файл

@ -67,7 +67,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
#define VP9_FRAME_MARKER 0x2
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -159,3 +159,18 @@ const struct {
{0, 8 }, // 64X32 - {0b0000, 0b1000}
{0, 0 }, // 64X64 - {0b0000, 0b0000}
};
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
const uint8_t need_top_left[INTRA_MODES] = {
0, // DC_PRED
0, // V_PRED
0, // H_PRED
0, // D45_PRED
1, // D135_PRED
1, // D117_PRED
1, // D153_PRED
0, // D207_PRED
0, // D63_PRED
1, // TM_PRED
};
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH

Просмотреть файл

@ -33,6 +33,9 @@ extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES];
extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES];
extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES];
extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2];
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
extern const uint8_t need_top_left[INTRA_MODES];
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -298,196 +298,168 @@ void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) {
static void filter_selectively_vert_row2(int subsampling_factor,
uint8_t *s, int pitch,
unsigned int mask_16x16_l,
unsigned int mask_8x8_l,
unsigned int mask_4x4_l,
unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n,
unsigned int mask_16x16,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
uint8_t *ss[2];
ss[0] = s;
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
mask; mask >>= 1) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
for (mask =
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
mask; mask = (mask & ~dual_one) >> 1) {
if (mask & dual_one) {
const loop_filter_thresh *lfis[2];
lfis[0] = lfthr + *lfl;
lfis[1] = lfthr + *(lfl + lfl_forward);
ss[1] = ss[0] + 8 * pitch;
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vpx_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
} else if (mask_16x16_0 & 1) {
vpx_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
if (mask_16x16 & dual_one) {
if ((mask_16x16 & dual_one) == dual_one) {
vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr);
} else {
vpx_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vpx_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_8x8_0 & 1) {
vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
if (mask_8x8 & dual_one) {
if ((mask_8x8 & dual_one) == dual_one) {
vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr, lfis[1]->mblim,
lfis[1]->lim, lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vpx_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_0 & 1) {
vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
if (mask_4x4 & dual_one) {
if ((mask_4x4 & dual_one) == dual_one) {
vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim,
lfis[0]->hev_thr, lfis[1]->mblim,
lfis[1]->lim, lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vpx_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
} else if (mask_4x4_int_0 & 1) {
vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr);
if (mask_4x4_int & dual_one) {
if ((mask_4x4_int & dual_one) == dual_one) {
vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr);
} else {
vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim,
lfi->lim, lfi->hev_thr);
}
}
}
s += 8;
ss[0] += 8;
lfl += 1;
mask_16x16_0 >>= 1;
mask_8x8_0 >>= 1;
mask_4x4_0 >>= 1;
mask_4x4_int_0 >>= 1;
mask_16x16_1 >>= 1;
mask_8x8_1 >>= 1;
mask_4x4_1 >>= 1;
mask_4x4_int_1 >>= 1;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
}
}
#if CONFIG_VP9_HIGHBITDEPTH
static void highbd_filter_selectively_vert_row2(int subsampling_factor,
uint16_t *s, int pitch,
unsigned int mask_16x16_l,
unsigned int mask_8x8_l,
unsigned int mask_4x4_l,
unsigned int mask_4x4_int_l,
const loop_filter_info_n *lfi_n,
unsigned int mask_16x16,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
const int mask_shift = subsampling_factor ? 4 : 8;
const int mask_cutoff = subsampling_factor ? 0xf : 0xff;
const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff;
const int lfl_forward = subsampling_factor ? 4 : 8;
unsigned int mask_16x16_0 = mask_16x16_l & mask_cutoff;
unsigned int mask_8x8_0 = mask_8x8_l & mask_cutoff;
unsigned int mask_4x4_0 = mask_4x4_l & mask_cutoff;
unsigned int mask_4x4_int_0 = mask_4x4_int_l & mask_cutoff;
unsigned int mask_16x16_1 = (mask_16x16_l >> mask_shift) & mask_cutoff;
unsigned int mask_8x8_1 = (mask_8x8_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_1 = (mask_4x4_l >> mask_shift) & mask_cutoff;
unsigned int mask_4x4_int_1 = (mask_4x4_int_l >> mask_shift) & mask_cutoff;
const unsigned int dual_one = 1 | (1 << lfl_forward);
unsigned int mask;
uint16_t *ss[2];
ss[0] = s;
for (mask = mask_16x16_0 | mask_8x8_0 | mask_4x4_0 | mask_4x4_int_0 |
mask_16x16_1 | mask_8x8_1 | mask_4x4_1 | mask_4x4_int_1;
mask; mask >>= 1) {
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
for (mask =
(mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff;
mask; mask = (mask & ~dual_one) >> 1) {
if (mask & dual_one) {
const loop_filter_thresh *lfis[2];
lfis[0] = lfthr + *lfl;
lfis[1] = lfthr + *(lfl + lfl_forward);
ss[1] = ss[0] + 8 * pitch;
if (mask & 1) {
if ((mask_16x16_0 | mask_16x16_1) & 1) {
if ((mask_16x16_0 & mask_16x16_1) & 1) {
vpx_highbd_lpf_vertical_16_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
} else if (mask_16x16_0 & 1) {
vpx_highbd_lpf_vertical_16(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_16x16 & dual_one) {
if ((mask_16x16 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_16(s + 8 *pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)];
vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_8x8_0 | mask_8x8_1) & 1) {
if ((mask_8x8_0 & mask_8x8_1) & 1) {
vpx_highbd_lpf_vertical_8_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_8x8_0 & 1) {
vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_8x8 & dual_one) {
if ((mask_8x8 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)];
vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_4x4_0 | mask_4x4_1) & 1) {
if ((mask_4x4_0 & mask_4x4_1) & 1) {
vpx_highbd_lpf_vertical_4_dual(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_0 & 1) {
vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_4x4 & dual_one) {
if ((mask_4x4 & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)];
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim,
lfi->lim, lfi->hev_thr, bd);
}
}
if ((mask_4x4_int_0 | mask_4x4_int_1) & 1) {
if ((mask_4x4_int_0 & mask_4x4_int_1) & 1) {
vpx_highbd_lpf_vertical_4_dual(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr, bd);
} else if (mask_4x4_int_0 & 1) {
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
lfi0->hev_thr, bd);
if (mask_4x4_int & dual_one) {
if ((mask_4x4_int & dual_one) == dual_one) {
vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim,
lfis[0]->lim, lfis[0]->hev_thr,
lfis[1]->mblim, lfis[1]->lim,
lfis[1]->hev_thr, bd);
} else {
vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
lfi1->lim, lfi1->hev_thr, bd);
const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)];
vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch,
lfi->mblim, lfi->lim, lfi->hev_thr, bd);
}
}
}
s += 8;
ss[0] += 8;
lfl += 1;
mask_16x16_0 >>= 1;
mask_8x8_0 >>= 1;
mask_4x4_0 >>= 1;
mask_4x4_int_0 >>= 1;
mask_16x16_1 >>= 1;
mask_8x8_1 >>= 1;
mask_4x4_1 >>= 1;
mask_4x4_int_1 >>= 1;
mask_16x16 >>= 1;
mask_8x8 >>= 1;
mask_4x4 >>= 1;
mask_4x4_int >>= 1;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@ -497,17 +469,17 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
count = 1;
if (mask & 1) {
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@ -520,7 +492,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@ -549,7 +521,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@ -574,7 +546,7 @@ static void filter_selectively_horiz(uint8_t *s, int pitch,
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
} else if (mask_4x4_int & 1) {
} else {
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr);
}
@ -594,17 +566,17 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
int count;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= count) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
count = 1;
if (mask & 1) {
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask_16x16 & 1) {
if ((mask_16x16 & 3) == 3) {
vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
@ -617,7 +589,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else if (mask_8x8 & 1) {
if ((mask_8x8 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@ -650,7 +622,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
} else if (mask_4x4 & 1) {
if ((mask_4x4 & 3) == 3) {
// Next block's thresholds.
const loop_filter_thresh *lfin = lfi_n->lfthr + *(lfl + 1);
const loop_filter_thresh *lfin = lfthr + *(lfl + 1);
vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, lfin->mblim, lfin->lim,
@ -679,7 +651,7 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
lfi->lim, lfi->hev_thr, bd);
}
}
} else if (mask_4x4_int & 1) {
} else {
vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
lfi->hev_thr, bd);
}
@ -700,7 +672,6 @@ static void highbd_filter_selectively_horiz(uint16_t *s, int pitch,
// whether there were any coefficients encoded, and the loop filter strength
// block we are currently looking at. Shift is used to position the
// 1's we produce.
// TODO(JBB) Need another function for different resolution color..
static void build_masks(const loop_filter_info_n *const lfi_n,
const MODE_INFO *mi, const int shift_y,
const int shift_uv,
@ -935,7 +906,6 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
// TODO(JBB): This function only works for yv12.
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
MODE_INFO **mi, const int mode_info_stride,
LOOP_FILTER_MASK *lfm) {
@ -971,9 +941,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
vp9_zero(*lfm);
assert(mip[0] != NULL);
// TODO(jimbankoski): Try moving most of the following code into decode
// loop and storing lfm in the mbmi structure so that we don't have to go
// through the recursive loop structure multiple times.
switch (mip[0]->sb_type) {
case BLOCK_64X64:
build_masks(lfi_n, mip[0] , 0, 0, lfm);
@ -1077,8 +1044,6 @@ void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
}
break;
}
vp9_adjust_mask(cm, mi_row, mi_col, lfm);
}
static void filter_selectively_vert(uint8_t *s, int pitch,
@ -1086,13 +1051,13 @@ static void filter_selectively_vert(uint8_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
@ -1120,13 +1085,13 @@ static void highbd_filter_selectively_vert(uint16_t *s, int pitch,
unsigned int mask_8x8,
unsigned int mask_4x4,
unsigned int mask_4x4_int,
const loop_filter_info_n *lfi_n,
const loop_filter_thresh *lfthr,
const uint8_t *lfl, int bd) {
unsigned int mask;
for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int;
mask; mask >>= 1) {
const loop_filter_thresh *lfi = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi = lfthr + *lfl;
if (mask & 1) {
if (mask_16x16 & 1) {
@ -1257,23 +1222,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3],
cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
cm->lf_info.lfthr, &lfl[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_vert(dst->buf, dst->stride,
mask_16x16_c & border_mask,
mask_8x8_c & border_mask,
mask_4x4_c & border_mask,
mask_4x4_int[r],
&cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
@ -1306,23 +1266,18 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3],
cm->lf_info.lfthr, &lfl[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
cm->lf_info.lfthr, &lfl[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_horiz(dst->buf, dst->stride,
mask_16x16_r,
mask_8x8_r,
mask_4x4_r,
mask_4x4_int_r,
&cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
@ -1344,27 +1299,29 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
// Vertical pass: do 2 rows at one time
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
unsigned int mask_16x16_l = mask_16x16 & 0xffff;
unsigned int mask_8x8_l = mask_8x8 & 0xffff;
unsigned int mask_4x4_l = mask_4x4 & 0xffff;
unsigned int mask_4x4_int_l = mask_4x4_int & 0xffff;
// Disable filtering on the leftmost column.
// Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfm->lfl_y[r << 3], (int)cm->bit_depth);
highbd_filter_selectively_vert_row2(plane->subsampling_x,
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride,
(unsigned int)mask_16x16,
(unsigned int)mask_8x8,
(unsigned int)mask_4x4,
(unsigned int)mask_4x4_int,
cm->lf_info.lfthr,
&lfm->lfl_y[r << 3],
(int)cm->bit_depth);
} else {
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
(unsigned int)mask_16x16,
(unsigned int)mask_8x8,
(unsigned int)mask_4x4,
(unsigned int)mask_4x4_int,
cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride, mask_16x16_l, mask_8x8_l,
mask_4x4_l, mask_4x4_int_l, &cm->lf_info, &lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 16;
@ -1397,19 +1354,18 @@ void vp9_filter_block_plane_ss00(VP9_COMMON *const cm,
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info, &lfm->lfl_y[r << 3],
(int)cm->bit_depth);
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff,
cm->lf_info.lfthr, &lfm->lfl_y[r << 3],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
mask_4x4_r, mask_4x4_int & 0xff,
cm->lf_info.lfthr, &lfm->lfl_y[r << 3]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int & 0xff, &cm->lf_info,
&lfm->lfl_y[r << 3]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@ -1443,38 +1399,35 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
{
unsigned int mask_16x16_l = mask_16x16 & 0xff;
unsigned int mask_8x8_l = mask_8x8 & 0xff;
unsigned int mask_4x4_l = mask_4x4 & 0xff;
unsigned int mask_4x4_int_l = mask_4x4_int & 0xff;
// Disable filtering on the leftmost column.
// Disable filtering on the leftmost column.
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(
plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1], (int)cm->bit_depth);
} else {
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1]);
}
#else
filter_selectively_vert_row2(
plane->subsampling_x, dst->buf, dst->stride,
mask_16x16_l, mask_8x8_l, mask_4x4_l, mask_4x4_int_l, &cm->lf_info,
&lfl_uv[r << 1]);
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert_row2(plane->subsampling_x,
CONVERT_TO_SHORTPTR(dst->buf),
dst->stride,
(unsigned int)mask_16x16,
(unsigned int)mask_8x8,
(unsigned int)mask_4x4,
(unsigned int)mask_4x4_int,
cm->lf_info.lfthr, &lfl_uv[r << 1],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride,
(unsigned int)mask_16x16,
(unsigned int)mask_8x8,
(unsigned int)mask_4x4,
(unsigned int)mask_4x4_int,
cm->lf_info.lfthr, &lfl_uv[r << 1]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 16 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
}
dst->buf += 16 * dst->stride;
mask_16x16 >>= 8;
mask_8x8 >>= 8;
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
}
// Horizontal pass
@ -1506,17 +1459,16 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
if (cm->use_highbitdepth) {
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
&lfl_uv[r << 1], (int)cm->bit_depth);
mask_4x4_r, mask_4x4_int_r,
cm->lf_info.lfthr, &lfl_uv[r << 1],
(int)cm->bit_depth);
} else {
#endif // CONFIG_VP9_HIGHBITDEPTH
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr,
&lfl_uv[r << 1]);
#if CONFIG_VP9_HIGHBITDEPTH
}
#else
filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r,
mask_4x4_r, mask_4x4_int_r, &cm->lf_info,
&lfl_uv[r << 1]);
#endif // CONFIG_VP9_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
@ -1552,7 +1504,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm,
vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
// TODO(jimbankoski): For 444 only need to do y mask.
vp9_adjust_mask(cm, mi_row, mi_col, lfm);
vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm);
@ -1592,6 +1544,8 @@ void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame,
}
// Used by the encoder to build the loopfilter masks.
// TODO(slavarnway): Do the encoder the same way the decoder does it and
// build the masks in line as part of the encode process.
void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level,
int partial_frame) {
int start_mi_row, end_mi_row, mi_rows_to_filter;

Просмотреть файл

@ -12,6 +12,7 @@
#include <stdlib.h>
#include <stdio.h>
#include "./vpx_dsp_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_scale_rtcd.h"
#include "./vp9_rtcd.h"
@ -587,32 +588,6 @@ static void fillrd(struct postproc_state *state, int q, int a) {
state->last_noise = a;
}
void vp9_plane_add_noise_c(uint8_t *start, char *noise,
char blackclamp[16],
char whiteclamp[16],
char bothclamp[16],
unsigned int width, unsigned int height, int pitch) {
unsigned int i, j;
// TODO(jbb): why does simd code use both but c doesn't, normalize and
// fix..
(void) bothclamp;
for (i = 0; i < height; i++) {
uint8_t *pos = start + i * pitch;
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
for (j = 0; j < width; j++) {
if (pos[j] < blackclamp[0])
pos[j] = blackclamp[0];
if (pos[j] > 255 + whiteclamp[0])
pos[j] = 255 + whiteclamp[0];
pos[j] += ref[j];
}
}
}
static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO *temp = cm->postproc_state.prev_mip;
@ -726,8 +701,7 @@ int vp9_post_proc_frame(struct VP9Common *cm,
ppstate->last_noise != noise_level) {
fillrd(ppstate, 63 - q, noise_level);
}
vp9_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
vpx_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
ppstate->whiteclamp, ppstate->bothclamp,
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
}

Просмотреть файл

@ -28,9 +28,9 @@ int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
if (left_type == above_type)
return left_type;
else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
else if (left_type == SWITCHABLE_FILTERS)
return above_type;
else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
else if (above_type == SWITCHABLE_FILTERS)
return left_type;
else
return SWITCHABLE_FILTERS;

Просмотреть файл

@ -20,19 +20,6 @@
#include "vp9/common/vp9_reconintra.h"
#if CONFIG_VP9_HIGHBITDEPTH
void high_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
}
void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const MV *src_mv,
@ -50,8 +37,9 @@ void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride,
src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS);
high_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd);
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4,
bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@ -222,9 +210,9 @@ static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
xd->bd);
highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys,
xd->bd);
} else {
inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride,
subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys);

Просмотреть файл

@ -34,14 +34,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
}
#if CONFIG_VP9_HIGHBITDEPTH
void high_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd);
static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
const int subpel_x,
const int subpel_y,
const struct scale_factors *sf,
int w, int h, int ref,
const InterpKernel *kernel,
int xs, int ys, int bd) {
sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
src, src_stride, dst, dst_stride,
kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi,

Просмотреть файл

@ -142,6 +142,7 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
// 129 C D .. W X
// 129 E F .. U V
// 129 G H .. S T T T T T
// For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1.
// Get current frame pointer, width and height.
if (plane == 0) {
@ -177,7 +178,6 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
left_col[i] = ref[i * ref_stride - 1];
}
} else {
// TODO(Peter): this value should probably change for high bitdepth
vpx_memset16(left_col, base + 1, bs);
}
}
@ -239,7 +239,6 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
vpx_memset16(above_row + r, above_row[r - 1],
x0 + 2 * bs - frame_width);
}
// TODO(Peter) this value should probably change for high bitdepth
above_row[-1] = left_available ? above_ref[-1] : (base + 1);
} else {
/* faster path if the block does not need extension */
@ -251,13 +250,11 @@ static void build_intra_predictors_high(const MACROBLOCKD *xd,
memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0]));
else
vpx_memset16(above_row + bs, above_row[bs - 1], bs);
// TODO(Peter): this value should probably change for high bitdepth
above_row[-1] = left_available ? above_ref[-1] : (base + 1);
}
}
} else {
vpx_memset16(above_row, base - 1, bs * 2);
// TODO(Peter): this value should probably change for high bitdepth
above_row[-1] = base - 1;
}
}

Просмотреть файл

@ -70,10 +70,6 @@ add_proto qw/void vp9_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8
specialize qw/vp9_post_proc_down_and_across sse2/;
$vp9_post_proc_down_and_across_sse2=vp9_post_proc_down_and_across_xmm;
add_proto qw/void vp9_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp9_plane_add_noise sse2/;
$vp9_plane_add_noise_sse2=vp9_plane_add_noise_wmt;
add_proto qw/void vp9_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
specialize qw/vp9_filter_by_weight16x16 sse2 msa/;
@ -169,9 +165,6 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_post_proc_down_and_across/, "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
specialize qw/vp9_highbd_post_proc_down_and_across/;
add_proto qw/void vp9_highbd_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
specialize qw/vp9_highbd_plane_add_noise/;
}
#
@ -252,7 +245,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
specialize qw/vp9_fwht4x4/, "$sse2_x86inc";
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2 msa/;
@ -264,7 +257,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_fht16x16 sse2 msa/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fwht4x4 msa/, "$mmx_x86inc";
specialize qw/vp9_fwht4x4 msa/, "$sse2_x86inc";
}
#
@ -276,7 +269,7 @@ $vp9_full_search_sad_sse3=vp9_full_search_sadx3;
$vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad/;
specialize qw/vp9_diamond_search_sad avx/;
add_proto qw/void vp9_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
specialize qw/vp9_temporal_filter_apply sse2 msa/;

Просмотреть файл

@ -28,6 +28,7 @@ static const int seg_feature_data_max[SEG_LVL_MAX] = {
void vp9_clearall_segfeatures(struct segmentation *seg) {
vp9_zero(seg->feature_data);
vp9_zero(seg->feature_mask);
seg->aq_av_offset = 0;
}
void vp9_enable_segfeature(struct segmentation *seg, int segment_id,

Просмотреть файл

@ -47,6 +47,7 @@ struct segmentation {
int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX];
unsigned int feature_mask[MAX_SEGMENTS];
int aq_av_offset;
};
static INLINE int segfeature_active(const struct segmentation *seg,

Просмотреть файл

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
#include "vpx_ports/mem.h"

Просмотреть файл

@ -624,68 +624,6 @@ sym(vp9_mbpost_proc_across_ip_xmm):
%undef flimit4
;void vp9_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
; unsigned char blackclamp[16],
; unsigned char whiteclamp[16],
; unsigned char bothclamp[16],
; unsigned int width, unsigned int height, int pitch)
global sym(vp9_plane_add_noise_wmt) PRIVATE
sym(vp9_plane_add_noise_wmt):
push rbp
mov rbp, rsp
SHADOW_ARGS_TO_STACK 8
GET_GOT rbx
push rsi
push rdi
; end prolog
.addnoise_loop:
call sym(LIBVPX_RAND) WRT_PLT
mov rcx, arg(1) ;noise
and rax, 0xff
add rcx, rax
; we rely on the fact that the clamping vectors are stored contiguously
; in black/white/both order. Note that we have to reload this here because
; rdx could be trashed by rand()
mov rdx, arg(2) ; blackclamp
mov rdi, rcx
movsxd rcx, dword arg(5) ;[Width]
mov rsi, arg(0) ;Pos
xor rax,rax
.addnoise_nextset:
movdqu xmm1,[rsi+rax] ; get the source
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
paddusb xmm1, [rdx+32] ;bothclamp
psubusb xmm1, [rdx+16] ;whiteclamp
movdqu xmm2,[rdi+rax] ; get the noise for this line
paddb xmm1,xmm2 ; add it in
movdqu [rsi+rax],xmm1 ; store the result
add rax,16 ; move to the next line
cmp rax, rcx
jl .addnoise_nextset
movsxd rax, dword arg(7) ; Pitch
add arg(0), rax ; Start += Pitch
sub dword arg(6), 1 ; Height -= 1
jg .addnoise_loop
; begin epilog
pop rdi
pop rsi
RESTORE_GOT
UNSHADOW_ARGS
pop rbp
ret
SECTION_RODATA
align 16
rd42:

Просмотреть файл

@ -525,8 +525,8 @@ static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride,
}
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
@ -699,8 +699,8 @@ static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd,
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
high_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd);
} else {
inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x,
subpel_y, sf, w, h, ref, kernel, xs, ys);
@ -1315,11 +1315,16 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
BufferPool *const pool = cm->buffer_pool;
for (i = 0; i < REFS_PER_FRAME; ++i) {
if (vpx_rb_read_bit(rb)) {
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
found = 1;
break;
if (cm->frame_refs[i].idx != INVALID_IDX) {
YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf;
width = buf->y_crop_width;
height = buf->y_crop_height;
found = 1;
break;
} else {
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Failed to decode frame size");
}
}
}
@ -1334,22 +1339,23 @@ static void setup_frame_size_with_refs(VP9_COMMON *cm,
// has valid dimensions.
for (i = 0; i < REFS_PER_FRAME; ++i) {
RefBuffer *const ref_frame = &cm->frame_refs[i];
has_valid_ref_frame |= valid_ref_frame_size(ref_frame->buf->y_crop_width,
ref_frame->buf->y_crop_height,
width, height);
has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX &&
valid_ref_frame_size(ref_frame->buf->y_crop_width,
ref_frame->buf->y_crop_height,
width, height));
}
if (!has_valid_ref_frame)
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame has invalid size");
for (i = 0; i < REFS_PER_FRAME; ++i) {
RefBuffer *const ref_frame = &cm->frame_refs[i];
if (!valid_ref_frame_img_fmt(
ref_frame->buf->bit_depth,
ref_frame->buf->subsampling_x,
ref_frame->buf->subsampling_y,
cm->bit_depth,
cm->subsampling_x,
cm->subsampling_y))
if (ref_frame->idx == INVALID_IDX ||
!valid_ref_frame_img_fmt(ref_frame->buf->bit_depth,
ref_frame->buf->subsampling_x,
ref_frame->buf->subsampling_y,
cm->bit_depth,
cm->subsampling_x,
cm->subsampling_y))
vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
"Referenced frame has incompatible color format");
}

Просмотреть файл

@ -371,9 +371,9 @@ static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) {
if (left_type == above_type)
return left_type;
else if (left_type == SWITCHABLE_FILTERS && above_type != SWITCHABLE_FILTERS)
else if (left_type == SWITCHABLE_FILTERS)
return above_type;
else if (left_type != SWITCHABLE_FILTERS && above_type == SWITCHABLE_FILTERS)
else if (above_type == SWITCHABLE_FILTERS)
return left_type;
else
return SWITCHABLE_FILTERS;
@ -902,4 +902,10 @@ void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd,
frame_mvs += cm->mi_cols;
}
}
#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
(xd->above_mi == NULL || xd->left_mi == NULL) &&
!is_inter_block(mi) && need_top_left[mi->uv_mode])
assert(0);
#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
}

Просмотреть файл

@ -505,7 +505,7 @@ vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data,
uint32_t this_sz = 0;
for (j = 0; j < mag; ++j)
this_sz |= (*x++) << (j * 8);
this_sz |= ((uint32_t)(*x++)) << (j * 8);
sizes[i] = this_sz;
}
*count = frames;

Просмотреть файл

@ -13,6 +13,7 @@
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
#include "vp9/encoder/vp9_aq_360.h"
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/common/vp9_seg_common.h"

Просмотреть файл

@ -22,7 +22,6 @@
#include "vp9/encoder/vp9_segmentation.h"
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
size_t consec_zero_mv_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
if (cr == NULL)
return NULL;
@ -40,21 +39,12 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
}
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv);
cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size);
if (cr->consec_zero_mv == NULL) {
vp9_cyclic_refresh_free(cr);
return NULL;
}
memset(cr->consec_zero_mv, 0, consec_zero_mv_size);
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
vpx_free(cr->map);
vpx_free(cr->last_coded_q_map);
vpx_free(cr->consec_zero_mv);
vpx_free(cr);
}
@ -244,7 +234,6 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
BLOCK_SIZE bsize) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
MV mv = mi->mv[0].as_mv;
const int bw = num_8x8_blocks_wide_lookup[bsize];
const int bh = num_8x8_blocks_high_lookup[bsize];
const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
@ -268,15 +257,8 @@ void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi,
clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id],
0, MAXQ),
cr->last_coded_q_map[map_offset]);
// Update the consecutive zero/low_mv count.
if (is_inter_block(mi) && (abs(mv.row) < 8 && abs(mv.col) < 8)) {
if (cr->consec_zero_mv[map_offset] < 255)
cr->consec_zero_mv[map_offset]++;
} else {
cr->consec_zero_mv[map_offset] = 0;
}
}
}
}
// Update the actual number of blocks that were applied the segment delta q.
@ -410,13 +392,18 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
cr->target_num_seg_blocks = 0;
if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) {
consec_zero_mv_thresh = 100;
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
consec_zero_mv_thresh = 80;
}
qindex_thresh =
cpi->oxcf.content == VP9E_CONTENT_SCREEN
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
// More aggressive settings for noisy content.
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
consec_zero_mv_thresh = 80;
qindex_thresh =
VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),
7 * cm->base_qindex >> 3);
}
do {
int sum_map = 0;
// Get the mi_row/mi_col corresponding to superblock index i.
@ -441,7 +428,7 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
if (cr->map[bl_index2] == 0) {
count_tot++;
if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) {
sum_map++;
count_sel++;
}
@ -480,6 +467,8 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->percent_refresh = 5;
cr->max_qdelta_perc = 50;
cr->time_for_refresh = 0;
cr->motion_thresh = 32;
cr->rate_boost_fac = 15;
// Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4)
// periods of the refresh cycle, after a key frame.
// Account for larger interval on base layer for temporal layers.
@ -489,9 +478,11 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_ratio_qdelta = 3.0;
} else {
cr->rate_ratio_qdelta = 2.0;
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium)
// Reduce the delta-qp if the estimated source noise is above threshold.
cr->rate_ratio_qdelta = 1.5;
if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
// Reduce the delta-qp if the estimated source noise is above threshold.
cr->rate_ratio_qdelta = 1.7;
cr->rate_boost_fac = 13;
}
}
// Adjust some parameters for low resolutions at low bitrates.
if (cm->width <= 352 &&
@ -499,9 +490,6 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
rc->avg_frame_bandwidth < 3400) {
cr->motion_thresh = 4;
cr->rate_boost_fac = 10;
} else {
cr->motion_thresh = 32;
cr->rate_boost_fac = 15;
}
if (cpi->svc.spatial_layer_id > 0) {
cr->motion_thresh = 4;
@ -544,8 +532,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
if (cm->frame_type == KEY_FRAME) {
memset(cr->last_coded_q_map, MAXQ,
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
memset(cr->consec_zero_mv, 0,
cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv));
cr->sb_index = 0;
}
return;
@ -620,7 +606,6 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols);
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;

Просмотреть файл

@ -53,8 +53,6 @@ struct CYCLIC_REFRESH {
signed char *map;
// Map of the last q a block was coded at.
uint8_t *last_coded_q_map;
// Count on how many consecutive times a block uses ZER0MV for encoding.
uint8_t *consec_zero_mv;
// Thresholds applied to the projected rate/distortion of the coding block,
// when deciding whether block should be refreshed.
int64_t thresh_rate_sb;

Просмотреть файл

@ -167,7 +167,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
vp9_64_zeros, 0, bw, bh, &sse, &avg);
#endif // CONFIG_VP9_HIGHBITDEPTH
var = sse - (((int64_t)avg * avg) / (bw * bh));
return (256 * var) / (bw * bh);
return (unsigned int)(((uint64_t)256 * var) / (bw * bh));
} else {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
@ -185,7 +185,7 @@ static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x,
x->plane[0].src.stride,
vp9_64_zeros, 0, &sse);
#endif // CONFIG_VP9_HIGHBITDEPTH
return (256 * var) >> num_pels_log2_lookup[bs];
return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]);
}
}

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше