Change-Id: I38dad398844ee424a7a92a745ab703645018d02b
This commit is contained in:
James Zern 2014-08-10 16:15:18 -07:00
Родитель ccddd5d0f9
Коммит 3a7d467da9
57 изменённых файлов: 18211 добавлений и 133 удалений

Просмотреть файл

@ -9,8 +9,12 @@
##
LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
third_party/libyuv/include/libyuv/convert.h \
third_party/libyuv/include/libyuv/convert_argb.h \
third_party/libyuv/include/libyuv/convert_from.h \
third_party/libyuv/include/libyuv/cpu_id.h \
third_party/libyuv/include/libyuv/planar_functions.h \
third_party/libyuv/include/libyuv/rotate.h \
third_party/libyuv/include/libyuv/row.h \
third_party/libyuv/include/libyuv/scale.h \
third_party/libyuv/include/libyuv/scale_row.h \
@ -20,14 +24,15 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
third_party/libyuv/source/row_common.cc \
third_party/libyuv/source/row_mips.cc \
third_party/libyuv/source/row_neon.cc \
third_party/libyuv/source/row_neon64.cc \
third_party/libyuv/source/row_posix.cc \
third_party/libyuv/source/row_win.cc \
third_party/libyuv/source/scale.cc \
third_party/libyuv/source/scale.cc \
third_party/libyuv/source/scale_common.cc \
third_party/libyuv/source/scale_mips.cc \
third_party/libyuv/source/scale_neon.cc \
third_party/libyuv/source/scale_posix.cc \
third_party/libyuv/source/scale_win.cc
third_party/libyuv/source/scale_win.cc \
LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
third_party/libwebm/mkvmuxerutil.cpp \
@ -210,17 +215,18 @@ endif
# from an installed tree or a version controlled tree. Determine
# the proper paths.
ifeq ($(HAVE_ALT_TREE_LAYOUT),yes)
LIB_PATH := $(SRC_PATH_BARE)/../lib
INC_PATH := $(SRC_PATH_BARE)/../include
LIB_PATH-yes := $(SRC_PATH_BARE)/../lib
INC_PATH-yes := $(SRC_PATH_BARE)/../include
else
LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.)
INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8
INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8
INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9
INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9
LIB_PATH := $(call enabled,LIB_PATH)
INC_PATH := $(call enabled,INC_PATH)
endif
INC_PATH-$(CONFIG_LIBYUV) += $(SRC_PATH_BARE)/third_party/libyuv/include
LIB_PATH := $(call enabled,LIB_PATH)
INC_PATH := $(call enabled,INC_PATH)
INTERNAL_CFLAGS = $(addprefix -I,$(INC_PATH))
INTERNAL_LDFLAGS += $(addprefix -L,$(LIB_PATH))

5
third_party/libyuv/README.libvpx поставляемый
Просмотреть файл

@ -1,6 +1,6 @@
Name: libyuv
URL: http://code.google.com/p/libyuv/
Version: 1005
Version: 1041
License: BSD
License File: LICENSE
@ -13,5 +13,4 @@ which down-samples the original input video (f.g. 1280x720) a number of times
in order to encode multiple resolution bit streams.
Local Modifications:
Modified the original scaler code minimally with include file changes to fit
in our current build system.
None.

73
third_party/libyuv/include/libyuv/compare.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,73 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
#define INCLUDE_LIBYUV_COMPARE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Compute a hash for specified memory. Seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
// Sum Square Error - used to compute Mean Square Error or PSNR.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a,
const uint8* src_b, int count);
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
static const int kMaxPsnr = 128;
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height);
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT

254
third_party/libyuv/include/libyuv/convert.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,254 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert I444 to I420.
LIBYUV_API
int I444ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I422 to I420.
LIBYUV_API
int I422ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I411 to I420.
LIBYUV_API
int I411ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy I420 to I420.
#define I420ToI420 I420Copy
LIBYUV_API
int I420Copy(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert I400 (grey) to I420.
LIBYUV_API
int I400ToI420(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV12 to I420.
LIBYUV_API
int NV12ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert NV21 to I420.
LIBYUV_API
int NV21ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert YUY2 to I420.
LIBYUV_API
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert UYVY to I420.
LIBYUV_API
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert M420 to I420.
LIBYUV_API
int M420ToI420(const uint8* src_m420, int src_stride_m420,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert Q420 to I420.
LIBYUV_API
int Q420ToI420(const uint8* src_y, int src_stride_y,
const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ARGB little endian (bgra in memory) to I420.
LIBYUV_API
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// BGRA little endian (argb in memory) to I420.
LIBYUV_API
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// ABGR little endian (rgba in memory) to I420.
LIBYUV_API
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGBA little endian (abgr in memory) to I420.
LIBYUV_API
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB little endian (bgr in memory) to I420.
LIBYUV_API
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB big endian (rgb in memory) to I420.
LIBYUV_API
int RAWToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB16 (RGBP fourcc) little endian to I420.
LIBYUV_API
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB15 (RGBO fourcc) little endian to I420.
LIBYUV_API
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// RGB12 (R444 fourcc) little endian to I420.
LIBYUV_API
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture.
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToI420(const uint8* sample, size_t sample_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height,
int dst_width, int dst_height);
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height);
#endif
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_y" number of bytes in a row of the dst_y plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToI420(const uint8* src_frame, size_t src_size,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT

225
third_party/libyuv/include/libyuv/convert_argb.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,225 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes
#include "libyuv/convert_from.h"
#include "libyuv/planar_functions.h"
#include "libyuv/rotate.h"
// TODO(fbarchard): This set of functions should exactly match convert.h
// Add missing Q420.
// TODO(fbarchard): Add tests. Create random content of right size and convert
// with C vs Opt and or to I420 and compare.
// TODO(fbarchard): Some of these functions lack parameter setting.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Alias.
#define ARGBToARGB ARGBCopy
// Copy ARGB to ARGB.
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I420 to ARGB.
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert I400 (grey) to ARGB.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Alias.
#define YToARGB I400ToARGB_Reference
// Convert I400 to ARGB. Reverse of ARGBToI400.
LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_vu, int src_stride_vu,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// TODO(fbarchard): Convert Q420 to ARGB.
// LIBYUV_API
// int Q420ToARGB(const uint8* src_y, int src_stride_y,
// const uint8* src_yuy2, int src_stride_yuy2,
// uint8* dst_argb, int dst_stride_argb,
// int width, int height);
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// BGRA little endian (argb in memory) to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// ABGR little endian (rgba in memory) to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGBA little endian (abgr in memory) to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Deprecated function name.
#define BG24ToARGB RGB24ToARGB
// RGB little endian (bgr in memory) to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB big endian (rgb in memory) to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB16 (RGBP fourcc) little endian to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB15 (RGBO fourcc) little endian to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// RGB12 (R444 fourcc) little endian to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
#ifdef HAVE_JPEG
// src_width/height provided by capture
// dst_width/height for clipping determine final size.
LIBYUV_API
int MJPGToARGB(const uint8* sample, size_t sample_size,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height,
int dst_width, int dst_height);
#endif
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
// "src_size" is needed to parse MJPG.
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
// Normally this would be the same as dst_width, with recommended alignment
// to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected. The caller should
// allocate the I420 buffer according to rotation.
// "dst_stride_u" number of bytes in a row of the dst_u plane.
// Normally this would be the same as (dst_width + 1) / 2, with
// recommended alignment to 16 bytes for better efficiency.
// If rotation of 90 or 270 is used, stride is affected.
// "crop_x" and "crop_y" are starting position for cropping.
// To center, crop_x = (src_width - dst_width) / 2
// crop_y = (src_height - dst_height) / 2
// "src_width" / "src_height" is size of src_frame in pixels.
// "src_height" can be negative indicating a vertically flipped image source.
// "crop_width" / "crop_height" is the size to crop the src to.
// Must be less than or equal to src_width/src_height
// Cropping parameters are pre-rotation.
// "rotation" can be 0, 90, 180 or 270.
// "format" is a fourcc. ie 'I420', 'YUY2'
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
LIBYUV_API
int ConvertToARGB(const uint8* src_frame, size_t src_size,
uint8* dst_argb, int dst_stride_argb,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT

173
third_party/libyuv/include/libyuv/convert_from.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,173 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// See Also convert.h for conversions from formats to I420.
// I420Copy in convert to I420ToI420.
LIBYUV_API
int I420ToI422(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int I420ToI411(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
LIBYUV_API
int I400Copy(const uint8* src_y, int src_stride_y,
uint8* dst_y, int dst_stride_y,
int width, int height);
// TODO(fbarchard): I420ToM420
// TODO(fbarchard): I420ToQ420
LIBYUV_API
int I420ToNV12(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
LIBYUV_API
int I420ToNV21(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
LIBYUV_API
int I420ToYUY2(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToUYVY(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToBGRA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToABGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int I420ToRGBA(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
LIBYUV_API
int I420ToRGB24(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRAW(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToRGB565(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
// Convert I420 to specified format.
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
LIBYUV_API
int ConvertFromI420(const uint8* y, int y_stride,
const uint8* u, int u_stride,
const uint8* v, int v_stride,
uint8* dst_sample, int dst_sample_stride,
int width, int height,
uint32 format);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT

166
third_party/libyuv/include/libyuv/convert_from_argb.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,166 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB to ARGB.
#define ARGBToARGB ARGBCopy
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Convert ARGB To BGRA.
LIBYUV_API
int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
uint8* dst_bgra, int dst_stride_bgra,
int width, int height);
// Convert ARGB To ABGR.
LIBYUV_API
int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_abgr, int dst_stride_abgr,
int width, int height);
// Convert ARGB To RGBA.
LIBYUV_API
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgba, int dst_stride_rgba,
int width, int height);
// Convert ARGB To RGB24.
LIBYUV_API
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb24, int dst_stride_rgb24,
int width, int height);
// Convert ARGB To RAW.
LIBYUV_API
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb, int dst_stride_rgb,
int width, int height);
// Convert ARGB To RGB565.
LIBYUV_API
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
uint8* dst_rgb565, int dst_stride_rgb565,
int width, int height);
// Convert ARGB To ARGB1555.
LIBYUV_API
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb1555, int dst_stride_argb1555,
int width, int height);
// Convert ARGB To ARGB4444.
LIBYUV_API
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb4444, int dst_stride_argb4444,
int width, int height);
// Convert ARGB To I444.
LIBYUV_API
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I422.
LIBYUV_API
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I420. (also in convert.h)
LIBYUV_API
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J420. (JPeg full range I420).
LIBYUV_API
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB To I411.
LIBYUV_API
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Convert ARGB to J400. (JPeg full range).
LIBYUV_API
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
uint8* dst_yj, int dst_stride_yj,
int width, int height);
// Convert ARGB to I400.
LIBYUV_API
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
int width, int height);
// Convert ARGB To NV12.
LIBYUV_API
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_uv, int dst_stride_uv,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To NV21.
LIBYUV_API
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
uint8* dst_y, int dst_stride_y,
uint8* dst_vu, int dst_stride_vu,
int width, int height);
// Convert ARGB To YUY2.
LIBYUV_API
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
uint8* dst_yuy2, int dst_stride_yuy2,
int width, int height);
// Convert ARGB To UYVY.
LIBYUV_API
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
uint8* dst_uyvy, int dst_stride_uyvy,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT

2
third_party/libyuv/include/libyuv/cpu_id.h поставляемый
Просмотреть файл

@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
#define INCLUDE_LIBYUV_CPU_ID_H_
#include "basic_types.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {

168
third_party/libyuv/include/libyuv/format_conversion.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,168 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert Bayer RGB formats to I420.
LIBYUV_API
int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
LIBYUV_API
int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height);
// Temporary API mapper.
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer);
// Convert I420 to Bayer RGB formats.
LIBYUV_API
int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
LIBYUV_API
int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height);
// Temporary API mapper.
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_frame, int dst_stride_frame,
int width, int height,
uint32 dst_fourcc_bayer);
// Convert Bayer RGB formats to ARGB.
LIBYUV_API
int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
LIBYUV_API
int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height);
// Temporary API mapper.
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer);
// Converts ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
LIBYUV_API
int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height);
// Temporary API mapper.
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT

193
third_party/libyuv/include/libyuv/mjpeg_decoder.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,193 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
// NOTE: For a simplified public API use convert.h MJPGToI420().
struct jpeg_common_struct;
struct jpeg_decompress_struct;
struct jpeg_source_mgr;
namespace libyuv {
#ifdef __cplusplus
extern "C" {
#endif
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
#ifdef __cplusplus
} // extern "C"
#endif
static const uint32 kUnknownDataSize = 0xFFFFFFFF;
enum JpegSubsamplingType {
kJpegYuv420,
kJpegYuv422,
kJpegYuv411,
kJpegYuv444,
kJpegYuv400,
kJpegUnknown
};
struct Buffer {
const uint8* data;
int len;
};
struct BufferVector {
Buffer* buffers;
int len;
int pos;
};
struct SetJmpErrorMgr;
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
// simply independent JPEG images with a fixed huffman table (which is omitted).
// It is rarely used in video transmission, but is common as a camera capture
// format, especially in Logitech devices. This class implements a decoder for
// MJPEG frames.
//
// See http://tools.ietf.org/html/rfc2435
class LIBYUV_API MJpegDecoder {
public:
typedef void (*CallbackFunction)(void* opaque,
const uint8* const* data,
const int* strides,
int rows);
static const int kColorSpaceUnknown;
static const int kColorSpaceGrayscale;
static const int kColorSpaceRgb;
static const int kColorSpaceYCbCr;
static const int kColorSpaceCMYK;
static const int kColorSpaceYCCK;
MJpegDecoder();
~MJpegDecoder();
// Loads a new frame, reads its headers, and determines the uncompressed
// image format.
// Returns LIBYUV_TRUE if image looks valid and format is supported.
// If return value is LIBYUV_TRUE, then the values for all the following
// getters are populated.
// src_len is the size of the compressed mjpeg frame in bytes.
LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
// Returns width of the last loaded frame in pixels.
int GetWidth();
// Returns height of the last loaded frame in pixels.
int GetHeight();
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int GetColorSpace();
// Number of color components in the color space.
int GetNumComponents();
// Sample factors of the n-th component.
int GetHorizSampFactor(int component);
int GetVertSampFactor(int component);
int GetHorizSubSampFactor(int component);
int GetVertSubSampFactor(int component);
// Public for testability.
int GetImageScanlinesPerImcuRow();
// Public for testability.
int GetComponentScanlinesPerImcuRow(int component);
// Width of a component in bytes.
int GetComponentWidth(int component);
// Height of a component.
int GetComponentHeight(int component);
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
int GetComponentStride(int component);
// Size of a component in bytes.
int GetComponentSize(int component);
// Call this after LoadFrame() if you decide you don't want to decode it
// after all.
LIBYUV_BOOL UnloadFrame();
// Decodes the entire image into a one-buffer-per-color-component format.
// dst_width must match exactly. dst_height must be <= to image height; if
// less, the image is cropped. "planes" must have size equal to at least
// GetNumComponents() and they must point to non-overlapping buffers of size
// at least GetComponentSize(i). The pointers in planes are incremented
// to point to after the end of the written data.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
// Decodes the entire image and passes the data via repeated calls to a
// callback function. Each call will get the data for a whole number of
// image scanlines.
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height);
// The helper function which recognizes the jpeg sub-sampling type.
static JpegSubsamplingType JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components);
private:
void AllocOutputBuffers(int num_outbufs);
void DestroyOutputBuffers();
LIBYUV_BOOL StartDecode();
LIBYUV_BOOL FinishDecode();
void SetScanlinePointers(uint8** data);
LIBYUV_BOOL DecodeImcuRow();
int GetComponentScanlinePadding(int component);
// A buffer holding the input data for a frame.
Buffer buf_;
BufferVector buf_vec_;
jpeg_decompress_struct* decompress_struct_;
jpeg_source_mgr* source_mgr_;
SetJmpErrorMgr* error_mgr_;
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
// GetComponentScanlinePadding() != 0.)
LIBYUV_BOOL has_scanline_padding_;
// Temporaries used to point to scanline outputs.
int num_outbufs_; // Outermost size of all arrays below.
uint8*** scanlines_;
int* scanlines_sizes_;
// Temporary buffer used for decoding when we can't decode directly to the
// output buffers. Large enough for just one iMCU row.
uint8** databuf_;
int* databuf_strides_;
};
} // namespace libyuv
#endif // __cplusplus
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT

Просмотреть файл

@ -11,11 +11,11 @@
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
#include "basic_types.h"
#include "libyuv/basic_types.h"
// TODO(fbarchard): Remove the following headers includes.
// #include "convert.h"
// #include "convert_argb.h"
#include "libyuv/convert.h"
#include "libyuv/convert_argb.h"
#ifdef __cplusplus
namespace libyuv {

117
third_party/libyuv/include/libyuv/rotate.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,117 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Supported rotation.
typedef enum RotationMode {
kRotate0 = 0, // No rotation.
kRotate90 = 90, // Rotate 90 degrees clockwise.
kRotate180 = 180, // Rotate 180 degrees.
kRotate270 = 270, // Rotate 270 degrees clockwise.
// Deprecated.
kRotateNone = 0,
kRotateClockwise = 90,
kRotateCounterClockwise = 270,
} RotationModeEnum;
// Rotate I420 frame.
LIBYUV_API
int I420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate NV12 input and store in I420.
LIBYUV_API
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int src_width, int src_height, enum RotationMode mode);
// Rotate a plane by 0, 90, 180, or 270.
LIBYUV_API
int RotatePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int src_width, int src_height, enum RotationMode mode);
// Rotate planes by 90, 180, 270. Deprecated.
LIBYUV_API
void RotatePlane90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotatePlane270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void RotateUV90(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// Rotations for when U and V are interleaved.
// These functions take one input pointer and
// split the data into two buffers while
// rotating them. Deprecated.
LIBYUV_API
void RotateUV180(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
LIBYUV_API
void RotateUV270(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
// The 90 and 270 functions are based on transposes.
// Doing a transpose with reversing the read/write
// order will result in a rotation by +- 90 degrees.
// Deprecated.
LIBYUV_API
void TransposePlane(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height);
LIBYUV_API
void TransposeUV(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width, int height);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT

33
third_party/libyuv/include/libyuv/rotate_argb.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,33 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/rotate.h" // For RotationMode.
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Rotate ARGB frame
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int src_width, int src_height, enum RotationMode mode);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT

39
third_party/libyuv/include/libyuv/row.h поставляемый
Просмотреть файл

@ -13,7 +13,11 @@
#include <stdlib.h> // For malloc.
#include "basic_types.h"
#include "libyuv/basic_types.h"
#if defined(__native_client__)
#include "ppapi/c/pp_macros.h" // For PPAPI_RELEASE
#endif
#ifdef __cplusplus
namespace libyuv {
@ -38,7 +42,8 @@ extern "C" {
var = 0
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
defined(TARGET_IPHONE_SIMULATOR)
defined(TARGET_IPHONE_SIMULATOR) || \
(defined(_MSC_VER) && defined(__clang__))
#define LIBYUV_DISABLE_X86
#endif
// True if compiling for SSSE3 as a requirement.
@ -47,7 +52,12 @@ extern "C" {
#endif
// Enable for NaCL pepper 33 for bundle and AVX2 support.
// #define NEW_BINUTILS
#if defined(__native_client__) && PPAPI_RELEASE >= 33
#define NEW_BINUTILS
#endif
#if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
#define LIBYUV_DISABLE_NEON
#endif
// The following are available on all x86 platforms:
#if !defined(LIBYUV_DISABLE_X86) && \
@ -152,6 +162,11 @@ extern "C" {
#define HAS_YUY2TOYROW_SSE2
#endif
// The following are available on x64 Visual C:
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
#define HAS_I422TOARGBROW_SSSE3
#endif
// GCC >= 4.7.0 required for AVX2.
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
@ -235,6 +250,10 @@ extern "C" {
#define HAS_MIRRORROW_SSE2
#endif
// The following are available on arm64 platforms:
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
#endif
// The following are available on Neon platforms:
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
@ -330,7 +349,8 @@ extern "C" {
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
#define HAS_COPYROW_MIPS
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#define HAS_I422TOABGRROW_MIPS_DSPR2
@ -426,7 +446,7 @@ typedef uint8 uvec8[16];
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
#opcode " (%%r15,%%r14),%" #arg "\n" \
BUNDLEUNLOCK
#else
#else // defined(__native_client__) && defined(__x86_64__)
#define BUNDLEALIGN "\n"
#define MEMACCESS(base) "(%" #base ")"
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
@ -443,6 +463,15 @@ typedef uint8 uvec8[16];
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
#define MEMOPARG(opcode, offset, base, index, scale, arg) \
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
#endif // defined(__native_client__) && defined(__x86_64__)
#if defined(__arm__)
#undef MEMACCESS
#if defined(__native_client__)
#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
#else
#define MEMACCESS(base) "\n"
#endif
#endif
void I444ToARGBRow_NEON(const uint8* src_y,

2
third_party/libyuv/include/libyuv/scale.h поставляемый
Просмотреть файл

@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_H_
#include "basic_types.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {

57
third_party/libyuv/include/libyuv/scale_argb.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,57 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
#include "libyuv/basic_types.h"
#include "libyuv/scale.h" // For FilterMode
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering);
// Clipped scale takes destination rectangle coordinates for clip values.
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
// TODO(fbarchard): Implement this.
// Scale with YUV conversion to ARGB and clipping.
LIBYUV_API
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint32 src_fourcc,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
uint32 dst_fourcc,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT

Просмотреть файл

@ -11,7 +11,7 @@
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
#define INCLUDE_LIBYUV_SCALE_ROW_H_
#include "basic_types.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {

16
third_party/libyuv/include/libyuv/version.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,16 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
#define INCLUDE_LIBYUV_VERSION_H_
#define LIBYUV_VERSION 1041
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT

182
third_party/libyuv/include/libyuv/video_common.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,182 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Common definitions for video, including fourcc and VideoFormat.
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////
// Definition of FourCC codes
//////////////////////////////////////////////////////////////////////////////
// Convert four characters to a FourCC code.
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
// constants are used in a switch.
#ifdef __cplusplus
#define FOURCC(a, b, c, d) ( \
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
#else
#define FOURCC(a, b, c, d) ( \
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
#endif
// Some pages discussing FourCC codes:
// http://www.fourcc.org/yuv.php
// http://v4l2spec.bytesex.org/spec/book1.htm
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
// FourCC codes grouped according to implementation efficiency.
// Primary formats should convert in 1 efficient step.
// Secondary formats are converted in 2 steps.
// Auxilliary formats call primary converters.
enum FourCC {
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
// 2 Secondary YUV formats: row biplanar.
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
// 4 Secondary RGB formats: 4 Bayer Patterns.
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
// 1 Primary Compressed YUV format.
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
// 1 Auxiliary compressed YUV format set aside for capturer.
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
// Match any fourcc.
FOURCC_ANY = -1,
};
enum FourCCBpp {
// Canonical fourcc codes used in our code.
FOURCC_BPP_I420 = 12,
FOURCC_BPP_I422 = 16,
FOURCC_BPP_I444 = 24,
FOURCC_BPP_I411 = 12,
FOURCC_BPP_I400 = 8,
FOURCC_BPP_NV21 = 12,
FOURCC_BPP_NV12 = 12,
FOURCC_BPP_YUY2 = 16,
FOURCC_BPP_UYVY = 16,
FOURCC_BPP_M420 = 12,
FOURCC_BPP_Q420 = 12,
FOURCC_BPP_ARGB = 32,
FOURCC_BPP_BGRA = 32,
FOURCC_BPP_ABGR = 32,
FOURCC_BPP_RGBA = 32,
FOURCC_BPP_24BG = 24,
FOURCC_BPP_RAW = 24,
FOURCC_BPP_RGBP = 16,
FOURCC_BPP_RGBO = 16,
FOURCC_BPP_R444 = 16,
FOURCC_BPP_RGGB = 8,
FOURCC_BPP_BGGR = 8,
FOURCC_BPP_GRBG = 8,
FOURCC_BPP_GBRG = 8,
FOURCC_BPP_YV12 = 12,
FOURCC_BPP_YV16 = 16,
FOURCC_BPP_YV24 = 24,
FOURCC_BPP_YU12 = 12,
FOURCC_BPP_J420 = 12,
FOURCC_BPP_J400 = 8,
FOURCC_BPP_MJPG = 0, // 0 means unknown.
FOURCC_BPP_H264 = 0,
FOURCC_BPP_IYUV = 12,
FOURCC_BPP_YU16 = 16,
FOURCC_BPP_YU24 = 24,
FOURCC_BPP_YUYV = 16,
FOURCC_BPP_YUVS = 16,
FOURCC_BPP_HDYC = 16,
FOURCC_BPP_2VUY = 16,
FOURCC_BPP_JPEG = 1,
FOURCC_BPP_DMB1 = 1,
FOURCC_BPP_BA81 = 8,
FOURCC_BPP_RGB3 = 24,
FOURCC_BPP_BGR3 = 24,
FOURCC_BPP_CM32 = 32,
FOURCC_BPP_CM24 = 24,
// Match any fourcc.
FOURCC_BPP_ANY = 0, // 0 means unknown.
};
// Converts fourcc aliases into canonical ones.
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT

325
third_party/libyuv/source/compare.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,325 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/compare.h"
#include <float.h>
#include <math.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
// This module is for Visual C x86
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
#define HAS_HASHDJB2_SSE41
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
#if _MSC_VER >= 1700
#define HAS_HASHDJB2_AVX2
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
#endif
#endif // HAS_HASHDJB2_SSE41
// hash seed of 5381 recommended.
LIBYUV_API
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
const int kBlockSize = 1 << 15; // 32768;
int remainder;
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
#if defined(HAS_HASHDJB2_SSE41)
if (TestCpuFlag(kCpuHasSSE41)) {
HashDjb2_SSE = HashDjb2_SSE41;
}
#endif
#if defined(HAS_HASHDJB2_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
HashDjb2_SSE = HashDjb2_AVX2;
}
#endif
while (count >= (uint64)(kBlockSize)) {
seed = HashDjb2_SSE(src, kBlockSize, seed);
src += kBlockSize;
count -= kBlockSize;
}
remainder = (int)(count) & ~15;
if (remainder) {
seed = HashDjb2_SSE(src, remainder, seed);
src += remainder;
count -= remainder;
}
remainder = (int)(count) & 15;
if (remainder) {
seed = HashDjb2_C(src, remainder, seed);
}
return seed;
}
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
#if !defined(LIBYUV_DISABLE_NEON) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SUMSQUAREERROR_NEON
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
#endif
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
#define HAS_SUMSQUAREERROR_SSE2
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
#endif
// Visual C 2012 required for AVX2.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
#define HAS_SUMSQUAREERROR_AVX2
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
#endif
// TODO(fbarchard): Refactor into row function.
LIBYUV_API
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
int count) {
// SumSquareError returns values 0 to 65535 for each squared difference.
// Up to 65536 of those can be summed and remain within a uint32.
// After each block of 65536 pixels, accumulate into a uint64.
const int kBlockSize = 65536;
int remainder = count & (kBlockSize - 1) & ~31;
uint64 sse = 0;
int i;
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
SumSquareError_C;
#if defined(HAS_SUMSQUAREERROR_NEON)
if (TestCpuFlag(kCpuHasNEON)) {
SumSquareError = SumSquareError_NEON;
}
#endif
#if defined(HAS_SUMSQUAREERROR_SSE2)
if (TestCpuFlag(kCpuHasSSE2) &&
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
// Note only used for multiples of 16 so count is not checked.
SumSquareError = SumSquareError_SSE2;
}
#endif
#if defined(HAS_SUMSQUAREERROR_AVX2)
if (TestCpuFlag(kCpuHasAVX2)) {
// Note only used for multiples of 32 so count is not checked.
SumSquareError = SumSquareError_AVX2;
}
#endif
#ifdef _OPENMP
#pragma omp parallel for reduction(+: sse)
#endif
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
}
src_a += count & ~(kBlockSize - 1);
src_b += count & ~(kBlockSize - 1);
if (remainder) {
sse += SumSquareError(src_a, src_b, remainder);
src_a += remainder;
src_b += remainder;
}
remainder = count & 31;
if (remainder) {
sse += SumSquareError_C(src_a, src_b, remainder);
}
return sse;
}
LIBYUV_API
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
uint64 sse = 0;
int h;
// Coalesce rows.
if (stride_a == width &&
stride_b == width) {
width *= height;
height = 1;
stride_a = stride_b = 0;
}
for (h = 0; h < height; ++h) {
sse += ComputeSumSquareError(src_a, src_b, width);
src_a += stride_a;
src_b += stride_b;
}
return sse;
}
LIBYUV_API
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
double psnr;
if (sse > 0) {
double mse = (double)(count) / (double)(sse);
psnr = 10.0 * log10(255.0 * 255.0 * mse);
} else {
psnr = kMaxPsnr; // Limit to prevent divide by 0
}
if (psnr > kMaxPsnr)
psnr = kMaxPsnr;
return psnr;
}
LIBYUV_API
double CalcFramePsnr(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
const uint64 samples = width * height;
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
src_b, stride_b,
width, height);
return SumSquareErrorToPsnr(sse, samples);
}
LIBYUV_API
double I420Psnr(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
src_y_b, stride_y_b,
width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
const uint64 samples = width * height + 2 * (width_uv * height_uv);
const uint64 sse = sse_y + sse_u + sse_v;
return SumSquareErrorToPsnr(sse, samples);
}
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
static double Ssim8x8_C(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) {
int64 sum_a = 0;
int64 sum_b = 0;
int64 sum_sq_a = 0;
int64 sum_sq_b = 0;
int64 sum_axb = 0;
int i;
for (i = 0; i < 8; ++i) {
int j;
for (j = 0; j < 8; ++j) {
sum_a += src_a[j];
sum_b += src_b[j];
sum_sq_a += src_a[j] * src_a[j];
sum_sq_b += src_b[j] * src_b[j];
sum_axb += src_a[j] * src_b[j];
}
src_a += stride_a;
src_b += stride_b;
}
{
const int64 count = 64;
// scale the constants by number of pixels
const int64 c1 = (cc1 * count * count) >> 12;
const int64 c2 = (cc2 * count * count) >> 12;
const int64 sum_a_x_sum_b = sum_a * sum_b;
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
const int64 sum_a_sq = sum_a*sum_a;
const int64 sum_b_sq = sum_b*sum_b;
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq +
count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0.0) {
return DBL_MAX;
}
return ssim_n * 1.0 / ssim_d;
}
}
// We are using a 8x8 moving window with starting location of each 8x8 window
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
// block boundaries to penalize blocking artifacts.
LIBYUV_API
double CalcFrameSsim(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b,
int width, int height) {
int samples = 0;
double ssim_total = 0;
double (*Ssim8x8)(const uint8* src_a, int stride_a,
const uint8* src_b, int stride_b) = Ssim8x8_C;
// sample point start with each 4x4 location
int i;
for (i = 0; i < height - 8; i += 4) {
int j;
for (j = 0; j < width - 8; j += 4) {
ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
samples++;
}
src_a += stride_a * 4;
src_b += stride_b * 4;
}
ssim_total /= samples;
return ssim_total;
}
LIBYUV_API
double I420Ssim(const uint8* src_y_a, int stride_y_a,
const uint8* src_u_a, int stride_u_a,
const uint8* src_v_a, int stride_v_a,
const uint8* src_y_b, int stride_y_b,
const uint8* src_u_b, int stride_u_b,
const uint8* src_v_b, int stride_v_b,
int width, int height) {
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
src_y_b, stride_y_b, width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
src_u_b, stride_u_b,
width_uv, height_uv);
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
src_v_b, stride_v_b,
width_uv, height_uv);
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

42
third_party/libyuv/source/compare_common.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,42 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse = 0u;
int i;
for (i = 0; i < count; ++i) {
int diff = src_a[i] - src_b[i];
sse += (uint32)(diff * diff);
}
return sse;
}
// hash seed of 5381 recommended.
// Internal C version of HashDjb2 with int sized count for efficiency.
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
uint32 hash = seed;
int i;
for (i = 0; i < count; ++i) {
hash += (hash << 5) + src[i];
}
return hash;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

64
third_party/libyuv/source/compare_neon.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,64 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
volatile uint32 sse;
asm volatile (
"vmov.u8 q8, #0 \n"
"vmov.u8 q10, #0 \n"
"vmov.u8 q9, #0 \n"
"vmov.u8 q11, #0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n"
MEMACCESS(1)
"vld1.8 {q1}, [%1]! \n"
"subs %2, %2, #16 \n"
"vsubl.u8 q2, d0, d2 \n"
"vsubl.u8 q3, d1, d3 \n"
"vmlal.s16 q8, d4, d4 \n"
"vmlal.s16 q9, d6, d6 \n"
"vmlal.s16 q10, d5, d5 \n"
"vmlal.s16 q11, d7, d7 \n"
"bgt 1b \n"
"vadd.u32 q8, q8, q9 \n"
"vadd.u32 q10, q10, q11 \n"
"vadd.u32 q11, q8, q10 \n"
"vpaddl.u32 q1, q11 \n"
"vadd.u64 d0, d2, d3 \n"
"vmov.32 %3, d0[0] \n"
: "+r"(src_a),
"+r"(src_b),
"+r"(count),
"=r"(sse)
:
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
return sse;
}
#endif // __ARM_NEON__
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

158
third_party/libyuv/source/compare_posix.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,158 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
uint32 sse;
asm volatile ( // NOLINT
"pxor %%xmm0,%%xmm0 \n"
"pxor %%xmm5,%%xmm5 \n"
LABELALIGN
"1: \n"
"movdqa " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"movdqa " MEMACCESS(1) ",%%xmm2 \n"
"lea " MEMLEA(0x10, 1) ",%1 \n"
"sub $0x10,%2 \n"
"movdqa %%xmm1,%%xmm3 \n"
"psubusb %%xmm2,%%xmm1 \n"
"psubusb %%xmm3,%%xmm2 \n"
"por %%xmm2,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm5,%%xmm1 \n"
"punpckhbw %%xmm5,%%xmm2 \n"
"pmaddwd %%xmm1,%%xmm1 \n"
"pmaddwd %%xmm2,%%xmm2 \n"
"paddd %%xmm1,%%xmm0 \n"
"paddd %%xmm2,%%xmm0 \n"
"jg 1b \n"
"pshufd $0xee,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"pshufd $0x1,%%xmm0,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"movd %%xmm0,%3 \n"
: "+r"(src_a), // %0
"+r"(src_b), // %1
"+r"(count), // %2
"=g"(sse) // %3
:
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
#endif
); // NOLINT
return sse;
}
#endif // defined(__x86_64__) || defined(__i386__)
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
uint32 hash;
asm volatile ( // NOLINT
"movd %2,%%xmm0 \n"
"pxor %%xmm7,%%xmm7 \n"
"movdqa %4,%%xmm6 \n"
LABELALIGN
"1: \n"
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
"lea " MEMLEA(0x10, 0) ",%0 \n"
"pmulld %%xmm6,%%xmm0 \n"
"movdqa %5,%%xmm5 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklbw %%xmm7,%%xmm2 \n"
"movdqa %%xmm2,%%xmm3 \n"
"punpcklwd %%xmm7,%%xmm3 \n"
"pmulld %%xmm5,%%xmm3 \n"
"movdqa %6,%%xmm5 \n"
"movdqa %%xmm2,%%xmm4 \n"
"punpckhwd %%xmm7,%%xmm4 \n"
"pmulld %%xmm5,%%xmm4 \n"
"movdqa %7,%%xmm5 \n"
"punpckhbw %%xmm7,%%xmm1 \n"
"movdqa %%xmm1,%%xmm2 \n"
"punpcklwd %%xmm7,%%xmm2 \n"
"pmulld %%xmm5,%%xmm2 \n"
"movdqa %8,%%xmm5 \n"
"punpckhwd %%xmm7,%%xmm1 \n"
"pmulld %%xmm5,%%xmm1 \n"
"paddd %%xmm4,%%xmm3 \n"
"paddd %%xmm2,%%xmm1 \n"
"sub $0x10,%1 \n"
"paddd %%xmm3,%%xmm1 \n"
"pshufd $0xe,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"pshufd $0x1,%%xmm1,%%xmm2 \n"
"paddd %%xmm2,%%xmm1 \n"
"paddd %%xmm1,%%xmm0 \n"
"jg 1b \n"
"movd %%xmm0,%3 \n"
: "+r"(src), // %0
"+r"(count), // %1
"+rm"(seed), // %2
"=g"(hash) // %3
: "m"(kHash16x33), // %4
"m"(kHashMul0), // %5
"m"(kHashMul1), // %6
"m"(kHashMul2), // %7
"m"(kHashMul3) // %8
: "memory", "cc"
#if defined(__SSE2__)
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
#endif
); // NOLINT
return hash;
}
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

232
third_party/libyuv/source/compare_win.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,232 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
__declspec(naked) __declspec(align(16))
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
pxor xmm0, xmm0
pxor xmm5, xmm5
align 4
wloop:
movdqa xmm1, [eax]
lea eax, [eax + 16]
movdqa xmm2, [edx]
lea edx, [edx + 16]
sub ecx, 16
movdqa xmm3, xmm1 // abs trick
psubusb xmm1, xmm2
psubusb xmm2, xmm3
por xmm1, xmm2
movdqa xmm2, xmm1
punpcklbw xmm1, xmm5
punpckhbw xmm2, xmm5
pmaddwd xmm1, xmm1
pmaddwd xmm2, xmm2
paddd xmm0, xmm1
paddd xmm0, xmm2
jg wloop
pshufd xmm1, xmm0, 0xee
paddd xmm0, xmm1
pshufd xmm1, xmm0, 0x01
paddd xmm0, xmm1
movd eax, xmm0
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
#pragma warning(disable: 4752)
__declspec(naked) __declspec(align(16))
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
__asm {
mov eax, [esp + 4] // src_a
mov edx, [esp + 8] // src_b
mov ecx, [esp + 12] // count
vpxor ymm0, ymm0, ymm0 // sum
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
sub edx, eax
align 4
wloop:
vmovdqu ymm1, [eax]
vmovdqu ymm2, [eax + edx]
lea eax, [eax + 32]
sub ecx, 32
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
vpsubusb ymm2, ymm2, ymm1
vpor ymm1, ymm2, ymm3
vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
vpunpckhbw ymm1, ymm1, ymm5
vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
vpmaddwd ymm1, ymm1, ymm1
vpaddd ymm0, ymm0, ymm1
vpaddd ymm0, ymm0, ymm2
jg wloop
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
vpaddd ymm0, ymm0, ymm1
vpermq ymm1, ymm0, 0x02 // high + low lane.
vpaddd ymm0, ymm0, ymm1
vmovd eax, xmm0
vzeroupper
ret
}
}
#endif // _MSC_VER >= 1700
#define HAS_HASHDJB2_SSE41
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
static uvec32 kHashMul0 = {
0x0c3525e1, // 33 ^ 15
0xa3476dc1, // 33 ^ 14
0x3b4039a1, // 33 ^ 13
0x4f5f0981, // 33 ^ 12
};
static uvec32 kHashMul1 = {
0x30f35d61, // 33 ^ 11
0x855cb541, // 33 ^ 10
0x040a9121, // 33 ^ 9
0x747c7101, // 33 ^ 8
};
static uvec32 kHashMul2 = {
0xec41d4e1, // 33 ^ 7
0x4cfa3cc1, // 33 ^ 6
0x025528a1, // 33 ^ 5
0x00121881, // 33 ^ 4
};
static uvec32 kHashMul3 = {
0x00008c61, // 33 ^ 3
0x00000441, // 33 ^ 2
0x00000021, // 33 ^ 1
0x00000001, // 33 ^ 0
};
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
_asm _emit 0x40 _asm _emit reg
__declspec(naked) __declspec(align(16))
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
pxor xmm7, xmm7 // constant 0 for unpck
movdqa xmm6, kHash16x33
align 4
wloop:
movdqu xmm1, [eax] // src[0-15]
lea eax, [eax + 16]
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
movdqa xmm5, kHashMul0
movdqa xmm2, xmm1
punpcklbw xmm2, xmm7 // src[0-7]
movdqa xmm3, xmm2
punpcklwd xmm3, xmm7 // src[0-3]
pmulld(0xdd) // pmulld xmm3, xmm5
movdqa xmm5, kHashMul1
movdqa xmm4, xmm2
punpckhwd xmm4, xmm7 // src[4-7]
pmulld(0xe5) // pmulld xmm4, xmm5
movdqa xmm5, kHashMul2
punpckhbw xmm1, xmm7 // src[8-15]
movdqa xmm2, xmm1
punpcklwd xmm2, xmm7 // src[8-11]
pmulld(0xd5) // pmulld xmm2, xmm5
movdqa xmm5, kHashMul3
punpckhwd xmm1, xmm7 // src[12-15]
pmulld(0xcd) // pmulld xmm1, xmm5
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
movd eax, xmm0 // return hash
ret
}
}
// Visual C 2012 required for AVX2.
#if _MSC_VER >= 1700
__declspec(naked) __declspec(align(16))
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
__asm {
mov eax, [esp + 4] // src
mov ecx, [esp + 8] // count
movd xmm0, [esp + 12] // seed
movdqa xmm6, kHash16x33
align 4
wloop:
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
pmulld xmm0, xmm6 // hash *= 33 ^ 16
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
pmulld xmm3, kHashMul0
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
pmulld xmm4, kHashMul1
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
pmulld xmm2, kHashMul2
lea eax, [eax + 16]
pmulld xmm1, kHashMul3
paddd xmm3, xmm4 // add 16 results
paddd xmm1, xmm2
sub ecx, 16
paddd xmm1, xmm3
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
paddd xmm1, xmm2
pshufd xmm2, xmm1, 0x01
paddd xmm1, xmm2
paddd xmm0, xmm1
jg wloop
movd eax, xmm0 // return hash
ret
}
}
#endif // _MSC_VER >= 1700
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

1513
third_party/libyuv/source/convert.cc поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

938
third_party/libyuv/source/convert_argb.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,938 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Copy ARGB with optional flipping
LIBYUV_API
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
if (!src_argb || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
width * 4, height);
return 0;
}
// Convert I444 to ARGB.
LIBYUV_API
int I444ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*I444ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I444ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u == width &&
src_stride_v == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_I444TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I444ToARGBRow = I444ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_I444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I444ToARGBRow = I444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I444ToARGBRow = I444ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I422 to ARGB.
LIBYUV_API
int I422ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u * 2 == width &&
src_stride_v * 2 == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I411 to ARGB.
LIBYUV_API
int I411ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*I411ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I411ToARGBRow_C;
if (!src_y || !src_u || !src_v ||
!dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
src_stride_u * 4 == width &&
src_stride_v * 4 == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
}
#if defined(HAS_I411TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I411ToARGBRow = I411ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_I411TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I411ToARGBRow = I411ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I411ToARGBRow = I411ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
src_u += src_stride_u;
src_v += src_stride_v;
}
return 0;
}
// Convert I400 to ARGB.
LIBYUV_API
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*YToARGBRow)(const uint8* y_buf,
uint8* rgb_buf,
int width) = YToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
// Coalesce rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = dst_stride_argb = 0;
}
#if defined(HAS_YTOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YToARGBRow = YToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_SSE2;
}
}
#elif defined(HAS_YTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YToARGBRow = YToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YToARGBRow = YToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
YToARGBRow(src_y, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
}
return 0;
}
// Convert I400 to ARGB.
LIBYUV_API
int I400ToARGB(const uint8* src_y, int src_stride_y,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
I400ToARGBRow_C;
if (!src_y || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_y = src_y + (height - 1) * src_stride_y;
src_stride_y = -src_stride_y;
}
// Coalesce rows.
if (src_stride_y == width &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_y = dst_stride_argb = 0;
}
#if defined(HAS_I400TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I400ToARGBRow = I400ToARGBRow_SSE2;
}
}
}
#elif defined(HAS_I400TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I400ToARGBRow = I400ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I400ToARGBRow = I400ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
I400ToARGBRow(src_y, dst_argb, width);
src_y += src_stride_y;
dst_argb += dst_stride_argb;
}
return 0;
}
// Shuffle table for converting BGRA to ARGB.
static uvec8 kShuffleMaskBGRAToARGB = {
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
};
// Shuffle table for converting ABGR to ARGB.
static uvec8 kShuffleMaskABGRToARGB = {
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
};
// Shuffle table for converting RGBA to ARGB.
static uvec8 kShuffleMaskRGBAToARGB = {
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
};
// Convert BGRA to ARGB.
LIBYUV_API
int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_bgra, src_stride_bgra,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskBGRAToARGB),
width, height);
}
// Convert ARGB to BGRA (same as BGRAToARGB).
LIBYUV_API
int ARGBToBGRA(const uint8* src_bgra, int src_stride_bgra,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_bgra, src_stride_bgra,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskBGRAToARGB),
width, height);
}
// Convert ABGR to ARGB.
LIBYUV_API
int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_abgr, src_stride_abgr,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskABGRToARGB),
width, height);
}
// Convert ARGB to ABGR to (same as ABGRToARGB).
LIBYUV_API
int ARGBToABGR(const uint8* src_abgr, int src_stride_abgr,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_abgr, src_stride_abgr,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskABGRToARGB),
width, height);
}
// Convert RGBA to ARGB.
LIBYUV_API
int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
return ARGBShuffle(src_rgba, src_stride_rgba,
dst_argb, dst_stride_argb,
(const uint8*)(&kShuffleMaskRGBAToARGB),
width, height);
}
// Convert RGB24 to ARGB.
LIBYUV_API
int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RGB24ToARGBRow_C;
if (!src_rgb24 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
src_stride_rgb24 = -src_stride_rgb24;
}
// Coalesce rows.
if (src_stride_rgb24 == width * 3 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_rgb24 = dst_stride_argb = 0;
}
#if defined(HAS_RGB24TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
}
}
#elif defined(HAS_RGB24TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB24ToARGBRow(src_rgb24, dst_argb, width);
src_rgb24 += src_stride_rgb24;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert RAW to ARGB.
LIBYUV_API
int RAWToARGB(const uint8* src_raw, int src_stride_raw,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
RAWToARGBRow_C;
if (!src_raw || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_raw = src_raw + (height - 1) * src_stride_raw;
src_stride_raw = -src_stride_raw;
}
// Coalesce rows.
if (src_stride_raw == width * 3 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_raw = dst_stride_argb = 0;
}
#if defined(HAS_RAWTOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
RAWToARGBRow = RAWToARGBRow_SSSE3;
}
}
#elif defined(HAS_RAWTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RAWToARGBRow = RAWToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RAWToARGBRow = RAWToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
RAWToARGBRow(src_raw, dst_argb, width);
src_raw += src_stride_raw;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert RGB565 to ARGB.
LIBYUV_API
int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
RGB565ToARGBRow_C;
if (!src_rgb565 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
src_stride_rgb565 = -src_stride_rgb565;
}
// Coalesce rows.
if (src_stride_rgb565 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_rgb565 = dst_stride_argb = 0;
}
#if defined(HAS_RGB565TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
}
}
#elif defined(HAS_RGB565TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
RGB565ToARGBRow(src_rgb565, dst_argb, width);
src_rgb565 += src_stride_rgb565;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert ARGB1555 to ARGB.
LIBYUV_API
int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
int pix) = ARGB1555ToARGBRow_C;
if (!src_argb1555 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
src_stride_argb1555 = -src_stride_argb1555;
}
// Coalesce rows.
if (src_stride_argb1555 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_argb1555 = dst_stride_argb = 0;
}
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
src_argb1555 += src_stride_argb1555;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert ARGB4444 to ARGB.
LIBYUV_API
int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
int pix) = ARGB4444ToARGBRow_C;
if (!src_argb4444 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
src_stride_argb4444 = -src_stride_argb4444;
}
// Coalesce rows.
if (src_stride_argb4444 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_argb4444 = dst_stride_argb = 0;
}
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
}
}
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
src_argb4444 += src_stride_argb4444;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert NV12 to ARGB.
LIBYUV_API
int NV12ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert NV21 to ARGB.
LIBYUV_API
int NV21ToARGB(const uint8* src_y, int src_stride_y,
const uint8* src_uv, int src_stride_uv,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*NV21ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV21ToARGBRow_C;
if (!src_y || !src_uv || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV21TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_NV21TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV21ToARGBRow = NV21ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
dst_argb += dst_stride_argb;
src_y += src_stride_y;
if (y & 1) {
src_uv += src_stride_uv;
}
}
return 0;
}
// Convert M420 to ARGB.
LIBYUV_API
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*NV12ToARGBRow)(const uint8* y_buf,
const uint8* uv_buf,
uint8* rgb_buf,
int width) = NV12ToARGBRow_C;
if (!src_m420 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
#if defined(HAS_NV12TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_NV12TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
NV12ToARGBRow = NV12ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height - 1; y += 2) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
dst_argb + dst_stride_argb, width);
dst_argb += dst_stride_argb * 2;
src_m420 += src_stride_m420 * 3;
}
if (height & 1) {
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
}
return 0;
}
// Convert YUY2 to ARGB.
LIBYUV_API
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
YUY2ToARGBRow_C;
if (!src_yuy2 || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
src_stride_yuy2 = -src_stride_yuy2;
}
// Coalesce rows.
if (src_stride_yuy2 == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_yuy2 = dst_stride_argb = 0;
}
#if defined(HAS_YUY2TOARGBROW_SSSE3)
// Posix is 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_YUY2TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
YUY2ToARGBRow = YUY2ToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
YUY2ToARGBRow(src_yuy2, dst_argb, width);
src_yuy2 += src_stride_yuy2;
dst_argb += dst_stride_argb;
}
return 0;
}
// Convert UYVY to ARGB.
LIBYUV_API
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
uint8* dst_argb, int dst_stride_argb,
int width, int height) {
int y;
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
UYVYToARGBRow_C;
if (!src_uyvy || !dst_argb ||
width <= 0 || height == 0) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
src_stride_uyvy = -src_stride_uyvy;
}
// Coalesce rows.
if (src_stride_uyvy == width * 2 &&
dst_stride_argb == width * 4) {
width *= height;
height = 1;
src_stride_uyvy = dst_stride_argb = 0;
}
#if defined(HAS_UYVYTOARGBROW_SSSE3)
// Posix is 16, Windows is 8.
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
UYVYToARGBRow = UYVYToARGBRow_SSSE3;
}
}
}
#elif defined(HAS_UYVYTOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
UYVYToARGBRow = UYVYToARGBRow_NEON;
}
}
#endif
for (y = 0; y < height; ++y) {
UYVYToARGBRow(src_uyvy, dst_argb, width);
src_uyvy += src_stride_uyvy;
dst_argb += dst_stride_argb;
}
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

1210
third_party/libyuv/source/convert_from.cc поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

1113
third_party/libyuv/source/convert_from_argb.cc поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

392
third_party/libyuv/source/convert_jpeg.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,392 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#ifdef HAVE_JPEG
struct I420Buffers {
uint8* y;
int y_stride;
uint8* u;
int u_stride;
uint8* v;
int v_stride;
int w;
int h;
};
static void JpegCopyI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I420Copy(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI422ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I422ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI444ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I444ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI411ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I411ToI420(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
static void JpegI400ToI420(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
I420Buffers* dest = (I420Buffers*)(opaque);
I400ToI420(data[0], strides[0],
dest->y, dest->y_stride,
dest->u, dest->u_stride,
dest->v, dest->v_stride,
dest->w, rows);
dest->y += rows * dest->y_stride;
dest->u += ((rows + 1) >> 1) * dest->u_stride;
dest->v += ((rows + 1) >> 1) * dest->v_stride;
dest->h -= rows;
}
// Query size of MJPG in pixels.
LIBYUV_API
int MJPGSize(const uint8* sample, size_t sample_size,
int* width, int* height) {
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret) {
*width = mjpeg_decoder.GetWidth();
*height = mjpeg_decoder.GetHeight();
}
mjpeg_decoder.UnloadFrame();
return ret ? 0 : -1; // -1 for runtime failure.
}
// MJPG (Motion JPeg) to I420
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#ifdef HAVE_JPEG
struct ARGBBuffers {
uint8* argb;
int argb_stride;
int w;
int h;
};
static void JpegI420ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I420ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI422ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I422ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI444ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I444ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI411ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I411ToARGB(data[0], strides[0],
data[1], strides[1],
data[2], strides[2],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
static void JpegI400ToARGB(void* opaque,
const uint8* const* data,
const int* strides,
int rows) {
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
I400ToARGB(data[0], strides[0],
dest->argb, dest->argb_stride,
dest->w, rows);
dest->argb += rows * dest->argb_stride;
dest->h -= rows;
}
// MJPG (Motion JPeg) to ARGB
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
LIBYUV_API
int MJPGToARGB(const uint8* sample,
size_t sample_size,
uint8* argb, int argb_stride,
int w, int h,
int dw, int dh) {
if (sample_size == kUnknownDataSize) {
// ERROR: MJPEG frame size unknown
return -1;
}
// TODO(fbarchard): Port MJpeg to C.
MJpegDecoder mjpeg_decoder;
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
if (ret && (mjpeg_decoder.GetWidth() != w ||
mjpeg_decoder.GetHeight() != h)) {
// ERROR: MJPEG frame has unexpected dimensions
mjpeg_decoder.UnloadFrame();
return 1; // runtime failure
}
if (ret) {
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
// YUV420
if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
// YUV422
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
// YUV444
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
// YUV411
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceYCbCr &&
mjpeg_decoder.GetNumComponents() == 3 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
// YUV400
} else if (mjpeg_decoder.GetColorSpace() ==
MJpegDecoder::kColorSpaceGrayscale &&
mjpeg_decoder.GetNumComponents() == 1 &&
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
} else {
// TODO(fbarchard): Implement conversion for any other colorspace/sample
// factors that occur in practice. 411 is supported by libjpeg
// ERROR: Unable to convert MJPEG frame because format is not supported
mjpeg_decoder.UnloadFrame();
return 1;
}
}
return ret ? 0 : 1;
}
#endif
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

327
third_party/libyuv/source/convert_to_argb.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,327 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/convert_argb.h"
#include "libyuv/cpu_id.h"
#include "libyuv/format_conversion.h"
#ifdef HAVE_JPEG
#include "libyuv/mjpeg_decoder.h"
#endif
#include "libyuv/rotate_argb.h"
#include "libyuv/row.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToARGB(const uint8* sample, size_t sample_size,
uint8* crop_argb, int argb_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination crop_argb is same as source sample,
// also enable temporary buffer.
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
crop_argb == sample;
uint8* tmp_argb = crop_argb;
int tmp_argb_stride = argb_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (crop_argb == NULL || sample == NULL ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
if (need_buf) {
int argb_size = crop_width * abs_crop_height * 4;
rotate_buffer = (uint8*)malloc(argb_size);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
crop_argb = rotate_buffer;
argb_stride = crop_width;
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToARGB(src, aligned_src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToARGB(src, src_width * 3,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToARGB(src, src_width * 4,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToARGB(src, src_width * 2,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
r = NV12ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV21ToARGB(src, src_width,
src_uv, aligned_src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToARGB(src, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
// case FOURCC_Q420:
// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
// src_width + crop_x * 2;
// r = Q420ToARGB(src, src_width * 3,
// src_uv, src_width * 3,
// crop_argb, argb_stride,
// crop_width, inv_crop_height);
// break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToARGB(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToARGB(src_y, src_width,
src_u, src_width,
src_v, src_width,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToARGB(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
crop_argb, argb_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToARGB(sample, sample_size,
crop_argb, argb_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = ARGBRotate(crop_argb, argb_stride,
tmp_argb, tmp_argb_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

383
third_party/libyuv/source/convert_to_i420.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,383 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
#include "libyuv/convert.h"
#include "libyuv/format_conversion.h"
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Convert camera sample to I420 with cropping, rotation and vertical flip.
// src_width is used for source stride computation
// src_height is used to compute location of planes, and indicate inversion
// sample_size is measured in bytes and is the size of the frame.
// With MJPEG it is the compressed size of the frame.
LIBYUV_API
int ConvertToI420(const uint8* sample,
size_t sample_size,
uint8* y, int y_stride,
uint8* u, int u_stride,
uint8* v, int v_stride,
int crop_x, int crop_y,
int src_width, int src_height,
int crop_width, int crop_height,
enum RotationMode rotation,
uint32 fourcc) {
uint32 format = CanonicalFourCC(fourcc);
int aligned_src_width = (src_width + 1) & ~1;
const uint8* src;
const uint8* src_uv;
int abs_src_height = (src_height < 0) ? -src_height : src_height;
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
int r = 0;
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
format != FOURCC_NV12 && format != FOURCC_NV21 &&
format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
uint8* tmp_y = y;
uint8* tmp_u = u;
uint8* tmp_v = v;
int tmp_y_stride = y_stride;
int tmp_u_stride = u_stride;
int tmp_v_stride = v_stride;
uint8* rotate_buffer = NULL;
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
if (!y || !u || !v || !sample ||
src_width <= 0 || crop_width <= 0 ||
src_height == 0 || crop_height == 0) {
return -1;
}
if (src_height < 0) {
inv_crop_height = -inv_crop_height;
}
// One pass rotation is available for some formats. For the rest, convert
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
// and then rotate the I420 to the final destination buffer.
// For in-place conversion, if destination y is same as source sample,
// also enable temporary buffer.
if (need_buf) {
int y_size = crop_width * abs_crop_height;
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
if (!rotate_buffer) {
return 1; // Out of memory runtime error.
}
y = rotate_buffer;
u = y + y_size;
v = u + uv_size;
y_stride = crop_width;
u_stride = v_stride = ((crop_width + 1) / 2);
}
switch (format) {
// Single plane formats
case FOURCC_YUY2:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = YUY2ToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_UYVY:
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
r = UYVYToI420(src, aligned_src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBP:
src = sample + (src_width * crop_y + crop_x) * 2;
r = RGB565ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBO:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB1555ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_R444:
src = sample + (src_width * crop_y + crop_x) * 2;
r = ARGB4444ToI420(src, src_width * 2,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_24BG:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RGB24ToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RAW:
src = sample + (src_width * crop_y + crop_x) * 3;
r = RAWToI420(src, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ARGB:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ARGBToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_BGRA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = BGRAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_ABGR:
src = sample + (src_width * crop_y + crop_x) * 4;
r = ABGRToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGBA:
src = sample + (src_width * crop_y + crop_x) * 4;
r = RGBAToI420(src, src_width * 4,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// TODO(fbarchard): Support cropping Bayer by odd numbers
// by adjusting fourcc.
case FOURCC_BGGR:
src = sample + (src_width * crop_y + crop_x);
r = BayerBGGRToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GBRG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGBRGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_GRBG:
src = sample + (src_width * crop_y + crop_x);
r = BayerGRBGToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_RGGB:
src = sample + (src_width * crop_y + crop_x);
r = BayerRGGBToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_I400:
src = sample + src_width * crop_y + crop_x;
r = I400ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Biplanar formats
case FOURCC_NV12:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_NV21:
src = sample + (src_width * crop_y + crop_x);
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
// Call NV12 but with u and v parameters swapped.
r = NV12ToI420Rotate(src, src_width,
src_uv, aligned_src_width,
y, y_stride,
v, v_stride,
u, u_stride,
crop_width, inv_crop_height, rotation);
break;
case FOURCC_M420:
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
r = M420ToI420(src, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
case FOURCC_Q420:
src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
src_width + crop_x * 2;
r = Q420ToI420(src, src_width * 3,
src_uv, src_width * 3,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
// Triplanar formats
case FOURCC_I420:
case FOURCC_YU12:
case FOURCC_YV12: {
const uint8* src_y = sample + (src_width * crop_y + crop_x);
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
int halfheight = (abs_src_height + 1) / 2;
if (format == FOURCC_YV12) {
src_v = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
(halfwidth * crop_y + crop_x) / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
}
r = I420Rotate(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height, rotation);
break;
}
case FOURCC_I422:
case FOURCC_YV16: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
int halfwidth = (src_width + 1) / 2;
if (format == FOURCC_YV16) {
src_v = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_u = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
} else {
src_u = sample + src_width * abs_src_height +
halfwidth * crop_y + crop_x / 2;
src_v = sample + src_width * abs_src_height +
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
}
r = I422ToI420(src_y, src_width,
src_u, halfwidth,
src_v, halfwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I444:
case FOURCC_YV24: {
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u;
const uint8* src_v;
if (format == FOURCC_YV24) {
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
} else {
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
}
r = I444ToI420(src_y, src_width,
src_u, src_width,
src_v, src_width,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
case FOURCC_I411: {
int quarterwidth = (src_width + 3) / 4;
const uint8* src_y = sample + src_width * crop_y + crop_x;
const uint8* src_u = sample + src_width * abs_src_height +
quarterwidth * crop_y + crop_x / 4;
const uint8* src_v = sample + src_width * abs_src_height +
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
r = I411ToI420(src_y, src_width,
src_u, quarterwidth,
src_v, quarterwidth,
y, y_stride,
u, u_stride,
v, v_stride,
crop_width, inv_crop_height);
break;
}
#ifdef HAVE_JPEG
case FOURCC_MJPG:
r = MJPGToI420(sample, sample_size,
y, y_stride,
u, u_stride,
v, v_stride,
src_width, abs_src_height, crop_width, inv_crop_height);
break;
#endif
default:
r = -1; // unknown fourcc - return failure code.
}
if (need_buf) {
if (!r) {
r = I420Rotate(y, y_stride,
u, u_stride,
v, v_stride,
tmp_y, tmp_y_stride,
tmp_u, tmp_u_stride,
tmp_v, tmp_v_stride,
crop_width, abs_crop_height, rotation);
}
free(rotate_buffer);
}
return r;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

15
third_party/libyuv/source/cpu_id.cc поставляемый
Просмотреть файл

@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "libyuv/cpu_id.h"
#ifdef _MSC_VER
#if defined(_MSC_VER) && !defined(__clang__)
#include <intrin.h> // For __cpuidex()
#endif
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
@ -27,7 +27,7 @@
#include <stdio.h>
#include <string.h>
#include "third_party/libyuv/include/libyuv/basic_types.h" // For CPU_X86
#include "libyuv/basic_types.h" // For CPU_X86
#ifdef __cplusplus
namespace libyuv {
@ -48,7 +48,7 @@ extern "C" {
defined(__i386__) || defined(__x86_64__))
LIBYUV_API
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
#if defined(_MSC_VER)
#if defined(_MSC_VER) && !defined(__clang__)
#if (_MSC_FULL_VER >= 160040219)
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
#elif defined(_M_IX86)
@ -188,10 +188,14 @@ LIBYUV_API SAFEBUFFERS
int InitCpuFlags(void) {
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
CpuId(0, 0, cpu_info0);
CpuId(1, 0, cpu_info1);
CpuId(7, 0, cpu_info7);
if (cpu_info0[0] >= 7) {
CpuId(7, 0, cpu_info7);
}
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
@ -199,6 +203,7 @@ int InitCpuFlags(void) {
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
kCpuHasX86;
#ifdef HAS_XGETBV
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
TestOsSaveYmm()) { // Saves YMM.

552
third_party/libyuv/source/format_conversion.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,552 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/format_conversion.h"
#include "libyuv/basic_types.h"
#include "libyuv/cpu_id.h"
#include "libyuv/video_common.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// generate a selector mask useful for pshufb
static uint32 GenerateSelector(int select0, int select1) {
return (uint32)(select0) |
(uint32)((select1 + 4) << 8) |
(uint32)((select0 + 8) << 16) |
(uint32)((select1 + 12) << 24);
}
static int MakeSelectors(const int blue_index,
const int green_index,
const int red_index,
uint32 dst_fourcc_bayer,
uint32* index_map) {
// Now build a lookup table containing the indices for the four pixels in each
// 2x2 Bayer grid.
switch (dst_fourcc_bayer) {
case FOURCC_BGGR:
index_map[0] = GenerateSelector(blue_index, green_index);
index_map[1] = GenerateSelector(green_index, red_index);
break;
case FOURCC_GBRG:
index_map[0] = GenerateSelector(green_index, blue_index);
index_map[1] = GenerateSelector(red_index, green_index);
break;
case FOURCC_RGGB:
index_map[0] = GenerateSelector(red_index, green_index);
index_map[1] = GenerateSelector(green_index, blue_index);
break;
case FOURCC_GRBG:
index_map[0] = GenerateSelector(green_index, red_index);
index_map[1] = GenerateSelector(blue_index, green_index);
break;
default:
return -1; // Bad FourCC
}
return 0;
}
// Converts 32 bit ARGB to Bayer RGB formats.
LIBYUV_API
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
int y;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
for (y = 0; y < height; ++y) {
ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
src_argb += src_stride_argb;
dst_bayer += dst_stride_bayer;
}
return 0;
}
#define AVG(a, b) (((a) + (b)) >> 1)
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 r = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
g = src_bayer0[1];
r = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer0[0];
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = AVG(r, src_bayer1[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[0];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer1[1];
dst_argb[7] = 255U;
}
}
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 g = src_bayer0[1];
uint8 b = src_bayer1[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[7] = 255U;
g = src_bayer0[1];
b = src_bayer1[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer1[1]);
dst_argb[1] = AVG(g, src_bayer0[1]);
dst_argb[2] = src_bayer0[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[1];
dst_argb[5] = src_bayer0[1];
dst_argb[6] = src_bayer0[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 b = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
dst_argb[4] = src_bayer0[1];
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[7] = 255U;
b = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = AVG(b, src_bayer0[1]);
dst_argb[1] = src_bayer0[0];
dst_argb[2] = src_bayer1[0];
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer0[1];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer1[0];
dst_argb[7] = 255U;
}
}
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
uint8* dst_argb, int pix) {
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
uint8 r = src_bayer0[1];
int x;
for (x = 0; x < pix - 2; x += 2) {
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
r = src_bayer0[1];
src_bayer0 += 2;
src_bayer1 += 2;
dst_argb += 8;
}
dst_argb[0] = src_bayer1[0];
dst_argb[1] = src_bayer0[0];
dst_argb[2] = AVG(r, src_bayer0[1]);
dst_argb[3] = 255U;
if (!(pix & 1)) {
dst_argb[4] = src_bayer1[0];
dst_argb[5] = src_bayer0[0];
dst_argb[6] = src_bayer0[1];
dst_argb[7] = 255U;
}
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
uint32 src_fourcc_bayer) {
int y;
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
if (height < 0) {
height = -height;
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
dst_stride_argb = -dst_stride_argb;
}
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
dst_argb + dst_stride_argb, width);
src_bayer += src_stride_bayer * 2;
dst_argb += dst_stride_argb * 2;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
}
return 0;
}
// Converts any Bayer RGB format to ARGB.
LIBYUV_API
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_y, int dst_stride_y,
uint8* dst_u, int dst_stride_u,
uint8* dst_v, int dst_stride_v,
int width, int height,
uint32 src_fourcc_bayer) {
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
uint8* dst_argb, int pix);
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
ARGBToYRow_C;
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
dst_y = dst_y + (height - 1) * dst_stride_y;
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
dst_stride_y = -dst_stride_y;
dst_stride_u = -dst_stride_u;
dst_stride_v = -dst_stride_v;
}
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
ARGBToYRow = ARGBToYRow_Any_SSSE3;
if (IS_ALIGNED(width, 16)) {
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
ARGBToUVRow = ARGBToUVRow_SSSE3;
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
ARGBToYRow = ARGBToYRow_SSSE3;
}
}
}
#elif defined(HAS_ARGBTOYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToYRow = ARGBToYRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToYRow = ARGBToYRow_NEON;
}
if (width >= 16) {
ARGBToUVRow = ARGBToUVRow_Any_NEON;
if (IS_ALIGNED(width, 16)) {
ARGBToUVRow = ARGBToUVRow_NEON;
}
}
}
#endif
switch (src_fourcc_bayer) {
case FOURCC_BGGR:
BayerRow0 = BayerRowBG;
BayerRow1 = BayerRowGR;
break;
case FOURCC_GBRG:
BayerRow0 = BayerRowGB;
BayerRow1 = BayerRowRG;
break;
case FOURCC_GRBG:
BayerRow0 = BayerRowGR;
BayerRow1 = BayerRowBG;
break;
case FOURCC_RGGB:
BayerRow0 = BayerRowRG;
BayerRow1 = BayerRowGB;
break;
default:
return -1; // Bad FourCC
}
{
// Allocate 2 rows of ARGB.
const int kRowSize = (width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int y;
for (y = 0; y < height - 1; y += 2) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
row + kRowSize, width);
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
src_bayer += src_stride_bayer * 2;
dst_y += dst_stride_y * 2;
dst_u += dst_stride_u;
dst_v += dst_stride_v;
}
if (height & 1) {
BayerRow0(src_bayer, src_stride_bayer, row, width);
ARGBToUVRow(row, 0, dst_u, dst_v, width);
ARGBToYRow(row, dst_y, width);
}
free_aligned_buffer_64(row);
}
return 0;
}
// Convert I420 to Bayer.
LIBYUV_API
int I420ToBayer(const uint8* src_y, int src_stride_y,
const uint8* src_u, int src_stride_u,
const uint8* src_v, int src_stride_v,
uint8* dst_bayer, int dst_stride_bayer,
int width, int height,
uint32 dst_fourcc_bayer) {
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
uint32 selector, int pix) = ARGBToBayerRow_C;
const int blue_index = 0; // Offsets for ARGB format
const int green_index = 1;
const int red_index = 2;
uint32 index_map[2];
// Negative height means invert the image.
if (height < 0) {
int halfheight;
height = -height;
halfheight = (height + 1) >> 1;
src_y = src_y + (height - 1) * src_stride_y;
src_u = src_u + (halfheight - 1) * src_stride_u;
src_v = src_v + (halfheight - 1) * src_stride_v;
src_stride_y = -src_stride_y;
src_stride_u = -src_stride_u;
src_stride_v = -src_stride_v;
}
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
}
}
#elif defined(HAS_ARGBTOBAYERROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
if (IS_ALIGNED(width, 8)) {
ARGBToBayerRow = ARGBToBayerRow_NEON;
}
}
#endif
if (MakeSelectors(blue_index, green_index, red_index,
dst_fourcc_bayer, index_map)) {
return -1; // Bad FourCC
}
{
// Allocate a row of ARGB.
align_buffer_64(row, width * 4);
int y;
for (y = 0; y < height; ++y) {
I422ToARGBRow(src_y, src_u, src_v, row, width);
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
dst_bayer += dst_stride_bayer;
src_y += src_stride_y;
if (y & 1) {
src_u += src_stride_u;
src_v += src_stride_v;
}
}
free_aligned_buffer_64(row);
}
return 0;
}
#define MAKEBAYERFOURCC(BAYER) \
LIBYUV_API \
int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_y, int dst_stride_y, \
uint8* dst_u, int dst_stride_u, \
uint8* dst_v, int dst_stride_v, \
int width, int height) { \
return BayerToI420(src_bayer, src_stride_bayer, \
dst_y, dst_stride_y, \
dst_u, dst_stride_u, \
dst_v, dst_stride_v, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
const uint8* src_u, int src_stride_u, \
const uint8* src_v, int src_stride_v, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return I420ToBayer(src_y, src_stride_y, \
src_u, src_stride_u, \
src_v, src_stride_v, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
uint8* dst_bayer, int dst_stride_bayer, \
int width, int height) { \
return ARGBToBayer(src_argb, src_stride_argb, \
dst_bayer, dst_stride_bayer, \
width, height, \
FOURCC_##BAYER); \
} \
\
LIBYUV_API \
int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
uint8* dst_argb, int dst_stride_argb, \
int width, int height) { \
return BayerToARGB(src_bayer, src_stride_bayer, \
dst_argb, dst_stride_argb, \
width, height, \
FOURCC_##BAYER); \
}
MAKEBAYERFOURCC(BGGR)
MAKEBAYERFOURCC(GBRG)
MAKEBAYERFOURCC(GRBG)
MAKEBAYERFOURCC(RGGB)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

566
third_party/libyuv/source/mjpeg_decoder.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,566 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#ifdef HAVE_JPEG
#include <assert.h>
#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
!defined(TARGET_IPHONE_SIMULATOR)
// Must be included before jpeglib.
#include <setjmp.h>
#define HAVE_SETJMP
#endif
struct FILE; // For jpeglib.h.
// C++ build requires extern C for jpeg internals.
#ifdef __cplusplus
extern "C" {
#endif
#include <jpeglib.h>
#ifdef __cplusplus
} // extern "C"
#endif
#include "libyuv/planar_functions.h" // For CopyPlane().
namespace libyuv {
#ifdef HAVE_SETJMP
struct SetJmpErrorMgr {
jpeg_error_mgr base; // Must be at the top
jmp_buf setjmp_buffer;
};
#endif
const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
// Methods that are passed to jpeglib.
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
void init_source(jpeg_decompress_struct* cinfo);
void skip_input_data(jpeg_decompress_struct* cinfo,
long num_bytes); // NOLINT
void term_source(jpeg_decompress_struct* cinfo);
void ErrorHandler(jpeg_common_struct* cinfo);
MJpegDecoder::MJpegDecoder()
: has_scanline_padding_(LIBYUV_FALSE),
num_outbufs_(0),
scanlines_(NULL),
scanlines_sizes_(NULL),
databuf_(NULL),
databuf_strides_(NULL) {
decompress_struct_ = new jpeg_decompress_struct;
source_mgr_ = new jpeg_source_mgr;
#ifdef HAVE_SETJMP
error_mgr_ = new SetJmpErrorMgr;
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
// Override standard exit()-based error handler.
error_mgr_->base.error_exit = &ErrorHandler;
#endif
decompress_struct_->client_data = NULL;
source_mgr_->init_source = &init_source;
source_mgr_->fill_input_buffer = &fill_input_buffer;
source_mgr_->skip_input_data = &skip_input_data;
source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
source_mgr_->term_source = &term_source;
jpeg_create_decompress(decompress_struct_);
decompress_struct_->src = source_mgr_;
buf_vec_.buffers = &buf_;
buf_vec_.len = 1;
}
MJpegDecoder::~MJpegDecoder() {
jpeg_destroy_decompress(decompress_struct_);
delete decompress_struct_;
delete source_mgr_;
#ifdef HAVE_SETJMP
delete error_mgr_;
#endif
DestroyOutputBuffers();
}
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
if (!ValidateJpeg(src, src_len)) {
return LIBYUV_FALSE;
}
buf_.data = src;
buf_.len = (int)(src_len);
buf_vec_.pos = 0;
decompress_struct_->client_data = &buf_vec_;
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_read_header, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
// ERROR: Bad MJPEG header
return LIBYUV_FALSE;
}
AllocOutputBuffers(GetNumComponents());
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_size = GetComponentScanlinesPerImcuRow(i);
if (scanlines_sizes_[i] != scanlines_size) {
if (scanlines_[i]) {
delete scanlines_[i];
}
scanlines_[i] = new uint8* [scanlines_size];
scanlines_sizes_[i] = scanlines_size;
}
// We allocate padding for the final scanline to pad it up to DCTSIZE bytes
// to avoid memory errors, since jpeglib only reads full MCUs blocks. For
// the preceding scanlines, the padding is not needed/wanted because the
// following addresses will already be valid (they are the initial bytes of
// the next scanline) and will be overwritten when jpeglib writes out that
// next scanline.
int databuf_stride = GetComponentStride(i);
int databuf_size = scanlines_size * databuf_stride;
if (databuf_strides_[i] != databuf_stride) {
if (databuf_[i]) {
delete databuf_[i];
}
databuf_[i] = new uint8[databuf_size];
databuf_strides_[i] = databuf_stride;
}
if (GetComponentStride(i) != GetComponentWidth(i)) {
has_scanline_padding_ = LIBYUV_TRUE;
}
}
return LIBYUV_TRUE;
}
static int DivideAndRoundUp(int numerator, int denominator) {
return (numerator + denominator - 1) / denominator;
}
static int DivideAndRoundDown(int numerator, int denominator) {
return numerator / denominator;
}
// Returns width of the last loaded frame.
int MJpegDecoder::GetWidth() {
return decompress_struct_->image_width;
}
// Returns height of the last loaded frame.
int MJpegDecoder::GetHeight() {
return decompress_struct_->image_height;
}
// Returns format of the last loaded frame. The return value is one of the
// kColorSpace* constants.
int MJpegDecoder::GetColorSpace() {
return decompress_struct_->jpeg_color_space;
}
// Number of color components in the color space.
int MJpegDecoder::GetNumComponents() {
return decompress_struct_->num_components;
}
// Sample factors of the n-th component.
int MJpegDecoder::GetHorizSampFactor(int component) {
return decompress_struct_->comp_info[component].h_samp_factor;
}
int MJpegDecoder::GetVertSampFactor(int component) {
return decompress_struct_->comp_info[component].v_samp_factor;
}
int MJpegDecoder::GetHorizSubSampFactor(int component) {
return decompress_struct_->max_h_samp_factor /
GetHorizSampFactor(component);
}
int MJpegDecoder::GetVertSubSampFactor(int component) {
return decompress_struct_->max_v_samp_factor /
GetVertSampFactor(component);
}
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
return decompress_struct_->max_v_samp_factor * DCTSIZE;
}
int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
}
int MJpegDecoder::GetComponentWidth(int component) {
int hs = GetHorizSubSampFactor(component);
return DivideAndRoundUp(GetWidth(), hs);
}
int MJpegDecoder::GetComponentHeight(int component) {
int vs = GetVertSubSampFactor(component);
return DivideAndRoundUp(GetHeight(), vs);
}
// Get width in bytes padded out to a multiple of DCTSIZE
int MJpegDecoder::GetComponentStride(int component) {
return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
}
int MJpegDecoder::GetComponentSize(int component) {
return GetComponentWidth(component) * GetComponentHeight(component);
}
LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called jpeg_abort_decompress, it experienced an error, and we called
// longjmp() and rewound the stack to here. Return error.
return LIBYUV_FALSE;
}
#endif
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
uint8** planes, int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// Compute amount of lines to skip to implement vertical crop.
// TODO(fbarchard): Ensure skip is a multiple of maximum component
// subsample. ie 2
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
// There is no API to skip lines in the output data, so we read them
// into the temp buffer.
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip. Must read it and then
// copy the parts we want into the destination.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip =
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
rows_to_skip;
int data_to_skip = rows_to_skip * GetComponentStride(i);
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
}
}
// Read full MCUs but cropped horizontally
for (; lines_left > GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
int scanlines_to_copy =
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
CopyPlane(databuf_[i], GetComponentStride(i),
planes[i], GetComponentWidth(i),
GetComponentWidth(i), scanlines_to_copy);
planes[i] += scanlines_to_copy * GetComponentWidth(i);
}
}
return FinishDecode();
}
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
int dst_width, int dst_height) {
if (dst_width != GetWidth() ||
dst_height > GetHeight()) {
// ERROR: Bad dimensions
return LIBYUV_FALSE;
}
#ifdef HAVE_SETJMP
if (setjmp(error_mgr_->setjmp_buffer)) {
// We called into jpeglib, it experienced an error sometime during this
// function call, and we called longjmp() and rewound the stack to here.
// Return error.
return LIBYUV_FALSE;
}
#endif
if (!StartDecode()) {
return LIBYUV_FALSE;
}
SetScanlinePointers(databuf_);
int lines_left = dst_height;
// TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
int skip = (GetHeight() - dst_height) / 2;
if (skip > 0) {
while (skip >= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
skip -= GetImageScanlinesPerImcuRow();
}
if (skip > 0) {
// Have a partial iMCU row left over to skip.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
for (int i = 0; i < num_outbufs_; ++i) {
// TODO(fbarchard): Compute skip to avoid this
assert(skip % GetVertSubSampFactor(i) == 0);
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
// Change our own data buffer pointers so we can pass them to the
// callback.
databuf_[i] += data_to_skip;
}
int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
(*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
// Now change them back.
for (int i = 0; i < num_outbufs_; ++i) {
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
int data_to_skip = rows_to_skip * GetComponentStride(i);
databuf_[i] -= data_to_skip;
}
lines_left -= scanlines_to_copy;
}
}
// Read full MCUs until we get to the crop point.
for (; lines_left >= GetImageScanlinesPerImcuRow();
lines_left -= GetImageScanlinesPerImcuRow()) {
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
}
if (lines_left > 0) {
// Have a partial iMCU row left over to decode.
if (!DecodeImcuRow()) {
FinishDecode();
return LIBYUV_FALSE;
}
(*fn)(opaque, databuf_, databuf_strides_, lines_left);
}
return FinishDecode();
}
void init_source(j_decompress_ptr cinfo) {
fill_input_buffer(cinfo);
}
boolean fill_input_buffer(j_decompress_ptr cinfo) {
BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
if (buf_vec->pos >= buf_vec->len) {
assert(0 && "No more data");
// ERROR: No more data
return FALSE;
}
cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
++buf_vec->pos;
return TRUE;
}
void skip_input_data(j_decompress_ptr cinfo,
long num_bytes) { // NOLINT
cinfo->src->next_input_byte += num_bytes;
}
void term_source(j_decompress_ptr cinfo) {
// Nothing to do.
}
#ifdef HAVE_SETJMP
void ErrorHandler(j_common_ptr cinfo) {
// This is called when a jpeglib command experiences an error. Unfortunately
// jpeglib's error handling model is not very flexible, because it expects the
// error handler to not return--i.e., it wants the program to terminate. To
// recover from errors we use setjmp() as shown in their example. setjmp() is
// C's implementation for the "call with current continuation" functionality
// seen in some functional programming languages.
// A formatted message can be output, but is unsafe for release.
#ifdef DEBUG
char buf[JMSG_LENGTH_MAX];
(*cinfo->err->format_message)(cinfo, buf);
// ERROR: Error in jpeglib: buf
#endif
SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
// This rewinds the call stack to the point of the corresponding setjmp()
// and causes it to return (for a second time) with value 1.
longjmp(mgr->setjmp_buffer, 1);
}
#endif
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
if (num_outbufs != num_outbufs_) {
// We could perhaps optimize this case to resize the output buffers without
// necessarily having to delete and recreate each one, but it's not worth
// it.
DestroyOutputBuffers();
scanlines_ = new uint8** [num_outbufs];
scanlines_sizes_ = new int[num_outbufs];
databuf_ = new uint8* [num_outbufs];
databuf_strides_ = new int[num_outbufs];
for (int i = 0; i < num_outbufs; ++i) {
scanlines_[i] = NULL;
scanlines_sizes_[i] = 0;
databuf_[i] = NULL;
databuf_strides_[i] = 0;
}
num_outbufs_ = num_outbufs;
}
}
void MJpegDecoder::DestroyOutputBuffers() {
for (int i = 0; i < num_outbufs_; ++i) {
delete [] scanlines_[i];
delete [] databuf_[i];
}
delete [] scanlines_;
delete [] databuf_;
delete [] scanlines_sizes_;
delete [] databuf_strides_;
scanlines_ = NULL;
databuf_ = NULL;
scanlines_sizes_ = NULL;
databuf_strides_ = NULL;
num_outbufs_ = 0;
}
// JDCT_IFAST and do_block_smoothing improve performance substantially.
LIBYUV_BOOL MJpegDecoder::StartDecode() {
decompress_struct_->raw_data_out = TRUE;
decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
decompress_struct_->dither_mode = JDITHER_NONE;
// Not applicable to 'raw':
decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
// Only for buffered mode:
decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
// Blocky but fast:
decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
if (!jpeg_start_decompress(decompress_struct_)) {
// ERROR: Couldn't start JPEG decompressor";
return LIBYUV_FALSE;
}
return LIBYUV_TRUE;
}
LIBYUV_BOOL MJpegDecoder::FinishDecode() {
// jpeglib considers it an error if we finish without decoding the whole
// image, so we call "abort" rather than "finish".
jpeg_abort_decompress(decompress_struct_);
return LIBYUV_TRUE;
}
void MJpegDecoder::SetScanlinePointers(uint8** data) {
for (int i = 0; i < num_outbufs_; ++i) {
uint8* data_i = data[i];
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
scanlines_[i][j] = data_i;
data_i += GetComponentStride(i);
}
}
}
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
jpeg_read_raw_data(decompress_struct_,
scanlines_,
GetImageScanlinesPerImcuRow());
}
// The helper function which recognizes the jpeg sub-sampling type.
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
int* subsample_x, int* subsample_y, int number_of_components) {
if (number_of_components == 3) { // Color images.
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 2 &&
subsample_x[2] == 2 && subsample_y[2] == 2) {
return kJpegYuv420;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 2 && subsample_y[1] == 1 &&
subsample_x[2] == 2 && subsample_y[2] == 1) {
return kJpegYuv422;
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
subsample_x[1] == 1 && subsample_y[1] == 1 &&
subsample_x[2] == 1 && subsample_y[2] == 1) {
return kJpegYuv444;
}
} else if (number_of_components == 1) { // Grey-scale images.
if (subsample_x[0] == 1 && subsample_y[0] == 1) {
return kJpegYuv400;
}
}
return kJpegUnknown;
}
} // namespace libyuv
#endif // HAVE_JPEG

47
third_party/libyuv/source/mjpeg_validate.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,47 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/mjpeg_decoder.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// Helper function to validate the jpeg appears intact.
// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
size_t i;
if (sample_size < 64) {
// ERROR: Invalid jpeg size: sample_size
return LIBYUV_FALSE;
}
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
// ERROR: Invalid jpeg initial start code
return LIBYUV_FALSE;
}
for (i = sample_size - 2; i > 1;) {
if (sample[i] != 0xd9) {
if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
return LIBYUV_TRUE; // Success: Valid jpeg.
}
--i;
}
--i;
}
// ERROR: Invalid jpeg end code not found. Size sample_size
return LIBYUV_FALSE;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

12
third_party/libyuv/source/planar_functions.cc поставляемый
Просмотреть файл

@ -8,15 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/planar_functions.h"
#include "libyuv/planar_functions.h"
#include <string.h> // for memset()
#include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "libyuv/cpu_id.h"
#ifdef HAVE_JPEG
#include "third_party/libyuv/include/libyuv/mjpeg_decoder.h"
#include "libyuv/mjpeg_decoder.h"
#endif
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
@ -37,6 +37,10 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
height = 1;
src_stride_y = dst_stride_y = 0;
}
// Nothing to do.
if (src_y == dst_y && src_stride_y == dst_stride_y) {
return;
}
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
CopyRow = CopyRow_X86;

1301
third_party/libyuv/source/rotate.cc поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

209
third_party/libyuv/source/rotate_argb.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,209 @@
/*
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/rotate.h"
#include "libyuv/cpu_id.h"
#include "libyuv/convert.h"
#include "libyuv/planar_functions.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// ARGBScale has a function to copy pixels to a row, striding each source
// pixel by a constant.
#if !defined(LIBYUV_DISABLE_X86) && \
(defined(_M_IX86) || \
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
#endif
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
#define HAS_SCALEARGBROWDOWNEVEN_NEON
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
int src_stepx,
uint8* dst_ptr, int dst_width);
#endif
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
int src_stepx,
uint8* dst_ptr, int dst_width);
static void ARGBTranspose(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
int i;
int src_pixel_step = src_stride >> 2;
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
IS_ALIGNED(src, 4)) {
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
}
#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
dst += dst_stride;
src += 4;
}
}
void ARGBRotate90(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 90 is a ARGBTranspose with the source read
// from bottom to top. So set the source pointer to the end
// of the buffer and flip the sign of the source stride.
src += src_stride * (height - 1);
src_stride = -src_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate270(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Rotate by 270 is a ARGBTranspose with the destination written
// from bottom to top. So set the destination pointer to the end
// of the buffer and flip the sign of the destination stride.
dst += dst_stride * (width - 1);
dst_stride = -dst_stride;
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
}
void ARGBRotate180(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width, int height) {
// Swap first and last row and mirror the content. Uses a temporary row.
align_buffer_64(row, width * 4);
const uint8* src_bot = src + src_stride * (height - 1);
uint8* dst_bot = dst + dst_stride * (height - 1);
int half_height = (height + 1) >> 1;
int y;
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
ARGBMirrorRow_C;
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
#if defined(HAS_ARGBMIRRORROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
}
#endif
#if defined(HAS_ARGBMIRRORROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
ARGBMirrorRow = ARGBMirrorRow_AVX2;
}
#endif
#if defined(HAS_ARGBMIRRORROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
ARGBMirrorRow = ARGBMirrorRow_NEON;
}
#endif
#if defined(HAS_COPYROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
CopyRow = CopyRow_NEON;
}
#endif
#if defined(HAS_COPYROW_X86)
if (TestCpuFlag(kCpuHasX86)) {
CopyRow = CopyRow_X86;
}
#endif
#if defined(HAS_COPYROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
CopyRow = CopyRow_SSE2;
}
#endif
#if defined(HAS_COPYROW_ERMS)
if (TestCpuFlag(kCpuHasERMS)) {
CopyRow = CopyRow_ERMS;
}
#endif
#if defined(HAS_COPYROW_MIPS)
if (TestCpuFlag(kCpuHasMIPS)) {
CopyRow = CopyRow_MIPS;
}
#endif
// Odd height will harmlessly mirror the middle row twice.
for (y = 0; y < half_height; ++y) {
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
src += src_stride;
dst += dst_stride;
src_bot -= src_stride;
dst_bot -= dst_stride;
}
free_aligned_buffer_64(row);
}
LIBYUV_API
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
uint8* dst_argb, int dst_stride_argb,
int width, int height,
enum RotationMode mode) {
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
return -1;
}
// Negative height means invert the image.
if (height < 0) {
height = -height;
src_argb = src_argb + (height - 1) * src_stride_argb;
src_stride_argb = -src_stride_argb;
}
switch (mode) {
case kRotate0:
// copy frame
return ARGBCopy(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
case kRotate90:
ARGBRotate90(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate270:
ARGBRotate270(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
case kRotate180:
ARGBRotate180(src_argb, src_stride_argb,
dst_argb, dst_stride_argb,
width, height);
return 0;
default:
break;
}
return -1;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

485
third_party/libyuv/source/rotate_mips.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,485 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"sw $s0, 0(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"sw $s1, 4(%[dst]) \n"
"bnez %[width], 1b \n"
" addu %[dst], %[dst], %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lbu $t0, 0(%[src]) \n"
"lbux $t1, %[src_stride](%[src]) \n"
"lbux $t8, $t2(%[src]) \n"
"lbux $t9, $t3(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s0, $t8, $t0 \n"
"lbux $t0, $t4(%[src]) \n"
"lbux $t1, $t5(%[src]) \n"
"lbux $t8, $t6(%[src]) \n"
"lbux $t9, $t7(%[src]) \n"
"sll $t1, $t1, 16 \n"
"sll $t9, $t9, 16 \n"
"or $t0, $t0, $t1 \n"
"or $t8, $t8, $t9 \n"
"precr.qb.ph $s1, $t8, $t0 \n"
"swr $s0, 0(%[dst]) \n"
"swl $s0, 3(%[dst]) \n"
"addiu %[width], -1 \n"
"addiu %[src], 1 \n"
"swr $s1, 4(%[dst]) \n"
"swl $s1, 7(%[dst]) \n"
"bnez %[width], 11b \n"
"addu %[dst], %[dst], %[dst_stride] \n"
"2: \n"
".set pop \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1"
);
}
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
__asm__ __volatile__ (
".set noat \n"
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"srl $AT, %[width], 0x2 \n"
"andi $t0, %[dst], 0x3 \n"
"andi $t1, %[dst_stride], 0x3 \n"
"or $t0, $t0, $t1 \n"
"bnez $t0, 11f \n"
" subu $t7, $t9, %[src_stride] \n"
//dst + dst_stride word aligned
"1: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"sw $s4, 0(%[dst]) \n"
"sw $t0, 4(%[dst]) \n"
"sw $s6, 0($s0) \n"
"sw $t8, 4($s0) \n"
"sw $s5, 0($s1) \n"
"sw $t1, 4($s1) \n"
"sw $s7, 0($s2) \n"
"sw $t9, 4($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 1b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"b 2f \n"
//dst + dst_stride unaligned
"11: \n"
"lw $t0, 0(%[src]) \n"
"lwx $t1, %[src_stride](%[src]) \n"
"lwx $t8, $t2(%[src]) \n"
"lwx $t9, $t3(%[src]) \n"
// t0 = | 30 | 20 | 10 | 00 |
// t1 = | 31 | 21 | 11 | 01 |
// t8 = | 32 | 22 | 12 | 02 |
// t9 = | 33 | 23 | 13 | 03 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 21 | 01 | 20 | 00 |
// s1 = | 23 | 03 | 22 | 02 |
// s2 = | 31 | 11 | 30 | 10 |
// s3 = | 33 | 13 | 32 | 12 |
"precr.qb.ph $s4, $s1, $s0 \n"
"precrq.qb.ph $s5, $s1, $s0 \n"
"precr.qb.ph $s6, $s3, $s2 \n"
"precrq.qb.ph $s7, $s3, $s2 \n"
// s4 = | 03 | 02 | 01 | 00 |
// s5 = | 23 | 22 | 21 | 20 |
// s6 = | 13 | 12 | 11 | 10 |
// s7 = | 33 | 32 | 31 | 30 |
"lwx $t0, $t4(%[src]) \n"
"lwx $t1, $t5(%[src]) \n"
"lwx $t8, $t6(%[src]) \n"
"lwx $t9, $t7(%[src]) \n"
// t0 = | 34 | 24 | 14 | 04 |
// t1 = | 35 | 25 | 15 | 05 |
// t8 = | 36 | 26 | 16 | 06 |
// t9 = | 37 | 27 | 17 | 07 |
"precr.qb.ph $s0, $t1, $t0 \n"
"precr.qb.ph $s1, $t9, $t8 \n"
"precrq.qb.ph $s2, $t1, $t0 \n"
"precrq.qb.ph $s3, $t9, $t8 \n"
// s0 = | 25 | 05 | 24 | 04 |
// s1 = | 27 | 07 | 26 | 06 |
// s2 = | 35 | 15 | 34 | 14 |
// s3 = | 37 | 17 | 36 | 16 |
"precr.qb.ph $t0, $s1, $s0 \n"
"precrq.qb.ph $t1, $s1, $s0 \n"
"precr.qb.ph $t8, $s3, $s2 \n"
"precrq.qb.ph $t9, $s3, $s2 \n"
// t0 = | 07 | 06 | 05 | 04 |
// t1 = | 27 | 26 | 25 | 24 |
// t8 = | 17 | 16 | 15 | 14 |
// t9 = | 37 | 36 | 35 | 34 |
"addu $s0, %[dst], %[dst_stride] \n"
"addu $s1, $s0, %[dst_stride] \n"
"addu $s2, $s1, %[dst_stride] \n"
"swr $s4, 0(%[dst]) \n"
"swl $s4, 3(%[dst]) \n"
"swr $t0, 4(%[dst]) \n"
"swl $t0, 7(%[dst]) \n"
"swr $s6, 0($s0) \n"
"swl $s6, 3($s0) \n"
"swr $t8, 4($s0) \n"
"swl $t8, 7($s0) \n"
"swr $s5, 0($s1) \n"
"swl $s5, 3($s1) \n"
"swr $t1, 4($s1) \n"
"swl $t1, 7($s1) \n"
"swr $s7, 0($s2) \n"
"swl $s7, 3($s2) \n"
"swr $t9, 4($s2) \n"
"swl $t9, 7($s2) \n"
"addiu $AT, -1 \n"
"addiu %[src], 4 \n"
"bnez $AT, 11b \n"
" addu %[dst], $s2, %[dst_stride] \n"
"2: \n"
".set pop \n"
".set at \n"
:[src] "+r" (src),
[dst] "+r" (dst),
[width] "+r" (width)
:[src_stride] "r" (src_stride),
[dst_stride] "r" (dst_stride)
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
);
}
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[width], 2f \n"
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
"addu $t3, $t2, %[src_stride] \n"
"addu $t5, $t4, %[src_stride] \n"
"addu $t6, $t2, $t4 \n"
"subu $t7, $t9, %[src_stride] \n"
"srl $t1, %[width], 1 \n"
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
"andi $t0, %[dst_a], 0x3 \n"
"andi $t8, %[dst_b], 0x3 \n"
"or $t0, $t0, $t8 \n"
"andi $t8, %[dst_stride_a], 0x3 \n"
"andi $s5, %[dst_stride_b], 0x3 \n"
"or $t8, $t8, $s5 \n"
"or $t0, $t0, $t8 \n"
"bnez $t0, 11f \n"
" nop \n"
// dst + dst_stride word aligned (both, a & b dst addresses)
"1: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"sw $s3, 0($s5) \n"
"sw $s4, 0($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"sw $s3, 0(%[dst_a]) \n"
"sw $s4, 0(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"sw $s3, 4($s5) \n"
"sw $s4, 4($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"sw $s3, 4(%[dst_a]) \n"
"sw $s4, 4(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 1b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"b 2f \n"
" nop \n"
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
"11: \n"
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
"addu $s5, %[dst_a], %[dst_stride_a] \n"
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
"addu $s6, %[dst_b], %[dst_stride_b] \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
"swr $s3, 0($s5) \n"
"swl $s3, 3($s5) \n"
"swr $s4, 0($s6) \n"
"swl $s4, 3($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
"swr $s3, 0(%[dst_a]) \n"
"swl $s3, 3(%[dst_a]) \n"
"swr $s4, 0(%[dst_b]) \n"
"swl $s4, 3(%[dst_b]) \n"
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
"sll $t0, $t0, 16 \n"
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
"sll $t9, $t9, 16 \n"
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
"swr $s3, 4($s5) \n"
"swl $s3, 7($s5) \n"
"swr $s4, 4($s6) \n"
"swl $s4, 7($s6) \n"
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
"addiu %[src], 4 \n"
"addiu $t1, -1 \n"
"sll $t0, %[dst_stride_a], 1 \n"
"sll $t8, %[dst_stride_b], 1 \n"
"swr $s3, 4(%[dst_a]) \n"
"swl $s3, 7(%[dst_a]) \n"
"swr $s4, 4(%[dst_b]) \n"
"swl $s4, 7(%[dst_b]) \n"
"addu %[dst_a], %[dst_a], $t0 \n"
"bnez $t1, 11b \n"
" addu %[dst_b], %[dst_b], $t8 \n"
"2: \n"
".set pop \n"
: [src] "+r" (src),
[dst_a] "+r" (dst_a),
[dst_b] "+r" (dst_b),
[width] "+r" (width),
[src_stride] "+r" (src_stride)
: [dst_stride_a] "r" (dst_stride_a),
[dst_stride_b] "r" (dst_stride_b)
: "t0", "t1", "t2", "t3", "t4", "t5",
"t6", "t7", "t8", "t9",
"s0", "s1", "s2", "s3",
"s4", "s5", "s6"
);
}
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

533
third_party/libyuv/source/rotate_neon.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,533 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/row.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
static uvec8 kVTbl4x4Transpose =
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
void TransposeWx8_NEON(const uint8* src, int src_stride,
uint8* dst, int dst_stride,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %5, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d1}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d2}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d3}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d4}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d5}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d6}, [%0], %2 \n"
MEMACCESS(0)
"vld1.8 {d7}, [%0] \n"
"vtrn.8 d1, d0 \n"
"vtrn.8 d3, d2 \n"
"vtrn.8 d5, d4 \n"
"vtrn.8 d7, d6 \n"
"vtrn.16 d1, d3 \n"
"vtrn.16 d0, d2 \n"
"vtrn.16 d5, d7 \n"
"vtrn.16 d4, d6 \n"
"vtrn.32 d1, d5 \n"
"vtrn.32 d0, d4 \n"
"vtrn.32 d3, d7 \n"
"vtrn.32 d2, d6 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0] \n"
"add %1, #8 \n" // src += 8
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
"subs %5, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %5, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %5, #2 \n"
"blt 3f \n"
"cmp %5, #4 \n"
"blt 2f \n"
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d0[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d1[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d1[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d2[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d2[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d3[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.32 {d3[1]}, [%0] \n"
"mov %0, %3 \n"
MEMACCESS(6)
"vld1.8 {q3}, [%6] \n"
"vtbl.8 d4, {d0, d1}, d6 \n"
"vtbl.8 d5, {d0, d1}, d7 \n"
"vtbl.8 d0, {d2, d3}, d6 \n"
"vtbl.8 d1, {d2, d3}, d7 \n"
// TODO(frkoenig): Rework shuffle above to
// write out with 4 instead of 8 writes.
MEMACCESS(0)
"vst1.32 {d4[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d4[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d5[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d5[1]}, [%0] \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"vst1.32 {d0[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d0[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d1[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d1[1]}, [%0] \n"
"add %1, #4 \n" // src += 4
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
"subs %5, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %5, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.16 {d0[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d0[3]}, [%0], %2 \n"
MEMACCESS(0)
"vld1.16 {d1[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0] \n"
"add %1, #2 \n" // src += 2
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
"subs %5, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"vld1.8 {d0[0]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[1]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[2]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[3]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[4]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[5]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[6]}, [%1], %2 \n"
MEMACCESS(1)
"vld1.8 {d0[7]}, [%1] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst), // %3
"+r"(dst_stride), // %4
"+r"(width) // %5
: "r"(&kVTbl4x4Transpose) // %6
: "memory", "cc", "q0", "q1", "q2", "q3"
);
}
static uvec8 kVTbl4x4TransposeDi =
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
uint8* dst_a, int dst_stride_a,
uint8* dst_b, int dst_stride_b,
int width) {
const uint8* src_temp = NULL;
asm volatile (
// loops are on blocks of 8. loop will stop when
// counter gets to or below 0. starting the counter
// at w-8 allow for this
"sub %7, #8 \n"
// handle 8x8 blocks. this should be the majority of the plane
".p2align 2 \n"
"1: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.8 {d0, d1}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d2, d3}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d4, d5}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d6, d7}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d16, d17}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d18, d19}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d20, d21}, [%0], %2 \n"
MEMACCESS(0)
"vld2.8 {d22, d23}, [%0] \n"
"vtrn.8 q1, q0 \n"
"vtrn.8 q3, q2 \n"
"vtrn.8 q9, q8 \n"
"vtrn.8 q11, q10 \n"
"vtrn.16 q1, q3 \n"
"vtrn.16 q0, q2 \n"
"vtrn.16 q9, q11 \n"
"vtrn.16 q8, q10 \n"
"vtrn.32 q1, q9 \n"
"vtrn.32 q0, q8 \n"
"vtrn.32 q3, q11 \n"
"vtrn.32 q2, q10 \n"
"vrev16.8 q0, q0 \n"
"vrev16.8 q1, q1 \n"
"vrev16.8 q2, q2 \n"
"vrev16.8 q3, q3 \n"
"vrev16.8 q8, q8 \n"
"vrev16.8 q9, q9 \n"
"vrev16.8 q10, q10 \n"
"vrev16.8 q11, q11 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.8 {d2}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d6}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d4}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d18}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d16}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d22}, [%0], %4 \n"
MEMACCESS(0)
"vst1.8 {d20}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.8 {d3}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d1}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d7}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d5}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d19}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d17}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d23}, [%0], %6 \n"
MEMACCESS(0)
"vst1.8 {d21}, [%0] \n"
"add %1, #8*2 \n" // src += 8*2
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
"subs %7, #8 \n" // w -= 8
"bge 1b \n"
// add 8 back to counter. if the result is 0 there are
// no residuals.
"adds %7, #8 \n"
"beq 4f \n"
// some residual, so between 1 and 7 lines left to transpose
"cmp %7, #2 \n"
"blt 3f \n"
"cmp %7, #4 \n"
"blt 2f \n"
// TODO(frkoenig): Clean this up
// 4x8 block
"mov %0, %1 \n"
MEMACCESS(0)
"vld1.64 {d0}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d1}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d2}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d3}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d4}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d5}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d6}, [%0], %2 \n"
MEMACCESS(0)
"vld1.64 {d7}, [%0] \n"
MEMACCESS(8)
"vld1.8 {q15}, [%8] \n"
"vtrn.8 q0, q1 \n"
"vtrn.8 q2, q3 \n"
"vtbl.8 d16, {d0, d1}, d30 \n"
"vtbl.8 d17, {d0, d1}, d31 \n"
"vtbl.8 d18, {d2, d3}, d30 \n"
"vtbl.8 d19, {d2, d3}, d31 \n"
"vtbl.8 d20, {d4, d5}, d30 \n"
"vtbl.8 d21, {d4, d5}, d31 \n"
"vtbl.8 d22, {d6, d7}, d30 \n"
"vtbl.8 d23, {d6, d7}, d31 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.32 {d16[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d16[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d17[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d17[1]}, [%0], %4 \n"
"add %0, %3, #4 \n"
MEMACCESS(0)
"vst1.32 {d20[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d20[1]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d21[0]}, [%0], %4 \n"
MEMACCESS(0)
"vst1.32 {d21[1]}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.32 {d18[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d18[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d19[1]}, [%0], %6 \n"
"add %0, %5, #4 \n"
MEMACCESS(0)
"vst1.32 {d22[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d22[1]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[0]}, [%0], %6 \n"
MEMACCESS(0)
"vst1.32 {d23[1]}, [%0] \n"
"add %1, #4*2 \n" // src += 4 * 2
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
"subs %7, #4 \n" // w -= 4
"beq 4f \n"
// some residual, check to see if it includes a 2x8 block,
// or less
"cmp %7, #2 \n"
"blt 3f \n"
// 2x8 block
"2: \n"
"mov %0, %1 \n"
MEMACCESS(0)
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
MEMACCESS(0)
"vld2.16 {d1[3], d3[3]}, [%0] \n"
"vtrn.8 d0, d1 \n"
"vtrn.8 d2, d3 \n"
"mov %0, %3 \n"
MEMACCESS(0)
"vst1.64 {d0}, [%0], %4 \n"
MEMACCESS(0)
"vst1.64 {d2}, [%0] \n"
"mov %0, %5 \n"
MEMACCESS(0)
"vst1.64 {d1}, [%0], %6 \n"
MEMACCESS(0)
"vst1.64 {d3}, [%0] \n"
"add %1, #2*2 \n" // src += 2 * 2
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
"subs %7, #2 \n" // w -= 2
"beq 4f \n"
// 1x8 block
"3: \n"
MEMACCESS(1)
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
MEMACCESS(1)
"vld2.8 {d0[7], d1[7]}, [%1] \n"
MEMACCESS(3)
"vst1.64 {d0}, [%3] \n"
MEMACCESS(5)
"vst1.64 {d1}, [%5] \n"
"4: \n"
: "+r"(src_temp), // %0
"+r"(src), // %1
"+r"(src_stride), // %2
"+r"(dst_a), // %3
"+r"(dst_stride_a), // %4
"+r"(dst_b), // %5
"+r"(dst_stride_b), // %6
"+r"(width) // %7
: "r"(&kVTbl4x4TransposeDi) // %8
: "memory", "cc",
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
);
}
#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

12
third_party/libyuv/source/row_any.cc поставляемый
Просмотреть файл

@ -8,9 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#include "third_party/libyuv/include/libyuv/basic_types.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {
@ -35,10 +35,12 @@ extern "C" {
}
#ifdef HAS_I422TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4, 7)
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
1, 4, 7)
#endif // HAS_I422TOARGBROW_SSSE3
#ifdef HAS_I444TOARGBROW_SSSE3
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
0, 4, 7)
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
2, 4, 7)
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
@ -59,7 +61,7 @@ YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
#endif // HAS_I422TOARGBROW_SSSE3
#endif // HAS_I444TOARGBROW_SSSE3
#ifdef HAS_I422TOARGBROW_AVX2
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
#endif // HAS_I422TOARGBROW_AVX2

4
third_party/libyuv/source/row_common.cc поставляемый
Просмотреть файл

@ -8,11 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#include <string.h> // For memcpy and memset.
#include "third_party/libyuv/include/libyuv/basic_types.h"
#include "libyuv/basic_types.h"
#ifdef __cplusplus
namespace libyuv {

9
third_party/libyuv/source/row_mips.cc поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
@ -16,7 +16,8 @@ extern "C" {
#endif
// The following are available on Mips platforms:
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
#ifdef HAS_COPYROW_MIPS
void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
@ -376,7 +377,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
// MIPS DSPR2 functions
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
(__mips_dsp_rev >= 2)
(__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
int width) {
__asm__ __volatile__ (

306
third_party/libyuv/source/row_neon.cc поставляемый

Разница между файлами не показана из-за своего большого размера Загрузить разницу

3323
third_party/libyuv/source/row_neon64.cc поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

2
third_party/libyuv/source/row_posix.cc поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {

226
third_party/libyuv/source/row_win.cc поставляемый
Просмотреть файл

@ -8,15 +8,179 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#if defined (_M_X64)
#include <emmintrin.h>
#include <tmmintrin.h> // For _mm_maddubs_epi16
#endif
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
// This module is for Visual C x86.
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
// This module is for Visual C.
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
#define UB 127 /* min(127,(int8)(2.018 * 64)) */
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
#define UR 0
#define VB 0
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
// Bias
#define BB UB * 128 + VB * 128
#define BG UG * 128 + VG * 128
#define BR UR * 128 + VR * 128
static const vec8 kUVToB = {
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
};
static const vec8 kUVToR = {
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
};
static const vec8 kUVToG = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
};
static const vec8 kVUToB = {
VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
};
static const vec8 kVUToR = {
VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
};
static const vec8 kVUToG = {
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
};
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
// 64 bit
#if defined(_M_X64)
// Aligned destination version.
__declspec(align(16))
void I422ToARGBRow_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
while (width > 0) {
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
xmm1 = _mm_load_si128(&xmm0);
xmm2 = _mm_load_si128(&xmm0);
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
xmm0 = _mm_adds_epi16(xmm0, xmm3);
xmm1 = _mm_adds_epi16(xmm1, xmm3);
xmm2 = _mm_adds_epi16(xmm2, xmm3);
xmm0 = _mm_srai_epi16(xmm0, 6);
xmm1 = _mm_srai_epi16(xmm1, 6);
xmm2 = _mm_srai_epi16(xmm2, 6);
xmm0 = _mm_packus_epi16(xmm0, xmm0);
xmm1 = _mm_packus_epi16(xmm1, xmm1);
xmm2 = _mm_packus_epi16(xmm2, xmm2);
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
xmm1 = _mm_load_si128(&xmm0);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
_mm_store_si128((__m128i *)dst_argb, xmm0);
_mm_store_si128((__m128i *)(dst_argb + 16), xmm1);
y_buf += 8;
u_buf += 4;
dst_argb += 32;
width -= 8;
}
}
// Unaligned destination version.
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
__m128i xmm0, xmm1, xmm2, xmm3;
const __m128i xmm5 = _mm_set1_epi8(-1);
const __m128i xmm4 = _mm_setzero_si128();
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
while (width > 0) {
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
xmm1 = _mm_load_si128(&xmm0);
xmm2 = _mm_load_si128(&xmm0);
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
xmm0 = _mm_adds_epi16(xmm0, xmm3);
xmm1 = _mm_adds_epi16(xmm1, xmm3);
xmm2 = _mm_adds_epi16(xmm2, xmm3);
xmm0 = _mm_srai_epi16(xmm0, 6);
xmm1 = _mm_srai_epi16(xmm1, 6);
xmm2 = _mm_srai_epi16(xmm2, 6);
xmm0 = _mm_packus_epi16(xmm0, xmm0);
xmm1 = _mm_packus_epi16(xmm1, xmm1);
xmm2 = _mm_packus_epi16(xmm2, xmm2);
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
xmm1 = _mm_load_si128(&xmm0);
xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
_mm_storeu_si128((__m128i *)dst_argb, xmm0);
_mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
y_buf += 8;
u_buf += 4;
dst_argb += 32;
width -= 8;
}
}
// 32 bit
#else // defined(_M_X64)
#ifdef HAS_ARGBTOYROW_SSSE3
@ -2030,21 +2194,6 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
}
#endif // HAS_ARGBTOYROW_SSSE3
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
#define UB 127 /* min(63,(int8)(2.018 * 64)) */
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
#define UR 0
#define VB 0
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
// Bias
#define BB UB * 128 + VB * 128
#define BG UG * 128 + VG * 128
#define BR UR * 128 + VR * 128
#ifdef HAS_I422TOARGBROW_AVX2
static const lvec8 kUVToB_AVX = {
@ -2079,10 +2228,10 @@ static const lvec16 kUVBiasR_AVX = {
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
__declspec(naked) __declspec(align(16))
void I422ToARGBRow_AVX2(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
const uint8* u_buf,
const uint8* v_buf,
uint8* dst_argb,
int width) {
__asm {
push esi
push edi
@ -2150,36 +2299,6 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
#ifdef HAS_I422TOARGBROW_SSSE3
static const vec8 kUVToB = {
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
};
static const vec8 kUVToR = {
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
};
static const vec8 kUVToG = {
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
};
static const vec8 kVUToB = {
VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
};
static const vec8 kVUToR = {
VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
};
static const vec8 kVUToG = {
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
};
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
// Read 8 UV from 444.
@ -7276,7 +7395,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
}
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
#endif // defined(_M_X64)
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
#ifdef __cplusplus
} // extern "C"

146
third_party/libyuv/source/row_x86.asm поставляемый Normal file
Просмотреть файл

@ -0,0 +1,146 @@
;
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
;
; Use of this source code is governed by a BSD-style license
; that can be found in the LICENSE file in the root of the source
; tree. An additional intellectual property rights grant can be found
; in the file PATENTS. All contributing project authors may
; be found in the AUTHORS file in the root of the source tree.
;
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01020000h
%error AVX2 is supported only by yasm 1.2.0 or later.
%endif
%endif
%include "x86inc.asm"
SECTION .text
; cglobal numeric constants are parameters, gpr regs, mm regs
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
%macro YUY2TOYROW 2-3
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
%ifidn %1,YUY2
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
psrlw m2, m2, 8
%endif
ALIGN 4
.convertloop:
mov%2 m0, [src_yuy2q]
mov%2 m1, [src_yuy2q + mmsize]
lea src_yuy2q, [src_yuy2q + mmsize * 2]
%ifidn %1,YUY2
pand m0, m0, m2 ; YUY2 even bytes are Y
pand m1, m1, m2
%else
psrlw m0, m0, 8 ; UYVY odd bytes are Y
psrlw m1, m1, 8
%endif
packuswb m0, m0, m1
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
%endif
sub pixd, mmsize
mov%2 [dst_yq], m0
lea dst_yq, [dst_yq + mmsize]
jg .convertloop
REP_RET
%endmacro
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
INIT_MMX MMX
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_XMM SSE2
YUY2TOYROW YUY2,a,
YUY2TOYROW YUY2,u,_Unaligned
YUY2TOYROW UYVY,a,
YUY2TOYROW UYVY,u,_Unaligned
INIT_YMM AVX2
YUY2TOYROW YUY2,a,
YUY2TOYROW UYVY,a,
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
%macro SplitUVRow 1-2
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
psrlw m4, m4, 8
sub dst_vq, dst_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uvq]
mov%1 m1, [src_uvq + mmsize]
lea src_uvq, [src_uvq + mmsize * 2]
psrlw m2, m0, 8 ; odd bytes
psrlw m3, m1, 8
pand m0, m0, m4 ; even bytes
pand m1, m1, m4
packuswb m0, m0, m1
packuswb m2, m2, m3
%if cpuflag(AVX2)
vpermq m0, m0, 0xd8
vpermq m2, m2, 0xd8
%endif
mov%1 [dst_uq], m0
mov%1 [dst_uq + dst_vq], m2
lea dst_uq, [dst_uq + mmsize]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_XMM SSE2
SplitUVRow a,
SplitUVRow u,_Unaligned
INIT_YMM AVX2
SplitUVRow a,
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
; int width);
%macro MergeUVRow_ 1-2
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
sub src_vq, src_uq
ALIGN 4
.convertloop:
mov%1 m0, [src_uq]
mov%1 m1, [src_vq]
lea src_uq, [src_uq + mmsize]
punpcklbw m2, m0, m1 // first 8 UV pairs
punpckhbw m0, m0, m1 // next 8 UV pairs
%if cpuflag(AVX2)
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
mov%1 [dst_uvq], m1
mov%1 [dst_uvq + mmsize], m2
%else
mov%1 [dst_uvq], m2
mov%1 [dst_uvq + mmsize], m0
%endif
lea dst_uvq, [dst_uvq + mmsize * 2]
sub pixd, mmsize
jg .convertloop
REP_RET
%endmacro
INIT_MMX MMX
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_XMM SSE2
MergeUVRow_ a,
MergeUVRow_ u,_Unaligned
INIT_YMM AVX2
MergeUVRow_ a,

10
third_party/libyuv/source/scale.cc поставляемый
Просмотреть файл

@ -8,15 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/scale.h"
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyPlane
#include "third_party/libyuv/include/libyuv/row.h"
#include "third_party/libyuv/include/libyuv/scale_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyPlane
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {

809
third_party/libyuv/source/scale_argb.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,809 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
// ScaleARGB ARGB, 1/2
// This is an optimized version for scaling down a ARGB to 1/2 of
// its original size.
static void ScaleARGBDown2(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) =
filtering == kFilterNone ? ScaleARGBRowDown2_C :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
ScaleARGBRowDown2Box_C);
assert(dx == 65536 * 2); // Test scale factor of 2.
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
// Advance to odd row, even column.
if (filtering == kFilterBilinear) {
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
} else {
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
}
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
ScaleARGBRowDown2Box_SSE2);
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
ScaleARGBRowDown2_NEON;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// ScaleARGB ARGB, 1/4
// This is an optimized version for scaling down a ARGB to 1/4 of
// its original size.
static void ScaleARGBDown4Box(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
int row_stride = src_stride * (dy >> 16);
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
// Advance to odd row, even column.
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
assert(dx == 65536 * 4); // Test scale factor of 4.
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
}
#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
row + kRowSize, dst_width * 2);
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
free_aligned_buffer_64(row);
}
// ScaleARGB ARGB Even
// This is an optimized version for scaling down a ARGB to even
// multiple of its original size.
static void ScaleARGBDownEven(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
int col_step = dx >> 16;
int row_stride = (dy >> 16) * src_stride;
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
int src_step, uint8* dst_argb, int dst_width) =
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
assert(IS_ALIGNED(src_width, 2));
assert(IS_ALIGNED(src_height, 2));
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
ScaleARGBRowDownEven_SSE2;
}
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
IS_ALIGNED(src_argb, 4)) {
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
ScaleARGBRowDownEven_NEON;
}
#endif
if (filtering == kFilterLinear) {
src_stride = 0;
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
src_argb += row_stride;
dst_argb += dst_stride;
}
}
// Scale ARGB down with bilinear interpolation.
static void ScaleARGBBilinearDown(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
int64 xlast = x + (int64)(dst_width - 1) * dx;
int64 xl = (dx >= 0) ? x : xlast;
int64 xr = (dx >= 0) ? xlast : x;
int clip_src_width;
xl = (xl >> 16) & ~3; // Left edge aligned.
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
if (xr > src_width) {
xr = src_width;
}
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
src_argb += xl * 4;
x -= (int)(xl << 16);
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(clip_src_width, 32)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(clip_src_width, 16)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
if (IS_ALIGNED(clip_src_width, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
}
#endif
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
// Allocate a row of ARGB.
{
align_buffer_64(row, clip_src_width * 4);
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
for (j = 0; j < dst_height; ++j) {
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
if (filtering == kFilterLinear) {
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(row, src, src_stride, clip_src_width, yf);
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
}
dst_argb += dst_stride;
y += dy;
if (y > max_y) {
y = max_y;
}
}
free_aligned_buffer_64(row);
}
}
// Scale ARGB up with bilinear interpolation.
static void ScaleARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
const int max_y = (src_height - 1) << 16;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
#endif
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
if (y > max_y) {
y = max_y;
}
{
int yi = y >> 16;
const uint8* src = src_argb + yi * src_stride;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
if (src_height > 1) {
src += src_stride;
}
ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
src += src_stride;
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
src = src_argb + yi * src_stride;
}
if (yi != lasty) {
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src += src_stride;
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride;
y += dy;
}
free_aligned_buffer_64(row);
}
}
#ifdef YUVSCALEUP
// Scale YUV to ARGB up with bilinear interpolation.
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride_y,
int src_stride_u,
int src_stride_v,
int dst_stride_argb,
const uint8* src_y,
const uint8* src_u,
const uint8* src_v,
uint8* dst_argb,
int x, int dx, int y, int dy,
enum FilterMode filtering) {
int j;
void (*I422ToARGBRow)(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) = I422ToARGBRow_C;
#if defined(HAS_I422TOARGBROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
I422ToARGBRow = I422ToARGBRow_SSSE3;
}
}
}
#endif
#if defined(HAS_I422TOARGBROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
if (IS_ALIGNED(src_width, 16)) {
I422ToARGBRow = I422ToARGBRow_AVX2;
}
}
#endif
#if defined(HAS_I422TOARGBROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
I422ToARGBRow = I422ToARGBRow_Any_NEON;
if (IS_ALIGNED(src_width, 8)) {
I422ToARGBRow = I422ToARGBRow_NEON;
}
}
#endif
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
}
#endif
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
#if defined(HAS_INTERPOLATEROW_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSE2;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSE2;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSE2;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_SSSE3;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
InterpolateRow = InterpolateRow_SSSE3;
}
}
}
#endif
#if defined(HAS_INTERPOLATEROW_AVX2)
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
InterpolateRow = InterpolateRow_Any_AVX2;
if (IS_ALIGNED(dst_width, 8)) {
InterpolateRow = InterpolateRow_AVX2;
}
}
#endif
#if defined(HAS_INTERPOLATEROW_NEON)
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
InterpolateRow = InterpolateRow_Any_NEON;
if (IS_ALIGNED(dst_width, 4)) {
InterpolateRow = InterpolateRow_NEON;
}
}
#endif
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
InterpolateRow = InterpolateRow_MIPS_DSPR2;
}
#endif
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
if (src_width >= 32768) {
ScaleARGBFilterCols = filtering ?
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
}
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
}
#endif
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
}
#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
const int max_y = (src_height - 1) << 16;
if (y > max_y) {
y = max_y;
}
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
int yi = y >> 16;
int uv_yi = yi >> kYShift;
const uint8* src_row_y = src_y + yi * src_stride_y;
const uint8* src_row_u = src_u + uv_yi * src_stride_u;
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
// Allocate 2 rows of ARGB.
const int kRowSize = (dst_width * 4 + 15) & ~15;
align_buffer_64(row, kRowSize * 2);
// Allocate 1 row of ARGB for source conversion.
align_buffer_64(argb_row, src_width * 4);
uint8* rowptr = row;
int rowstride = kRowSize;
int lasty = yi;
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
if (src_height > 1) {
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
if (src_height > 2) {
src_row_y += src_stride_y;
if (!(yi & 1)) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
for (j = 0; j < dst_height; ++j) {
yi = y >> 16;
if (yi != lasty) {
if (y > max_y) {
y = max_y;
yi = y >> 16;
uv_yi = yi >> kYShift;
src_row_y = src_y + yi * src_stride_y;
src_row_u = src_u + uv_yi * src_stride_u;
src_row_v = src_v + uv_yi * src_stride_v;
}
if (yi != lasty) {
// TODO(fbarchard): Convert the clipped region of row.
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
rowptr += rowstride;
rowstride = -rowstride;
lasty = yi;
src_row_y += src_stride_y;
if (yi & 1) {
src_row_u += src_stride_u;
src_row_v += src_stride_v;
}
}
}
if (filtering == kFilterLinear) {
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
} else {
int yf = (y >> 8) & 255;
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
}
dst_argb += dst_stride_argb;
y += dy;
}
free_aligned_buffer_64(row);
free_aligned_buffer_64(row_argb);
}
#endif
// Scale ARGB to/from any dimensions, without interpolation.
// Fixed point math is used for performance: The upper 16 bits
// of x and dx is the integer part of the source position and
// the lower 16 bits are the fixed decimal part.
static void ScaleARGBSimple(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_argb, uint8* dst_argb,
int x, int dx, int y, int dy) {
int j;
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
int dst_width, int x, int dx) =
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
#if defined(HAS_SCALEARGBCOLS_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
ScaleARGBCols = ScaleARGBCols_SSE2;
}
#endif
if (src_width * 2 == dst_width && x < 0x8000) {
ScaleARGBCols = ScaleARGBColsUp2_C;
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
}
#endif
}
for (j = 0; j < dst_height; ++j) {
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
dst_width, x, dx);
dst_argb += dst_stride;
y += dy;
}
}
// ScaleARGB a ARGB.
// This function in turn calls a scaling function
// suitable for handling the desired resolutions.
static void ScaleARGB(const uint8* src, int src_stride,
int src_width, int src_height,
uint8* dst, int dst_stride,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
int dx = 0;
int dy = 0;
// ARGB does not support box filter yet, but allow the user to pass it.
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
dst_width, dst_height,
filtering);
// Negative src_height means invert the image.
if (src_height < 0) {
src_height = -src_height;
src = src + (src_height - 1) * src_stride;
src_stride = -src_stride;
}
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
if (clip_x) {
int64 clipf = (int64)(clip_x) * dx;
x += (clipf & 0xffff);
src += (clipf >> 16) * 4;
dst += clip_x * 4;
}
if (clip_y) {
int64 clipf = (int64)(clip_y) * dy;
y += (clipf & 0xffff);
src += (clipf >> 16) * src_stride;
dst += clip_y * dst_stride;
}
// Special case for integer step values.
if (((dx | dy) & 0xffff) == 0) {
if (!dx || !dy) { // 1 pixel wide and/or tall.
filtering = kFilterNone;
} else {
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
if (!(dx & 0x10000) && !(dy & 0x10000)) {
if (dx == 0x20000) {
// Optimized 1/2 downsample.
ScaleARGBDown2(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (dx == 0x40000 && filtering == kFilterBox) {
// Optimized 1/4 box downsample.
ScaleARGBDown4Box(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
return;
}
ScaleARGBDownEven(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
// Optimized odd scale down. ie 3, 5, 7, 9x.
if ((dx & 0x10000) && (dy & 0x10000)) {
filtering = kFilterNone;
if (dx == 0x10000 && dy == 0x10000) {
// Straight copy.
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
dst, dst_stride, clip_width, clip_height);
return;
}
}
}
}
if (dx == 0x10000 && (x & 0xffff) == 0) {
// Arbitrary scale vertically, but unscaled vertically.
ScalePlaneVertical(src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, y, dy, 4, filtering);
return;
}
if (filtering && dy < 65536) {
ScaleARGBBilinearUp(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
if (filtering) {
ScaleARGBBilinearDown(src_width, src_height,
clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy, filtering);
return;
}
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
src_stride, dst_stride, src, dst,
x, dx, y, dy);
}
LIBYUV_API
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
int clip_x, int clip_y, int clip_width, int clip_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
clip_x < 0 || clip_y < 0 ||
(clip_x + clip_width) > dst_width ||
(clip_y + clip_height) > dst_height) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
clip_x, clip_y, clip_width, clip_height, filtering);
return 0;
}
// Scale an ARGB image.
LIBYUV_API
int ARGBScale(const uint8* src_argb, int src_stride_argb,
int src_width, int src_height,
uint8* dst_argb, int dst_stride_argb,
int dst_width, int dst_height,
enum FilterMode filtering) {
if (!src_argb || src_width == 0 || src_height == 0 ||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
return -1;
}
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
dst_argb, dst_stride_argb, dst_width, dst_height,
0, 0, dst_width, dst_height, filtering);
return 0;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

10
third_party/libyuv/source/scale_common.cc поставляемый
Просмотреть файл

@ -8,15 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/scale.h"
#include "libyuv/scale.h"
#include <assert.h>
#include <string.h>
#include "third_party/libyuv/include/libyuv/cpu_id.h"
#include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyARGB
#include "third_party/libyuv/include/libyuv/row.h"
#include "third_party/libyuv/include/libyuv/scale_row.h"
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
namespace libyuv {

7
third_party/libyuv/source/scale_mips.cc поставляемый
Просмотреть файл

@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/basic_types.h"
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/basic_types.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
@ -18,7 +18,8 @@ extern "C" {
// This module is for GCC MIPS DSPR2
#if !defined(LIBYUV_DISABLE_MIPS) && \
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
(_MIPS_SIM == _MIPS_SIM_ABI32)
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst, int dst_width) {

128
third_party/libyuv/source/scale_neon.cc поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {
@ -28,8 +28,10 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
"vld2.8 {q0, q1}, [%0]! \n"
"subs %2, %2, #16 \n" // 16 processed per loop
MEMACCESS(1)
"vst1.8 {q1}, [%1]! \n" // store odd pixels
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -48,7 +50,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
MEMACCESS(1)
"vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
"subs %3, %3, #16 \n" // 16 processed per loop
"vpaddl.u8 q0, q0 \n" // row 1 add adjacent
@ -57,6 +61,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"vpadal.u8 q1, q3 \n"
"vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
"vrshrn.u16 d1, q1, #2 \n"
MEMACCESS(2)
"vst1.8 {q0}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -73,8 +78,10 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
"subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"vst1.8 {d2}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -87,16 +94,20 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
"add r4, %0, %3 \n"
"add r5, r4, %3 \n"
"add %3, r5, %3 \n"
const uint8* src_ptr1 = src_ptr + src_stride;
const uint8* src_ptr2 = src_ptr + src_stride * 2;
const uint8* src_ptr3 = src_ptr + src_stride * 3;
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {q0}, [%0]! \n" // load up 16x4
"vld1.8 {q1}, [r4]! \n"
"vld1.8 {q2}, [r5]! \n"
"vld1.8 {q3}, [%3]! \n"
MEMACCESS(3)
"vld1.8 {q1}, [%3]! \n"
MEMACCESS(4)
"vld1.8 {q2}, [%4]! \n"
MEMACCESS(5)
"vld1.8 {q3}, [%5]! \n"
"subs %2, %2, #4 \n"
"vpaddl.u8 q0, q0 \n"
"vpadal.u8 q0, q1 \n"
@ -105,13 +116,17 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"vpaddl.u16 q0, q0 \n"
"vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
"vmovn.u16 d0, q0 \n"
MEMACCESS(1)
"vst1.32 {d0[0]}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width) // %2
: "r"(src_stride) // %3
: "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(src_ptr1), // %3
"+r"(src_ptr2), // %4
"+r"(src_ptr3) // %5
:
: "q0", "q1", "q2", "q3", "memory", "cc"
);
}
@ -124,9 +139,11 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
asm volatile (
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
"subs %2, %2, #24 \n"
"vmov d2, d3 \n" // order d0, d1, d2
MEMACCESS(1)
"vst3.8 {d0, d1, d2}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -145,7 +162,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
MEMACCESS(3)
"vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
"subs %2, %2, #24 \n"
@ -182,6 +201,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
"vmlal.u8 q8, d3, d24 \n"
"vqrshrn.u16 d2, q8, #2 \n"
MEMACCESS(1)
"vst3.8 {d0, d1, d2}, [%1]! \n"
"bgt 1b \n"
@ -202,7 +222,9 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
MEMACCESS(3)
"vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
"subs %2, %2, #24 \n"
// average src line 0 with src line 1
@ -222,6 +244,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
"vmlal.u8 q3, d3, d24 \n"
"vqrshrn.u16 d2, q3, #2 \n"
MEMACCESS(1)
"vst3.8 {d0, d1, d2}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -250,14 +273,18 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
MEMACCESS(3)
"vld1.8 {q3}, [%3] \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
"subs %2, %2, #12 \n"
"vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
"vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
MEMACCESS(1)
"vst1.8 {d4}, [%1]! \n"
MEMACCESS(1)
"vst1.32 {d5[0]}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -272,11 +299,15 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
const uint8* src_ptr1 = src_ptr + src_stride * 2;
asm volatile (
"vld1.16 {q13}, [%4] \n"
"vld1.8 {q14}, [%5] \n"
"vld1.8 {q15}, [%6] \n"
"add r4, %0, %3, lsl #1 \n"
MEMACCESS(5)
"vld1.16 {q13}, [%5] \n"
MEMACCESS(6)
"vld1.8 {q14}, [%6] \n"
MEMACCESS(7)
"vld1.8 {q15}, [%7] \n"
"add %3, %0 \n"
".p2align 2 \n"
"1: \n"
@ -285,9 +316,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
// d1 = 10 50 11 51 12 52 13 53
// d2 = 20 60 21 61 22 62 23 63
// d3 = 30 70 31 71 32 72 33 73
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n"
MEMACCESS(3)
"vld4.8 {d4, d5, d6, d7}, [%3]! \n"
"vld4.8 {d16, d17, d18, d19}, [r4]! \n"
MEMACCESS(4)
"vld4.8 {d16, d17, d18, d19}, [%4]! \n"
"subs %2, %2, #12 \n"
// Shuffle the input data around to get align the data
@ -364,18 +398,20 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
"vtbl.u8 d3, {d0, d1, d2}, d28 \n"
"vtbl.u8 d4, {d0, d1, d2}, d29 \n"
MEMACCESS(1)
"vst1.8 {d3}, [%1]! \n"
MEMACCESS(1)
"vst1.32 {d4[0]}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
"+r"(dst_ptr), // %1
"+r"(dst_width), // %2
"+r"(src_stride) // %3
: "r"(&kMult38_Div6), // %4
"r"(&kShuf38_2), // %5
"r"(&kMult38_Div9) // %6
: "r4", "q0", "q1", "q2", "q3", "q8", "q9",
"q13", "q14", "q15", "memory", "cc"
"+r"(src_stride), // %3
"+r"(src_ptr1) // %4
: "r"(&kMult38_Div6), // %5
"r"(&kShuf38_2), // %6
"r"(&kMult38_Div9) // %7
: "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
);
}
@ -384,7 +420,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) {
asm volatile (
MEMACCESS(4)
"vld1.16 {q13}, [%4] \n"
MEMACCESS(5)
"vld1.8 {q14}, [%5] \n"
"add %3, %0 \n"
".p2align 2 \n"
@ -394,7 +432,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
// d1 = 10 50 11 51 12 52 13 53
// d2 = 20 60 21 61 22 62 23 63
// d3 = 30 70 31 71 32 72 33 73
MEMACCESS(0)
"vld4.8 {d0, d1, d2, d3}, [%0]! \n"
MEMACCESS(3)
"vld4.8 {d4, d5, d6, d7}, [%3]! \n"
"subs %2, %2, #12 \n"
@ -461,7 +501,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
"vtbl.u8 d3, {d0, d1, d2}, d28 \n"
"vtbl.u8 d4, {d0, d1, d2}, d29 \n"
MEMACCESS(1)
"vst1.8 {d3}, [%1]! \n"
MEMACCESS(1)
"vst1.32 {d4[0]}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -494,7 +536,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"vdup.8 d4, %4 \n"
// General purpose row blend.
"1: \n"
MEMACCESS(1)
"vld1.8 {q0}, [%1]! \n"
MEMACCESS(2)
"vld1.8 {q1}, [%2]! \n"
"subs %3, %3, #16 \n"
"vmull.u8 q13, d0, d4 \n"
@ -503,50 +547,63 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
"vmlal.u8 q14, d3, d5 \n"
"vrshrn.u16 d0, q13, #8 \n"
"vrshrn.u16 d1, q14, #8 \n"
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n"
"bgt 1b \n"
"b 99f \n"
// Blend 25 / 75.
"25: \n"
MEMACCESS(1)
"vld1.8 {q0}, [%1]! \n"
MEMACCESS(2)
"vld1.8 {q1}, [%2]! \n"
"subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n"
"vrhadd.u8 q0, q1 \n"
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n"
"bgt 25b \n"
"b 99f \n"
// Blend 50 / 50.
"50: \n"
MEMACCESS(1)
"vld1.8 {q0}, [%1]! \n"
MEMACCESS(2)
"vld1.8 {q1}, [%2]! \n"
"subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n"
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n"
"bgt 50b \n"
"b 99f \n"
// Blend 75 / 25.
"75: \n"
MEMACCESS(1)
"vld1.8 {q1}, [%1]! \n"
MEMACCESS(2)
"vld1.8 {q0}, [%2]! \n"
"subs %3, %3, #16 \n"
"vrhadd.u8 q0, q1 \n"
"vrhadd.u8 q0, q1 \n"
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n"
"bgt 75b \n"
"b 99f \n"
// Blend 100 / 0 - Copy row unchanged.
"100: \n"
MEMACCESS(1)
"vld1.8 {q0}, [%1]! \n"
"subs %3, %3, #16 \n"
MEMACCESS(0)
"vst1.8 {q0}, [%0]! \n"
"bgt 100b \n"
"99: \n"
MEMACCESS(0)
"vst1.8 {d1[7]}, [%0] \n"
: "+r"(dst_ptr), // %0
"+r"(src_ptr), // %1
@ -564,10 +621,14 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
".p2align 2 \n"
"1: \n"
// load even pixels into q0, odd into q1
MEMACCESS(0)
"vld2.32 {q0, q1}, [%0]! \n"
MEMACCESS(0)
"vld2.32 {q2, q3}, [%0]! \n"
"subs %2, %2, #8 \n" // 8 processed per loop
MEMACCESS(1)
"vst1.8 {q1}, [%1]! \n" // store odd pixels
MEMACCESS(1)
"vst1.8 {q3}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -585,14 +646,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
MEMACCESS(0)
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
MEMACCESS(1)
"vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
MEMACCESS(1)
"vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
"vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
"vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
@ -602,6 +667,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
"vrshrn.u16 d1, q1, #2 \n"
"vrshrn.u16 d2, q2, #2 \n"
"vrshrn.u16 d3, q3, #2 \n"
MEMACCESS(2)
"vst4.8 {d0, d1, d2, d3}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_ptr), // %0
@ -621,11 +687,16 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"mov r12, %3, lsl #2 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.32 {d0[0]}, [%0], r12 \n"
MEMACCESS(0)
"vld1.32 {d0[1]}, [%0], r12 \n"
MEMACCESS(0)
"vld1.32 {d1[0]}, [%0], r12 \n"
MEMACCESS(0)
"vld1.32 {d1[1]}, [%0], r12 \n"
"subs %2, %2, #4 \n" // 4 pixels per loop.
MEMACCESS(1)
"vst1.8 {q0}, [%1]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0
@ -646,13 +717,21 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"add %1, %1, %0 \n"
".p2align 2 \n"
"1: \n"
MEMACCESS(0)
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
MEMACCESS(1)
"vld1.8 {d1}, [%1], r12 \n"
MEMACCESS(0)
"vld1.8 {d2}, [%0], r12 \n"
MEMACCESS(1)
"vld1.8 {d3}, [%1], r12 \n"
MEMACCESS(0)
"vld1.8 {d4}, [%0], r12 \n"
MEMACCESS(1)
"vld1.8 {d5}, [%1], r12 \n"
MEMACCESS(0)
"vld1.8 {d6}, [%0], r12 \n"
MEMACCESS(1)
"vld1.8 {d7}, [%1], r12 \n"
"vaddl.u8 q0, d0, d1 \n"
"vaddl.u8 q1, d2, d3 \n"
@ -665,6 +744,7 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
"vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
"vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
"subs %3, %3, #4 \n" // 4 pixels per loop.
MEMACCESS(2)
"vst1.8 {q0}, [%2]! \n"
"bgt 1b \n"
: "+r"(src_argb), // %0

2
third_party/libyuv/source/scale_posix.cc поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {

2
third_party/libyuv/source/scale_win.cc поставляемый
Просмотреть файл

@ -8,7 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "third_party/libyuv/include/libyuv/row.h"
#include "libyuv/row.h"
#ifdef __cplusplus
namespace libyuv {

64
third_party/libyuv/source/video_common.cc поставляемый Normal file
Просмотреть файл

@ -0,0 +1,64 @@
/*
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "libyuv/video_common.h"
#ifdef __cplusplus
namespace libyuv {
extern "C" {
#endif
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
struct FourCCAliasEntry {
uint32 alias;
uint32 canonical;
};
static const struct FourCCAliasEntry kFourCCAliases[] = {
{FOURCC_IYUV, FOURCC_I420},
{FOURCC_YU16, FOURCC_I422},
{FOURCC_YU24, FOURCC_I444},
{FOURCC_YUYV, FOURCC_YUY2},
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
{FOURCC_HDYC, FOURCC_UYVY},
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
{FOURCC_DMB1, FOURCC_MJPG},
{FOURCC_BA81, FOURCC_BGGR},
{FOURCC_RGB3, FOURCC_RAW },
{FOURCC_BGR3, FOURCC_24BG},
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
{FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
};
// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
LIBYUV_API
uint32 CanonicalFourCC(uint32 fourcc) {
int i;
for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
if (kFourCCAliases[i].alias == fourcc) {
return kFourCCAliases[i].canonical;
}
}
// Not an alias, so return it as-is.
return fourcc;
}
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif

1136
third_party/libyuv/source/x86inc.asm поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу