libyuv: update to r1041
Change-Id: I38dad398844ee424a7a92a745ab703645018d02b
This commit is contained in:
Родитель
ccddd5d0f9
Коммит
3a7d467da9
18
examples.mk
18
examples.mk
|
@ -9,8 +9,12 @@
|
|||
##
|
||||
|
||||
LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
|
||||
third_party/libyuv/include/libyuv/convert.h \
|
||||
third_party/libyuv/include/libyuv/convert_argb.h \
|
||||
third_party/libyuv/include/libyuv/convert_from.h \
|
||||
third_party/libyuv/include/libyuv/cpu_id.h \
|
||||
third_party/libyuv/include/libyuv/planar_functions.h \
|
||||
third_party/libyuv/include/libyuv/rotate.h \
|
||||
third_party/libyuv/include/libyuv/row.h \
|
||||
third_party/libyuv/include/libyuv/scale.h \
|
||||
third_party/libyuv/include/libyuv/scale_row.h \
|
||||
|
@ -20,14 +24,15 @@ LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \
|
|||
third_party/libyuv/source/row_common.cc \
|
||||
third_party/libyuv/source/row_mips.cc \
|
||||
third_party/libyuv/source/row_neon.cc \
|
||||
third_party/libyuv/source/row_neon64.cc \
|
||||
third_party/libyuv/source/row_posix.cc \
|
||||
third_party/libyuv/source/row_win.cc \
|
||||
third_party/libyuv/source/scale.cc \
|
||||
third_party/libyuv/source/scale.cc \
|
||||
third_party/libyuv/source/scale_common.cc \
|
||||
third_party/libyuv/source/scale_mips.cc \
|
||||
third_party/libyuv/source/scale_neon.cc \
|
||||
third_party/libyuv/source/scale_posix.cc \
|
||||
third_party/libyuv/source/scale_win.cc
|
||||
third_party/libyuv/source/scale_win.cc \
|
||||
|
||||
LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
|
||||
third_party/libwebm/mkvmuxerutil.cpp \
|
||||
|
@ -210,17 +215,18 @@ endif
|
|||
# from an installed tree or a version controlled tree. Determine
|
||||
# the proper paths.
|
||||
ifeq ($(HAVE_ALT_TREE_LAYOUT),yes)
|
||||
LIB_PATH := $(SRC_PATH_BARE)/../lib
|
||||
INC_PATH := $(SRC_PATH_BARE)/../include
|
||||
LIB_PATH-yes := $(SRC_PATH_BARE)/../lib
|
||||
INC_PATH-yes := $(SRC_PATH_BARE)/../include
|
||||
else
|
||||
LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.)
|
||||
INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8
|
||||
INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8
|
||||
INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9
|
||||
INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9
|
||||
LIB_PATH := $(call enabled,LIB_PATH)
|
||||
INC_PATH := $(call enabled,INC_PATH)
|
||||
endif
|
||||
INC_PATH-$(CONFIG_LIBYUV) += $(SRC_PATH_BARE)/third_party/libyuv/include
|
||||
LIB_PATH := $(call enabled,LIB_PATH)
|
||||
INC_PATH := $(call enabled,INC_PATH)
|
||||
INTERNAL_CFLAGS = $(addprefix -I,$(INC_PATH))
|
||||
INTERNAL_LDFLAGS += $(addprefix -L,$(LIB_PATH))
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1005
|
||||
Version: 1041
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
@ -13,5 +13,4 @@ which down-samples the original input video (f.g. 1280x720) a number of times
|
|||
in order to encode multiple resolution bit streams.
|
||||
|
||||
Local Modifications:
|
||||
Modified the original scaler code minimally with include file changes to fit
|
||||
in our current build system.
|
||||
None.
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_COMPARE_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_COMPARE_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Compute a hash for specified memory. Seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
|
||||
|
||||
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a,
|
||||
const uint8* src_b, int count);
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
|
||||
static const int kMaxPsnr = 128;
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_COMPARE_H_ NOLINT
|
|
@ -0,0 +1,254 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_CONVERT_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
// TODO(fbarchard): Remove the following headers includes.
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert I444 to I420.
|
||||
LIBYUV_API
|
||||
int I444ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert I422 to I420.
|
||||
LIBYUV_API
|
||||
int I422ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to I420.
|
||||
LIBYUV_API
|
||||
int I411ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert I400 (grey) to I420.
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV12 to I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV21 to I420.
|
||||
LIBYUV_API
|
||||
int NV21ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert YUY2 to I420.
|
||||
LIBYUV_API
|
||||
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert UYVY to I420.
|
||||
LIBYUV_API
|
||||
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert M420 to I420.
|
||||
LIBYUV_API
|
||||
int M420ToI420(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert Q420 to I420.
|
||||
LIBYUV_API
|
||||
int Q420ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height);
|
||||
#endif
|
||||
|
||||
// Note Bayer formats (BGGR) To I420 are in format_conversion.h
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// "src_size" is needed to parse MJPG.
|
||||
// "dst_stride_y" number of bytes in a row of the dst_y plane.
|
||||
// Normally this would be the same as dst_width, with recommended alignment
|
||||
// to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected. The caller should
|
||||
// allocate the I420 buffer according to rotation.
|
||||
// "dst_stride_u" number of bytes in a row of the dst_u plane.
|
||||
// Normally this would be the same as (dst_width + 1) / 2, with
|
||||
// recommended alignment to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected.
|
||||
// "crop_x" and "crop_y" are starting position for cropping.
|
||||
// To center, crop_x = (src_width - dst_width) / 2
|
||||
// crop_y = (src_height - dst_height) / 2
|
||||
// "src_width" / "src_height" is size of src_frame in pixels.
|
||||
// "src_height" can be negative indicating a vertically flipped image source.
|
||||
// "crop_width" / "crop_height" is the size to crop the src to.
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_H_ NOLINT
|
|
@ -0,0 +1,225 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_CONVERT_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
// TODO(fbarchard): Remove the following headers includes
|
||||
#include "libyuv/convert_from.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
// TODO(fbarchard): This set of functions should exactly match convert.h
|
||||
// Add missing Q420.
|
||||
// TODO(fbarchard): Add tests. Create random content of right size and convert
|
||||
// with C vs Opt and or to I420 and compare.
|
||||
// TODO(fbarchard): Some of these functions lack parameter setting.
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Alias.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert I400 (grey) to ARGB.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Alias.
|
||||
#define YToARGB I400ToARGB_Reference
|
||||
|
||||
// Convert I400 to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV12 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert NV21 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// TODO(fbarchard): Convert Q420 to ARGB.
|
||||
// LIBYUV_API
|
||||
// int Q420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
// const uint8* src_yuy2, int src_stride_yuy2,
|
||||
// uint8* dst_argb, int dst_stride_argb,
|
||||
// int width, int height);
|
||||
|
||||
// Convert YUY2 to ARGB.
|
||||
LIBYUV_API
|
||||
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert UYVY to ARGB.
|
||||
LIBYUV_API
|
||||
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Deprecated function name.
|
||||
#define BG24ToARGB RGB24ToARGB
|
||||
|
||||
// RGB little endian (bgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
#endif
|
||||
|
||||
// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
|
||||
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
// "src_size" is needed to parse MJPG.
|
||||
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
|
||||
// Normally this would be the same as dst_width, with recommended alignment
|
||||
// to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected. The caller should
|
||||
// allocate the I420 buffer according to rotation.
|
||||
// "dst_stride_u" number of bytes in a row of the dst_u plane.
|
||||
// Normally this would be the same as (dst_width + 1) / 2, with
|
||||
// recommended alignment to 16 bytes for better efficiency.
|
||||
// If rotation of 90 or 270 is used, stride is affected.
|
||||
// "crop_x" and "crop_y" are starting position for cropping.
|
||||
// To center, crop_x = (src_width - dst_width) / 2
|
||||
// crop_y = (src_height - dst_height) / 2
|
||||
// "src_width" / "src_height" is size of src_frame in pixels.
|
||||
// "src_height" can be negative indicating a vertically flipped image source.
|
||||
// "crop_width" / "crop_height" is the size to crop the src to.
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ NOLINT
|
|
@ -0,0 +1,173 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_CONVERT_FROM_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// See Also convert.h for conversions from formats to I420.
|
||||
|
||||
// I420Copy in convert to I420ToI420.
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI411(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
|
||||
// TODO(fbarchard): I420ToM420
|
||||
// TODO(fbarchard): I420ToQ420
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
|
||||
|
||||
// Convert I420 to specified format.
|
||||
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
|
||||
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8* y, int y_stride,
|
||||
const uint8* u, int u_stride,
|
||||
const uint8* v, int v_stride,
|
||||
uint8* dst_sample, int dst_sample_stride,
|
||||
int width, int height,
|
||||
uint32 format);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ NOLINT
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To BGRA.
|
||||
LIBYUV_API
|
||||
int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To ABGR.
|
||||
LIBYUV_API
|
||||
int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RGBA.
|
||||
LIBYUV_API
|
||||
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
LIBYUV_API
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb, int dst_stride_rgb,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To RGB565.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To ARGB1555.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To ARGB4444.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I444.
|
||||
LIBYUV_API
|
||||
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I422.
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I420. (also in convert.h)
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I411.
|
||||
LIBYUV_API
|
||||
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB to I400.
|
||||
LIBYUV_API
|
||||
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To NV12.
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To UYVY.
|
||||
LIBYUV_API
|
||||
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ NOLINT
|
|
@ -11,7 +11,7 @@
|
|||
#ifndef INCLUDE_LIBYUV_CPU_ID_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_CPU_ID_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert Bayer RGB formats to I420.
|
||||
LIBYUV_API
|
||||
int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Temporary API mapper.
|
||||
#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
|
||||
BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
|
||||
|
||||
LIBYUV_API
|
||||
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
uint32 src_fourcc_bayer);
|
||||
|
||||
// Convert I420 to Bayer RGB formats.
|
||||
LIBYUV_API
|
||||
int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
|
||||
// Temporary API mapper.
|
||||
#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
|
||||
I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height,
|
||||
uint32 dst_fourcc_bayer);
|
||||
|
||||
// Convert Bayer RGB formats to ARGB.
|
||||
LIBYUV_API
|
||||
int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
|
||||
// Temporary API mapper.
|
||||
#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
|
||||
|
||||
LIBYUV_API
|
||||
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height,
|
||||
uint32 src_fourcc_bayer);
|
||||
|
||||
// Converts ARGB to Bayer RGB formats.
|
||||
LIBYUV_API
|
||||
int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height);
|
||||
|
||||
// Temporary API mapper.
|
||||
#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height,
|
||||
uint32 dst_fourcc_bayer);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_FORMATCONVERSION_H_ NOLINT
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_MJPEG_DECODER_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
// NOTE: For a simplified public API use convert.h MJPGToI420().
|
||||
|
||||
struct jpeg_common_struct;
|
||||
struct jpeg_decompress_struct;
|
||||
struct jpeg_source_mgr;
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
static const uint32 kUnknownDataSize = 0xFFFFFFFF;
|
||||
|
||||
enum JpegSubsamplingType {
|
||||
kJpegYuv420,
|
||||
kJpegYuv422,
|
||||
kJpegYuv411,
|
||||
kJpegYuv444,
|
||||
kJpegYuv400,
|
||||
kJpegUnknown
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
const uint8* data;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct BufferVector {
|
||||
Buffer* buffers;
|
||||
int len;
|
||||
int pos;
|
||||
};
|
||||
|
||||
struct SetJmpErrorMgr;
|
||||
|
||||
// MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are
|
||||
// simply independent JPEG images with a fixed huffman table (which is omitted).
|
||||
// It is rarely used in video transmission, but is common as a camera capture
|
||||
// format, especially in Logitech devices. This class implements a decoder for
|
||||
// MJPEG frames.
|
||||
//
|
||||
// See http://tools.ietf.org/html/rfc2435
|
||||
class LIBYUV_API MJpegDecoder {
|
||||
public:
|
||||
typedef void (*CallbackFunction)(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows);
|
||||
|
||||
static const int kColorSpaceUnknown;
|
||||
static const int kColorSpaceGrayscale;
|
||||
static const int kColorSpaceRgb;
|
||||
static const int kColorSpaceYCbCr;
|
||||
static const int kColorSpaceCMYK;
|
||||
static const int kColorSpaceYCCK;
|
||||
|
||||
MJpegDecoder();
|
||||
~MJpegDecoder();
|
||||
|
||||
// Loads a new frame, reads its headers, and determines the uncompressed
|
||||
// image format.
|
||||
// Returns LIBYUV_TRUE if image looks valid and format is supported.
|
||||
// If return value is LIBYUV_TRUE, then the values for all the following
|
||||
// getters are populated.
|
||||
// src_len is the size of the compressed mjpeg frame in bytes.
|
||||
LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
|
||||
|
||||
// Returns width of the last loaded frame in pixels.
|
||||
int GetWidth();
|
||||
|
||||
// Returns height of the last loaded frame in pixels.
|
||||
int GetHeight();
|
||||
|
||||
// Returns format of the last loaded frame. The return value is one of the
|
||||
// kColorSpace* constants.
|
||||
int GetColorSpace();
|
||||
|
||||
// Number of color components in the color space.
|
||||
int GetNumComponents();
|
||||
|
||||
// Sample factors of the n-th component.
|
||||
int GetHorizSampFactor(int component);
|
||||
|
||||
int GetVertSampFactor(int component);
|
||||
|
||||
int GetHorizSubSampFactor(int component);
|
||||
|
||||
int GetVertSubSampFactor(int component);
|
||||
|
||||
// Public for testability.
|
||||
int GetImageScanlinesPerImcuRow();
|
||||
|
||||
// Public for testability.
|
||||
int GetComponentScanlinesPerImcuRow(int component);
|
||||
|
||||
// Width of a component in bytes.
|
||||
int GetComponentWidth(int component);
|
||||
|
||||
// Height of a component.
|
||||
int GetComponentHeight(int component);
|
||||
|
||||
// Width of a component in bytes with padding for DCTSIZE. Public for testing.
|
||||
int GetComponentStride(int component);
|
||||
|
||||
// Size of a component in bytes.
|
||||
int GetComponentSize(int component);
|
||||
|
||||
// Call this after LoadFrame() if you decide you don't want to decode it
|
||||
// after all.
|
||||
LIBYUV_BOOL UnloadFrame();
|
||||
|
||||
// Decodes the entire image into a one-buffer-per-color-component format.
|
||||
// dst_width must match exactly. dst_height must be <= to image height; if
|
||||
// less, the image is cropped. "planes" must have size equal to at least
|
||||
// GetNumComponents() and they must point to non-overlapping buffers of size
|
||||
// at least GetComponentSize(i). The pointers in planes are incremented
|
||||
// to point to after the end of the written data.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
|
||||
|
||||
// Decodes the entire image and passes the data via repeated calls to a
|
||||
// callback function. Each call will get the data for a whole number of
|
||||
// image scanlines.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height);
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
static JpegSubsamplingType JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components);
|
||||
|
||||
private:
|
||||
|
||||
void AllocOutputBuffers(int num_outbufs);
|
||||
void DestroyOutputBuffers();
|
||||
|
||||
LIBYUV_BOOL StartDecode();
|
||||
LIBYUV_BOOL FinishDecode();
|
||||
|
||||
void SetScanlinePointers(uint8** data);
|
||||
LIBYUV_BOOL DecodeImcuRow();
|
||||
|
||||
int GetComponentScanlinePadding(int component);
|
||||
|
||||
// A buffer holding the input data for a frame.
|
||||
Buffer buf_;
|
||||
BufferVector buf_vec_;
|
||||
|
||||
jpeg_decompress_struct* decompress_struct_;
|
||||
jpeg_source_mgr* source_mgr_;
|
||||
SetJmpErrorMgr* error_mgr_;
|
||||
|
||||
// LIBYUV_TRUE iff at least one component has scanline padding. (i.e.,
|
||||
// GetComponentScanlinePadding() != 0.)
|
||||
LIBYUV_BOOL has_scanline_padding_;
|
||||
|
||||
// Temporaries used to point to scanline outputs.
|
||||
int num_outbufs_; // Outermost size of all arrays below.
|
||||
uint8*** scanlines_;
|
||||
int* scanlines_sizes_;
|
||||
// Temporary buffer used for decoding when we can't decode directly to the
|
||||
// output buffers. Large enough for just one iMCU row.
|
||||
uint8** databuf_;
|
||||
int* databuf_strides_;
|
||||
};
|
||||
|
||||
} // namespace libyuv
|
||||
|
||||
#endif // __cplusplus
|
||||
#endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ NOLINT
|
|
@ -11,11 +11,11 @@
|
|||
#ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
// TODO(fbarchard): Remove the following headers includes.
|
||||
// #include "convert.h"
|
||||
// #include "convert_argb.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_ROTATE_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Supported rotation.
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate180 = 180, // Rotate 180 degrees.
|
||||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
|
||||
// Deprecated.
|
||||
kRotateNone = 0,
|
||||
kRotateClockwise = 90,
|
||||
kRotateCounterClockwise = 270,
|
||||
} RotationModeEnum;
|
||||
|
||||
// Rotate I420 frame.
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
|
||||
// Rotate planes by 90, 180, 270. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV90(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
|
||||
// Rotations for when U and V are interleaved.
|
||||
// These functions take one input pointer and
|
||||
// split the data into two buffers while
|
||||
// rotating them. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV270(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
|
||||
// The 90 and 270 functions are based on transposes.
|
||||
// Doing a transpose with reversing the read/write
|
||||
// order will result in a rotation by +- 90 degrees.
|
||||
// Deprecated.
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
void TransposeUV(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_H_ NOLINT
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_ROTATE_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/rotate.h" // For RotationMode.
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Rotate ARGB frame
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ NOLINT
|
|
@ -13,7 +13,11 @@
|
|||
|
||||
#include <stdlib.h> // For malloc.
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#if defined(__native_client__)
|
||||
#include "ppapi/c/pp_macros.h" // For PPAPI_RELEASE
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -38,7 +42,8 @@ extern "C" {
|
|||
var = 0
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
|
||||
defined(TARGET_IPHONE_SIMULATOR)
|
||||
defined(TARGET_IPHONE_SIMULATOR) || \
|
||||
(defined(_MSC_VER) && defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
// True if compiling for SSSE3 as a requirement.
|
||||
|
@ -47,7 +52,12 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
// Enable for NaCL pepper 33 for bundle and AVX2 support.
|
||||
// #define NEW_BINUTILS
|
||||
#if defined(__native_client__) && PPAPI_RELEASE >= 33
|
||||
#define NEW_BINUTILS
|
||||
#endif
|
||||
#if defined(__native_client__) && defined(__arm__) && PPAPI_RELEASE < 37
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
|
||||
// The following are available on all x86 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
|
@ -152,6 +162,11 @@ extern "C" {
|
|||
#define HAS_YUY2TOYROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on x64 Visual C:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined (_M_X64)
|
||||
#define HAS_I422TOARGBROW_SSSE3
|
||||
#endif
|
||||
|
||||
// GCC >= 4.7.0 required for AVX2.
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
|
||||
|
@ -235,6 +250,10 @@ extern "C" {
|
|||
#define HAS_MIRRORROW_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available on arm64 platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
#endif
|
||||
|
||||
// The following are available on Neon platforms:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
|
@ -330,7 +349,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
#define HAS_COPYROW_MIPS
|
||||
#if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_I422TOABGRROW_MIPS_DSPR2
|
||||
|
@ -426,7 +446,7 @@ typedef uint8 uvec8[16];
|
|||
"lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
|
||||
#opcode " (%%r15,%%r14),%" #arg "\n" \
|
||||
BUNDLEUNLOCK
|
||||
#else
|
||||
#else // defined(__native_client__) && defined(__x86_64__)
|
||||
#define BUNDLEALIGN "\n"
|
||||
#define MEMACCESS(base) "(%" #base ")"
|
||||
#define MEMACCESS2(offset, base) #offset "(%" #base ")"
|
||||
|
@ -443,6 +463,15 @@ typedef uint8 uvec8[16];
|
|||
#opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
|
||||
#define MEMOPARG(opcode, offset, base, index, scale, arg) \
|
||||
#opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
|
||||
#endif // defined(__native_client__) && defined(__x86_64__)
|
||||
|
||||
#if defined(__arm__)
|
||||
#undef MEMACCESS
|
||||
#if defined(__native_client__)
|
||||
#define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
|
||||
#else
|
||||
#define MEMACCESS(base) "\n"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void I444ToARGBRow_NEON(const uint8* src_y,
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
#ifndef INCLUDE_LIBYUV_SCALE_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_SCALE_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_SCALE_ARGB_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/scale.h" // For FilterMode
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Clipped scale takes destination rectangle coordinates for clip values.
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// TODO(fbarchard): Implement this.
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ NOLINT
|
|
@ -11,7 +11,7 @@
|
|||
#ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_SCALE_ROW_H_
|
||||
|
||||
#include "basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1041
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_ NOLINT
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Common definitions for video, including fourcc and VideoFormat.
|
||||
|
||||
#ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ // NOLINT
|
||||
#define INCLUDE_LIBYUV_VIDEO_COMMON_H_
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Definition of FourCC codes
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Convert four characters to a FourCC code.
|
||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||
// constants are used in a switch.
|
||||
#ifdef __cplusplus
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
|
||||
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
|
||||
#else
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
|
||||
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
|
||||
#endif
|
||||
|
||||
// Some pages discussing FourCC codes:
|
||||
// http://www.fourcc.org/yuv.php
|
||||
// http://v4l2spec.bytesex.org/spec/book1.htm
|
||||
// http://developer.apple.com/quicktime/icefloe/dispatch020.html
|
||||
// http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12
|
||||
// http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt
|
||||
|
||||
// FourCC codes grouped according to implementation efficiency.
|
||||
// Primary formats should convert in 1 efficient step.
|
||||
// Secondary formats are converted in 2 steps.
|
||||
// Auxilliary formats call primary converters.
|
||||
enum FourCC {
|
||||
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
|
||||
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
|
||||
|
||||
// 4 Secondary RGB formats: 4 Bayer Patterns.
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
|
||||
// 1 Primary Compressed YUV format.
|
||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||
|
||||
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
|
||||
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
|
||||
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422.
|
||||
FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444.
|
||||
FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2.
|
||||
FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac.
|
||||
FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY.
|
||||
FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac.
|
||||
FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG.
|
||||
FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac.
|
||||
FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR.
|
||||
FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW.
|
||||
FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG.
|
||||
FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB
|
||||
FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB
|
||||
FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO.
|
||||
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
|
||||
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
|
||||
|
||||
// 1 Auxiliary compressed YUV format set aside for capturer.
|
||||
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_ANY = -1,
|
||||
};
|
||||
|
||||
enum FourCCBpp {
|
||||
// Canonical fourcc codes used in our code.
|
||||
FOURCC_BPP_I420 = 12,
|
||||
FOURCC_BPP_I422 = 16,
|
||||
FOURCC_BPP_I444 = 24,
|
||||
FOURCC_BPP_I411 = 12,
|
||||
FOURCC_BPP_I400 = 8,
|
||||
FOURCC_BPP_NV21 = 12,
|
||||
FOURCC_BPP_NV12 = 12,
|
||||
FOURCC_BPP_YUY2 = 16,
|
||||
FOURCC_BPP_UYVY = 16,
|
||||
FOURCC_BPP_M420 = 12,
|
||||
FOURCC_BPP_Q420 = 12,
|
||||
FOURCC_BPP_ARGB = 32,
|
||||
FOURCC_BPP_BGRA = 32,
|
||||
FOURCC_BPP_ABGR = 32,
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
FOURCC_BPP_RGBO = 16,
|
||||
FOURCC_BPP_R444 = 16,
|
||||
FOURCC_BPP_RGGB = 8,
|
||||
FOURCC_BPP_BGGR = 8,
|
||||
FOURCC_BPP_GRBG = 8,
|
||||
FOURCC_BPP_GBRG = 8,
|
||||
FOURCC_BPP_YV12 = 12,
|
||||
FOURCC_BPP_YV16 = 16,
|
||||
FOURCC_BPP_YV24 = 24,
|
||||
FOURCC_BPP_YU12 = 12,
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
FOURCC_BPP_YU16 = 16,
|
||||
FOURCC_BPP_YU24 = 24,
|
||||
FOURCC_BPP_YUYV = 16,
|
||||
FOURCC_BPP_YUVS = 16,
|
||||
FOURCC_BPP_HDYC = 16,
|
||||
FOURCC_BPP_2VUY = 16,
|
||||
FOURCC_BPP_JPEG = 1,
|
||||
FOURCC_BPP_DMB1 = 1,
|
||||
FOURCC_BPP_BA81 = 8,
|
||||
FOURCC_BPP_RGB3 = 24,
|
||||
FOURCC_BPP_BGR3 = 24,
|
||||
FOURCC_BPP_CM32 = 32,
|
||||
FOURCC_BPP_CM24 = 24,
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
};
|
||||
|
||||
// Converts fourcc aliases into canonical ones.
|
||||
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ NOLINT
|
|
@ -0,0 +1,325 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/compare.h"
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#ifdef _OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
// Internal C version of HashDjb2 with int sized count for efficiency.
|
||||
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
|
||||
|
||||
// This module is for Visual C x86
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
||||
|
||||
#if _MSC_VER >= 1700
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||
#endif
|
||||
|
||||
#endif // HAS_HASHDJB2_SSE41
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
HashDjb2_SSE = HashDjb2_SSE41;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HASHDJB2_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
HashDjb2_SSE = HashDjb2_AVX2;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (count >= (uint64)(kBlockSize)) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
}
|
||||
remainder = (int)(count) & ~15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_SSE(src, remainder, seed);
|
||||
src += remainder;
|
||||
count -= remainder;
|
||||
}
|
||||
remainder = (int)(count) & 15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_C(src, remainder, seed);
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && _MSC_VER >= 1700
|
||||
#define HAS_SUMSQUAREERROR_AVX2
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||
#endif
|
||||
|
||||
// TODO(fbarchard): Refactor into row function.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
int count) {
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32.
|
||||
// After each block of 65536 pixels, accumulate into a uint64.
|
||||
const int kBlockSize = 65536;
|
||||
int remainder = count & (kBlockSize - 1) & ~31;
|
||||
uint64 sse = 0;
|
||||
int i;
|
||||
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
||||
SumSquareError_C;
|
||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SumSquareError = SumSquareError_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) &&
|
||||
IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
|
||||
// Note only used for multiples of 16 so count is not checked.
|
||||
SumSquareError = SumSquareError_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
// Note only used for multiples of 32 so count is not checked.
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: sse)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
if (remainder) {
|
||||
sse += SumSquareError(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
src_b += remainder;
|
||||
}
|
||||
remainder = count & 31;
|
||||
if (remainder) {
|
||||
sse += SumSquareError_C(src_a, src_b, remainder);
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
uint64 sse = 0;
|
||||
int h;
|
||||
// Coalesce rows.
|
||||
if (stride_a == width &&
|
||||
stride_b == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
stride_a = stride_b = 0;
|
||||
}
|
||||
for (h = 0; h < height; ++h) {
|
||||
sse += ComputeSumSquareError(src_a, src_b, width);
|
||||
src_a += stride_a;
|
||||
src_b += stride_b;
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
|
||||
double psnr;
|
||||
if (sse > 0) {
|
||||
double mse = (double)(count) / (double)(sse);
|
||||
psnr = 10.0 * log10(255.0 * 255.0 * mse);
|
||||
} else {
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
}
|
||||
|
||||
if (psnr > kMaxPsnr)
|
||||
psnr = kMaxPsnr;
|
||||
|
||||
return psnr;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
const uint64 samples = width * height;
|
||||
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
|
||||
src_b, stride_b,
|
||||
width, height);
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b,
|
||||
width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 samples = width * height + 2 * (width_uv * height_uv);
|
||||
const uint64 sse = sse_y + sse_u + sse_v;
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
|
||||
|
||||
static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) {
|
||||
int64 sum_a = 0;
|
||||
int64 sum_b = 0;
|
||||
int64 sum_sq_a = 0;
|
||||
int64 sum_sq_b = 0;
|
||||
int64 sum_axb = 0;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
int j;
|
||||
for (j = 0; j < 8; ++j) {
|
||||
sum_a += src_a[j];
|
||||
sum_b += src_b[j];
|
||||
sum_sq_a += src_a[j] * src_a[j];
|
||||
sum_sq_b += src_b[j] * src_b[j];
|
||||
sum_axb += src_a[j] * src_b[j];
|
||||
}
|
||||
|
||||
src_a += stride_a;
|
||||
src_b += stride_b;
|
||||
}
|
||||
|
||||
{
|
||||
const int64 count = 64;
|
||||
// scale the constants by number of pixels
|
||||
const int64 c1 = (cc1 * count * count) >> 12;
|
||||
const int64 c2 = (cc2 * count * count) >> 12;
|
||||
|
||||
const int64 sum_a_x_sum_b = sum_a * sum_b;
|
||||
|
||||
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
|
||||
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
|
||||
|
||||
const int64 sum_a_sq = sum_a*sum_a;
|
||||
const int64 sum_b_sq = sum_b*sum_b;
|
||||
|
||||
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq +
|
||||
count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0.0) {
|
||||
return DBL_MAX;
|
||||
}
|
||||
return ssim_n * 1.0 / ssim_d;
|
||||
}
|
||||
}
|
||||
|
||||
// We are using a 8x8 moving window with starting location of each 8x8 window
|
||||
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
|
||||
// block boundaries to penalize blocking artifacts.
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
int samples = 0;
|
||||
double ssim_total = 0;
|
||||
double (*Ssim8x8)(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) = Ssim8x8_C;
|
||||
|
||||
// sample point start with each 4x4 location
|
||||
int i;
|
||||
for (i = 0; i < height - 8; i += 4) {
|
||||
int j;
|
||||
for (j = 0; j < width - 8; j += 4) {
|
||||
ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b);
|
||||
samples++;
|
||||
}
|
||||
|
||||
src_a += stride_a * 4;
|
||||
src_b += stride_b * 4;
|
||||
}
|
||||
|
||||
ssim_total /= samples;
|
||||
return ssim_total;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse = 0u;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
int diff = src_a[i] - src_b[i];
|
||||
sse += (uint32)(diff * diff);
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
// Internal C version of HashDjb2 with int sized count for efficiency.
|
||||
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
|
||||
uint32 hash = seed;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
hash += (hash << 5) + src[i];
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
|
||||
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q11, #0 \n"
|
||||
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q8, d4, d4 \n"
|
||||
"vmlal.s16 q9, d6, d6 \n"
|
||||
"vmlal.s16 q10, d5, d5 \n"
|
||||
"vmlal.s16 q11, d7, d7 \n"
|
||||
"bgt 1b \n"
|
||||
|
||||
"vadd.u32 q8, q8, q9 \n"
|
||||
"vadd.u32 q10, q10, q11 \n"
|
||||
"vadd.u32 q11, q8, q10 \n"
|
||||
"vpaddl.u32 q1, q11 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // __ARM_NEON__
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
|
||||
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse;
|
||||
asm volatile ( // NOLINT
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"movdqa " MEMACCESS(1) ",%%xmm2 \n"
|
||||
"lea " MEMLEA(0x10, 1) ",%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
:
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
#endif
|
||||
); // NOLINT
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
uint32 hash;
|
||||
asm volatile ( // NOLINT
|
||||
"movd %2,%%xmm0 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
"movdqa %4,%%xmm6 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"pmulld %%xmm6,%%xmm0 \n"
|
||||
"movdqa %5,%%xmm5 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklwd %%xmm7,%%xmm3 \n"
|
||||
"pmulld %%xmm5,%%xmm3 \n"
|
||||
"movdqa %6,%%xmm5 \n"
|
||||
"movdqa %%xmm2,%%xmm4 \n"
|
||||
"punpckhwd %%xmm7,%%xmm4 \n"
|
||||
"pmulld %%xmm5,%%xmm4 \n"
|
||||
"movdqa %7,%%xmm5 \n"
|
||||
"punpckhbw %%xmm7,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklwd %%xmm7,%%xmm2 \n"
|
||||
"pmulld %%xmm5,%%xmm2 \n"
|
||||
"movdqa %8,%%xmm5 \n"
|
||||
"punpckhwd %%xmm7,%%xmm1 \n"
|
||||
"pmulld %%xmm5,%%xmm1 \n"
|
||||
"paddd %%xmm4,%%xmm3 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"sub $0x10,%1 \n"
|
||||
"paddd %%xmm3,%%xmm1 \n"
|
||||
"pshufd $0xe,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"pshufd $0x1,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"jg 1b \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(count), // %1
|
||||
"+rm"(seed), // %2
|
||||
"=g"(hash) // %3
|
||||
: "m"(kHash16x33), // %4
|
||||
"m"(kHashMul0), // %5
|
||||
"m"(kHashMul1), // %6
|
||||
"m"(kHashMul2), // %7
|
||||
"m"(kHashMul3) // %8
|
||||
: "memory", "cc"
|
||||
#if defined(__SSE2__)
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
#endif
|
||||
); // NOLINT
|
||||
return hash;
|
||||
}
|
||||
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
|
@ -0,0 +1,232 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm5, xmm5
|
||||
|
||||
align 4
|
||||
wloop:
|
||||
movdqa xmm1, [eax]
|
||||
lea eax, [eax + 16]
|
||||
movdqa xmm2, [edx]
|
||||
lea edx, [edx + 16]
|
||||
sub ecx, 16
|
||||
movdqa xmm3, xmm1 // abs trick
|
||||
psubusb xmm1, xmm2
|
||||
psubusb xmm2, xmm3
|
||||
por xmm1, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm1, xmm5
|
||||
punpckhbw xmm2, xmm5
|
||||
pmaddwd xmm1, xmm1
|
||||
pmaddwd xmm2, xmm2
|
||||
paddd xmm0, xmm1
|
||||
paddd xmm0, xmm2
|
||||
jg wloop
|
||||
|
||||
pshufd xmm1, xmm0, 0xee
|
||||
paddd xmm0, xmm1
|
||||
pshufd xmm1, xmm0, 0x01
|
||||
paddd xmm0, xmm1
|
||||
movd eax, xmm0
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
|
||||
#pragma warning(disable: 4752)
|
||||
__declspec(naked) __declspec(align(16))
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
vpxor ymm0, ymm0, ymm0 // sum
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
|
||||
sub edx, eax
|
||||
|
||||
align 4
|
||||
wloop:
|
||||
vmovdqu ymm1, [eax]
|
||||
vmovdqu ymm2, [eax + edx]
|
||||
lea eax, [eax + 32]
|
||||
sub ecx, 32
|
||||
vpsubusb ymm3, ymm1, ymm2 // abs difference trick
|
||||
vpsubusb ymm2, ymm2, ymm1
|
||||
vpor ymm1, ymm2, ymm3
|
||||
vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order.
|
||||
vpunpckhbw ymm1, ymm1, ymm5
|
||||
vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32.
|
||||
vpmaddwd ymm1, ymm1, ymm1
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpaddd ymm0, ymm0, ymm2
|
||||
jg wloop
|
||||
|
||||
vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpermq ymm1, ymm0, 0x02 // high + low lane.
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vmovd eax, xmm0
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
// 27: 66 0F 38 40 C6 pmulld xmm0,xmm6
|
||||
// 44: 66 0F 38 40 DD pmulld xmm3,xmm5
|
||||
// 59: 66 0F 38 40 E5 pmulld xmm4,xmm5
|
||||
// 72: 66 0F 38 40 D5 pmulld xmm2,xmm5
|
||||
// 83: 66 0F 38 40 CD pmulld xmm1,xmm5
|
||||
#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
|
||||
_asm _emit 0x40 _asm _emit reg
|
||||
|
||||
__declspec(naked) __declspec(align(16))
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
movdqa xmm6, kHash16x33
|
||||
|
||||
align 4
|
||||
wloop:
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
lea eax, [eax + 16]
|
||||
pmulld(0xc6) // pmulld xmm0,xmm6 hash *= 33 ^ 16
|
||||
movdqa xmm5, kHashMul0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
movdqa xmm3, xmm2
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
pmulld(0xdd) // pmulld xmm3, xmm5
|
||||
movdqa xmm5, kHashMul1
|
||||
movdqa xmm4, xmm2
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
pmulld(0xe5) // pmulld xmm4, xmm5
|
||||
movdqa xmm5, kHashMul2
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
pmulld(0xd5) // pmulld xmm2, xmm5
|
||||
movdqa xmm5, kHashMul3
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
pmulld(0xcd) // pmulld xmm1, xmm5
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
sub ecx, 16
|
||||
paddd xmm1, xmm3
|
||||
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
__declspec(naked) __declspec(align(16))
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
movdqa xmm6, kHash16x33
|
||||
|
||||
align 4
|
||||
wloop:
|
||||
vpmovzxbd xmm3, dword ptr [eax] // src[0-3]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
vpmovzxbd xmm4, dword ptr [eax + 4] // src[4-7]
|
||||
pmulld xmm3, kHashMul0
|
||||
vpmovzxbd xmm2, dword ptr [eax + 8] // src[8-11]
|
||||
pmulld xmm4, kHashMul1
|
||||
vpmovzxbd xmm1, dword ptr [eax + 12] // src[12-15]
|
||||
pmulld xmm2, kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm1, kHashMul3
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
sub ecx, 16
|
||||
paddd xmm1, xmm3
|
||||
pshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
paddd xmm1, xmm2
|
||||
pshufd xmm2, xmm1, 0x01
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm0, xmm1
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,938 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/format_conversion.h"
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Copy ARGB with optional flipping
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
if (!src_argb || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
|
||||
CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width * 4, height);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I444ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I444ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u == width &&
|
||||
src_stride_v == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I444TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I444ToARGBRow = I444ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I444TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I444ToARGBRow = I444ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I444ToARGBRow = I444ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 2 == width &&
|
||||
src_stride_v * 2 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I411ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I411ToARGBRow_C;
|
||||
if (!src_y || !src_u || !src_v ||
|
||||
!dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
src_stride_u * 4 == width &&
|
||||
src_stride_v * 4 == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I411TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I411ToARGBRow = I411ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I411TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I411ToARGBRow = I411ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I411ToARGBRow = I411ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
LIBYUV_API
|
||||
int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YToARGBRow)(const uint8* y_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = YToARGBRow_C;
|
||||
if (!src_y || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_YTOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
YToARGBRow = YToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
YToARGBRow = YToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_YTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
YToARGBRow = YToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
YToARGBRow = YToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
YToARGBRow(src_y, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I400 to ARGB.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*I400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
|
||||
I400ToARGBRow_C;
|
||||
if (!src_y || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_stride_y = -src_stride_y;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_y == width &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_y = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_I400TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
|
||||
I400ToARGBRow = I400ToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I400ToARGBRow = I400ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_I400TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I400ToARGBRow = I400ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I400ToARGBRow = I400ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
I400ToARGBRow(src_y, dst_argb, width);
|
||||
src_y += src_stride_y;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Shuffle table for converting BGRA to ARGB.
|
||||
static uvec8 kShuffleMaskBGRAToARGB = {
|
||||
3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u
|
||||
};
|
||||
|
||||
// Shuffle table for converting ABGR to ARGB.
|
||||
static uvec8 kShuffleMaskABGRToARGB = {
|
||||
2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u
|
||||
};
|
||||
|
||||
// Shuffle table for converting RGBA to ARGB.
|
||||
static uvec8 kShuffleMaskRGBAToARGB = {
|
||||
1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u
|
||||
};
|
||||
|
||||
// Convert BGRA to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_bgra, src_stride_bgra,
|
||||
dst_argb, dst_stride_argb,
|
||||
(const uint8*)(&kShuffleMaskBGRAToARGB),
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert ARGB to BGRA (same as BGRAToARGB).
|
||||
LIBYUV_API
|
||||
int ARGBToBGRA(const uint8* src_bgra, int src_stride_bgra,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_bgra, src_stride_bgra,
|
||||
dst_argb, dst_stride_argb,
|
||||
(const uint8*)(&kShuffleMaskBGRAToARGB),
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert ABGR to ARGB.
|
||||
LIBYUV_API
|
||||
int ABGRToARGB(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_abgr, src_stride_abgr,
|
||||
dst_argb, dst_stride_argb,
|
||||
(const uint8*)(&kShuffleMaskABGRToARGB),
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert ARGB to ABGR to (same as ABGRToARGB).
|
||||
LIBYUV_API
|
||||
int ARGBToABGR(const uint8* src_abgr, int src_stride_abgr,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_abgr, src_stride_abgr,
|
||||
dst_argb, dst_stride_argb,
|
||||
(const uint8*)(&kShuffleMaskABGRToARGB),
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert RGBA to ARGB.
|
||||
LIBYUV_API
|
||||
int RGBAToARGB(const uint8* src_rgba, int src_stride_rgba,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
return ARGBShuffle(src_rgba, src_stride_rgba,
|
||||
dst_argb, dst_stride_argb,
|
||||
(const uint8*)(&kShuffleMaskRGBAToARGB),
|
||||
width, height);
|
||||
}
|
||||
|
||||
// Convert RGB24 to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
RGB24ToARGBRow_C;
|
||||
if (!src_rgb24 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24;
|
||||
src_stride_rgb24 = -src_stride_rgb24;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_rgb24 == width * 3 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_rgb24 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_RGB24TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB24TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB24ToARGBRow = RGB24ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB24ToARGBRow(src_rgb24, dst_argb, width);
|
||||
src_rgb24 += src_stride_rgb24;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert RAW to ARGB.
|
||||
LIBYUV_API
|
||||
int RAWToARGB(const uint8* src_raw, int src_stride_raw,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
|
||||
RAWToARGBRow_C;
|
||||
if (!src_raw || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_raw = src_raw + (height - 1) * src_stride_raw;
|
||||
src_stride_raw = -src_stride_raw;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_raw == width * 3 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_raw = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_RAWTOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
RAWToARGBRow = RAWToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RAWTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RAWToARGBRow = RAWToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RAWToARGBRow = RAWToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RAWToARGBRow(src_raw, dst_argb, width);
|
||||
src_raw += src_stride_raw;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert RGB565 to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
|
||||
RGB565ToARGBRow_C;
|
||||
if (!src_rgb565 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565;
|
||||
src_stride_rgb565 = -src_stride_rgb565;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_rgb565 == width * 2 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_rgb565 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_RGB565TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_RGB565TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
RGB565ToARGBRow = RGB565ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
RGB565ToARGBRow(src_rgb565, dst_argb, width);
|
||||
src_rgb565 += src_stride_rgb565;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB1555 to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
|
||||
int pix) = ARGB1555ToARGBRow_C;
|
||||
if (!src_argb1555 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555;
|
||||
src_stride_argb1555 = -src_stride_argb1555;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb1555 == width * 2 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb1555 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_ARGB1555TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGB1555TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGB1555ToARGBRow(src_argb1555, dst_argb, width);
|
||||
src_argb1555 += src_stride_argb1555;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert ARGB4444 to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
|
||||
int pix) = ARGB4444ToARGBRow_C;
|
||||
if (!src_argb4444 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444;
|
||||
src_stride_argb4444 = -src_stride_argb4444;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_argb4444 == width * 2 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_argb4444 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_ARGB4444TOARGBROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGB4444TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGB4444ToARGBRow(src_argb4444, dst_argb, width);
|
||||
src_argb4444 += src_stride_argb4444;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert NV12 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_NV12TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_NV12TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV12ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_uv += src_stride_uv;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert NV21 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV21ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = NV21ToARGBRow_C;
|
||||
if (!src_y || !src_uv || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_NV21TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_NV21TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV21ToARGBRow = NV21ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
NV21ToARGBRow(src_y, src_uv, dst_argb, width);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_uv += src_stride_uv;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*NV12ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* uv_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = NV12ToARGBRow_C;
|
||||
if (!src_m420 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
#if defined(HAS_NV12TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_NV12TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
NV12ToARGBRow = NV12ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
|
||||
dst_argb + dst_stride_argb, width);
|
||||
dst_argb += dst_stride_argb * 2;
|
||||
src_m420 += src_stride_m420 * 3;
|
||||
}
|
||||
if (height & 1) {
|
||||
NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert YUY2 to ARGB.
|
||||
LIBYUV_API
|
||||
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
|
||||
YUY2ToARGBRow_C;
|
||||
if (!src_yuy2 || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2;
|
||||
src_stride_yuy2 = -src_stride_yuy2;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_yuy2 == width * 2 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_yuy2 = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_YUY2TOARGBROW_SSSE3)
|
||||
// Posix is 16, Windows is 8.
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_YUY2TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
YUY2ToARGBRow = YUY2ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
YUY2ToARGBRow(src_yuy2, dst_argb, width);
|
||||
src_yuy2 += src_stride_yuy2;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert UYVY to ARGB.
|
||||
LIBYUV_API
|
||||
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height) {
|
||||
int y;
|
||||
void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
|
||||
UYVYToARGBRow_C;
|
||||
if (!src_uyvy || !dst_argb ||
|
||||
width <= 0 || height == 0) {
|
||||
return -1;
|
||||
}
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy;
|
||||
src_stride_uyvy = -src_stride_uyvy;
|
||||
}
|
||||
// Coalesce rows.
|
||||
if (src_stride_uyvy == width * 2 &&
|
||||
dst_stride_argb == width * 4) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
src_stride_uyvy = dst_stride_argb = 0;
|
||||
}
|
||||
#if defined(HAS_UYVYTOARGBROW_SSSE3)
|
||||
// Posix is 16, Windows is 8.
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_UYVYTOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
UYVYToARGBRow = UYVYToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
for (y = 0; y < height; ++y) {
|
||||
UYVYToARGBRow(src_uyvy, dst_argb, width);
|
||||
src_uyvy += src_stride_uyvy;
|
||||
dst_argb += dst_stride_argb;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,392 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
struct I420Buffers {
|
||||
uint8* y;
|
||||
int y_stride;
|
||||
uint8* u;
|
||||
int u_stride;
|
||||
uint8* v;
|
||||
int v_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegCopyI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I420Copy(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I422ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I444ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I411ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I400ToI420(data[0], strides[0],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height) {
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret) {
|
||||
*width = mjpeg_decoder.GetWidth();
|
||||
*height = mjpeg_decoder.GetHeight();
|
||||
}
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return ret ? 0 : -1; // -1 for runtime failure.
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to I420
|
||||
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
struct ARGBBuffers {
|
||||
uint8* argb;
|
||||
int argb_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegI420ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I420ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I422ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I444ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I411ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I400ToARGB(data[0], strides[0],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to ARGB
|
||||
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* argb, int argb_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,327 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/convert_argb.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/format_conversion.h"
|
||||
#ifdef HAVE_JPEG
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
#include "libyuv/rotate_argb.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* crop_argb, int argb_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8* src;
|
||||
const uint8* src_uv;
|
||||
int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination crop_argb is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
|
||||
crop_argb == sample;
|
||||
uint8* tmp_argb = crop_argb;
|
||||
int tmp_argb_stride = argb_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
if (crop_argb == NULL || sample == NULL ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
inv_crop_height = -inv_crop_height;
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * abs_crop_height * 4;
|
||||
rotate_buffer = (uint8*)malloc(argb_size);
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
crop_argb = rotate_buffer;
|
||||
argb_stride = crop_width;
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// TODO(fbarchard): Support cropping Bayer by odd numbers
|
||||
// by adjusting fourcc.
|
||||
case FOURCC_BGGR:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerBGGRToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
|
||||
case FOURCC_GBRG:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerGBRGToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
|
||||
case FOURCC_GRBG:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerGRBGToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
|
||||
case FOURCC_RGGB:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerRGGBToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV21ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// case FOURCC_Q420:
|
||||
// src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
|
||||
// src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
|
||||
// src_width + crop_x * 2;
|
||||
// r = Q420ToARGB(src, src_width * 3,
|
||||
// src_uv, src_width * 3,
|
||||
// crop_argb, argb_stride,
|
||||
// crop_width, inv_crop_height);
|
||||
// break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YU12:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
} else {
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToARGB(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToARGB(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToARGB(sample, sample_size,
|
||||
crop_argb, argb_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = ARGBRotate(crop_argb, argb_stride,
|
||||
tmp_argb, tmp_argb_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "libyuv/convert.h"
|
||||
|
||||
#include "libyuv/format_conversion.h"
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
// src_width is used for source stride computation
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8* src;
|
||||
const uint8* src_uv;
|
||||
int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
|
||||
format != FOURCC_NV12 && format != FOURCC_NV21 &&
|
||||
format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
|
||||
uint8* tmp_y = y;
|
||||
uint8* tmp_u = u;
|
||||
uint8* tmp_v = v;
|
||||
int tmp_y_stride = y_stride;
|
||||
int tmp_u_stride = u_stride;
|
||||
int tmp_v_stride = v_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
if (!y || !u || !v || !sample ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
if (src_height < 0) {
|
||||
inv_crop_height = -inv_crop_height;
|
||||
}
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination y is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
y = rotate_buffer;
|
||||
u = y + y_size;
|
||||
v = u + uv_size;
|
||||
y_stride = crop_width;
|
||||
u_stride = v_stride = ((crop_width + 1) / 2);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// TODO(fbarchard): Support cropping Bayer by odd numbers
|
||||
// by adjusting fourcc.
|
||||
case FOURCC_BGGR:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerBGGRToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_GBRG:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerGBRGToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_GRBG:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerGRBGToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGGB:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
r = BayerRGGBToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
v, v_stride,
|
||||
u, u_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_Q420:
|
||||
src = sample + (src_width + aligned_src_width * 2) * crop_y + crop_x;
|
||||
src_uv = sample + (src_width + aligned_src_width * 2) * crop_y +
|
||||
src_width + crop_x * 2;
|
||||
r = Q420ToI420(src, src_width * 3,
|
||||
src_uv, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YU12:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420Rotate(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToI420(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
} else {
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToI420(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToI420(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToI420(sample, sample_size,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
r = -1; // unknown fourcc - return failure code.
|
||||
}
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = I420Rotate(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
tmp_y, tmp_y_stride,
|
||||
tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -8,9 +8,9 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/cpu_id.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
|
@ -27,7 +27,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/basic_types.h" // For CPU_X86
|
||||
#include "libyuv/basic_types.h" // For CPU_X86
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -48,7 +48,7 @@ extern "C" {
|
|||
defined(__i386__) || defined(__x86_64__))
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if (_MSC_FULL_VER >= 160040219)
|
||||
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
|
||||
#elif defined(_M_IX86)
|
||||
|
@ -188,10 +188,14 @@ LIBYUV_API SAFEBUFFERS
|
|||
int InitCpuFlags(void) {
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
|
||||
|
||||
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
CpuId(7, 0, cpu_info7);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
|
@ -199,6 +203,7 @@ int InitCpuFlags(void) {
|
|||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
kCpuHasX86;
|
||||
|
||||
#ifdef HAS_XGETBV
|
||||
if ((cpu_info1[2] & 0x18000000) == 0x18000000 && // AVX and OSSave
|
||||
TestOsSaveYmm()) { // Saves YMM.
|
||||
|
|
|
@ -0,0 +1,552 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/format_conversion.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/video_common.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// generate a selector mask useful for pshufb
|
||||
static uint32 GenerateSelector(int select0, int select1) {
|
||||
return (uint32)(select0) |
|
||||
(uint32)((select1 + 4) << 8) |
|
||||
(uint32)((select0 + 8) << 16) |
|
||||
(uint32)((select1 + 12) << 24);
|
||||
}
|
||||
|
||||
static int MakeSelectors(const int blue_index,
|
||||
const int green_index,
|
||||
const int red_index,
|
||||
uint32 dst_fourcc_bayer,
|
||||
uint32* index_map) {
|
||||
// Now build a lookup table containing the indices for the four pixels in each
|
||||
// 2x2 Bayer grid.
|
||||
switch (dst_fourcc_bayer) {
|
||||
case FOURCC_BGGR:
|
||||
index_map[0] = GenerateSelector(blue_index, green_index);
|
||||
index_map[1] = GenerateSelector(green_index, red_index);
|
||||
break;
|
||||
case FOURCC_GBRG:
|
||||
index_map[0] = GenerateSelector(green_index, blue_index);
|
||||
index_map[1] = GenerateSelector(red_index, green_index);
|
||||
break;
|
||||
case FOURCC_RGGB:
|
||||
index_map[0] = GenerateSelector(red_index, green_index);
|
||||
index_map[1] = GenerateSelector(green_index, blue_index);
|
||||
break;
|
||||
case FOURCC_GRBG:
|
||||
index_map[0] = GenerateSelector(green_index, red_index);
|
||||
index_map[1] = GenerateSelector(blue_index, green_index);
|
||||
break;
|
||||
default:
|
||||
return -1; // Bad FourCC
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts 32 bit ARGB to Bayer RGB formats.
|
||||
LIBYUV_API
|
||||
int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height,
|
||||
uint32 dst_fourcc_bayer) {
|
||||
int y;
|
||||
const int blue_index = 0; // Offsets for ARGB format
|
||||
const int green_index = 1;
|
||||
const int red_index = 2;
|
||||
uint32 index_map[2];
|
||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (MakeSelectors(blue_index, green_index, red_index,
|
||||
dst_fourcc_bayer, index_map)) {
|
||||
return -1; // Bad FourCC
|
||||
}
|
||||
|
||||
for (y = 0; y < height; ++y) {
|
||||
ARGBToBayerRow(src_argb, dst_bayer, index_map[y & 1], width);
|
||||
src_argb += src_stride_argb;
|
||||
dst_bayer += dst_stride_bayer;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define AVG(a, b) (((a) + (b)) >> 1)
|
||||
|
||||
static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix) {
|
||||
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
|
||||
uint8 g = src_bayer0[1];
|
||||
uint8 r = src_bayer1[1];
|
||||
int x;
|
||||
for (x = 0; x < pix - 2; x += 2) {
|
||||
dst_argb[0] = src_bayer0[0];
|
||||
dst_argb[1] = AVG(g, src_bayer0[1]);
|
||||
dst_argb[2] = AVG(r, src_bayer1[1]);
|
||||
dst_argb[3] = 255U;
|
||||
dst_argb[4] = AVG(src_bayer0[0], src_bayer0[2]);
|
||||
dst_argb[5] = src_bayer0[1];
|
||||
dst_argb[6] = src_bayer1[1];
|
||||
dst_argb[7] = 255U;
|
||||
g = src_bayer0[1];
|
||||
r = src_bayer1[1];
|
||||
src_bayer0 += 2;
|
||||
src_bayer1 += 2;
|
||||
dst_argb += 8;
|
||||
}
|
||||
dst_argb[0] = src_bayer0[0];
|
||||
dst_argb[1] = AVG(g, src_bayer0[1]);
|
||||
dst_argb[2] = AVG(r, src_bayer1[1]);
|
||||
dst_argb[3] = 255U;
|
||||
if (!(pix & 1)) {
|
||||
dst_argb[4] = src_bayer0[0];
|
||||
dst_argb[5] = src_bayer0[1];
|
||||
dst_argb[6] = src_bayer1[1];
|
||||
dst_argb[7] = 255U;
|
||||
}
|
||||
}
|
||||
|
||||
static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix) {
|
||||
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
|
||||
uint8 g = src_bayer0[1];
|
||||
uint8 b = src_bayer1[1];
|
||||
int x;
|
||||
for (x = 0; x < pix - 2; x += 2) {
|
||||
dst_argb[0] = AVG(b, src_bayer1[1]);
|
||||
dst_argb[1] = AVG(g, src_bayer0[1]);
|
||||
dst_argb[2] = src_bayer0[0];
|
||||
dst_argb[3] = 255U;
|
||||
dst_argb[4] = src_bayer1[1];
|
||||
dst_argb[5] = src_bayer0[1];
|
||||
dst_argb[6] = AVG(src_bayer0[0], src_bayer0[2]);
|
||||
dst_argb[7] = 255U;
|
||||
g = src_bayer0[1];
|
||||
b = src_bayer1[1];
|
||||
src_bayer0 += 2;
|
||||
src_bayer1 += 2;
|
||||
dst_argb += 8;
|
||||
}
|
||||
dst_argb[0] = AVG(b, src_bayer1[1]);
|
||||
dst_argb[1] = AVG(g, src_bayer0[1]);
|
||||
dst_argb[2] = src_bayer0[0];
|
||||
dst_argb[3] = 255U;
|
||||
if (!(pix & 1)) {
|
||||
dst_argb[4] = src_bayer1[1];
|
||||
dst_argb[5] = src_bayer0[1];
|
||||
dst_argb[6] = src_bayer0[0];
|
||||
dst_argb[7] = 255U;
|
||||
}
|
||||
}
|
||||
|
||||
static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix) {
|
||||
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
|
||||
uint8 b = src_bayer0[1];
|
||||
int x;
|
||||
for (x = 0; x < pix - 2; x += 2) {
|
||||
dst_argb[0] = AVG(b, src_bayer0[1]);
|
||||
dst_argb[1] = src_bayer0[0];
|
||||
dst_argb[2] = src_bayer1[0];
|
||||
dst_argb[3] = 255U;
|
||||
dst_argb[4] = src_bayer0[1];
|
||||
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
|
||||
dst_argb[6] = AVG(src_bayer1[0], src_bayer1[2]);
|
||||
dst_argb[7] = 255U;
|
||||
b = src_bayer0[1];
|
||||
src_bayer0 += 2;
|
||||
src_bayer1 += 2;
|
||||
dst_argb += 8;
|
||||
}
|
||||
dst_argb[0] = AVG(b, src_bayer0[1]);
|
||||
dst_argb[1] = src_bayer0[0];
|
||||
dst_argb[2] = src_bayer1[0];
|
||||
dst_argb[3] = 255U;
|
||||
if (!(pix & 1)) {
|
||||
dst_argb[4] = src_bayer0[1];
|
||||
dst_argb[5] = src_bayer0[0];
|
||||
dst_argb[6] = src_bayer1[0];
|
||||
dst_argb[7] = 255U;
|
||||
}
|
||||
}
|
||||
|
||||
static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix) {
|
||||
const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
|
||||
uint8 r = src_bayer0[1];
|
||||
int x;
|
||||
for (x = 0; x < pix - 2; x += 2) {
|
||||
dst_argb[0] = src_bayer1[0];
|
||||
dst_argb[1] = src_bayer0[0];
|
||||
dst_argb[2] = AVG(r, src_bayer0[1]);
|
||||
dst_argb[3] = 255U;
|
||||
dst_argb[4] = AVG(src_bayer1[0], src_bayer1[2]);
|
||||
dst_argb[5] = AVG(src_bayer0[0], src_bayer0[2]);
|
||||
dst_argb[6] = src_bayer0[1];
|
||||
dst_argb[7] = 255U;
|
||||
r = src_bayer0[1];
|
||||
src_bayer0 += 2;
|
||||
src_bayer1 += 2;
|
||||
dst_argb += 8;
|
||||
}
|
||||
dst_argb[0] = src_bayer1[0];
|
||||
dst_argb[1] = src_bayer0[0];
|
||||
dst_argb[2] = AVG(r, src_bayer0[1]);
|
||||
dst_argb[3] = 255U;
|
||||
if (!(pix & 1)) {
|
||||
dst_argb[4] = src_bayer1[0];
|
||||
dst_argb[5] = src_bayer0[0];
|
||||
dst_argb[6] = src_bayer0[1];
|
||||
dst_argb[7] = 255U;
|
||||
}
|
||||
}
|
||||
|
||||
// Converts any Bayer RGB format to ARGB.
|
||||
LIBYUV_API
|
||||
int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height,
|
||||
uint32 src_fourcc_bayer) {
|
||||
int y;
|
||||
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix);
|
||||
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix);
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
dst_argb = dst_argb + (height - 1) * dst_stride_argb;
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
}
|
||||
switch (src_fourcc_bayer) {
|
||||
case FOURCC_BGGR:
|
||||
BayerRow0 = BayerRowBG;
|
||||
BayerRow1 = BayerRowGR;
|
||||
break;
|
||||
case FOURCC_GBRG:
|
||||
BayerRow0 = BayerRowGB;
|
||||
BayerRow1 = BayerRowRG;
|
||||
break;
|
||||
case FOURCC_GRBG:
|
||||
BayerRow0 = BayerRowGR;
|
||||
BayerRow1 = BayerRowBG;
|
||||
break;
|
||||
case FOURCC_RGGB:
|
||||
BayerRow0 = BayerRowRG;
|
||||
BayerRow1 = BayerRowGB;
|
||||
break;
|
||||
default:
|
||||
return -1; // Bad FourCC
|
||||
}
|
||||
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
|
||||
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
|
||||
dst_argb + dst_stride_argb, width);
|
||||
src_bayer += src_stride_bayer * 2;
|
||||
dst_argb += dst_stride_argb * 2;
|
||||
}
|
||||
if (height & 1) {
|
||||
BayerRow0(src_bayer, src_stride_bayer, dst_argb, width);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Converts any Bayer RGB format to ARGB.
|
||||
LIBYUV_API
|
||||
int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
uint32 src_fourcc_bayer) {
|
||||
void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix);
|
||||
void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
|
||||
uint8* dst_argb, int pix);
|
||||
|
||||
void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
|
||||
uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
|
||||
void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
|
||||
ARGBToYRow_C;
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
int halfheight;
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
dst_y = dst_y + (height - 1) * dst_stride_y;
|
||||
dst_u = dst_u + (halfheight - 1) * dst_stride_u;
|
||||
dst_v = dst_v + (halfheight - 1) * dst_stride_v;
|
||||
dst_stride_y = -dst_stride_y;
|
||||
dst_stride_u = -dst_stride_u;
|
||||
dst_stride_v = -dst_stride_v;
|
||||
}
|
||||
#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
|
||||
ARGBToYRow = ARGBToYRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
|
||||
ARGBToUVRow = ARGBToUVRow_SSSE3;
|
||||
if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
|
||||
ARGBToYRow = ARGBToYRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToYRow = ARGBToYRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToYRow = ARGBToYRow_NEON;
|
||||
}
|
||||
if (width >= 16) {
|
||||
ARGBToUVRow = ARGBToUVRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
ARGBToUVRow = ARGBToUVRow_NEON;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
switch (src_fourcc_bayer) {
|
||||
case FOURCC_BGGR:
|
||||
BayerRow0 = BayerRowBG;
|
||||
BayerRow1 = BayerRowGR;
|
||||
break;
|
||||
case FOURCC_GBRG:
|
||||
BayerRow0 = BayerRowGB;
|
||||
BayerRow1 = BayerRowRG;
|
||||
break;
|
||||
case FOURCC_GRBG:
|
||||
BayerRow0 = BayerRowGR;
|
||||
BayerRow1 = BayerRowBG;
|
||||
break;
|
||||
case FOURCC_RGGB:
|
||||
BayerRow0 = BayerRowRG;
|
||||
BayerRow1 = BayerRowGB;
|
||||
break;
|
||||
default:
|
||||
return -1; // Bad FourCC
|
||||
}
|
||||
|
||||
{
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
int y;
|
||||
for (y = 0; y < height - 1; y += 2) {
|
||||
BayerRow0(src_bayer, src_stride_bayer, row, width);
|
||||
BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
|
||||
row + kRowSize, width);
|
||||
ARGBToUVRow(row, kRowSize, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width);
|
||||
src_bayer += src_stride_bayer * 2;
|
||||
dst_y += dst_stride_y * 2;
|
||||
dst_u += dst_stride_u;
|
||||
dst_v += dst_stride_v;
|
||||
}
|
||||
if (height & 1) {
|
||||
BayerRow0(src_bayer, src_stride_bayer, row, width);
|
||||
ARGBToUVRow(row, 0, dst_u, dst_v, width);
|
||||
ARGBToYRow(row, dst_y, width);
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Convert I420 to Bayer.
|
||||
LIBYUV_API
|
||||
int I420ToBayer(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_bayer, int dst_stride_bayer,
|
||||
int width, int height,
|
||||
uint32 dst_fourcc_bayer) {
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer,
|
||||
uint32 selector, int pix) = ARGBToBayerRow_C;
|
||||
const int blue_index = 0; // Offsets for ARGB format
|
||||
const int green_index = 1;
|
||||
const int red_index = 2;
|
||||
uint32 index_map[2];
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
int halfheight;
|
||||
height = -height;
|
||||
halfheight = (height + 1) >> 1;
|
||||
src_y = src_y + (height - 1) * src_stride_y;
|
||||
src_u = src_u + (halfheight - 1) * src_stride_u;
|
||||
src_v = src_v + (halfheight - 1) * src_stride_v;
|
||||
src_stride_y = -src_stride_y;
|
||||
src_stride_u = -src_stride_u;
|
||||
src_stride_v = -src_stride_v;
|
||||
}
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_ARGBTOBAYERROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_SSSE3;
|
||||
}
|
||||
}
|
||||
#elif defined(HAS_ARGBTOBAYERROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_Any_NEON;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
ARGBToBayerRow = ARGBToBayerRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (MakeSelectors(blue_index, green_index, red_index,
|
||||
dst_fourcc_bayer, index_map)) {
|
||||
return -1; // Bad FourCC
|
||||
}
|
||||
{
|
||||
// Allocate a row of ARGB.
|
||||
align_buffer_64(row, width * 4);
|
||||
int y;
|
||||
for (y = 0; y < height; ++y) {
|
||||
I422ToARGBRow(src_y, src_u, src_v, row, width);
|
||||
ARGBToBayerRow(row, dst_bayer, index_map[y & 1], width);
|
||||
dst_bayer += dst_stride_bayer;
|
||||
src_y += src_stride_y;
|
||||
if (y & 1) {
|
||||
src_u += src_stride_u;
|
||||
src_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAKEBAYERFOURCC(BAYER) \
|
||||
LIBYUV_API \
|
||||
int Bayer##BAYER##ToI420(const uint8* src_bayer, int src_stride_bayer, \
|
||||
uint8* dst_y, int dst_stride_y, \
|
||||
uint8* dst_u, int dst_stride_u, \
|
||||
uint8* dst_v, int dst_stride_v, \
|
||||
int width, int height) { \
|
||||
return BayerToI420(src_bayer, src_stride_bayer, \
|
||||
dst_y, dst_stride_y, \
|
||||
dst_u, dst_stride_u, \
|
||||
dst_v, dst_stride_v, \
|
||||
width, height, \
|
||||
FOURCC_##BAYER); \
|
||||
} \
|
||||
\
|
||||
LIBYUV_API \
|
||||
int I420ToBayer##BAYER(const uint8* src_y, int src_stride_y, \
|
||||
const uint8* src_u, int src_stride_u, \
|
||||
const uint8* src_v, int src_stride_v, \
|
||||
uint8* dst_bayer, int dst_stride_bayer, \
|
||||
int width, int height) { \
|
||||
return I420ToBayer(src_y, src_stride_y, \
|
||||
src_u, src_stride_u, \
|
||||
src_v, src_stride_v, \
|
||||
dst_bayer, dst_stride_bayer, \
|
||||
width, height, \
|
||||
FOURCC_##BAYER); \
|
||||
} \
|
||||
\
|
||||
LIBYUV_API \
|
||||
int ARGBToBayer##BAYER(const uint8* src_argb, int src_stride_argb, \
|
||||
uint8* dst_bayer, int dst_stride_bayer, \
|
||||
int width, int height) { \
|
||||
return ARGBToBayer(src_argb, src_stride_argb, \
|
||||
dst_bayer, dst_stride_bayer, \
|
||||
width, height, \
|
||||
FOURCC_##BAYER); \
|
||||
} \
|
||||
\
|
||||
LIBYUV_API \
|
||||
int Bayer##BAYER##ToARGB(const uint8* src_bayer, int src_stride_bayer, \
|
||||
uint8* dst_argb, int dst_stride_argb, \
|
||||
int width, int height) { \
|
||||
return BayerToARGB(src_bayer, src_stride_bayer, \
|
||||
dst_argb, dst_stride_argb, \
|
||||
width, height, \
|
||||
FOURCC_##BAYER); \
|
||||
}
|
||||
|
||||
MAKEBAYERFOURCC(BGGR)
|
||||
MAKEBAYERFOURCC(GBRG)
|
||||
MAKEBAYERFOURCC(GRBG)
|
||||
MAKEBAYERFOURCC(RGGB)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,566 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
#include <assert.h>
|
||||
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && !defined(COVERAGE_ENABLED) &&\
|
||||
!defined(TARGET_IPHONE_SIMULATOR)
|
||||
// Must be included before jpeglib.
|
||||
#include <setjmp.h>
|
||||
#define HAVE_SETJMP
|
||||
#endif
|
||||
struct FILE; // For jpeglib.h.
|
||||
|
||||
// C++ build requires extern C for jpeg internals.
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <jpeglib.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#include "libyuv/planar_functions.h" // For CopyPlane().
|
||||
|
||||
namespace libyuv {
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
struct SetJmpErrorMgr {
|
||||
jpeg_error_mgr base; // Must be at the top
|
||||
jmp_buf setjmp_buffer;
|
||||
};
|
||||
#endif
|
||||
|
||||
const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN;
|
||||
const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE;
|
||||
const int MJpegDecoder::kColorSpaceRgb = JCS_RGB;
|
||||
const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr;
|
||||
const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK;
|
||||
const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
|
||||
|
||||
// Methods that are passed to jpeglib.
|
||||
boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
|
||||
void init_source(jpeg_decompress_struct* cinfo);
|
||||
void skip_input_data(jpeg_decompress_struct* cinfo,
|
||||
long num_bytes); // NOLINT
|
||||
void term_source(jpeg_decompress_struct* cinfo);
|
||||
void ErrorHandler(jpeg_common_struct* cinfo);
|
||||
|
||||
MJpegDecoder::MJpegDecoder()
|
||||
: has_scanline_padding_(LIBYUV_FALSE),
|
||||
num_outbufs_(0),
|
||||
scanlines_(NULL),
|
||||
scanlines_sizes_(NULL),
|
||||
databuf_(NULL),
|
||||
databuf_strides_(NULL) {
|
||||
decompress_struct_ = new jpeg_decompress_struct;
|
||||
source_mgr_ = new jpeg_source_mgr;
|
||||
#ifdef HAVE_SETJMP
|
||||
error_mgr_ = new SetJmpErrorMgr;
|
||||
decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
|
||||
// Override standard exit()-based error handler.
|
||||
error_mgr_->base.error_exit = &ErrorHandler;
|
||||
#endif
|
||||
decompress_struct_->client_data = NULL;
|
||||
source_mgr_->init_source = &init_source;
|
||||
source_mgr_->fill_input_buffer = &fill_input_buffer;
|
||||
source_mgr_->skip_input_data = &skip_input_data;
|
||||
source_mgr_->resync_to_restart = &jpeg_resync_to_restart;
|
||||
source_mgr_->term_source = &term_source;
|
||||
jpeg_create_decompress(decompress_struct_);
|
||||
decompress_struct_->src = source_mgr_;
|
||||
buf_vec_.buffers = &buf_;
|
||||
buf_vec_.len = 1;
|
||||
}
|
||||
|
||||
MJpegDecoder::~MJpegDecoder() {
|
||||
jpeg_destroy_decompress(decompress_struct_);
|
||||
delete decompress_struct_;
|
||||
delete source_mgr_;
|
||||
#ifdef HAVE_SETJMP
|
||||
delete error_mgr_;
|
||||
#endif
|
||||
DestroyOutputBuffers();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
if (!ValidateJpeg(src, src_len)) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
buf_.data = src;
|
||||
buf_.len = (int)(src_len);
|
||||
buf_vec_.pos = 0;
|
||||
decompress_struct_->client_data = &buf_vec_;
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called jpeg_read_header, it experienced an error, and we called
|
||||
// longjmp() and rewound the stack to here. Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) {
|
||||
// ERROR: Bad MJPEG header
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
AllocOutputBuffers(GetNumComponents());
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_size = GetComponentScanlinesPerImcuRow(i);
|
||||
if (scanlines_sizes_[i] != scanlines_size) {
|
||||
if (scanlines_[i]) {
|
||||
delete scanlines_[i];
|
||||
}
|
||||
scanlines_[i] = new uint8* [scanlines_size];
|
||||
scanlines_sizes_[i] = scanlines_size;
|
||||
}
|
||||
|
||||
// We allocate padding for the final scanline to pad it up to DCTSIZE bytes
|
||||
// to avoid memory errors, since jpeglib only reads full MCUs blocks. For
|
||||
// the preceding scanlines, the padding is not needed/wanted because the
|
||||
// following addresses will already be valid (they are the initial bytes of
|
||||
// the next scanline) and will be overwritten when jpeglib writes out that
|
||||
// next scanline.
|
||||
int databuf_stride = GetComponentStride(i);
|
||||
int databuf_size = scanlines_size * databuf_stride;
|
||||
if (databuf_strides_[i] != databuf_stride) {
|
||||
if (databuf_[i]) {
|
||||
delete databuf_[i];
|
||||
}
|
||||
databuf_[i] = new uint8[databuf_size];
|
||||
databuf_strides_[i] = databuf_stride;
|
||||
}
|
||||
|
||||
if (GetComponentStride(i) != GetComponentWidth(i)) {
|
||||
has_scanline_padding_ = LIBYUV_TRUE;
|
||||
}
|
||||
}
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
static int DivideAndRoundUp(int numerator, int denominator) {
|
||||
return (numerator + denominator - 1) / denominator;
|
||||
}
|
||||
|
||||
static int DivideAndRoundDown(int numerator, int denominator) {
|
||||
return numerator / denominator;
|
||||
}
|
||||
|
||||
// Returns width of the last loaded frame.
|
||||
int MJpegDecoder::GetWidth() {
|
||||
return decompress_struct_->image_width;
|
||||
}
|
||||
|
||||
// Returns height of the last loaded frame.
|
||||
int MJpegDecoder::GetHeight() {
|
||||
return decompress_struct_->image_height;
|
||||
}
|
||||
|
||||
// Returns format of the last loaded frame. The return value is one of the
|
||||
// kColorSpace* constants.
|
||||
int MJpegDecoder::GetColorSpace() {
|
||||
return decompress_struct_->jpeg_color_space;
|
||||
}
|
||||
|
||||
// Number of color components in the color space.
|
||||
int MJpegDecoder::GetNumComponents() {
|
||||
return decompress_struct_->num_components;
|
||||
}
|
||||
|
||||
// Sample factors of the n-th component.
|
||||
int MJpegDecoder::GetHorizSampFactor(int component) {
|
||||
return decompress_struct_->comp_info[component].h_samp_factor;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSampFactor(int component) {
|
||||
return decompress_struct_->comp_info[component].v_samp_factor;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetHorizSubSampFactor(int component) {
|
||||
return decompress_struct_->max_h_samp_factor /
|
||||
GetHorizSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSubSampFactor(int component) {
|
||||
return decompress_struct_->max_v_samp_factor /
|
||||
GetVertSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
|
||||
return decompress_struct_->max_v_samp_factor * DCTSIZE;
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) {
|
||||
int vs = GetVertSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentWidth(int component) {
|
||||
int hs = GetHorizSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetWidth(), hs);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentHeight(int component) {
|
||||
int vs = GetVertSubSampFactor(component);
|
||||
return DivideAndRoundUp(GetHeight(), vs);
|
||||
}
|
||||
|
||||
// Get width in bytes padded out to a multiple of DCTSIZE
|
||||
int MJpegDecoder::GetComponentStride(int component) {
|
||||
return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetComponentSize(int component) {
|
||||
return GetComponentWidth(component) * GetComponentHeight(component);
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called jpeg_abort_decompress, it experienced an error, and we called
|
||||
// longjmp() and rewound the stack to here. Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
jpeg_abort_decompress(decompress_struct_);
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
uint8** planes, int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called into jpeglib, it experienced an error sometime during this
|
||||
// function call, and we called longjmp() and rewound the stack to here.
|
||||
// Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (!StartDecode()) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
SetScanlinePointers(databuf_);
|
||||
int lines_left = dst_height;
|
||||
// Compute amount of lines to skip to implement vertical crop.
|
||||
// TODO(fbarchard): Ensure skip is a multiple of maximum component
|
||||
// subsample. ie 2
|
||||
int skip = (GetHeight() - dst_height) / 2;
|
||||
if (skip > 0) {
|
||||
// There is no API to skip lines in the output data, so we read them
|
||||
// into the temp buffer.
|
||||
while (skip >= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
skip -= GetImageScanlinesPerImcuRow();
|
||||
}
|
||||
if (skip > 0) {
|
||||
// Have a partial iMCU row left over to skip. Must read it and then
|
||||
// copy the parts we want into the destination.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip =
|
||||
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
|
||||
rows_to_skip;
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
|
||||
}
|
||||
}
|
||||
|
||||
// Read full MCUs but cropped horizontally
|
||||
for (; lines_left > GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (lines_left > 0) {
|
||||
// Have a partial iMCU row left over to decode.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy =
|
||||
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#ifdef HAVE_SETJMP
|
||||
if (setjmp(error_mgr_->setjmp_buffer)) {
|
||||
// We called into jpeglib, it experienced an error sometime during this
|
||||
// function call, and we called longjmp() and rewound the stack to here.
|
||||
// Return error.
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
if (!StartDecode()) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
SetScanlinePointers(databuf_);
|
||||
int lines_left = dst_height;
|
||||
// TODO(fbarchard): Compute amount of lines to skip to implement vertical crop
|
||||
int skip = (GetHeight() - dst_height) / 2;
|
||||
if (skip > 0) {
|
||||
while (skip >= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
skip -= GetImageScanlinesPerImcuRow();
|
||||
}
|
||||
if (skip > 0) {
|
||||
// Have a partial iMCU row left over to skip.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
// Change our own data buffer pointers so we can pass them to the
|
||||
// callback.
|
||||
databuf_[i] += data_to_skip;
|
||||
}
|
||||
int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip;
|
||||
(*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy);
|
||||
// Now change them back.
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
databuf_[i] -= data_to_skip;
|
||||
}
|
||||
lines_left -= scanlines_to_copy;
|
||||
}
|
||||
}
|
||||
// Read full MCUs until we get to the crop point.
|
||||
for (; lines_left >= GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
(*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
if (lines_left > 0) {
|
||||
// Have a partial iMCU row left over to decode.
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
(*fn)(opaque, databuf_, databuf_strides_, lines_left);
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
void init_source(j_decompress_ptr cinfo) {
|
||||
fill_input_buffer(cinfo);
|
||||
}
|
||||
|
||||
boolean fill_input_buffer(j_decompress_ptr cinfo) {
|
||||
BufferVector* buf_vec = (BufferVector*)(cinfo->client_data);
|
||||
if (buf_vec->pos >= buf_vec->len) {
|
||||
assert(0 && "No more data");
|
||||
// ERROR: No more data
|
||||
return FALSE;
|
||||
}
|
||||
cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data;
|
||||
cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len;
|
||||
++buf_vec->pos;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void skip_input_data(j_decompress_ptr cinfo,
|
||||
long num_bytes) { // NOLINT
|
||||
cinfo->src->next_input_byte += num_bytes;
|
||||
}
|
||||
|
||||
void term_source(j_decompress_ptr cinfo) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
void ErrorHandler(j_common_ptr cinfo) {
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
#ifdef DEBUG
|
||||
char buf[JMSG_LENGTH_MAX];
|
||||
(*cinfo->err->format_message)(cinfo, buf);
|
||||
// ERROR: Error in jpeglib: buf
|
||||
#endif
|
||||
|
||||
SetJmpErrorMgr* mgr = (SetJmpErrorMgr*)(cinfo->err);
|
||||
// This rewinds the call stack to the point of the corresponding setjmp()
|
||||
// and causes it to return (for a second time) with value 1.
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
if (num_outbufs != num_outbufs_) {
|
||||
// We could perhaps optimize this case to resize the output buffers without
|
||||
// necessarily having to delete and recreate each one, but it's not worth
|
||||
// it.
|
||||
DestroyOutputBuffers();
|
||||
|
||||
scanlines_ = new uint8** [num_outbufs];
|
||||
scanlines_sizes_ = new int[num_outbufs];
|
||||
databuf_ = new uint8* [num_outbufs];
|
||||
databuf_strides_ = new int[num_outbufs];
|
||||
|
||||
for (int i = 0; i < num_outbufs; ++i) {
|
||||
scanlines_[i] = NULL;
|
||||
scanlines_sizes_[i] = 0;
|
||||
databuf_[i] = NULL;
|
||||
databuf_strides_[i] = 0;
|
||||
}
|
||||
|
||||
num_outbufs_ = num_outbufs;
|
||||
}
|
||||
}
|
||||
|
||||
void MJpegDecoder::DestroyOutputBuffers() {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
delete [] scanlines_[i];
|
||||
delete [] databuf_[i];
|
||||
}
|
||||
delete [] scanlines_;
|
||||
delete [] databuf_;
|
||||
delete [] scanlines_sizes_;
|
||||
delete [] databuf_strides_;
|
||||
scanlines_ = NULL;
|
||||
databuf_ = NULL;
|
||||
scanlines_sizes_ = NULL;
|
||||
databuf_strides_ = NULL;
|
||||
num_outbufs_ = 0;
|
||||
}
|
||||
|
||||
// JDCT_IFAST and do_block_smoothing improve performance substantially.
|
||||
LIBYUV_BOOL MJpegDecoder::StartDecode() {
|
||||
decompress_struct_->raw_data_out = TRUE;
|
||||
decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default
|
||||
decompress_struct_->dither_mode = JDITHER_NONE;
|
||||
// Not applicable to 'raw':
|
||||
decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE);
|
||||
// Only for buffered mode:
|
||||
decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE);
|
||||
// Blocky but fast:
|
||||
decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE);
|
||||
|
||||
if (!jpeg_start_decompress(decompress_struct_)) {
|
||||
// ERROR: Couldn't start JPEG decompressor";
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::FinishDecode() {
|
||||
// jpeglib considers it an error if we finish without decoding the whole
|
||||
// image, so we call "abort" rather than "finish".
|
||||
jpeg_abort_decompress(decompress_struct_);
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
void MJpegDecoder::SetScanlinePointers(uint8** data) {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
uint8* data_i = data[i];
|
||||
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
|
||||
scanlines_[i][j] = data_i;
|
||||
data_i += GetComponentStride(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
|
||||
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
|
||||
jpeg_read_raw_data(decompress_struct_,
|
||||
scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components) {
|
||||
if (number_of_components == 3) { // Color images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 2 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
return kJpegYuv420;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
return kJpegYuv422;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 1 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
return kJpegYuv444;
|
||||
}
|
||||
} else if (number_of_components == 1) { // Grey-scale images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1) {
|
||||
return kJpegYuv400;
|
||||
}
|
||||
}
|
||||
return kJpegUnknown;
|
||||
}
|
||||
|
||||
} // namespace libyuv
|
||||
#endif // HAVE_JPEG
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Helper function to validate the jpeg appears intact.
|
||||
// TODO(fbarchard): Optimize case where SOI is found but EOI is not.
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
|
||||
size_t i;
|
||||
if (sample_size < 64) {
|
||||
// ERROR: Invalid jpeg size: sample_size
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
if (sample[0] != 0xff || sample[1] != 0xd8) { // Start Of Image
|
||||
// ERROR: Invalid jpeg initial start code
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (i = sample_size - 2; i > 1;) {
|
||||
if (sample[i] != 0xd9) {
|
||||
if (sample[i] == 0xff && sample[i + 1] == 0xd9) { // End Of Image
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
--i;
|
||||
}
|
||||
--i;
|
||||
}
|
||||
// ERROR: Invalid jpeg end code not found. Size sample_size
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
|
@ -8,15 +8,15 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/planar_functions.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
|
||||
#include <string.h> // for memset()
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/cpu_id.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#ifdef HAVE_JPEG
|
||||
#include "third_party/libyuv/include/libyuv/mjpeg_decoder.h"
|
||||
#include "libyuv/mjpeg_decoder.h"
|
||||
#endif
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -37,6 +37,10 @@ void CopyPlane(const uint8* src_y, int src_stride_y,
|
|||
height = 1;
|
||||
src_stride_y = dst_stride_y = 0;
|
||||
}
|
||||
// Nothing to do.
|
||||
if (src_y == dst_y && src_stride_y == dst_stride_y) {
|
||||
return;
|
||||
}
|
||||
#if defined(HAS_COPYROW_X86)
|
||||
if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
|
||||
CopyRow = CopyRow_X86;
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ARGBScale has a function to copy pixels to a row, striding each source
|
||||
// pixel by a constant.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
#endif
|
||||
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
|
||||
int src_stepx,
|
||||
uint8* dst_ptr, int dst_width);
|
||||
|
||||
static void ARGBTranspose(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
int i;
|
||||
int src_pixel_step = src_stride >> 2;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
|
||||
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4) && // Width of dest.
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4) && // Width of dest.
|
||||
IS_ALIGNED(src, 4)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
||||
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
|
||||
dst += dst_stride;
|
||||
src += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBRotate90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
// Rotate by 90 is a ARGBTranspose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
void ARGBRotate270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
// Rotate by 270 is a ARGBTranspose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
dst += dst_stride * (width - 1);
|
||||
dst_stride = -dst_stride;
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
}
|
||||
|
||||
void ARGBRotate180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width * 4);
|
||||
const uint8* src_bot = src + src_stride * (height - 1);
|
||||
uint8* dst_bot = dst + dst_stride * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
|
||||
ARGBMirrorRow_C;
|
||||
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
|
||||
#if defined(HAS_ARGBMIRRORROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width * 4, 32)) {
|
||||
CopyRow = CopyRow_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_X86)
|
||||
if (TestCpuFlag(kCpuHasX86)) {
|
||||
CopyRow = CopyRow_X86;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width * 4, 32) &&
|
||||
IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
CopyRow = CopyRow_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_ERMS)
|
||||
if (TestCpuFlag(kCpuHasERMS)) {
|
||||
CopyRow = CopyRow_ERMS;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_MIPS)
|
||||
if (TestCpuFlag(kCpuHasMIPS)) {
|
||||
CopyRow = CopyRow_MIPS;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
dst_bot -= dst_stride;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Negative height means invert the image.
|
||||
if (height < 0) {
|
||||
height = -height;
|
||||
src_argb = src_argb + (height - 1) * src_stride_argb;
|
||||
src_stride_argb = -src_stride_argb;
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return ARGBCopy(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
ARGBRotate90(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
ARGBRotate270(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
ARGBRotate180(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,485 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
"andi $t0, %[dst], 0x3 \n"
|
||||
"andi $t1, %[dst_stride], 0x3 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
"lbux $t9, $t3(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s0, $t8, $t0 \n"
|
||||
"lbux $t0, $t4(%[src]) \n"
|
||||
"lbux $t1, $t5(%[src]) \n"
|
||||
"lbux $t8, $t6(%[src]) \n"
|
||||
"lbux $t9, $t7(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s1, $t8, $t0 \n"
|
||||
"sw $s0, 0(%[dst]) \n"
|
||||
"addiu %[width], -1 \n"
|
||||
"addiu %[src], 1 \n"
|
||||
"sw $s1, 4(%[dst]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
"lbux $t9, $t3(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s0, $t8, $t0 \n"
|
||||
"lbux $t0, $t4(%[src]) \n"
|
||||
"lbux $t1, $t5(%[src]) \n"
|
||||
"lbux $t8, $t6(%[src]) \n"
|
||||
"lbux $t9, $t7(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s1, $t8, $t0 \n"
|
||||
"swr $s0, 0(%[dst]) \n"
|
||||
"swl $s0, 3(%[dst]) \n"
|
||||
"addiu %[width], -1 \n"
|
||||
"addiu %[src], 1 \n"
|
||||
"swr $s1, 4(%[dst]) \n"
|
||||
"swl $s1, 7(%[dst]) \n"
|
||||
"bnez %[width], 11b \n"
|
||||
"addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1"
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeWx8_FAST_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set noat \n"
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
|
||||
"srl $AT, %[width], 0x2 \n"
|
||||
"andi $t0, %[dst], 0x3 \n"
|
||||
"andi $t1, %[dst_stride], 0x3 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
"addu $s2, $s1, %[dst_stride] \n"
|
||||
|
||||
"sw $s4, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $s6, 0($s0) \n"
|
||||
"sw $t8, 4($s0) \n"
|
||||
"sw $s5, 0($s1) \n"
|
||||
"sw $t1, 4($s1) \n"
|
||||
"sw $s7, 0($s2) \n"
|
||||
"sw $t9, 4($s2) \n"
|
||||
|
||||
"addiu $AT, -1 \n"
|
||||
"addiu %[src], 4 \n"
|
||||
|
||||
"bnez $AT, 1b \n"
|
||||
" addu %[dst], $s2, %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
"addu $s2, $s1, %[dst_stride] \n"
|
||||
|
||||
"swr $s4, 0(%[dst]) \n"
|
||||
"swl $s4, 3(%[dst]) \n"
|
||||
"swr $t0, 4(%[dst]) \n"
|
||||
"swl $t0, 7(%[dst]) \n"
|
||||
"swr $s6, 0($s0) \n"
|
||||
"swl $s6, 3($s0) \n"
|
||||
"swr $t8, 4($s0) \n"
|
||||
"swl $t8, 7($s0) \n"
|
||||
"swr $s5, 0($s1) \n"
|
||||
"swl $s5, 3($s1) \n"
|
||||
"swr $t1, 4($s1) \n"
|
||||
"swl $t1, 7($s1) \n"
|
||||
"swr $s7, 0($s2) \n"
|
||||
"swl $s7, 3($s2) \n"
|
||||
"swr $t9, 4($s2) \n"
|
||||
"swl $t9, 7($s2) \n"
|
||||
|
||||
"addiu $AT, -1 \n"
|
||||
"addiu %[src], 4 \n"
|
||||
|
||||
"bnez $AT, 11b \n"
|
||||
" addu %[dst], $s2, %[dst_stride] \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
".set at \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
"subu $t7, $t9, %[src_stride] \n"
|
||||
"srl $t1, %[width], 1 \n"
|
||||
|
||||
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
||||
"andi $t0, %[dst_a], 0x3 \n"
|
||||
"andi $t8, %[dst_b], 0x3 \n"
|
||||
"or $t0, $t0, $t8 \n"
|
||||
"andi $t8, %[dst_stride_a], 0x3 \n"
|
||||
"andi $s5, %[dst_stride_b], 0x3 \n"
|
||||
"or $t8, $t8, $s5 \n"
|
||||
"or $t0, $t0, $t8 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" nop \n"
|
||||
// dst + dst_stride word aligned (both, a & b dst addresses)
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"sw $s3, 0($s5) \n"
|
||||
"sw $s4, 0($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"sw $s3, 0(%[dst_a]) \n"
|
||||
"sw $s4, 0(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
"sw $s3, 4($s5) \n"
|
||||
"sw $s4, 4($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
"sll $t0, %[dst_stride_a], 1 \n"
|
||||
"sll $t8, %[dst_stride_b], 1 \n"
|
||||
"sw $s3, 4(%[dst_a]) \n"
|
||||
"sw $s4, 4(%[dst_b]) \n"
|
||||
"addu %[dst_a], %[dst_a], $t0 \n"
|
||||
"bnez $t1, 1b \n"
|
||||
" addu %[dst_b], %[dst_b], $t8 \n"
|
||||
"b 2f \n"
|
||||
" nop \n"
|
||||
|
||||
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"swr $s3, 0($s5) \n"
|
||||
"swl $s3, 3($s5) \n"
|
||||
"swr $s4, 0($s6) \n"
|
||||
"swl $s4, 3($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"swr $s3, 0(%[dst_a]) \n"
|
||||
"swl $s3, 3(%[dst_a]) \n"
|
||||
"swr $s4, 0(%[dst_b]) \n"
|
||||
"swl $s4, 3(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
|
||||
"swr $s3, 4($s5) \n"
|
||||
"swl $s3, 7($s5) \n"
|
||||
"swr $s4, 4($s6) \n"
|
||||
"swl $s4, 7($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
"sll $t0, %[dst_stride_a], 1 \n"
|
||||
"sll $t8, %[dst_stride_b], 1 \n"
|
||||
"swr $s3, 4(%[dst_a]) \n"
|
||||
"swl $s3, 7(%[dst_a]) \n"
|
||||
"swr $s4, 4(%[dst_b]) \n"
|
||||
"swl $s4, 7(%[dst_b]) \n"
|
||||
"addu %[dst_a], %[dst_a], $t0 \n"
|
||||
"bnez $t1, 11b \n"
|
||||
" addu %[dst_b], %[dst_b], $t8 \n"
|
||||
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
: [src] "+r" (src),
|
||||
[dst_a] "+r" (dst_a),
|
||||
[dst_b] "+r" (dst_b),
|
||||
[width] "+r" (width),
|
||||
[src_stride] "+r" (src_stride)
|
||||
: [dst_stride_a] "r" (dst_stride_a),
|
||||
[dst_stride_b] "r" (dst_stride_b)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -0,0 +1,533 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
|
||||
|
||||
static uvec8 kVTbl4x4Transpose =
|
||||
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %5, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d4}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d6}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d7}, [%0] \n"
|
||||
|
||||
"vtrn.8 d1, d0 \n"
|
||||
"vtrn.8 d3, d2 \n"
|
||||
"vtrn.8 d5, d4 \n"
|
||||
"vtrn.8 d7, d6 \n"
|
||||
|
||||
"vtrn.16 d1, d3 \n"
|
||||
"vtrn.16 d0, d2 \n"
|
||||
"vtrn.16 d5, d7 \n"
|
||||
"vtrn.16 d4, d6 \n"
|
||||
|
||||
"vtrn.32 d1, d5 \n"
|
||||
"vtrn.32 d0, d4 \n"
|
||||
"vtrn.32 d3, d7 \n"
|
||||
"vtrn.32 d2, d6 \n"
|
||||
|
||||
"vrev16.8 q0, q0 \n"
|
||||
"vrev16.8 q1, q1 \n"
|
||||
"vrev16.8 q2, q2 \n"
|
||||
"vrev16.8 q3, q3 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d1}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d3}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d5}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d7}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d6}, [%0] \n"
|
||||
|
||||
"add %1, #8 \n" // src += 8
|
||||
"add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride
|
||||
"subs %5, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %5, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %5, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d2[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d2[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d3[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d3[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(6)
|
||||
"vld1.8 {q3}, [%6] \n"
|
||||
|
||||
"vtbl.8 d4, {d0, d1}, d6 \n"
|
||||
"vtbl.8 d5, {d0, d1}, d7 \n"
|
||||
"vtbl.8 d0, {d2, d3}, d6 \n"
|
||||
"vtbl.8 d1, {d2, d3}, d7 \n"
|
||||
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d4[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d4[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d5[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d5[1]}, [%0] \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d0[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d0[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d1[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d1[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4 \n" // src += 4
|
||||
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %5, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[3]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d1}, [%0] \n"
|
||||
|
||||
"add %1, #2 \n" // src += 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %5, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[0]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[1]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[2]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[3]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[4]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[5]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[6]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[7]}, [%1] \n"
|
||||
|
||||
MEMACCESS(3)
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst), // %3
|
||||
"+r"(dst_stride), // %4
|
||||
"+r"(width) // %5
|
||||
: "r"(&kVTbl4x4Transpose) // %6
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3"
|
||||
);
|
||||
}
|
||||
|
||||
static uvec8 kVTbl4x4TransposeDi =
|
||||
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp = NULL;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %7, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d0, d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d2, d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d4, d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d6, d7}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d16, d17}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d18, d19}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d20, d21}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d22, d23}, [%0] \n"
|
||||
|
||||
"vtrn.8 q1, q0 \n"
|
||||
"vtrn.8 q3, q2 \n"
|
||||
"vtrn.8 q9, q8 \n"
|
||||
"vtrn.8 q11, q10 \n"
|
||||
|
||||
"vtrn.16 q1, q3 \n"
|
||||
"vtrn.16 q0, q2 \n"
|
||||
"vtrn.16 q9, q11 \n"
|
||||
"vtrn.16 q8, q10 \n"
|
||||
|
||||
"vtrn.32 q1, q9 \n"
|
||||
"vtrn.32 q0, q8 \n"
|
||||
"vtrn.32 q3, q11 \n"
|
||||
"vtrn.32 q2, q10 \n"
|
||||
|
||||
"vrev16.8 q0, q0 \n"
|
||||
"vrev16.8 q1, q1 \n"
|
||||
"vrev16.8 q2, q2 \n"
|
||||
"vrev16.8 q3, q3 \n"
|
||||
"vrev16.8 q8, q8 \n"
|
||||
"vrev16.8 q9, q9 \n"
|
||||
"vrev16.8 q10, q10 \n"
|
||||
"vrev16.8 q11, q11 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d6}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d18}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d16}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d22}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d20}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d3}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d1}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d7}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d5}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d19}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d17}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d23}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d21}, [%0] \n"
|
||||
|
||||
"add %1, #8*2 \n" // src += 8*2
|
||||
"add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a
|
||||
"add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b
|
||||
"subs %7, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %7, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %7, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d0}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d2}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d4}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d6}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d7}, [%0] \n"
|
||||
|
||||
MEMACCESS(8)
|
||||
"vld1.8 {q15}, [%8] \n"
|
||||
|
||||
"vtrn.8 q0, q1 \n"
|
||||
"vtrn.8 q2, q3 \n"
|
||||
|
||||
"vtbl.8 d16, {d0, d1}, d30 \n"
|
||||
"vtbl.8 d17, {d0, d1}, d31 \n"
|
||||
"vtbl.8 d18, {d2, d3}, d30 \n"
|
||||
"vtbl.8 d19, {d2, d3}, d31 \n"
|
||||
"vtbl.8 d20, {d4, d5}, d30 \n"
|
||||
"vtbl.8 d21, {d4, d5}, d31 \n"
|
||||
"vtbl.8 d22, {d6, d7}, d30 \n"
|
||||
"vtbl.8 d23, {d6, d7}, d31 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d16[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d16[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d17[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d17[1]}, [%0], %4 \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d20[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d20[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d21[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d21[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d18[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d18[1]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d19[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d19[1]}, [%0], %6 \n"
|
||||
|
||||
"add %0, %5, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d22[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d22[1]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d23[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d23[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4*2 \n" // src += 4 * 2
|
||||
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
|
||||
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
|
||||
"subs %7, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[3], d3[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
"vtrn.8 d2, d3 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d2}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d1}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d3}, [%0] \n"
|
||||
|
||||
"add %1, #2*2 \n" // src += 2 * 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
|
||||
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
|
||||
"subs %7, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[7], d1[7]}, [%1] \n"
|
||||
|
||||
MEMACCESS(3)
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
MEMACCESS(5)
|
||||
"vst1.64 {d1}, [%5] \n"
|
||||
|
||||
"4: \n"
|
||||
|
||||
: "+r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst_a), // %3
|
||||
"+r"(dst_stride_a), // %4
|
||||
"+r"(dst_b), // %5
|
||||
"+r"(dst_stride_b), // %6
|
||||
"+r"(width) // %7
|
||||
: "r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc",
|
||||
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -8,9 +8,9 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -35,10 +35,12 @@ extern "C" {
|
|||
}
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
|
||||
0, 4, 7)
|
||||
YANY(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_Unaligned_SSSE3, I422ToARGBRow_C,
|
||||
1, 4, 7)
|
||||
#endif // HAS_I422TOARGBROW_SSSE3
|
||||
#ifdef HAS_I444TOARGBROW_SSSE3
|
||||
YANY(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_Unaligned_SSSE3, I444ToARGBRow_C,
|
||||
0, 4, 7)
|
||||
YANY(I411ToARGBRow_Any_SSSE3, I411ToARGBRow_Unaligned_SSSE3, I411ToARGBRow_C,
|
||||
2, 4, 7)
|
||||
YANY(I422ToBGRARow_Any_SSSE3, I422ToBGRARow_Unaligned_SSSE3, I422ToBGRARow_C,
|
||||
|
@ -59,7 +61,7 @@ YANY(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, I422ToRGB24Row_C, 1, 3, 7)
|
|||
YANY(I422ToRAWRow_Any_SSSE3, I422ToRAWRow_SSSE3, I422ToRAWRow_C, 1, 3, 7)
|
||||
YANY(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, I422ToYUY2Row_C, 1, 2, 15)
|
||||
YANY(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, I422ToUYVYRow_C, 1, 2, 15)
|
||||
#endif // HAS_I422TOARGBROW_SSSE3
|
||||
#endif // HAS_I444TOARGBROW_SSSE3
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
YANY(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, I422ToARGBRow_C, 1, 4, 15)
|
||||
#endif // HAS_I422TOARGBROW_AVX2
|
||||
|
|
|
@ -8,11 +8,11 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include <string.h> // For memcpy and memset.
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/basic_types.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -16,7 +16,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
#ifdef HAS_COPYROW_MIPS
|
||||
void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
|
||||
|
@ -376,7 +377,9 @@ void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
|
|||
|
||||
// MIPS DSPR2 functions
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
|
||||
(__mips_dsp_rev >= 2)
|
||||
(__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -8,7 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -8,15 +8,179 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#if defined (_M_X64)
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h> // For _mm_maddubs_epi16
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Visual C x86.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
// This module is for Visual C.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
|
||||
|
||||
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
|
||||
|
||||
#define UB 127 /* min(127,(int8)(2.018 * 64)) */
|
||||
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
|
||||
#define UR 0
|
||||
|
||||
#define VB 0
|
||||
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
|
||||
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
|
||||
|
||||
// Bias
|
||||
#define BB UB * 128 + VB * 128
|
||||
#define BG UG * 128 + VG * 128
|
||||
#define BR UR * 128 + VR * 128
|
||||
|
||||
static const vec8 kUVToB = {
|
||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
||||
};
|
||||
|
||||
static const vec8 kUVToR = {
|
||||
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
|
||||
};
|
||||
|
||||
static const vec8 kUVToG = {
|
||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
||||
};
|
||||
|
||||
static const vec8 kVUToB = {
|
||||
VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
|
||||
};
|
||||
|
||||
static const vec8 kVUToR = {
|
||||
VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
|
||||
};
|
||||
|
||||
static const vec8 kVUToG = {
|
||||
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
|
||||
};
|
||||
|
||||
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
|
||||
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
|
||||
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
|
||||
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
|
||||
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
|
||||
|
||||
// 64 bit
|
||||
#if defined(_M_X64)
|
||||
|
||||
// Aligned destination version.
|
||||
__declspec(align(16))
|
||||
void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const __m128i xmm4 = _mm_setzero_si128();
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
|
||||
while (width > 0) {
|
||||
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
|
||||
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
|
||||
xmm1 = _mm_load_si128(&xmm0);
|
||||
xmm2 = _mm_load_si128(&xmm0);
|
||||
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
|
||||
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
|
||||
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
|
||||
xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
|
||||
xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
|
||||
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
|
||||
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
|
||||
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
|
||||
xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
|
||||
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
|
||||
xmm0 = _mm_adds_epi16(xmm0, xmm3);
|
||||
xmm1 = _mm_adds_epi16(xmm1, xmm3);
|
||||
xmm2 = _mm_adds_epi16(xmm2, xmm3);
|
||||
xmm0 = _mm_srai_epi16(xmm0, 6);
|
||||
xmm1 = _mm_srai_epi16(xmm1, 6);
|
||||
xmm2 = _mm_srai_epi16(xmm2, 6);
|
||||
xmm0 = _mm_packus_epi16(xmm0, xmm0);
|
||||
xmm1 = _mm_packus_epi16(xmm1, xmm1);
|
||||
xmm2 = _mm_packus_epi16(xmm2, xmm2);
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
|
||||
xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
|
||||
xmm1 = _mm_load_si128(&xmm0);
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
|
||||
xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
|
||||
|
||||
_mm_store_si128((__m128i *)dst_argb, xmm0);
|
||||
_mm_store_si128((__m128i *)(dst_argb + 16), xmm1);
|
||||
|
||||
y_buf += 8;
|
||||
u_buf += 4;
|
||||
dst_argb += 32;
|
||||
width -= 8;
|
||||
}
|
||||
}
|
||||
|
||||
// Unaligned destination version.
|
||||
void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
|
||||
__m128i xmm0, xmm1, xmm2, xmm3;
|
||||
const __m128i xmm5 = _mm_set1_epi8(-1);
|
||||
const __m128i xmm4 = _mm_setzero_si128();
|
||||
const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
||||
|
||||
while (width > 0) {
|
||||
xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);
|
||||
xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);
|
||||
xmm1 = _mm_load_si128(&xmm0);
|
||||
xmm2 = _mm_load_si128(&xmm0);
|
||||
xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);
|
||||
xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);
|
||||
xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);
|
||||
xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);
|
||||
xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);
|
||||
xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);
|
||||
xmm3 = _mm_loadl_epi64((__m128i*)y_buf);
|
||||
xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);
|
||||
xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);
|
||||
xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);
|
||||
xmm0 = _mm_adds_epi16(xmm0, xmm3);
|
||||
xmm1 = _mm_adds_epi16(xmm1, xmm3);
|
||||
xmm2 = _mm_adds_epi16(xmm2, xmm3);
|
||||
xmm0 = _mm_srai_epi16(xmm0, 6);
|
||||
xmm1 = _mm_srai_epi16(xmm1, 6);
|
||||
xmm2 = _mm_srai_epi16(xmm2, 6);
|
||||
xmm0 = _mm_packus_epi16(xmm0, xmm0);
|
||||
xmm1 = _mm_packus_epi16(xmm1, xmm1);
|
||||
xmm2 = _mm_packus_epi16(xmm2, xmm2);
|
||||
xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);
|
||||
xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);
|
||||
xmm1 = _mm_load_si128(&xmm0);
|
||||
xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);
|
||||
xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);
|
||||
|
||||
_mm_storeu_si128((__m128i *)dst_argb, xmm0);
|
||||
_mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);
|
||||
|
||||
y_buf += 8;
|
||||
u_buf += 4;
|
||||
dst_argb += 32;
|
||||
width -= 8;
|
||||
}
|
||||
}
|
||||
// 32 bit
|
||||
#else // defined(_M_X64)
|
||||
|
||||
#ifdef HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
|
@ -2030,21 +2194,6 @@ void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
|||
}
|
||||
#endif // HAS_ARGBTOYROW_SSSE3
|
||||
|
||||
#define YG 74 /* (int8)(1.164 * 64 + 0.5) */
|
||||
|
||||
#define UB 127 /* min(63,(int8)(2.018 * 64)) */
|
||||
#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */
|
||||
#define UR 0
|
||||
|
||||
#define VB 0
|
||||
#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */
|
||||
#define VR 102 /* (int8)(1.596 * 64 + 0.5) */
|
||||
|
||||
// Bias
|
||||
#define BB UB * 128 + VB * 128
|
||||
#define BG UG * 128 + VG * 128
|
||||
#define BR UR * 128 + VR * 128
|
||||
|
||||
#ifdef HAS_I422TOARGBROW_AVX2
|
||||
|
||||
static const lvec8 kUVToB_AVX = {
|
||||
|
@ -2079,10 +2228,10 @@ static const lvec16 kUVBiasR_AVX = {
|
|||
// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
||||
__declspec(naked) __declspec(align(16))
|
||||
void I422ToARGBRow_AVX2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* dst_argb,
|
||||
int width) {
|
||||
__asm {
|
||||
push esi
|
||||
push edi
|
||||
|
@ -2150,36 +2299,6 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
|
|||
|
||||
#ifdef HAS_I422TOARGBROW_SSSE3
|
||||
|
||||
static const vec8 kUVToB = {
|
||||
UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB
|
||||
};
|
||||
|
||||
static const vec8 kUVToR = {
|
||||
UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR
|
||||
};
|
||||
|
||||
static const vec8 kUVToG = {
|
||||
UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG
|
||||
};
|
||||
|
||||
static const vec8 kVUToB = {
|
||||
VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,
|
||||
};
|
||||
|
||||
static const vec8 kVUToR = {
|
||||
VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,
|
||||
};
|
||||
|
||||
static const vec8 kVUToG = {
|
||||
VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
|
||||
};
|
||||
|
||||
static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };
|
||||
static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };
|
||||
static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };
|
||||
static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };
|
||||
static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };
|
||||
|
||||
// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
|
||||
|
||||
// Read 8 UV from 444.
|
||||
|
@ -7276,7 +7395,8 @@ void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
|||
}
|
||||
#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
#endif // defined(_M_X64)
|
||||
#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
;
|
||||
; Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
%ifdef __YASM_VERSION_ID__
|
||||
%if __YASM_VERSION_ID__ < 01020000h
|
||||
%error AVX2 is supported only by yasm 1.2.0 or later.
|
||||
%endif
|
||||
%endif
|
||||
%include "x86inc.asm"
|
||||
|
||||
SECTION .text
|
||||
|
||||
; cglobal numeric constants are parameters, gpr regs, mm regs
|
||||
|
||||
; void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix)
|
||||
|
||||
%macro YUY2TOYROW 2-3
|
||||
cglobal %1ToYRow%3, 3, 3, 3, src_yuy2, dst_y, pix
|
||||
%ifidn %1,YUY2
|
||||
pcmpeqb m2, m2, m2 ; generate mask 0x00ff00ff
|
||||
psrlw m2, m2, 8
|
||||
%endif
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%2 m0, [src_yuy2q]
|
||||
mov%2 m1, [src_yuy2q + mmsize]
|
||||
lea src_yuy2q, [src_yuy2q + mmsize * 2]
|
||||
%ifidn %1,YUY2
|
||||
pand m0, m0, m2 ; YUY2 even bytes are Y
|
||||
pand m1, m1, m2
|
||||
%else
|
||||
psrlw m0, m0, 8 ; UYVY odd bytes are Y
|
||||
psrlw m1, m1, 8
|
||||
%endif
|
||||
packuswb m0, m0, m1
|
||||
%if cpuflag(AVX2)
|
||||
vpermq m0, m0, 0xd8
|
||||
%endif
|
||||
sub pixd, mmsize
|
||||
mov%2 [dst_yq], m0
|
||||
lea dst_yq, [dst_yq + mmsize]
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
; TODO(fbarchard): Remove MMX. Add SSSE3 pshufb version.
|
||||
INIT_MMX MMX
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW YUY2,u,_Unaligned
|
||||
YUY2TOYROW UYVY,a,
|
||||
YUY2TOYROW UYVY,u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
YUY2TOYROW YUY2,a,
|
||||
YUY2TOYROW UYVY,a,
|
||||
|
||||
; void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix)
|
||||
|
||||
%macro SplitUVRow 1-2
|
||||
cglobal SplitUVRow%2, 4, 4, 5, src_uv, dst_u, dst_v, pix
|
||||
pcmpeqb m4, m4, m4 ; generate mask 0x00ff00ff
|
||||
psrlw m4, m4, 8
|
||||
sub dst_vq, dst_uq
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%1 m0, [src_uvq]
|
||||
mov%1 m1, [src_uvq + mmsize]
|
||||
lea src_uvq, [src_uvq + mmsize * 2]
|
||||
psrlw m2, m0, 8 ; odd bytes
|
||||
psrlw m3, m1, 8
|
||||
pand m0, m0, m4 ; even bytes
|
||||
pand m1, m1, m4
|
||||
packuswb m0, m0, m1
|
||||
packuswb m2, m2, m3
|
||||
%if cpuflag(AVX2)
|
||||
vpermq m0, m0, 0xd8
|
||||
vpermq m2, m2, 0xd8
|
||||
%endif
|
||||
mov%1 [dst_uq], m0
|
||||
mov%1 [dst_uq + dst_vq], m2
|
||||
lea dst_uq, [dst_uq + mmsize]
|
||||
sub pixd, mmsize
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX MMX
|
||||
SplitUVRow a,
|
||||
SplitUVRow u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
SplitUVRow a,
|
||||
SplitUVRow u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
SplitUVRow a,
|
||||
|
||||
; void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
|
||||
; int width);
|
||||
|
||||
%macro MergeUVRow_ 1-2
|
||||
cglobal MergeUVRow_%2, 4, 4, 3, src_u, src_v, dst_uv, pix
|
||||
sub src_vq, src_uq
|
||||
|
||||
ALIGN 4
|
||||
.convertloop:
|
||||
mov%1 m0, [src_uq]
|
||||
mov%1 m1, [src_vq]
|
||||
lea src_uq, [src_uq + mmsize]
|
||||
punpcklbw m2, m0, m1 // first 8 UV pairs
|
||||
punpckhbw m0, m0, m1 // next 8 UV pairs
|
||||
%if cpuflag(AVX2)
|
||||
vperm2i128 m1, m2, m0, 0x20 // low 128 of ymm2 and low 128 of ymm0
|
||||
vperm2i128 m2, m2, m0, 0x31 // high 128 of ymm2 and high 128 of ymm0
|
||||
mov%1 [dst_uvq], m1
|
||||
mov%1 [dst_uvq + mmsize], m2
|
||||
%else
|
||||
mov%1 [dst_uvq], m2
|
||||
mov%1 [dst_uvq + mmsize], m0
|
||||
%endif
|
||||
lea dst_uvq, [dst_uvq + mmsize * 2]
|
||||
sub pixd, mmsize
|
||||
jg .convertloop
|
||||
REP_RET
|
||||
%endmacro
|
||||
|
||||
INIT_MMX MMX
|
||||
MergeUVRow_ a,
|
||||
MergeUVRow_ u,_Unaligned
|
||||
INIT_XMM SSE2
|
||||
MergeUVRow_ a,
|
||||
MergeUVRow_ u,_Unaligned
|
||||
INIT_YMM AVX2
|
||||
MergeUVRow_ a,
|
||||
|
|
@ -8,15 +8,15 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/scale.h"
|
||||
#include "libyuv/scale.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/cpu_id.h"
|
||||
#include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyPlane
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "third_party/libyuv/include/libyuv/scale_row.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h" // For CopyPlane
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,809 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/scale.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h" // For CopyARGB
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
static __inline int Abs(int v) {
|
||||
return v >= 0 ? v : -v;
|
||||
}
|
||||
|
||||
// ScaleARGB ARGB, 1/2
|
||||
// This is an optimized version for scaling down a ARGB to 1/2 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown2(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) =
|
||||
filtering == kFilterNone ? ScaleARGBRowDown2_C :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
|
||||
ScaleARGBRowDown2Box_C);
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row, even column.
|
||||
if (filtering == kFilterBilinear) {
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
} else {
|
||||
src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4;
|
||||
}
|
||||
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
|
||||
ScaleARGBRowDown2Box_SSE2);
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
|
||||
ScaleARGBRowDown2 = filtering ? ScaleARGBRowDown2Box_NEON :
|
||||
ScaleARGBRowDown2_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
dst_argb += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
// ScaleARGB ARGB, 1/4
|
||||
// This is an optimized version for scaling down a ARGB to 1/4 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
int j;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 2 * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
|
||||
// Advance to odd row, even column.
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(row_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2;
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(row_stride, 4)) {
|
||||
ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON;
|
||||
}
|
||||
#endif
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
|
||||
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
|
||||
row + kRowSize, dst_width * 2);
|
||||
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
dst_argb += dst_stride;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
|
||||
// ScaleARGB ARGB Even
|
||||
// This is an optimized version for scaling down a ARGB to even
|
||||
// multiple of its original size.
|
||||
static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * src_stride;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_step, uint8* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
|
||||
assert(IS_ALIGNED(src_width, 2));
|
||||
assert(IS_ALIGNED(src_height, 2));
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
|
||||
ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
#elif defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 4) &&
|
||||
IS_ALIGNED(src_argb, 4)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
|
||||
ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (filtering == kFilterLinear) {
|
||||
src_stride = 0;
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
dst_argb += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale ARGB down with bilinear interpolation.
|
||||
static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
|
||||
int64 xlast = x + (int64)(dst_width - 1) * dx;
|
||||
int64 xl = (dx >= 0) ? x : xlast;
|
||||
int64 xr = (dx >= 0) ? xlast : x;
|
||||
int clip_src_width;
|
||||
xl = (xl >> 16) & ~3; // Left edge aligned.
|
||||
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
|
||||
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
|
||||
if (xr > src_width) {
|
||||
xr = src_width;
|
||||
}
|
||||
clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4.
|
||||
src_argb += xl * 4;
|
||||
x -= (int)(xl << 16);
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && clip_src_width >= 16) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(clip_src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && clip_src_width >= 16) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(clip_src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && clip_src_width >= 32) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(clip_src_width, 32)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && clip_src_width >= 16) {
|
||||
InterpolateRow = InterpolateRow_Any_NEON;
|
||||
if (IS_ALIGNED(clip_src_width, 16)) {
|
||||
InterpolateRow = InterpolateRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && clip_src_width >= 4 &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
|
||||
if (IS_ALIGNED(clip_src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||
}
|
||||
#endif
|
||||
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
|
||||
// Allocate a row of ARGB.
|
||||
{
|
||||
align_buffer_64(row, clip_src_width * 4);
|
||||
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi = y >> 16;
|
||||
const uint8* src = src_argb + yi * src_stride;
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(row, src, src_stride, clip_src_width, yf);
|
||||
ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx);
|
||||
}
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale ARGB up with bilinear interpolation.
|
||||
static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_NEON;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
|
||||
}
|
||||
#endif
|
||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
|
||||
{
|
||||
int yi = y >> 16;
|
||||
const uint8* src = src_argb + yi * src_stride;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
|
||||
uint8* rowptr = row;
|
||||
int rowstride = kRowSize;
|
||||
int lasty = yi;
|
||||
|
||||
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
|
||||
if (src_height > 1) {
|
||||
src += src_stride;
|
||||
}
|
||||
ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx);
|
||||
src += src_stride;
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
yi = y >> 16;
|
||||
if (yi != lasty) {
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
src = src_argb + yi * src_stride;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
ScaleARGBFilterCols(rowptr, src, dst_width, x, dx);
|
||||
rowptr += rowstride;
|
||||
rowstride = -rowstride;
|
||||
lasty = yi;
|
||||
src += src_stride;
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterLinear) {
|
||||
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
}
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef YUVSCALEUP
|
||||
// Scale YUV to ARGB up with bilinear interpolation.
|
||||
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
int dst_stride_argb,
|
||||
const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(src_width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && src_width >= 16) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_AVX2;
|
||||
if (IS_ALIGNED(src_width, 16)) {
|
||||
I422ToARGBRow = I422ToARGBRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && src_width >= 8) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_NEON;
|
||||
if (IS_ALIGNED(src_width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
#if defined(HAS_INTERPOLATEROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSE2;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSE2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_Unaligned_SSSE3;
|
||||
if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
|
||||
InterpolateRow = InterpolateRow_SSSE3;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 8) {
|
||||
InterpolateRow = InterpolateRow_Any_AVX2;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
InterpolateRow = InterpolateRow_AVX2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && dst_width >= 4) {
|
||||
InterpolateRow = InterpolateRow_Any_NEON;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 1 &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
InterpolateRow = InterpolateRow_MIPS_DSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
|
||||
}
|
||||
#endif
|
||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
}
|
||||
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
|
||||
int yi = y >> 16;
|
||||
int uv_yi = yi >> kYShift;
|
||||
const uint8* src_row_y = src_y + yi * src_stride_y;
|
||||
const uint8* src_row_u = src_u + uv_yi * src_stride_u;
|
||||
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 4 + 15) & ~15;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
|
||||
// Allocate 1 row of ARGB for source conversion.
|
||||
align_buffer_64(argb_row, src_width * 4);
|
||||
|
||||
uint8* rowptr = row;
|
||||
int rowstride = kRowSize;
|
||||
int lasty = yi;
|
||||
|
||||
// TODO(fbarchard): Convert first 2 rows of YUV to ARGB.
|
||||
ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx);
|
||||
if (src_height > 1) {
|
||||
src_row_y += src_stride_y;
|
||||
if (yi & 1) {
|
||||
src_row_u += src_stride_u;
|
||||
src_row_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx);
|
||||
if (src_height > 2) {
|
||||
src_row_y += src_stride_y;
|
||||
if (!(yi & 1)) {
|
||||
src_row_u += src_stride_u;
|
||||
src_row_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
yi = y >> 16;
|
||||
if (yi != lasty) {
|
||||
if (y > max_y) {
|
||||
y = max_y;
|
||||
yi = y >> 16;
|
||||
uv_yi = yi >> kYShift;
|
||||
src_row_y = src_y + yi * src_stride_y;
|
||||
src_row_u = src_u + uv_yi * src_stride_u;
|
||||
src_row_v = src_v + uv_yi * src_stride_v;
|
||||
}
|
||||
if (yi != lasty) {
|
||||
// TODO(fbarchard): Convert the clipped region of row.
|
||||
I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width);
|
||||
ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx);
|
||||
rowptr += rowstride;
|
||||
rowstride = -rowstride;
|
||||
lasty = yi;
|
||||
src_row_y += src_stride_y;
|
||||
if (yi & 1) {
|
||||
src_row_u += src_stride_u;
|
||||
src_row_v += src_stride_v;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (filtering == kFilterLinear) {
|
||||
InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0);
|
||||
} else {
|
||||
int yf = (y >> 8) & 255;
|
||||
InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf);
|
||||
}
|
||||
dst_argb += dst_stride_argb;
|
||||
y += dy;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
free_aligned_buffer_64(row_argb);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Scale ARGB to/from any dimensions, without interpolation.
|
||||
// Fixed point math is used for performance: The upper 16 bits
|
||||
// of x and dx is the integer part of the source position and
|
||||
// the lower 16 bits are the fixed decimal part.
|
||||
|
||||
static void ScaleARGBSimple(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
int j;
|
||||
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBCols = ScaleARGBCols_SSE2;
|
||||
}
|
||||
#endif
|
||||
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBCols = ScaleARGBColsUp2_C;
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
|
||||
IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) &&
|
||||
IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) {
|
||||
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
|
||||
dst_width, x, dx);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
}
|
||||
|
||||
// ScaleARGB a ARGB.
|
||||
// This function in turn calls a scaling function
|
||||
// suitable for handling the desired resolutions.
|
||||
static void ScaleARGB(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint8* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
int dx = 0;
|
||||
int dy = 0;
|
||||
// ARGB does not support box filter yet, but allow the user to pass it.
|
||||
// Simplify filtering when possible.
|
||||
filtering = ScaleFilterReduce(src_width, src_height,
|
||||
dst_width, dst_height,
|
||||
filtering);
|
||||
|
||||
// Negative src_height means invert the image.
|
||||
if (src_height < 0) {
|
||||
src_height = -src_height;
|
||||
src = src + (src_height - 1) * src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
|
||||
&x, &y, &dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
if (clip_x) {
|
||||
int64 clipf = (int64)(clip_x) * dx;
|
||||
x += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * 4;
|
||||
dst += clip_x * 4;
|
||||
}
|
||||
if (clip_y) {
|
||||
int64 clipf = (int64)(clip_y) * dy;
|
||||
y += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * src_stride;
|
||||
dst += clip_y * dst_stride;
|
||||
}
|
||||
|
||||
// Special case for integer step values.
|
||||
if (((dx | dy) & 0xffff) == 0) {
|
||||
if (!dx || !dy) { // 1 pixel wide and/or tall.
|
||||
filtering = kFilterNone;
|
||||
} else {
|
||||
// Optimized even scale down. ie 2, 4, 6, 8, 10x.
|
||||
if (!(dx & 0x10000) && !(dy & 0x10000)) {
|
||||
if (dx == 0x20000) {
|
||||
// Optimized 1/2 downsample.
|
||||
ScaleARGBDown2(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
if (dx == 0x40000 && filtering == kFilterBox) {
|
||||
// Optimized 1/4 box downsample.
|
||||
ScaleARGBDown4Box(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
return;
|
||||
}
|
||||
ScaleARGBDownEven(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
// Optimized odd scale down. ie 3, 5, 7, 9x.
|
||||
if ((dx & 0x10000) && (dy & 0x10000)) {
|
||||
filtering = kFilterNone;
|
||||
if (dx == 0x10000 && dy == 0x10000) {
|
||||
// Straight copy.
|
||||
ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride,
|
||||
dst, dst_stride, clip_width, clip_height);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (dx == 0x10000 && (x & 0xffff) == 0) {
|
||||
// Arbitrary scale vertically, but unscaled vertically.
|
||||
ScalePlaneVertical(src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, y, dy, 4, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering && dy < 65536) {
|
||||
ScaleARGBBilinearUp(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering) {
|
||||
ScaleARGBBilinearDown(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
return;
|
||||
}
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
|
||||
clip_x < 0 || clip_y < 0 ||
|
||||
(clip_x + clip_width) > dst_width ||
|
||||
(clip_y + clip_height) > dst_height) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale an ARGB image.
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
0, 0, dst_width, dst_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
|
@ -8,15 +8,15 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/scale.h"
|
||||
#include "libyuv/scale.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/cpu_id.h"
|
||||
#include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyARGB
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "third_party/libyuv/include/libyuv/scale_row.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h" // For CopyARGB
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -8,8 +8,8 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/basic_types.h"
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -18,7 +18,8 @@ extern "C" {
|
|||
|
||||
// This module is for GCC MIPS DSPR2
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
@ -28,8 +28,10 @@ void ScaleRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {q0, q1}, [%0]! \n"
|
||||
"subs %2, %2, #16 \n" // 16 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q1}, [%1]! \n" // store odd pixels
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -48,7 +50,9 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
"add %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc
|
||||
"subs %3, %3, #16 \n" // 16 processed per loop
|
||||
"vpaddl.u8 q0, q0 \n" // row 1 add adjacent
|
||||
|
@ -57,6 +61,7 @@ void ScaleRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
"vpadal.u8 q1, q3 \n"
|
||||
"vrshrn.u16 d0, q0, #2 \n" // downshift, round and pack
|
||||
"vrshrn.u16 d1, q1, #2 \n"
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -73,8 +78,10 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d2}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -87,16 +94,20 @@ void ScaleRowDown4_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
|
||||
void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
"add r4, %0, %3 \n"
|
||||
"add r5, r4, %3 \n"
|
||||
"add %3, r5, %3 \n"
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
const uint8* src_ptr2 = src_ptr + src_stride * 2;
|
||||
const uint8* src_ptr3 = src_ptr + src_stride * 3;
|
||||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n" // load up 16x4
|
||||
"vld1.8 {q1}, [r4]! \n"
|
||||
"vld1.8 {q2}, [r5]! \n"
|
||||
"vld1.8 {q3}, [%3]! \n"
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q1}, [%3]! \n"
|
||||
MEMACCESS(4)
|
||||
"vld1.8 {q2}, [%4]! \n"
|
||||
MEMACCESS(5)
|
||||
"vld1.8 {q3}, [%5]! \n"
|
||||
"subs %2, %2, #4 \n"
|
||||
"vpaddl.u8 q0, q0 \n"
|
||||
"vpadal.u8 q0, q1 \n"
|
||||
|
@ -105,13 +116,17 @@ void ScaleRowDown4Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
"vpaddl.u16 q0, q0 \n"
|
||||
"vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding
|
||||
"vmovn.u16 d0, q0 \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.32 {d0[0]}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width) // %2
|
||||
: "r"(src_stride) // %3
|
||||
: "r4", "r5", "q0", "q1", "q2", "q3", "memory", "cc"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(src_ptr1), // %3
|
||||
"+r"(src_ptr2), // %4
|
||||
"+r"(src_ptr3) // %5
|
||||
:
|
||||
: "q0", "q1", "q2", "q3", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -124,9 +139,11 @@ void ScaleRowDown34_NEON(const uint8* src_ptr,
|
|||
asm volatile (
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
"subs %2, %2, #24 \n"
|
||||
"vmov d2, d3 \n" // order d0, d1, d2
|
||||
MEMACCESS(1)
|
||||
"vst3.8 {d0, d1, d2}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -145,7 +162,9 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
|||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
MEMACCESS(3)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
|
||||
"subs %2, %2, #24 \n"
|
||||
|
||||
|
@ -182,6 +201,7 @@ void ScaleRowDown34_0_Box_NEON(const uint8* src_ptr,
|
|||
"vmlal.u8 q8, d3, d24 \n"
|
||||
"vqrshrn.u16 d2, q8, #2 \n"
|
||||
|
||||
MEMACCESS(1)
|
||||
"vst3.8 {d0, d1, d2}, [%1]! \n"
|
||||
|
||||
"bgt 1b \n"
|
||||
|
@ -202,7 +222,9 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
|||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0
|
||||
MEMACCESS(3)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1
|
||||
"subs %2, %2, #24 \n"
|
||||
// average src line 0 with src line 1
|
||||
|
@ -222,6 +244,7 @@ void ScaleRowDown34_1_Box_NEON(const uint8* src_ptr,
|
|||
"vmlal.u8 q3, d3, d24 \n"
|
||||
"vqrshrn.u16 d2, q3, #2 \n"
|
||||
|
||||
MEMACCESS(1)
|
||||
"vst3.8 {d0, d1, d2}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -250,14 +273,18 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
|||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
MEMACCESS(3)
|
||||
"vld1.8 {q3}, [%3] \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0, d1, d2, d3}, [%0]! \n"
|
||||
"subs %2, %2, #12 \n"
|
||||
"vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n"
|
||||
"vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d4}, [%1]! \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.32 {d5[0]}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -272,11 +299,15 @@ void ScaleRowDown38_NEON(const uint8* src_ptr,
|
|||
void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
const uint8* src_ptr1 = src_ptr + src_stride * 2;
|
||||
|
||||
asm volatile (
|
||||
"vld1.16 {q13}, [%4] \n"
|
||||
"vld1.8 {q14}, [%5] \n"
|
||||
"vld1.8 {q15}, [%6] \n"
|
||||
"add r4, %0, %3, lsl #1 \n"
|
||||
MEMACCESS(5)
|
||||
"vld1.16 {q13}, [%5] \n"
|
||||
MEMACCESS(6)
|
||||
"vld1.8 {q14}, [%6] \n"
|
||||
MEMACCESS(7)
|
||||
"vld1.8 {q15}, [%7] \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
|
@ -285,9 +316,12 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
|||
// d1 = 10 50 11 51 12 52 13 53
|
||||
// d2 = 20 60 21 61 22 62 23 63
|
||||
// d3 = 30 70 31 71 32 72 33 73
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n"
|
||||
MEMACCESS(3)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%3]! \n"
|
||||
"vld4.8 {d16, d17, d18, d19}, [r4]! \n"
|
||||
MEMACCESS(4)
|
||||
"vld4.8 {d16, d17, d18, d19}, [%4]! \n"
|
||||
"subs %2, %2, #12 \n"
|
||||
|
||||
// Shuffle the input data around to get align the data
|
||||
|
@ -364,18 +398,20 @@ void OMITFP ScaleRowDown38_3_Box_NEON(const uint8* src_ptr,
|
|||
"vtbl.u8 d3, {d0, d1, d2}, d28 \n"
|
||||
"vtbl.u8 d4, {d0, d1, d2}, d29 \n"
|
||||
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d3}, [%1]! \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.32 {d4[0]}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
"+r"(dst_ptr), // %1
|
||||
"+r"(dst_width), // %2
|
||||
"+r"(src_stride) // %3
|
||||
: "r"(&kMult38_Div6), // %4
|
||||
"r"(&kShuf38_2), // %5
|
||||
"r"(&kMult38_Div9) // %6
|
||||
: "r4", "q0", "q1", "q2", "q3", "q8", "q9",
|
||||
"q13", "q14", "q15", "memory", "cc"
|
||||
"+r"(src_stride), // %3
|
||||
"+r"(src_ptr1) // %4
|
||||
: "r"(&kMult38_Div6), // %5
|
||||
"r"(&kShuf38_2), // %6
|
||||
"r"(&kMult38_Div9) // %7
|
||||
: "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -384,7 +420,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
asm volatile (
|
||||
MEMACCESS(4)
|
||||
"vld1.16 {q13}, [%4] \n"
|
||||
MEMACCESS(5)
|
||||
"vld1.8 {q14}, [%5] \n"
|
||||
"add %3, %0 \n"
|
||||
".p2align 2 \n"
|
||||
|
@ -394,7 +432,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||
// d1 = 10 50 11 51 12 52 13 53
|
||||
// d2 = 20 60 21 61 22 62 23 63
|
||||
// d3 = 30 70 31 71 32 72 33 73
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d1, d2, d3}, [%0]! \n"
|
||||
MEMACCESS(3)
|
||||
"vld4.8 {d4, d5, d6, d7}, [%3]! \n"
|
||||
"subs %2, %2, #12 \n"
|
||||
|
||||
|
@ -461,7 +501,9 @@ void ScaleRowDown38_2_Box_NEON(const uint8* src_ptr,
|
|||
"vtbl.u8 d3, {d0, d1, d2}, d28 \n"
|
||||
"vtbl.u8 d4, {d0, d1, d2}, d29 \n"
|
||||
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {d3}, [%1]! \n"
|
||||
MEMACCESS(1)
|
||||
"vst1.32 {d4[0]}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -494,7 +536,9 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
|||
"vdup.8 d4, %4 \n"
|
||||
// General purpose row blend.
|
||||
"1: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q0}, [%1]! \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {q1}, [%2]! \n"
|
||||
"subs %3, %3, #16 \n"
|
||||
"vmull.u8 q13, d0, d4 \n"
|
||||
|
@ -503,50 +547,63 @@ void ScaleFilterRows_NEON(uint8* dst_ptr,
|
|||
"vmlal.u8 q14, d3, d5 \n"
|
||||
"vrshrn.u16 d0, q13, #8 \n"
|
||||
"vrshrn.u16 d1, q14, #8 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n"
|
||||
"bgt 1b \n"
|
||||
"b 99f \n"
|
||||
|
||||
// Blend 25 / 75.
|
||||
"25: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q0}, [%1]! \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {q1}, [%2]! \n"
|
||||
"subs %3, %3, #16 \n"
|
||||
"vrhadd.u8 q0, q1 \n"
|
||||
"vrhadd.u8 q0, q1 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n"
|
||||
"bgt 25b \n"
|
||||
"b 99f \n"
|
||||
|
||||
// Blend 50 / 50.
|
||||
"50: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q0}, [%1]! \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {q1}, [%2]! \n"
|
||||
"subs %3, %3, #16 \n"
|
||||
"vrhadd.u8 q0, q1 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n"
|
||||
"bgt 50b \n"
|
||||
"b 99f \n"
|
||||
|
||||
// Blend 75 / 25.
|
||||
"75: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n"
|
||||
MEMACCESS(2)
|
||||
"vld1.8 {q0}, [%2]! \n"
|
||||
"subs %3, %3, #16 \n"
|
||||
"vrhadd.u8 q0, q1 \n"
|
||||
"vrhadd.u8 q0, q1 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n"
|
||||
"bgt 75b \n"
|
||||
"b 99f \n"
|
||||
|
||||
// Blend 100 / 0 - Copy row unchanged.
|
||||
"100: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q0}, [%1]! \n"
|
||||
"subs %3, %3, #16 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {q0}, [%0]! \n"
|
||||
"bgt 100b \n"
|
||||
|
||||
"99: \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d1[7]}, [%0] \n"
|
||||
: "+r"(dst_ptr), // %0
|
||||
"+r"(src_ptr), // %1
|
||||
|
@ -564,10 +621,14 @@ void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
// load even pixels into q0, odd into q1
|
||||
MEMACCESS(0)
|
||||
"vld2.32 {q0, q1}, [%0]! \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.32 {q2, q3}, [%0]! \n"
|
||||
"subs %2, %2, #8 \n" // 8 processed per loop
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q1}, [%1]! \n" // store odd pixels
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q3}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -585,14 +646,18 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
"add %1, %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
|
||||
MEMACCESS(0)
|
||||
"vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
|
||||
"subs %3, %3, #8 \n" // 8 processed per loop.
|
||||
"vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
|
||||
"vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB pixels.
|
||||
MEMACCESS(1)
|
||||
"vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB pixels.
|
||||
"vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts.
|
||||
"vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts.
|
||||
|
@ -602,6 +667,7 @@ void ScaleARGBRowDown2Box_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
|
|||
"vrshrn.u16 d1, q1, #2 \n"
|
||||
"vrshrn.u16 d2, q2, #2 \n"
|
||||
"vrshrn.u16 d3, q3, #2 \n"
|
||||
MEMACCESS(2)
|
||||
"vst4.8 {d0, d1, d2, d3}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_ptr), // %0
|
||||
|
@ -621,11 +687,16 @@ void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
|||
"mov r12, %3, lsl #2 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[0]}, [%0], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[1]}, [%0], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[0]}, [%0], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[1]}, [%0], r12 \n"
|
||||
"subs %2, %2, #4 \n" // 4 pixels per loop.
|
||||
MEMACCESS(1)
|
||||
"vst1.8 {q0}, [%1]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
|
@ -646,13 +717,21 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
|||
"add %1, %1, %0 \n"
|
||||
".p2align 2 \n"
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0], r12 \n" // Read 4 2x2 blocks -> 2x1
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d1}, [%1], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0], r12 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d3}, [%1], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d4}, [%0], r12 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d5}, [%1], r12 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d6}, [%0], r12 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d7}, [%1], r12 \n"
|
||||
"vaddl.u8 q0, d0, d1 \n"
|
||||
"vaddl.u8 q1, d2, d3 \n"
|
||||
|
@ -665,6 +744,7 @@ void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
|
|||
"vrshrn.u16 d0, q0, #2 \n" // first 2 pixels.
|
||||
"vrshrn.u16 d1, q2, #2 \n" // next 2 pixels.
|
||||
"subs %3, %3, #4 \n" // 4 pixels per loop.
|
||||
MEMACCESS(2)
|
||||
"vst1.8 {q0}, [%2]! \n"
|
||||
"bgt 1b \n"
|
||||
: "+r"(src_argb), // %0
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/libyuv/include/libyuv/row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#include "libyuv/video_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof(x[0]))
|
||||
|
||||
struct FourCCAliasEntry {
|
||||
uint32 alias;
|
||||
uint32 canonical;
|
||||
};
|
||||
|
||||
static const struct FourCCAliasEntry kFourCCAliases[] = {
|
||||
{FOURCC_IYUV, FOURCC_I420},
|
||||
{FOURCC_YU16, FOURCC_I422},
|
||||
{FOURCC_YU24, FOURCC_I444},
|
||||
{FOURCC_YUYV, FOURCC_YUY2},
|
||||
{FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs
|
||||
{FOURCC_HDYC, FOURCC_UYVY},
|
||||
{FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8
|
||||
{FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not.
|
||||
{FOURCC_DMB1, FOURCC_MJPG},
|
||||
{FOURCC_BA81, FOURCC_BGGR},
|
||||
{FOURCC_RGB3, FOURCC_RAW },
|
||||
{FOURCC_BGR3, FOURCC_24BG},
|
||||
{FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB
|
||||
{FOURCC_CM24, FOURCC_RAW }, // kCMPixelFormat_24RGB
|
||||
{FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555
|
||||
{FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565
|
||||
{FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551
|
||||
};
|
||||
// TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB.
|
||||
// {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA
|
||||
|
||||
LIBYUV_API
|
||||
uint32 CanonicalFourCC(uint32 fourcc) {
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(kFourCCAliases); ++i) {
|
||||
if (kFourCCAliases[i].alias == fourcc) {
|
||||
return kFourCCAliases[i].canonical;
|
||||
}
|
||||
}
|
||||
// Not an alias, so return it as-is.
|
||||
return fourcc;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче