Bug 1831930 - Update libvpx to 19ec57e14938bcb12d87123b7c369212f19792eb r=webrtc-reviewers,ng

Update to 19ec57e14938bcb12d87123b7c369212f19792eb by running `./mach
vendor media/libvpx/moz.yaml --patch-mode=none`

Differential Revision: https://phabricator.services.mozilla.com/D177513
This commit is contained in:
Chun-Min Chang 2023-05-12 01:08:58 +00:00
Родитель b50359b886
Коммит 69a22f30f5
151 изменённых файлов: 6008 добавлений и 1468 удалений

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -40,7 +40,7 @@ int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, in
int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
#define vp9_block_error_fp vp9_block_error_fp_c
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
#define vp9_diamond_search_sad vp9_diamond_search_sad_c
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -2,6 +2,7 @@
@ using the ads2gas.pl script.
.syntax unified
.equ VPX_ARCH_ARM , 0
.equ VPX_ARCH_AARCH64 , 0
.equ VPX_ARCH_MIPS , 0
.equ VPX_ARCH_X86 , 0
.equ VPX_ARCH_X86_64 , 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -352,7 +352,7 @@ unsigned int vpx_mse8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t
void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vpx_quantize_b vpx_quantize_b_c
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
#define vpx_quantize_b_32x32 vpx_quantize_b_32x32_c
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -472,6 +472,84 @@ unsigned int vpx_sad8x8_avg_c(const uint8_t *src_ptr, int src_stride, const uint
void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_c
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_c
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_c
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_c
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_c
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_c
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_c
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x16 vpx_sad_skip_32x16_c
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x16x4d vpx_sad_skip_32x16x4d_c
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x32 vpx_sad_skip_32x32_c
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x32x4d vpx_sad_skip_32x32x4d_c
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x64 vpx_sad_skip_32x64_c
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x64x4d vpx_sad_skip_32x64x4d_c
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_c
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_c
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x32 vpx_sad_skip_64x32_c
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x32x4d vpx_sad_skip_64x32x4d_c
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x64 vpx_sad_skip_64x64_c
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x64x4d vpx_sad_skip_64x64x4d_c
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_c
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_c
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_c
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_c
int vpx_satd_c(const int16_t *coeff, int length);
#define vpx_satd vpx_satd_c

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -39,9 +39,9 @@ int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
int64_t vp9_block_error_fp_neon(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_neon(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -2,6 +2,7 @@
@ using the ads2gas.pl script.
.syntax unified
.equ VPX_ARCH_ARM , 1
.equ VPX_ARCH_AARCH64 , 0
.equ VPX_ARCH_MIPS , 0
.equ VPX_ARCH_X86 , 0
.equ VPX_ARCH_X86_64 , 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 1
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -455,9 +455,9 @@ void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int1
void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -615,6 +615,110 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_4x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_4x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x4)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x4x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_neon(const int16_t *coeff, int length);
RTCD_EXTERN int (*vpx_satd)(const int16_t *coeff, int length);
@ -1139,6 +1243,58 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) vpx_sad8x8_avg = vpx_sad8x8_avg_neon;
vpx_sad8x8x4d = vpx_sad8x8x4d_c;
if (flags & HAS_NEON) vpx_sad8x8x4d = vpx_sad8x8x4d_neon;
vpx_sad_skip_16x16 = vpx_sad_skip_16x16_c;
if (flags & HAS_NEON) vpx_sad_skip_16x16 = vpx_sad_skip_16x16_neon;
vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_neon;
vpx_sad_skip_16x32 = vpx_sad_skip_16x32_c;
if (flags & HAS_NEON) vpx_sad_skip_16x32 = vpx_sad_skip_16x32_neon;
vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_neon;
vpx_sad_skip_16x8 = vpx_sad_skip_16x8_c;
if (flags & HAS_NEON) vpx_sad_skip_16x8 = vpx_sad_skip_16x8_neon;
vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_neon;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_c;
if (flags & HAS_NEON) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_neon;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_neon;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_c;
if (flags & HAS_NEON) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_neon;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_neon;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_c;
if (flags & HAS_NEON) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_neon;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_neon;
vpx_sad_skip_4x4 = vpx_sad_skip_4x4_c;
if (flags & HAS_NEON) vpx_sad_skip_4x4 = vpx_sad_skip_4x4_neon;
vpx_sad_skip_4x4x4d = vpx_sad_skip_4x4x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_4x4x4d = vpx_sad_skip_4x4x4d_neon;
vpx_sad_skip_4x8 = vpx_sad_skip_4x8_c;
if (flags & HAS_NEON) vpx_sad_skip_4x8 = vpx_sad_skip_4x8_neon;
vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_neon;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_c;
if (flags & HAS_NEON) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_neon;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_neon;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_c;
if (flags & HAS_NEON) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_neon;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_neon;
vpx_sad_skip_8x16 = vpx_sad_skip_8x16_c;
if (flags & HAS_NEON) vpx_sad_skip_8x16 = vpx_sad_skip_8x16_neon;
vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_neon;
vpx_sad_skip_8x4 = vpx_sad_skip_8x4_c;
if (flags & HAS_NEON) vpx_sad_skip_8x4 = vpx_sad_skip_8x4_neon;
vpx_sad_skip_8x4x4d = vpx_sad_skip_8x4x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_8x4x4d = vpx_sad_skip_8x4x4d_neon;
vpx_sad_skip_8x8 = vpx_sad_skip_8x8_c;
if (flags & HAS_NEON) vpx_sad_skip_8x8 = vpx_sad_skip_8x8_neon;
vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_c;
if (flags & HAS_NEON) vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_neon;
vpx_satd = vpx_satd_c;
if (flags & HAS_NEON) vpx_satd = vpx_satd_neon;
vpx_scaled_2d = vpx_scaled_2d_c;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -39,8 +39,8 @@ int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
int64_t vp9_block_error_fp_neon(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
#define vp9_block_error_fp vp9_block_error_fp_neon
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
#define vp9_diamond_search_sad vp9_diamond_search_sad_neon
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -2,6 +2,7 @@
@ using the ads2gas.pl script.
.syntax unified
.equ VPX_ARCH_ARM , 1
.equ VPX_ARCH_AARCH64 , 1
.equ VPX_ARCH_MIPS , 0
.equ VPX_ARCH_X86 , 0
.equ VPX_ARCH_X86_64 , 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 1
#define VPX_ARCH_AARCH64 1
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -455,8 +455,8 @@ void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int1
void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vpx_quantize_b vpx_quantize_b_neon
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
#define vpx_quantize_b_32x32 vpx_quantize_b_32x32_neon
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -615,6 +615,110 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_neon
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_neon
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_neon
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_neon
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_neon
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_neon
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_neon
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x16 vpx_sad_skip_32x16_neon
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x16x4d vpx_sad_skip_32x16x4d_neon
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x32 vpx_sad_skip_32x32_neon
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x32x4d vpx_sad_skip_32x32x4d_neon
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x64 vpx_sad_skip_32x64_neon
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x64x4d vpx_sad_skip_32x64x4d_neon
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_neon
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_neon
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_neon
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_neon
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x32 vpx_sad_skip_64x32_neon
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x32x4d vpx_sad_skip_64x32x4d_neon
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x64 vpx_sad_skip_64x64_neon
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x64x4d vpx_sad_skip_64x64x4d_neon
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_neon
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_neon
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_neon
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_neon
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_neon
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_neon
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_neon(const int16_t *coeff, int length);
#define vpx_satd vpx_satd_neon

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 1
%define VPX_ARCH_X86_64 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 1
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -241,6 +241,7 @@ RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t stride, con
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
@ -336,6 +337,7 @@ RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -344,11 +346,13 @@ RTCD_EXTERN void (*vpx_idct16x16_38_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -491,11 +495,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -665,6 +669,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -1044,6 +1158,7 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
vpx_fdct16x16 = vpx_fdct16x16_c;
if (flags & HAS_SSE2) vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct16x16_1 = vpx_fdct16x16_1_c;
if (flags & HAS_SSE2) vpx_fdct16x16_1 = vpx_fdct16x16_1_sse2;
vpx_fdct32x32 = vpx_fdct32x32_c;
@ -1091,13 +1206,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct16x16_38_add = vpx_idct16x16_38_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_38_add = vpx_idct16x16_38_add_sse2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
@ -1265,6 +1383,60 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2;
vpx_sad8x8x4d = vpx_sad8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad_skip_16x16 = vpx_sad_skip_16x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16 = vpx_sad_skip_16x16_sse2;
vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_sse2;
vpx_sad_skip_16x32 = vpx_sad_skip_16x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32 = vpx_sad_skip_16x32_sse2;
vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_sse2;
vpx_sad_skip_16x8 = vpx_sad_skip_16x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8 = vpx_sad_skip_16x8_sse2;
vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_sse2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_4x8 = vpx_sad_skip_4x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8 = vpx_sad_skip_4x8_sse2;
vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_sse2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_sad_skip_8x16 = vpx_sad_skip_8x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16 = vpx_sad_skip_8x16_sse2;
vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_sse2;
vpx_sad_skip_8x8 = vpx_sad_skip_8x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8 = vpx_sad_skip_8x8_sse2;
vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_sse2;
vpx_satd = vpx_satd_c;
if (flags & HAS_SSE2) vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 0
%define VPX_ARCH_X86_64 1

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 1

Просмотреть файл

@ -241,7 +241,8 @@ void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t stride, const uint8_t
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
#define vpx_fdct16x16 vpx_fdct16x16_sse2
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
@ -338,7 +339,8 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct16x16_256_add vpx_idct16x16_256_add_sse2
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
@ -346,11 +348,13 @@ void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int strid
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_sse2
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -493,11 +497,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -667,6 +671,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_sse2
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_sse2
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_sse2
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_sse2
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_sse2
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_sse2
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_sse2
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_sse2
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_sse2
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_sse2
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_sse2
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_sse2
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -990,6 +1104,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct32x32 = vpx_fdct32x32_sse2;
if (flags & HAS_AVX2) vpx_fdct32x32 = vpx_fdct32x32_avx2;
vpx_fdct32x32_rd = vpx_fdct32x32_rd_sse2;
@ -1004,8 +1120,13 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_hadamard_32x32 = vpx_hadamard_32x32_avx2;
vpx_hadamard_8x8 = vpx_hadamard_8x8_sse2;
if (flags & HAS_SSSE3) vpx_hadamard_8x8 = vpx_hadamard_8x8_ssse3;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_34_add = vpx_idct32x32_34_add_ssse3;
vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
@ -1050,6 +1171,26 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2;
vpx_sad64x64x4d = vpx_sad64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;
vpx_scaled_2d = vpx_scaled_2d_c;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 1
%define VPX_ARCH_X86_64 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 1
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -241,6 +241,7 @@ RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t stride, con
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
@ -336,6 +337,7 @@ RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -344,11 +346,13 @@ RTCD_EXTERN void (*vpx_idct16x16_38_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -491,11 +495,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -665,6 +669,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -1044,6 +1158,7 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
vpx_fdct16x16 = vpx_fdct16x16_c;
if (flags & HAS_SSE2) vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct16x16_1 = vpx_fdct16x16_1_c;
if (flags & HAS_SSE2) vpx_fdct16x16_1 = vpx_fdct16x16_1_sse2;
vpx_fdct32x32 = vpx_fdct32x32_c;
@ -1091,13 +1206,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct16x16_38_add = vpx_idct16x16_38_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_38_add = vpx_idct16x16_38_add_sse2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
@ -1265,6 +1383,60 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2;
vpx_sad8x8x4d = vpx_sad8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad_skip_16x16 = vpx_sad_skip_16x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16 = vpx_sad_skip_16x16_sse2;
vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_sse2;
vpx_sad_skip_16x32 = vpx_sad_skip_16x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32 = vpx_sad_skip_16x32_sse2;
vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_sse2;
vpx_sad_skip_16x8 = vpx_sad_skip_16x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8 = vpx_sad_skip_16x8_sse2;
vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_sse2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_4x8 = vpx_sad_skip_4x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8 = vpx_sad_skip_4x8_sse2;
vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_sse2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_sad_skip_8x16 = vpx_sad_skip_8x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16 = vpx_sad_skip_8x16_sse2;
vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_sse2;
vpx_sad_skip_8x8 = vpx_sad_skip_8x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8 = vpx_sad_skip_8x8_sse2;
vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_sse2;
vpx_satd = vpx_satd_c;
if (flags & HAS_SSE2) vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 0
%define VPX_ARCH_X86_64 1

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 1

Просмотреть файл

@ -241,7 +241,8 @@ void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t stride, const uint8_t
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
#define vpx_fdct16x16 vpx_fdct16x16_sse2
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
@ -338,7 +339,8 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct16x16_256_add vpx_idct16x16_256_add_sse2
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
@ -346,11 +348,13 @@ void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int strid
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_sse2
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -493,11 +497,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -667,6 +671,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_sse2
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_sse2
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_sse2
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_sse2
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_sse2
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_sse2
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_sse2
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_sse2
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_sse2
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_sse2
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_sse2
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_sse2
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -990,6 +1104,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct32x32 = vpx_fdct32x32_sse2;
if (flags & HAS_AVX2) vpx_fdct32x32 = vpx_fdct32x32_avx2;
vpx_fdct32x32_rd = vpx_fdct32x32_rd_sse2;
@ -1004,8 +1120,13 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_hadamard_32x32 = vpx_hadamard_32x32_avx2;
vpx_hadamard_8x8 = vpx_hadamard_8x8_sse2;
if (flags & HAS_SSSE3) vpx_hadamard_8x8 = vpx_hadamard_8x8_ssse3;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_34_add = vpx_idct32x32_34_add_ssse3;
vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
@ -1050,6 +1171,26 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2;
vpx_sad64x64x4d = vpx_sad64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;
vpx_scaled_2d = vpx_scaled_2d_c;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -39,8 +39,8 @@ int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
int64_t vp9_block_error_fp_neon(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
#define vp9_block_error_fp vp9_block_error_fp_neon
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_neon(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
#define vp9_diamond_search_sad vp9_diamond_search_sad_neon
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -2,6 +2,7 @@
@ using the ads2gas.pl script.
.syntax unified
.equ VPX_ARCH_ARM , 1
.equ VPX_ARCH_AARCH64 , 1
.equ VPX_ARCH_MIPS , 0
.equ VPX_ARCH_X86 , 0
.equ VPX_ARCH_X86_64 , 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE __inline
#define VPX_ARCH_ARM 1
#define VPX_ARCH_AARCH64 1
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -455,8 +455,8 @@ void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int1
void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
#define vpx_quantize_b vpx_quantize_b_neon
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
#define vpx_quantize_b_32x32 vpx_quantize_b_32x32_neon
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -615,6 +615,110 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_neon
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_neon
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_neon
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_neon
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_neon
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_neon
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_neon
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x16 vpx_sad_skip_32x16_neon
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x16x4d vpx_sad_skip_32x16x4d_neon
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x32 vpx_sad_skip_32x32_neon
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x32x4d vpx_sad_skip_32x32x4d_neon
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_32x64 vpx_sad_skip_32x64_neon
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_32x64x4d vpx_sad_skip_32x64x4d_neon
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_neon
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_neon
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_neon
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_neon
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x32 vpx_sad_skip_64x32_neon
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x32x4d vpx_sad_skip_64x32x4d_neon
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_64x64 vpx_sad_skip_64x64_neon
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_64x64x4d vpx_sad_skip_64x64x4d_neon
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_neon
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_neon
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_neon
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_neon
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_neon
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_neon
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_neon(const int16_t *coeff, int length);
#define vpx_satd vpx_satd_neon

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 1
%define VPX_ARCH_X86_64 0

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 1
#define VPX_ARCH_X86_64 0

Просмотреть файл

@ -241,6 +241,7 @@ RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t stride, con
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
@ -336,6 +337,7 @@ RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -344,11 +346,13 @@ RTCD_EXTERN void (*vpx_idct16x16_38_add)(const tran_low_t *input, uint8_t *dest,
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -491,11 +495,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -665,6 +669,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_16x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_16x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_4x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_4x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_8x8)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_8x8x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -1044,6 +1158,7 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
vpx_fdct16x16 = vpx_fdct16x16_c;
if (flags & HAS_SSE2) vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct16x16_1 = vpx_fdct16x16_1_c;
if (flags & HAS_SSE2) vpx_fdct16x16_1 = vpx_fdct16x16_1_sse2;
vpx_fdct32x32 = vpx_fdct32x32_c;
@ -1091,13 +1206,16 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct16x16_38_add = vpx_idct16x16_38_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_38_add = vpx_idct16x16_38_add_sse2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
@ -1265,6 +1383,60 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSE2) vpx_sad8x8_avg = vpx_sad8x8_avg_sse2;
vpx_sad8x8x4d = vpx_sad8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad8x8x4d = vpx_sad8x8x4d_sse2;
vpx_sad_skip_16x16 = vpx_sad_skip_16x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16 = vpx_sad_skip_16x16_sse2;
vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x16x4d = vpx_sad_skip_16x16x4d_sse2;
vpx_sad_skip_16x32 = vpx_sad_skip_16x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32 = vpx_sad_skip_16x32_sse2;
vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x32x4d = vpx_sad_skip_16x32x4d_sse2;
vpx_sad_skip_16x8 = vpx_sad_skip_16x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8 = vpx_sad_skip_16x8_sse2;
vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_16x8x4d = vpx_sad_skip_16x8x4d_sse2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_4x8 = vpx_sad_skip_4x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8 = vpx_sad_skip_4x8_sse2;
vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_4x8x4d = vpx_sad_skip_4x8x4d_sse2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_sad_skip_8x16 = vpx_sad_skip_8x16_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16 = vpx_sad_skip_8x16_sse2;
vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x16x4d = vpx_sad_skip_8x16x4d_sse2;
vpx_sad_skip_8x8 = vpx_sad_skip_8x8_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8 = vpx_sad_skip_8x8_sse2;
vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_c;
if (flags & HAS_SSE2) vpx_sad_skip_8x8x4d = vpx_sad_skip_8x8x4d_sse2;
vpx_satd = vpx_satd_c;
if (flags & HAS_SSE2) vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;

Просмотреть файл

@ -21,7 +21,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -45,9 +45,9 @@ int64_t vp9_block_error_fp_sse2(const tran_low_t *coeff, const tran_low_t *dqcoe
int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
RTCD_EXTERN int64_t (*vp9_block_error_fp)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_c(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
int vp9_diamond_search_sad_avx(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
RTCD_EXTERN int (*vp9_diamond_search_sad)(const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv);
void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type);
void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type);

Просмотреть файл

@ -1,4 +1,5 @@
%define VPX_ARCH_ARM 0
%define VPX_ARCH_AARCH64 0
%define VPX_ARCH_MIPS 0
%define VPX_ARCH_X86 0
%define VPX_ARCH_X86_64 1

Просмотреть файл

@ -11,6 +11,7 @@
#define RESTRICT
#define INLINE __inline
#define VPX_ARCH_ARM 0
#define VPX_ARCH_AARCH64 0
#define VPX_ARCH_MIPS 0
#define VPX_ARCH_X86 0
#define VPX_ARCH_X86_64 1

Просмотреть файл

@ -241,7 +241,8 @@ void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t stride, const uint8_t
void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_sse2(const int16_t *input, tran_low_t *output, int stride);
#define vpx_fdct16x16 vpx_fdct16x16_sse2
void vpx_fdct16x16_avx2(const int16_t *input, tran_low_t *output, int stride);
RTCD_EXTERN void (*vpx_fdct16x16)(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride);
void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride);
@ -338,7 +339,8 @@ void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct16x16_256_add vpx_idct16x16_256_add_sse2
void vpx_idct16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
@ -346,11 +348,13 @@ void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int strid
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_sse2
void vpx_idct32x32_1024_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_135_add_avx2(const tran_low_t *input, uint8_t *dest, int stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride);
@ -493,11 +497,11 @@ void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const in
void vpx_quantize_b_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
RTCD_EXTERN void (*vpx_quantize_b)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order);
void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
void vpx_quantize_b_32x32_avx2(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
RTCD_EXTERN void (*vpx_quantize_b_32x32)(const tran_low_t *coeff_ptr, const struct macroblock_plane * mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *scan_order);
unsigned int vpx_sad16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
@ -667,6 +671,116 @@ void vpx_sad8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * con
void vpx_sad8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad8x8x4d vpx_sad8x8x4d_sse2
unsigned int vpx_sad_skip_16x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x16 vpx_sad_skip_16x16_sse2
void vpx_sad_skip_16x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x16x4d vpx_sad_skip_16x16x4d_sse2
unsigned int vpx_sad_skip_16x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x32 vpx_sad_skip_16x32_sse2
void vpx_sad_skip_16x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x32x4d vpx_sad_skip_16x32x4d_sse2
unsigned int vpx_sad_skip_16x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_16x8 vpx_sad_skip_16x8_sse2
void vpx_sad_skip_16x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_16x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_16x8x4d vpx_sad_skip_16x8x4d_sse2
unsigned int vpx_sad_skip_32x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x16)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x16x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x16x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_32x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_32x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_32x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_32x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_32x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_4x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x4 vpx_sad_skip_4x4_c
void vpx_sad_skip_4x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x4x4d vpx_sad_skip_4x4x4d_c
unsigned int vpx_sad_skip_4x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_4x8 vpx_sad_skip_4x8_sse2
void vpx_sad_skip_4x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_4x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_4x8x4d vpx_sad_skip_4x8x4d_sse2
unsigned int vpx_sad_skip_64x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x32)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x32x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x32x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_64x64_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
RTCD_EXTERN unsigned int (*vpx_sad_skip_64x64)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
void vpx_sad_skip_64x64x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
RTCD_EXTERN void (*vpx_sad_skip_64x64x4d)(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
unsigned int vpx_sad_skip_8x16_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x16 vpx_sad_skip_8x16_sse2
void vpx_sad_skip_8x16x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x16x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x16x4d vpx_sad_skip_8x16x4d_sse2
unsigned int vpx_sad_skip_8x4_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x4 vpx_sad_skip_8x4_c
void vpx_sad_skip_8x4x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x4x4d vpx_sad_skip_8x4x4d_c
unsigned int vpx_sad_skip_8x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
unsigned int vpx_sad_skip_8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride);
#define vpx_sad_skip_8x8 vpx_sad_skip_8x8_sse2
void vpx_sad_skip_8x8x4d_c(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
void vpx_sad_skip_8x8x4d_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]);
#define vpx_sad_skip_8x8x4d vpx_sad_skip_8x8x4d_sse2
int vpx_satd_c(const int16_t *coeff, int length);
int vpx_satd_sse2(const int16_t *coeff, int length);
int vpx_satd_avx2(const int16_t *coeff, int length);
@ -990,6 +1104,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
vpx_fdct16x16 = vpx_fdct16x16_sse2;
if (flags & HAS_AVX2) vpx_fdct16x16 = vpx_fdct16x16_avx2;
vpx_fdct32x32 = vpx_fdct32x32_sse2;
if (flags & HAS_AVX2) vpx_fdct32x32 = vpx_fdct32x32_avx2;
vpx_fdct32x32_rd = vpx_fdct32x32_rd_sse2;
@ -1004,8 +1120,13 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_hadamard_32x32 = vpx_hadamard_32x32_avx2;
vpx_hadamard_8x8 = vpx_hadamard_8x8_sse2;
if (flags & HAS_SSSE3) vpx_hadamard_8x8 = vpx_hadamard_8x8_ssse3;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
if (flags & HAS_AVX2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_avx2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
if (flags & HAS_AVX2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_avx2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_135_add = vpx_idct32x32_135_add_ssse3;
if (flags & HAS_AVX2) vpx_idct32x32_135_add = vpx_idct32x32_135_add_avx2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
if (flags & HAS_SSSE3) vpx_idct32x32_34_add = vpx_idct32x32_34_add_ssse3;
vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
@ -1050,6 +1171,26 @@ static void setup_rtcd_internal(void)
if (flags & HAS_AVX2) vpx_sad64x64_avg = vpx_sad64x64_avg_avx2;
vpx_sad64x64x4d = vpx_sad64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad64x64x4d = vpx_sad64x64x4d_avx2;
vpx_sad_skip_32x16 = vpx_sad_skip_32x16_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16 = vpx_sad_skip_32x16_avx2;
vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x16x4d = vpx_sad_skip_32x16x4d_avx2;
vpx_sad_skip_32x32 = vpx_sad_skip_32x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32 = vpx_sad_skip_32x32_avx2;
vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x32x4d = vpx_sad_skip_32x32x4d_avx2;
vpx_sad_skip_32x64 = vpx_sad_skip_32x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64 = vpx_sad_skip_32x64_avx2;
vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_32x64x4d = vpx_sad_skip_32x64x4d_avx2;
vpx_sad_skip_64x32 = vpx_sad_skip_64x32_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32 = vpx_sad_skip_64x32_avx2;
vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x32x4d = vpx_sad_skip_64x32x4d_avx2;
vpx_sad_skip_64x64 = vpx_sad_skip_64x64_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64 = vpx_sad_skip_64x64_avx2;
vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_sse2;
if (flags & HAS_AVX2) vpx_sad_skip_64x64x4d = vpx_sad_skip_64x64x4d_avx2;
vpx_satd = vpx_satd_sse2;
if (flags & HAS_AVX2) vpx_satd = vpx_satd_avx2;
vpx_scaled_2d = vpx_scaled_2d_c;

Просмотреть файл

@ -64,6 +64,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
arm64-android-gcc
arm64-darwin-gcc
arm64-darwin20-gcc
arm64-darwin21-gcc
arm64-darwin22-gcc
arm64-linux-gcc
arm64-win64-gcc
arm64-win64-vs15
@ -77,6 +79,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
armv7-win32-vs15
armv7s-darwin-gcc
armv8-linux-gcc
loongarch32-linux-gcc
loongarch64-linux-gcc
mips32-linux-gcc
mips64-linux-gcc
ppc64le-linux-gcc
@ -117,6 +121,8 @@ COMPILING THE APPLICATIONS/LIBRARIES:
x86_64-darwin18-gcc
x86_64-darwin19-gcc
x86_64-darwin20-gcc
x86_64-darwin21-gcc
x86_64-darwin22-gcc
x86_64-iphonesimulator-gcc
x86_64-linux-gcc
x86_64-linux-icc

Просмотреть файл

@ -842,6 +842,10 @@ process_common_toolchain() {
# Enable the architecture family
case ${tgt_isa} in
arm64 | armv8)
enable_feature arm
enable_feature aarch64
;;
arm*)
enable_feature arm
;;
@ -1066,8 +1070,11 @@ EOF
enable_feature win_arm64_neon_h_workaround
else
# If a probe is not possible, assume this is the pure Windows
# SDK and so the workaround is necessary.
enable_feature win_arm64_neon_h_workaround
# SDK and so the workaround is necessary when using Visual
# Studio < 2019.
if [ ${tgt_cc##vs} -lt 16 ]; then
enable_feature win_arm64_neon_h_workaround
fi
fi
fi
fi

1
media/libvpx/libvpx/configure поставляемый
Просмотреть файл

@ -243,6 +243,7 @@ CODEC_FAMILIES="
ARCH_LIST="
arm
aarch64
mips
x86
x86_64

Просмотреть файл

@ -381,7 +381,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
vpx_codec_iface_t *iface,
vpx_codec_enc_cfg_t *enc_cfg) {
vpx_codec_err_t res;
int i, sl, tl;
int sl, tl;
SvcInternal_t *const si = get_svc_internal(svc_ctx);
if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL ||
enc_cfg == NULL) {
@ -433,7 +433,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
}
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
i = sl * svc_ctx->temporal_layers + tl;
const int i = sl * svc_ctx->temporal_layers + tl;
si->svc_params.max_quantizers[i] = MAX_QUANTIZER;
si->svc_params.min_quantizers[i] = 0;
if (enc_cfg->rc_end_usage == VPX_CBR &&
@ -503,7 +503,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) {
for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) {
i = sl * svc_ctx->temporal_layers + tl;
const int i = sl * svc_ctx->temporal_layers + tl;
if (enc_cfg->rc_end_usage == VPX_CBR &&
enc_cfg->g_pass == VPX_RC_ONE_PASS) {
si->svc_params.max_quantizers[i] = enc_cfg->rc_max_quantizer;

Просмотреть файл

@ -32,6 +32,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "./y4minput.h"
#define OUTPUT_FRAME_STATS 0
#define OUTPUT_RC_STATS 1
#define SIMULCAST_MODE 0
@ -880,7 +881,9 @@ int main(int argc, const char **argv) {
int pts = 0; /* PTS starts at 0 */
int frame_duration = 1; /* 1 timebase tick per frame */
int end_of_stream = 0;
#if OUTPUT_FRAME_STATS
int frames_received = 0;
#endif
#if OUTPUT_RC_STATS
VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL };
struct RateControlStats rc;
@ -1126,14 +1129,14 @@ int main(int argc, const char **argv) {
}
#endif
}
/*
#if OUTPUT_FRAME_STATS
printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
!!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
(int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
*/
++frames_received;
#endif
if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
++frames_received;
#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
(unsigned int)cx_pkt->data.frame.sz, NULL, 0))

Просмотреть файл

@ -545,7 +545,7 @@ testdata: $(LIBVPX_TEST_DATA)
echo "Checking test data:";\
for f in $(call enabled,LIBVPX_TEST_DATA); do\
grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
(cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
(cd "$(LIBVPX_TEST_DATA_PATH)"; $${sha1sum} -c);\
done; \
else\
echo "Skipping test data integrity check, sha1sum not found.";\
@ -631,8 +631,8 @@ test_rc_interface.$(VCPROJ_SFX): $(RC_INTERFACE_TEST_SRCS) vpx.$(VCPROJ_SFX) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
-L. -l$(CODEC_LIB) -l$(RC_RTC_LIB) -l$(GTEST_LIB) $^
endif # RC_INTERFACE_TEST
endif # CONFIG_VP9_ENCODER
endif
endif # CONFIG_ENCODERS
endif # CONFIG_MSVS
else
include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
@ -699,7 +699,7 @@ $(eval $(call linkerxx_template,$(SIMPLE_ENCODE_TEST_BIN), \
-L. -lsimple_encode -lvpx -lgtest $(extralibs) -lm))
endif # SIMPLE_ENCODE_TEST
endif # CONFIG_UNIT_TESTS
endif # CONFIG_EXTERNAL_BUILD
# Install test sources only if codec source is included
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
@ -724,7 +724,7 @@ NUM_SHARDS := 10
SHARDS := 0 1 2 3 4 5 6 7 8 9
$(foreach s,$(SHARDS),$(eval $(call test_shard_template,$(s),$(NUM_SHARDS))))
endif
endif # CONFIG_UNIT_TESTS
##
## documentation directives
@ -764,10 +764,10 @@ TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
endif
utiltest utiltest-no-data-check:
$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--test-data-path "$(LIBVPX_TEST_DATA_PATH)" \
--bin-path $(TEST_BIN_PATH)
$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--test-data-path "$(LIBVPX_TEST_DATA_PATH)" \
--bin-path $(TEST_BIN_PATH)
utiltest: testdata
else
@ -791,7 +791,7 @@ EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
endif
exampletest exampletest-no-data-check: examples
$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
--test-data-path $(LIBVPX_TEST_DATA_PATH) \
--test-data-path "$(LIBVPX_TEST_DATA_PATH)" \
--bin-path $(EXAMPLES_BIN_PATH)
exampletest: testdata
else

Просмотреть файл

@ -244,7 +244,7 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
// Vertical pass (transposed intermediate -> dst).
{
uint16_t *src_ptr = intermediate_buffer;
src_ptr = intermediate_buffer;
const int dst_next_row_stride = dst_stride - output_width;
unsigned int i, j;
for (i = 0; i < output_height; ++i) {

Просмотреть файл

@ -27,6 +27,7 @@
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/msvc.h" // for round()
#include "vpx_ports/vpx_timer.h"
using libvpx_test::ACMRandom;
@ -548,12 +549,50 @@ class Trans16x16TestBase {
}
}
void RunSpeedTest() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
int c_sum_time = 0;
int simd_sum_time = 0;
DECLARE_ALIGNED(32, int16_t, input_block[kNumCoeffs]);
DECLARE_ALIGNED(32, tran_low_t, output_ref_block[kNumCoeffs]);
DECLARE_ALIGNED(32, tran_low_t, output_block[kNumCoeffs]);
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
}
vpx_usec_timer timer_c;
vpx_usec_timer_start(&timer_c);
for (int i = 0; i < count_test_block; ++i) {
vpx_fdct16x16_c(input_block, output_ref_block, pitch_);
}
vpx_usec_timer_mark(&timer_c);
c_sum_time += static_cast<int>(vpx_usec_timer_elapsed(&timer_c));
vpx_usec_timer timer_mod;
vpx_usec_timer_start(&timer_mod);
for (int i = 0; i < count_test_block; ++i) {
RunFwdTxfm(input_block, output_block, pitch_);
}
vpx_usec_timer_mark(&timer_mod);
simd_sum_time += static_cast<int>(vpx_usec_timer_elapsed(&timer_mod));
printf(
"c_time = %d \t simd_time = %d \t Gain = %4.2f \n", c_sum_time,
simd_sum_time,
(static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
}
void CompareInvReference(IdctFunc ref_txfm, int thresh) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
const int eob = 10;
const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
@ -604,6 +643,80 @@ class Trans16x16TestBase {
}
}
void RunInvTrans16x16SpeedTest(IdctFunc ref_txfm, int thresh) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
const int eob = 10;
const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
int64_t c_sum_time = 0;
int64_t simd_sum_time = 0;
DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int j = 0; j < kNumCoeffs; ++j) {
if (j < eob) {
// Random values less than the threshold, either positive or negative
coeff[scan[j]] = rnd(thresh);
} else {
coeff[scan[j]] = 0;
}
if (bit_depth_ == VPX_BITS_8) {
dst[j] = 0;
ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
} else {
dst16[j] = 0;
ref16[j] = 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
if (bit_depth_ == VPX_BITS_8) {
vpx_usec_timer timer_c;
vpx_usec_timer_start(&timer_c);
for (int i = 0; i < count_test_block; ++i) {
ref_txfm(coeff, ref, pitch_);
}
vpx_usec_timer_mark(&timer_c);
c_sum_time += vpx_usec_timer_elapsed(&timer_c);
vpx_usec_timer timer_mod;
vpx_usec_timer_start(&timer_mod);
for (int i = 0; i < count_test_block; ++i) {
RunInvTxfm(coeff, dst, pitch_);
}
vpx_usec_timer_mark(&timer_mod);
simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
} else {
#if CONFIG_VP9_HIGHBITDEPTH
vpx_usec_timer timer_c;
vpx_usec_timer_start(&timer_c);
for (int i = 0; i < count_test_block; ++i) {
ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
}
vpx_usec_timer_mark(&timer_c);
c_sum_time += vpx_usec_timer_elapsed(&timer_c);
vpx_usec_timer timer_mod;
vpx_usec_timer_start(&timer_mod);
for (int i = 0; i < count_test_block; ++i) {
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
}
vpx_usec_timer_mark(&timer_mod);
simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
printf(
"c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
c_sum_time, simd_sum_time,
(static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
}
int pitch_;
int tx_type_;
vpx_bit_depth_t bit_depth_;
@ -664,6 +777,8 @@ TEST_P(Trans16x16DCT, QuantCheck) {
TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); }
TEST_P(Trans16x16DCT, DISABLED_Speed) { RunSpeedTest(); }
class Trans16x16HT : public Trans16x16TestBase,
public ::testing::TestWithParam<Ht16x16Param> {
public:
@ -714,7 +829,6 @@ TEST_P(Trans16x16HT, QuantCheck) {
RunQuantCheck(429, 729);
}
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
class InvTrans16x16DCT : public Trans16x16TestBase,
public ::testing::TestWithParam<Idct16x16Param> {
public:
@ -745,7 +859,10 @@ GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans16x16DCT);
TEST_P(InvTrans16x16DCT, CompareReference) {
CompareInvReference(ref_txfm_, thresh_);
}
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
TEST_P(InvTrans16x16DCT, DISABLED_Speed) {
RunInvTrans16x16SpeedTest(ref_txfm_, thresh_);
}
using std::make_tuple;
@ -787,6 +904,12 @@ INSTANTIATE_TEST_SUITE_P(
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(C, InvTrans16x16DCT,
::testing::Values(make_tuple(&vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_c,
6225, VPX_BITS_8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -821,8 +944,25 @@ INSTANTIATE_TEST_SUITE_P(
2, VPX_BITS_8),
make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
3, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(SSE2, InvTrans16x16DCT,
::testing::Values(make_tuple(
&vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_sse2, 6225, VPX_BITS_8)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_SUITE_P(
AVX2, Trans16x16DCT,
::testing::Values(make_tuple(&vpx_fdct16x16_avx2,
&vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(AVX2, InvTrans16x16DCT,
::testing::Values(make_tuple(
&vpx_idct16x16_256_add_c,
&vpx_idct16x16_256_add_avx2, 6225, VPX_BITS_8)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_SUITE_P(
SSE2, Trans16x16DCT,

Просмотреть файл

@ -24,10 +24,12 @@
#include "test/register_state_check.h"
#include "test/util.h"
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_scan.h"
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/msvc.h" // for round()
#include "vpx_ports/vpx_timer.h"
using libvpx_test::ACMRandom;
@ -71,6 +73,9 @@ typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
typedef std::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
Trans32x32Param;
typedef std::tuple<InvTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t, int, int>
InvTrans32x32Param;
#if CONFIG_VP9_HIGHBITDEPTH
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
@ -314,6 +319,174 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
}
}
class InvTrans32x32Test : public ::testing::TestWithParam<InvTrans32x32Param> {
public:
virtual ~InvTrans32x32Test() {}
virtual void SetUp() {
ref_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
version_ = GET_PARAM(2); // 0: high precision forward transform
// 1: low precision version for rd loop
bit_depth_ = GET_PARAM(3);
eob_ = GET_PARAM(4);
thresh_ = GET_PARAM(4);
mask_ = (1 << bit_depth_) - 1;
pitch_ = 32;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
void RunRefTxfm(tran_low_t *out, uint8_t *dst, int stride) {
ref_txfm_(out, dst, stride);
}
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
int version_;
vpx_bit_depth_t bit_depth_;
int mask_;
int eob_;
int thresh_;
InvTxfmFunc ref_txfm_;
InvTxfmFunc inv_txfm_;
int pitch_;
void RunInvTrans32x32SpeedTest() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
int64_t c_sum_time = 0;
int64_t simd_sum_time = 0;
const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int j = 0; j < kNumCoeffs; ++j) {
if (j < eob_) {
// Random values less than the threshold, either positive or negative
coeff[scan[j]] = rnd(thresh_);
} else {
coeff[scan[j]] = 0;
}
if (bit_depth_ == VPX_BITS_8) {
dst[j] = 0;
ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
} else {
dst16[j] = 0;
ref16[j] = 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
if (bit_depth_ == VPX_BITS_8) {
vpx_usec_timer timer_c;
vpx_usec_timer_start(&timer_c);
for (int i = 0; i < count_test_block; ++i) {
RunRefTxfm(coeff, ref, pitch_);
}
vpx_usec_timer_mark(&timer_c);
c_sum_time += vpx_usec_timer_elapsed(&timer_c);
vpx_usec_timer timer_mod;
vpx_usec_timer_start(&timer_mod);
for (int i = 0; i < count_test_block; ++i) {
RunInvTxfm(coeff, dst, pitch_);
}
vpx_usec_timer_mark(&timer_mod);
simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
} else {
#if CONFIG_VP9_HIGHBITDEPTH
vpx_usec_timer timer_c;
vpx_usec_timer_start(&timer_c);
for (int i = 0; i < count_test_block; ++i) {
RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
}
vpx_usec_timer_mark(&timer_c);
c_sum_time += vpx_usec_timer_elapsed(&timer_c);
vpx_usec_timer timer_mod;
vpx_usec_timer_start(&timer_mod);
for (int i = 0; i < count_test_block; ++i) {
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_);
}
vpx_usec_timer_mark(&timer_mod);
simd_sum_time += vpx_usec_timer_elapsed(&timer_mod);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
printf(
"c_time = %" PRId64 " \t simd_time = %" PRId64 " \t Gain = %4.2f \n",
c_sum_time, simd_sum_time,
(static_cast<float>(c_sum_time) / static_cast<float>(simd_sum_time)));
}
void CompareInvReference32x32() {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
const int eob = 31;
const int16_t *scan = vp9_default_scan_orders[TX_32X32].scan;
DECLARE_ALIGNED(32, tran_low_t, coeff[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int i = 0; i < count_test_block; ++i) {
for (int j = 0; j < kNumCoeffs; ++j) {
if (j < eob) {
coeff[scan[j]] = rnd.Rand8Extremes();
} else {
coeff[scan[j]] = 0;
}
if (bit_depth_ == VPX_BITS_8) {
dst[j] = 0;
ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
} else {
dst16[j] = 0;
ref16[j] = 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
if (bit_depth_ == VPX_BITS_8) {
RunRefTxfm(coeff, ref, pitch_);
RunInvTxfm(coeff, dst, pitch_);
} else {
#if CONFIG_VP9_HIGHBITDEPTH
RunRefTxfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
ASM_REGISTER_STATE_CHECK(
RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
const uint32_t diff = dst[j] - ref[j];
#endif // CONFIG_VP9_HIGHBITDEPTH
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error) << "Error: 32x32 IDCT Comparison has error "
<< error << " at index " << j;
}
}
}
};
GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(InvTrans32x32Test);
TEST_P(InvTrans32x32Test, DISABLED_Speed) { RunInvTrans32x32SpeedTest(); }
TEST_P(InvTrans32x32Test, CompareReference) { CompareInvReference32x32(); }
using std::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
@ -334,6 +507,14 @@ INSTANTIATE_TEST_SUITE_P(
VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
1, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(
C, InvTrans32x32Test,
::testing::Values(
(make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_c, 0,
VPX_BITS_8, 32, 6225)),
make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_c, 0,
VPX_BITS_8, 16, 6255)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
@ -352,6 +533,14 @@ INSTANTIATE_TEST_SUITE_P(
&vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_sse2,
&vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(
SSE2, InvTrans32x32Test,
::testing::Values(
(make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_sse2, 0,
VPX_BITS_8, 32, 6225)),
make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_sse2, 0,
VPX_BITS_8, 16, 6225)));
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@ -377,6 +566,14 @@ INSTANTIATE_TEST_SUITE_P(
&vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vpx_fdct32x32_rd_avx2,
&vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
INSTANTIATE_TEST_SUITE_P(
AVX2, InvTrans32x32Test,
::testing::Values(
(make_tuple(&vpx_idct32x32_1024_add_c, &vpx_idct32x32_1024_add_avx2, 0,
VPX_BITS_8, 32, 6225)),
make_tuple(&vpx_idct32x32_135_add_c, &vpx_idct32x32_135_add_avx2, 0,
VPX_BITS_8, 16, 6225)));
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

Просмотреть файл

@ -358,14 +358,6 @@ class TransTestBase : public ::testing::TestWithParam<DctParam> {
ASSERT_TRUE(in.Init());
Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
ASSERT_TRUE(coeff.Init());
Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
ASSERT_TRUE(dst.Init());
Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
ASSERT_TRUE(src.Init());
Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
ASSERT_TRUE(dst16.Init());
Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
ASSERT_TRUE(src16.Init());
for (int i = 0; i < count_test_block; ++i) {
InitMem();

Просмотреть файл

@ -13,9 +13,12 @@
#include <initializer_list>
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "test/codec_factory.h"
#include "test/encode_test_driver.h"
#include "test/i420_video_source.h"
#include "test/video_source.h"
#include "./vpx_config.h"
#include "test/video_source.h"
#include "vpx/vp8cx.h"
#include "vpx/vpx_encoder.h"
@ -360,4 +363,85 @@ TEST(EncodeAPI, ConfigChangeThreadCount) {
}
}
#if CONFIG_VP9_ENCODER
class EncodeApiGetTplStatsTest
: public ::libvpx_test::EncoderTest,
public ::testing::TestWithParam<const libvpx_test::CodecFactory *> {
public:
EncodeApiGetTplStatsTest() : EncoderTest(GetParam()) {}
~EncodeApiGetTplStatsTest() override {}
protected:
void SetUp() override {
InitializeConfig();
SetMode(::libvpx_test::kTwoPassGood);
}
void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) override {
if (video->frame() == 0) {
encoder->Control(VP9E_SET_TPL, 1);
}
}
vpx_codec_err_t AllocateTplList(VpxTplGopStats *data) {
// Allocate MAX_ARF_GOP_SIZE (50) * sizeof(VpxTplFrameStats) that will be
// filled by VP9E_GET_TPL_STATS.
// MAX_ARF_GOP_SIZE is used here because the test doesn't know the size of
// each GOP before getting TPL stats from the encoder.
data->size = 50;
data->frame_stats_list =
static_cast<VpxTplFrameStats *>(calloc(50, sizeof(VpxTplFrameStats)));
if (data->frame_stats_list == nullptr) return VPX_CODEC_MEM_ERROR;
return VPX_CODEC_OK;
}
void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) override {
::libvpx_test::CxDataIterator iter = encoder->GetCxData();
while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
switch (pkt->kind) {
case VPX_CODEC_CX_FRAME_PKT: {
VpxTplGopStats tpl_stats;
EXPECT_EQ(AllocateTplList(&tpl_stats), VPX_CODEC_OK);
encoder->Control(VP9E_GET_TPL_STATS, &tpl_stats);
bool stats_not_all_zero = false;
for (int i = 0; i < tpl_stats.size; i++) {
VpxTplFrameStats *frame_stats_list = tpl_stats.frame_stats_list;
if (frame_stats_list[i].frame_width != 0) {
ASSERT_EQ(frame_stats_list[i].frame_width, width_);
ASSERT_EQ(frame_stats_list[i].frame_height, height_);
ASSERT_GT(frame_stats_list[i].num_blocks, 0);
ASSERT_NE(frame_stats_list[i].block_stats_list, nullptr);
stats_not_all_zero = true;
}
}
ASSERT_TRUE(stats_not_all_zero);
// Free the memory right away now as this is only a test.
free(tpl_stats.frame_stats_list);
break;
}
default: break;
}
}
}
int width_;
int height_;
};
TEST_P(EncodeApiGetTplStatsTest, GetTplStats) {
cfg_.g_lag_in_frames = 25;
width_ = 352;
height_ = 288;
::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", width_,
height_, 30, 1, 0, 50);
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
INSTANTIATE_TEST_SUITE_P(
VP9, EncodeApiGetTplStatsTest,
::testing::Values(
static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)));
#endif // CONFIG_VP9_ENCODER
} // namespace

Просмотреть файл

@ -153,6 +153,11 @@ class Encoder {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
void Control(int ctrl_id, VpxTplGopStats *arg) {
const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
}
#endif // CONFIG_VP9_ENCODER
#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER

Просмотреть файл

@ -170,7 +170,7 @@ class FwdTrans8x8TestBase {
for (int j = 0; j < 64; ++j) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = kSignBiasMaxDiff255;
EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
<< "Error: 8x8 FDCT/FHT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-255, 255] at index " << j
@ -201,7 +201,7 @@ class FwdTrans8x8TestBase {
for (int j = 0; j < 64; ++j) {
const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
const int max_diff = kSignBiasMaxDiff15;
EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
ASSERT_LT(diff, max_diff << (bit_depth_ - 8))
<< "Error: 8x8 FDCT/FHT has a sign bias > "
<< 1. * max_diff / count_test_block * 100 << "%"
<< " for input range [-15, 15] at index " << j
@ -275,11 +275,11 @@ class FwdTrans8x8TestBase {
}
}
EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
<< " roundtrip error > 1";
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
<< "error > 1/5 per block";
}
@ -360,17 +360,17 @@ class FwdTrans8x8TestBase {
total_coeff_error += abs(coeff_diff);
}
EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error)
ASSERT_GE(1 << 2 * (bit_depth_ - 8), max_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
<< "an individual roundtrip error > 1";
<< " an individual roundtrip error > 1";
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
ASSERT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
<< "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
<< " roundtrip error > 1/5 per block";
EXPECT_EQ(0, total_coeff_error)
ASSERT_EQ(0, total_coeff_error)
<< "Error: Extremal 8x8 FDCT/FHT has"
<< "overflow issues in the intermediate steps > 1";
<< " overflow issues in the intermediate steps > 1";
}
}
@ -426,7 +426,7 @@ class FwdTrans8x8TestBase {
const int diff = dst[j] - src[j];
#endif
const uint32_t error = diff * diff;
EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
ASSERT_GE(1u << 2 * (bit_depth_ - 8), error)
<< "Error: 8x8 IDCT has error " << error << " at index " << j;
}
}
@ -456,7 +456,7 @@ class FwdTrans8x8TestBase {
for (int j = 0; j < kNumCoeffs; ++j) {
const int32_t diff = coeff[j] - coeff_r[j];
const uint32_t error = diff * diff;
EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
ASSERT_GE(9u << 2 * (bit_depth_ - 8), error)
<< "Error: 8x8 DCT has error " << error << " at index " << j;
}
}
@ -512,7 +512,7 @@ class FwdTrans8x8TestBase {
const int diff = dst[j] - ref[j];
#endif
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error)
ASSERT_EQ(0u, error)
<< "Error: 8x8 IDCT has error " << error << " at index " << j;
}
}

Просмотреть файл

@ -184,13 +184,13 @@ class RegisterStateCheckMMX {
uint16_t pre_fpu_env_[14];
};
#define API_REGISTER_STATE_CHECK(statement) \
do { \
{ \
libvpx_test::RegisterStateCheckMMX reg_check; \
ASM_REGISTER_STATE_CHECK(statement); \
} \
__asm__ volatile("" ::: "memory"); \
#define API_REGISTER_STATE_CHECK(statement) \
do { \
{ \
libvpx_test::RegisterStateCheckMMX reg_check_mmx; \
ASM_REGISTER_STATE_CHECK(statement); \
} \
__asm__ volatile("" ::: "memory"); \
} while (false)
} // namespace libvpx_test

Просмотреть файл

@ -42,6 +42,10 @@ typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride);
typedef TestParams<SadMxNFunc> SadMxNParam;
typedef unsigned int (*SadSkipMxNFunc)(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride);
typedef TestParams<SadSkipMxNFunc> SadSkipMxNParam;
typedef unsigned int (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
const uint8_t *second_pred);
@ -52,6 +56,11 @@ typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride,
unsigned int *sad_array);
typedef TestParams<SadMxNx4Func> SadMxNx4Param;
typedef void (*SadSkipMxNx4Func)(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_ptr[], int ref_stride,
unsigned int *sad_array);
typedef TestParams<SadSkipMxNx4Func> SadSkipMxNx4Param;
typedef void (*SadMxNx8Func)(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sad_array);
@ -170,6 +179,34 @@ class SADTestBase : public ::testing::TestWithParam<ParamType> {
return sad;
}
// Sum of Absolute Differences Skip rows. Given two blocks, calculate the
// absolute difference between two pixels in the same relative location every
// other row; accumulate and double the result at the end.
uint32_t ReferenceSADSkip(int ref_offset) const {
uint32_t sad = 0;
const uint8_t *const reference8 = GetReferenceFromOffset(ref_offset);
const uint8_t *const source8 = source_data_;
#if CONFIG_VP9_HIGHBITDEPTH
const uint16_t *const reference16 =
CONVERT_TO_SHORTPTR(GetReferenceFromOffset(ref_offset));
const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
#endif // CONFIG_VP9_HIGHBITDEPTH
for (int h = 0; h < params_.height; h += 2) {
for (int w = 0; w < params_.width; ++w) {
if (!use_high_bit_depth_) {
sad += abs(source8[h * source_stride_ + w] -
reference8[h * reference_stride_ + w]);
#if CONFIG_VP9_HIGHBITDEPTH
} else {
sad += abs(source16[h * source_stride_ + w] -
reference16[h * reference_stride_ + w]);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
}
return sad * 2;
}
// Sum of Absolute Differences Average. Given two blocks, and a prediction
// calculate the absolute difference between one pixel and average of the
// corresponding and predicted pixels; accumulate.
@ -290,6 +327,32 @@ class SADx4Test : public SADTestBase<SadMxNx4Param> {
}
};
class SADSkipx4Test : public SADTestBase<SadMxNx4Param> {
public:
SADSkipx4Test() : SADTestBase(GetParam()) {}
protected:
void SADs(unsigned int *results) const {
const uint8_t *references[] = { GetReference(0), GetReference(1),
GetReference(2), GetReference(3) };
ASM_REGISTER_STATE_CHECK(params_.func(
source_data_, source_stride_, references, reference_stride_, results));
}
void CheckSADs() const {
uint32_t reference_sad;
DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]);
SADs(exp_sad);
for (int block = 0; block < 4; ++block) {
reference_sad = ReferenceSADSkip(GetBlockRefOffset(block));
EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block;
}
}
};
class SADTest : public AbstractBench, public SADTestBase<SadMxNParam> {
public:
SADTest() : SADTestBase(GetParam()) {}
@ -317,6 +380,33 @@ class SADTest : public AbstractBench, public SADTestBase<SadMxNParam> {
}
};
class SADSkipTest : public AbstractBench, public SADTestBase<SadMxNParam> {
public:
SADSkipTest() : SADTestBase(GetParam()) {}
protected:
unsigned int SAD(int block_idx) const {
unsigned int ret;
const uint8_t *const reference = GetReference(block_idx);
ASM_REGISTER_STATE_CHECK(ret = params_.func(source_data_, source_stride_,
reference, reference_stride_));
return ret;
}
void CheckSAD() const {
const unsigned int reference_sad = ReferenceSADSkip(GetBlockRefOffset(0));
const unsigned int exp_sad = SAD(0);
ASSERT_EQ(reference_sad, exp_sad);
}
void Run() override {
params_.func(source_data_, source_stride_, reference_data_,
reference_stride_);
}
};
class SADavgTest : public AbstractBench, public SADTestBase<SadMxNAvgParam> {
public:
SADavgTest() : SADTestBase(GetParam()) {}
@ -397,6 +487,58 @@ TEST_P(SADTest, DISABLED_Speed) {
PrintMedian(title);
}
TEST_P(SADSkipTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(reference_data_, reference_stride_, mask_);
CheckSAD();
}
TEST_P(SADSkipTest, MaxSrc) {
FillConstant(source_data_, source_stride_, mask_);
FillConstant(reference_data_, reference_stride_, 0);
CheckSAD();
}
TEST_P(SADSkipTest, ShortRef) {
const int tmp_stride = reference_stride_;
reference_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
CheckSAD();
reference_stride_ = tmp_stride;
}
TEST_P(SADSkipTest, UnalignedRef) {
// The reference frame, but not the source frame, may be unaligned for
// certain types of searches.
const int tmp_stride = reference_stride_;
reference_stride_ -= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
CheckSAD();
reference_stride_ = tmp_stride;
}
TEST_P(SADSkipTest, ShortSrc) {
const int tmp_stride = source_stride_;
source_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(reference_data_, reference_stride_);
CheckSAD();
source_stride_ = tmp_stride;
}
TEST_P(SADSkipTest, DISABLED_Speed) {
const int kCountSpeedTestBlock = 50000000 / (params_.width * params_.height);
FillRandom(source_data_, source_stride_);
RunNTimes(kCountSpeedTestBlock);
char title[16];
snprintf(title, sizeof(title), "%dx%d", params_.width, params_.height);
PrintMedian(title);
}
TEST_P(SADavgTest, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(reference_data_, reference_stride_, mask_);
@ -554,6 +696,105 @@ TEST_P(SADx4Test, DISABLED_Speed) {
reference_stride_ = tmp_stride;
}
TEST_P(SADSkipx4Test, MaxRef) {
FillConstant(source_data_, source_stride_, 0);
FillConstant(GetReference(0), reference_stride_, mask_);
FillConstant(GetReference(1), reference_stride_, mask_);
FillConstant(GetReference(2), reference_stride_, mask_);
FillConstant(GetReference(3), reference_stride_, mask_);
CheckSADs();
}
TEST_P(SADSkipx4Test, MaxSrc) {
FillConstant(source_data_, source_stride_, mask_);
FillConstant(GetReference(0), reference_stride_, 0);
FillConstant(GetReference(1), reference_stride_, 0);
FillConstant(GetReference(2), reference_stride_, 0);
FillConstant(GetReference(3), reference_stride_, 0);
CheckSADs();
}
TEST_P(SADSkipx4Test, ShortRef) {
int tmp_stride = reference_stride_;
reference_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(GetReference(0), reference_stride_);
FillRandom(GetReference(1), reference_stride_);
FillRandom(GetReference(2), reference_stride_);
FillRandom(GetReference(3), reference_stride_);
CheckSADs();
reference_stride_ = tmp_stride;
}
TEST_P(SADSkipx4Test, UnalignedRef) {
// The reference frame, but not the source frame, may be unaligned for
// certain types of searches.
int tmp_stride = reference_stride_;
reference_stride_ -= 1;
FillRandom(source_data_, source_stride_);
FillRandom(GetReference(0), reference_stride_);
FillRandom(GetReference(1), reference_stride_);
FillRandom(GetReference(2), reference_stride_);
FillRandom(GetReference(3), reference_stride_);
CheckSADs();
reference_stride_ = tmp_stride;
}
TEST_P(SADSkipx4Test, ShortSrc) {
int tmp_stride = source_stride_;
source_stride_ >>= 1;
FillRandom(source_data_, source_stride_);
FillRandom(GetReference(0), reference_stride_);
FillRandom(GetReference(1), reference_stride_);
FillRandom(GetReference(2), reference_stride_);
FillRandom(GetReference(3), reference_stride_);
CheckSADs();
source_stride_ = tmp_stride;
}
TEST_P(SADSkipx4Test, SrcAlignedByWidth) {
uint8_t *tmp_source_data = source_data_;
source_data_ += params_.width;
FillRandom(source_data_, source_stride_);
FillRandom(GetReference(0), reference_stride_);
FillRandom(GetReference(1), reference_stride_);
FillRandom(GetReference(2), reference_stride_);
FillRandom(GetReference(3), reference_stride_);
CheckSADs();
source_data_ = tmp_source_data;
}
TEST_P(SADSkipx4Test, DISABLED_Speed) {
int tmp_stride = reference_stride_;
reference_stride_ -= 1;
FillRandom(source_data_, source_stride_);
FillRandom(GetReference(0), reference_stride_);
FillRandom(GetReference(1), reference_stride_);
FillRandom(GetReference(2), reference_stride_);
FillRandom(GetReference(3), reference_stride_);
const int kCountSpeedTestBlock = 500000000 / (params_.width * params_.height);
uint32_t reference_sad[4];
DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]);
vpx_usec_timer timer;
for (int block = 0; block < 4; ++block) {
reference_sad[block] = ReferenceSADSkip(GetBlockRefOffset(block));
}
vpx_usec_timer_start(&timer);
for (int i = 0; i < kCountSpeedTestBlock; ++i) {
SADs(exp_sad);
}
vpx_usec_timer_mark(&timer);
for (int block = 0; block < 4; ++block) {
EXPECT_EQ(reference_sad[block], exp_sad[block]) << "block " << block;
}
const int elapsed_time =
static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
printf("sad%dx%dx4 (%2dbit) time: %5d ms\n", params_.width, params_.height,
bit_depth_, elapsed_time);
reference_stride_ = tmp_stride;
}
//------------------------------------------------------------------------------
// C functions
const SadMxNParam c_tests[] = {
@ -614,6 +855,56 @@ const SadMxNParam c_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(C, SADTest, ::testing::ValuesIn(c_tests));
const SadSkipMxNParam skip_c_tests[] = {
SadSkipMxNParam(64, 64, &vpx_sad_skip_64x64_c),
SadSkipMxNParam(64, 32, &vpx_sad_skip_64x32_c),
SadSkipMxNParam(32, 64, &vpx_sad_skip_32x64_c),
SadSkipMxNParam(32, 32, &vpx_sad_skip_32x32_c),
SadSkipMxNParam(32, 16, &vpx_sad_skip_32x16_c),
SadSkipMxNParam(16, 32, &vpx_sad_skip_16x32_c),
SadSkipMxNParam(16, 16, &vpx_sad_skip_16x16_c),
SadSkipMxNParam(16, 8, &vpx_sad_skip_16x8_c),
SadSkipMxNParam(8, 16, &vpx_sad_skip_8x16_c),
SadSkipMxNParam(8, 8, &vpx_sad_skip_8x8_c),
SadSkipMxNParam(4, 8, &vpx_sad_skip_4x8_c),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_c, 8),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_c, 8),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_c, 8),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_c, 8),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_c, 8),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_c, 8),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_c, 8),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_c, 8),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_c, 8),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_c, 8),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_c, 8),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_c, 10),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_c, 10),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_c, 10),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_c, 10),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_c, 10),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_c, 10),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_c, 10),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_c, 10),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_c, 10),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_c, 10),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_c, 10),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_c, 12),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_c, 12),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_c, 12),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_c, 12),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_c, 12),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_c, 12),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_c, 12),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_c, 12),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_c, 12),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_c, 12),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_c, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(C, SADSkipTest, ::testing::ValuesIn(skip_c_tests));
const SadMxNAvgParam avg_c_tests[] = {
SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_c),
SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_c),
@ -730,6 +1021,57 @@ const SadMxNx4Param x4d_c_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
const SadSkipMxNx4Param skip_x4d_c_tests[] = {
SadSkipMxNx4Param(64, 64, &vpx_sad_skip_64x64x4d_c),
SadSkipMxNx4Param(64, 32, &vpx_sad_skip_64x32x4d_c),
SadSkipMxNx4Param(32, 64, &vpx_sad_skip_32x64x4d_c),
SadSkipMxNx4Param(32, 32, &vpx_sad_skip_32x32x4d_c),
SadSkipMxNx4Param(32, 16, &vpx_sad_skip_32x16x4d_c),
SadSkipMxNx4Param(16, 32, &vpx_sad_skip_16x32x4d_c),
SadSkipMxNx4Param(16, 16, &vpx_sad_skip_16x16x4d_c),
SadSkipMxNx4Param(16, 8, &vpx_sad_skip_16x8x4d_c),
SadSkipMxNx4Param(8, 16, &vpx_sad_skip_8x16x4d_c),
SadSkipMxNx4Param(8, 8, &vpx_sad_skip_8x8x4d_c),
SadSkipMxNx4Param(4, 8, &vpx_sad_skip_4x8x4d_c),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_c, 8),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_c, 8),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_c, 8),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_c, 8),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_c, 8),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_c, 8),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_c, 8),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_c, 8),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_c, 8),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_c, 8),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_c, 8),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_c, 10),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_c, 10),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_c, 10),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_c, 10),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_c, 10),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_c, 10),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_c, 10),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_c, 10),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_c, 10),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_c, 10),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_c, 10),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_c, 12),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_c, 12),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_c, 12),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_c, 12),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_c, 12),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_c, 12),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_c, 12),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_c, 12),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_c, 12),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_c, 12),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_c, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(C, SADSkipx4Test,
::testing::ValuesIn(skip_x4d_c_tests));
//------------------------------------------------------------------------------
// ARM functions
#if HAVE_NEON
@ -787,6 +1129,65 @@ const SadMxNParam neon_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));
const SadSkipMxNParam skip_neon_tests[] = {
SadSkipMxNParam(64, 64, &vpx_sad_skip_64x64_neon),
SadSkipMxNParam(64, 32, &vpx_sad_skip_64x32_neon),
SadSkipMxNParam(32, 64, &vpx_sad_skip_32x64_neon),
SadSkipMxNParam(32, 32, &vpx_sad_skip_32x32_neon),
SadSkipMxNParam(32, 16, &vpx_sad_skip_32x16_neon),
SadSkipMxNParam(16, 32, &vpx_sad_skip_16x32_neon),
SadSkipMxNParam(16, 16, &vpx_sad_skip_16x16_neon),
SadSkipMxNParam(16, 8, &vpx_sad_skip_16x8_neon),
SadSkipMxNParam(8, 16, &vpx_sad_skip_8x16_neon),
SadSkipMxNParam(8, 8, &vpx_sad_skip_8x8_neon),
SadSkipMxNParam(8, 4, &vpx_sad_skip_8x4_neon),
SadSkipMxNParam(4, 8, &vpx_sad_skip_4x8_neon),
SadSkipMxNParam(4, 4, &vpx_sad_skip_4x4_neon),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNParam(4, 4, &vpx_highbd_sad_skip_4x4_neon, 8),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_neon, 8),
SadSkipMxNParam(8, 4, &vpx_highbd_sad_skip_8x4_neon, 8),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_neon, 8),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_neon, 8),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_neon, 8),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_neon, 8),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_neon, 8),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_neon, 8),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_neon, 8),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_neon, 8),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_neon, 8),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_neon, 8),
SadSkipMxNParam(4, 4, &vpx_highbd_sad_skip_4x4_neon, 10),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_neon, 10),
SadSkipMxNParam(8, 4, &vpx_highbd_sad_skip_8x4_neon, 10),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_neon, 10),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_neon, 10),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_neon, 10),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_neon, 10),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_neon, 10),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_neon, 10),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_neon, 10),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_neon, 10),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_neon, 10),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_neon, 10),
SadSkipMxNParam(4, 4, &vpx_highbd_sad_skip_4x4_neon, 12),
SadSkipMxNParam(4, 8, &vpx_highbd_sad_skip_4x8_neon, 12),
SadSkipMxNParam(8, 4, &vpx_highbd_sad_skip_8x4_neon, 12),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_neon, 12),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_neon, 12),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_neon, 12),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_neon, 12),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_neon, 12),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_neon, 12),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_neon, 12),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_neon, 12),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_neon, 12),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_neon, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(NEON, SADSkipTest,
::testing::ValuesIn(skip_neon_tests));
const SadMxNAvgParam avg_neon_tests[] = {
SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_neon),
SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_neon),
@ -899,6 +1300,62 @@ const SadMxNx4Param x4d_neon_tests[] = {
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
const SadSkipMxNx4Param skip_x4d_neon_tests[] = {
SadSkipMxNx4Param(64, 64, &vpx_sad_skip_64x64x4d_neon),
SadSkipMxNx4Param(64, 32, &vpx_sad_skip_64x32x4d_neon),
SadSkipMxNx4Param(32, 64, &vpx_sad_skip_32x64x4d_neon),
SadSkipMxNx4Param(32, 32, &vpx_sad_skip_32x32x4d_neon),
SadSkipMxNx4Param(32, 16, &vpx_sad_skip_32x16x4d_neon),
SadSkipMxNx4Param(16, 32, &vpx_sad_skip_16x32x4d_neon),
SadSkipMxNx4Param(16, 16, &vpx_sad_skip_16x16x4d_neon),
SadSkipMxNx4Param(16, 8, &vpx_sad_skip_16x8x4d_neon),
SadSkipMxNx4Param(8, 16, &vpx_sad_skip_8x16x4d_neon),
SadSkipMxNx4Param(8, 8, &vpx_sad_skip_8x8x4d_neon),
SadSkipMxNx4Param(8, 4, &vpx_sad_skip_8x4x4d_neon),
SadSkipMxNx4Param(4, 8, &vpx_sad_skip_4x8x4d_neon),
SadSkipMxNx4Param(4, 4, &vpx_sad_skip_4x4x4d_neon),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 8),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 8),
SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 8),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 8),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 8),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 8),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 8),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 8),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 8),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 8),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 8),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 8),
SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 10),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 10),
SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 10),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 10),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 10),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 10),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 10),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 10),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 10),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 10),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 10),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 10),
SadSkipMxNx4Param(4, 4, &vpx_highbd_sad_skip_4x4x4d_neon, 12),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_neon, 12),
SadSkipMxNx4Param(8, 4, &vpx_highbd_sad_skip_8x4x4d_neon, 12),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_neon, 12),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_neon, 12),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_neon, 12),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_neon, 12),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_neon, 12),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_neon, 12),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_neon, 12),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_neon, 12),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_neon, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(NEON, SADSkipx4Test,
::testing::ValuesIn(skip_x4d_neon_tests));
#endif // HAVE_NEON
//------------------------------------------------------------------------------
@ -956,6 +1413,54 @@ const SadMxNParam sse2_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));
const SadSkipMxNParam skip_sse2_tests[] = {
SadSkipMxNParam(64, 64, &vpx_sad_skip_64x64_sse2),
SadSkipMxNParam(64, 32, &vpx_sad_skip_64x32_sse2),
SadSkipMxNParam(32, 64, &vpx_sad_skip_32x64_sse2),
SadSkipMxNParam(32, 32, &vpx_sad_skip_32x32_sse2),
SadSkipMxNParam(32, 16, &vpx_sad_skip_32x16_sse2),
SadSkipMxNParam(16, 32, &vpx_sad_skip_16x32_sse2),
SadSkipMxNParam(16, 16, &vpx_sad_skip_16x16_sse2),
SadSkipMxNParam(16, 8, &vpx_sad_skip_16x8_sse2),
SadSkipMxNParam(8, 16, &vpx_sad_skip_8x16_sse2),
SadSkipMxNParam(8, 8, &vpx_sad_skip_8x8_sse2),
SadSkipMxNParam(4, 8, &vpx_sad_skip_4x8_sse2),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_sse2, 8),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_sse2, 8),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_sse2, 8),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_sse2, 8),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_sse2, 8),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_sse2, 8),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_sse2, 8),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_sse2, 8),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_sse2, 8),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_sse2, 8),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_sse2, 10),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_sse2, 10),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_sse2, 10),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_sse2, 10),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_sse2, 10),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_sse2, 10),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_sse2, 10),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_sse2, 10),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_sse2, 10),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_sse2, 10),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_sse2, 12),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_sse2, 12),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_sse2, 12),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_sse2, 12),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_sse2, 12),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_sse2, 12),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_sse2, 12),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_sse2, 12),
SadSkipMxNParam(8, 16, &vpx_highbd_sad_skip_8x16_sse2, 12),
SadSkipMxNParam(8, 8, &vpx_highbd_sad_skip_8x8_sse2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipTest,
::testing::ValuesIn(skip_sse2_tests));
const SadMxNAvgParam avg_sse2_tests[] = {
SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_sse2),
SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_sse2),
@ -1065,6 +1570,57 @@ const SadMxNx4Param x4d_sse2_tests[] = {
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests));
const SadSkipMxNx4Param skip_x4d_sse2_tests[] = {
SadSkipMxNx4Param(64, 64, &vpx_sad_skip_64x64x4d_sse2),
SadSkipMxNx4Param(64, 32, &vpx_sad_skip_64x32x4d_sse2),
SadSkipMxNx4Param(32, 64, &vpx_sad_skip_32x64x4d_sse2),
SadSkipMxNx4Param(32, 32, &vpx_sad_skip_32x32x4d_sse2),
SadSkipMxNx4Param(32, 16, &vpx_sad_skip_32x16x4d_sse2),
SadSkipMxNx4Param(16, 32, &vpx_sad_skip_16x32x4d_sse2),
SadSkipMxNx4Param(16, 16, &vpx_sad_skip_16x16x4d_sse2),
SadSkipMxNx4Param(16, 8, &vpx_sad_skip_16x8x4d_sse2),
SadSkipMxNx4Param(8, 16, &vpx_sad_skip_8x16x4d_sse2),
SadSkipMxNx4Param(8, 8, &vpx_sad_skip_8x8x4d_sse2),
SadSkipMxNx4Param(4, 8, &vpx_sad_skip_4x8x4d_sse2),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_sse2, 8),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_sse2, 8),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_sse2, 8),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_sse2, 8),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_sse2, 8),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_sse2, 8),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_sse2, 8),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_sse2, 8),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_sse2, 8),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_sse2, 8),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_sse2, 8),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_sse2, 10),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_sse2, 10),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_sse2, 10),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_sse2, 10),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_sse2, 10),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_sse2, 10),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_sse2, 10),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_sse2, 10),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_sse2, 10),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_sse2, 10),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_sse2, 10),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_sse2, 12),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_sse2, 12),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_sse2, 12),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_sse2, 12),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_sse2, 12),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_sse2, 12),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_sse2, 12),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_sse2, 12),
SadSkipMxNx4Param(8, 16, &vpx_highbd_sad_skip_8x16x4d_sse2, 12),
SadSkipMxNx4Param(8, 8, &vpx_highbd_sad_skip_8x8x4d_sse2, 12),
SadSkipMxNx4Param(4, 8, &vpx_highbd_sad_skip_4x8x4d_sse2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(SSE2, SADSkipx4Test,
::testing::ValuesIn(skip_x4d_sse2_tests));
#endif // HAVE_SSE2
#if HAVE_SSE3
@ -1113,6 +1669,44 @@ const SadMxNParam avx2_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests));
const SadSkipMxNParam skip_avx2_tests[] = {
SadSkipMxNParam(64, 64, &vpx_sad_skip_64x64_avx2),
SadSkipMxNParam(64, 32, &vpx_sad_skip_64x32_avx2),
SadSkipMxNParam(32, 64, &vpx_sad_skip_32x64_avx2),
SadSkipMxNParam(32, 32, &vpx_sad_skip_32x32_avx2),
SadSkipMxNParam(32, 16, &vpx_sad_skip_32x16_avx2),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_avx2, 8),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_avx2, 8),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_avx2, 8),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_avx2, 8),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_avx2, 8),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_avx2, 8),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_avx2, 8),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_avx2, 8),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_avx2, 10),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_avx2, 10),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_avx2, 10),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_avx2, 10),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_avx2, 10),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_avx2, 10),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_avx2, 10),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_avx2, 10),
SadSkipMxNParam(64, 64, &vpx_highbd_sad_skip_64x64_avx2, 12),
SadSkipMxNParam(64, 32, &vpx_highbd_sad_skip_64x32_avx2, 12),
SadSkipMxNParam(32, 64, &vpx_highbd_sad_skip_32x64_avx2, 12),
SadSkipMxNParam(32, 32, &vpx_highbd_sad_skip_32x32_avx2, 12),
SadSkipMxNParam(32, 16, &vpx_highbd_sad_skip_32x16_avx2, 12),
SadSkipMxNParam(16, 32, &vpx_highbd_sad_skip_16x32_avx2, 12),
SadSkipMxNParam(16, 16, &vpx_highbd_sad_skip_16x16_avx2, 12),
SadSkipMxNParam(16, 8, &vpx_highbd_sad_skip_16x8_avx2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipTest,
::testing::ValuesIn(skip_avx2_tests));
const SadMxNAvgParam avg_avx2_tests[] = {
SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_avx2),
SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_avx2),
@ -1180,6 +1774,42 @@ const SadMxNx4Param x4d_avx2_tests[] = {
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
const SadSkipMxNx4Param skip_x4d_avx2_tests[] = {
SadSkipMxNx4Param(64, 64, &vpx_sad_skip_64x64x4d_avx2),
SadSkipMxNx4Param(64, 32, &vpx_sad_skip_64x32x4d_avx2),
SadSkipMxNx4Param(32, 64, &vpx_sad_skip_32x64x4d_avx2),
SadSkipMxNx4Param(32, 32, &vpx_sad_skip_32x32x4d_avx2),
SadSkipMxNx4Param(32, 16, &vpx_sad_skip_32x16x4d_avx2),
#if CONFIG_VP9_HIGHBITDEPTH
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_avx2, 8),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_avx2, 8),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_avx2, 8),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_avx2, 8),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_avx2, 8),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_avx2, 8),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_avx2, 8),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_avx2, 8),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_avx2, 10),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_avx2, 10),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_avx2, 10),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_avx2, 10),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_avx2, 10),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_avx2, 10),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_avx2, 10),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_avx2, 10),
SadSkipMxNx4Param(64, 64, &vpx_highbd_sad_skip_64x64x4d_avx2, 12),
SadSkipMxNx4Param(64, 32, &vpx_highbd_sad_skip_64x32x4d_avx2, 12),
SadSkipMxNx4Param(32, 64, &vpx_highbd_sad_skip_32x64x4d_avx2, 12),
SadSkipMxNx4Param(32, 32, &vpx_highbd_sad_skip_32x32x4d_avx2, 12),
SadSkipMxNx4Param(32, 16, &vpx_highbd_sad_skip_32x16x4d_avx2, 12),
SadSkipMxNx4Param(16, 32, &vpx_highbd_sad_skip_16x32x4d_avx2, 12),
SadSkipMxNx4Param(16, 16, &vpx_highbd_sad_skip_16x16x4d_avx2, 12),
SadSkipMxNx4Param(16, 8, &vpx_highbd_sad_skip_16x8x4d_avx2, 12),
#endif // CONFIG_VP9_HIGHBITDEPTH
};
INSTANTIATE_TEST_SUITE_P(AVX2, SADSkipx4Test,
::testing::ValuesIn(skip_x4d_avx2_tests));
#endif // HAVE_AVX2
#if HAVE_AVX512

Просмотреть файл

@ -39,10 +39,10 @@ namespace {
const int number_of_iterations = 100;
typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
const macroblock_plane *const mb_plane,
const macroblock_plane *mb_plane,
tran_low_t *qcoeff, tran_low_t *dqcoeff,
const int16_t *dequant, uint16_t *eob,
const struct ScanOrder *const scan_order);
const struct ScanOrder *scan_order);
typedef std::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
int /*max_size*/, bool /*is_fp*/>
QuantizeParam;

Просмотреть файл

@ -35,101 +35,134 @@ static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = {
#define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_h0, filt_h1, \
filt_h2) \
({ \
v16i8 vec0_m, vec1_m, vec2_m; \
v8i16 hz_out_m; \
v16i8 _6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m; \
v8i16 _6tap_out_m; \
\
VSHF_B3_SB(src0, src1, src0, src1, src0, src1, mask0, mask1, mask2, \
vec0_m, vec1_m, vec2_m); \
hz_out_m = \
DPADD_SH3_SH(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); \
_6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m); \
_6tap_out_m = DPADD_SH3_SH(_6tap_vec0_m, _6tap_vec1_m, _6tap_vec2_m, \
filt_h0, filt_h1, filt_h2); \
\
hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \
hz_out_m = __msa_sat_s_h(hz_out_m, 7); \
_6tap_out_m = __msa_srari_h(_6tap_out_m, VP8_FILTER_SHIFT); \
_6tap_out_m = __msa_sat_s_h(_6tap_out_m, 7); \
\
hz_out_m; \
_6tap_out_m; \
})
#define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
mask2, filt0, filt1, filt2, out0, out1) \
{ \
v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m; \
v16i8 _6tap_4wid_vec0_m, _6tap_4wid_vec1_m, _6tap_4wid_vec2_m, \
_6tap_4wid_vec3_m, _6tap_4wid_vec4_m, _6tap_4wid_vec5_m; \
\
VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \
DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \
DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \
DPADD_SB2_SH(vec4_m, vec5_m, filt2, filt2, out0, out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, _6tap_4wid_vec0_m, \
_6tap_4wid_vec1_m); \
DOTP_SB2_SH(_6tap_4wid_vec0_m, _6tap_4wid_vec1_m, filt0, filt0, out0, \
out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, _6tap_4wid_vec2_m, \
_6tap_4wid_vec3_m); \
DPADD_SB2_SH(_6tap_4wid_vec2_m, _6tap_4wid_vec3_m, filt1, filt1, out0, \
out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, _6tap_4wid_vec4_m, \
_6tap_4wid_vec5_m); \
DPADD_SB2_SH(_6tap_4wid_vec4_m, _6tap_4wid_vec5_m, filt2, filt2, out0, \
out1); \
}
#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
mask2, filt0, filt1, filt2, out0, out1, \
out2, out3) \
{ \
v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \
\
VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \
out0, out1, out2, out3); \
VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \
VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec4_m, vec5_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec6_m, vec7_m); \
DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \
out0, out1, out2, out3); \
DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt2, filt2, filt2, filt2, \
out0, out1, out2, out3); \
#define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
mask2, filt0, filt1, filt2, out0, out1, \
out2, out3) \
{ \
v16i8 _6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \
_6tap_8wid_vec3_m, _6tap_8wid_vec4_m, _6tap_8wid_vec5_m, \
_6tap_8wid_vec6_m, _6tap_8wid_vec7_m; \
\
VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, _6tap_8wid_vec0_m, \
_6tap_8wid_vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, _6tap_8wid_vec2_m, \
_6tap_8wid_vec3_m); \
DOTP_SB4_SH(_6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \
_6tap_8wid_vec3_m, filt0, filt0, filt0, filt0, out0, out1, \
out2, out3); \
VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, _6tap_8wid_vec0_m, \
_6tap_8wid_vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, _6tap_8wid_vec2_m, \
_6tap_8wid_vec3_m); \
VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, _6tap_8wid_vec4_m, \
_6tap_8wid_vec5_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, _6tap_8wid_vec6_m, \
_6tap_8wid_vec7_m); \
DPADD_SB4_SH(_6tap_8wid_vec0_m, _6tap_8wid_vec1_m, _6tap_8wid_vec2_m, \
_6tap_8wid_vec3_m, filt1, filt1, filt1, filt1, out0, out1, \
out2, out3); \
DPADD_SB4_SH(_6tap_8wid_vec4_m, _6tap_8wid_vec5_m, _6tap_8wid_vec6_m, \
_6tap_8wid_vec7_m, filt2, filt2, filt2, filt2, out0, out1, \
out2, out3); \
}
#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \
({ \
v8i16 tmp0; \
\
tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \
\
tmp0; \
})
#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \
#define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \
({ \
v16i8 vec0_m, vec1_m; \
v8i16 hz_out_m; \
v8i16 _4tap_dpadd_tmp0; \
\
VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0_m, vec1_m); \
hz_out_m = FILT_4TAP_DPADD_S_H(vec0_m, vec1_m, filt_h0, filt_h1); \
_4tap_dpadd_tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \
_4tap_dpadd_tmp0 = \
__msa_dpadd_s_h(_4tap_dpadd_tmp0, (v16i8)vec1, (v16i8)filt1); \
\
hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \
hz_out_m = __msa_sat_s_h(hz_out_m, 7); \
\
hz_out_m; \
_4tap_dpadd_tmp0; \
})
#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1) \
{ \
v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \
\
VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \
DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \
DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \
#define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \
({ \
v16i8 _4tap_vec0_m, _4tap_vec1_m; \
v8i16 _4tap_out_m; \
\
VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, _4tap_vec0_m, \
_4tap_vec1_m); \
_4tap_out_m = \
FILT_4TAP_DPADD_S_H(_4tap_vec0_m, _4tap_vec1_m, filt_h0, filt_h1); \
\
_4tap_out_m = __msa_srari_h(_4tap_out_m, VP8_FILTER_SHIFT); \
_4tap_out_m = __msa_sat_s_h(_4tap_out_m, 7); \
\
_4tap_out_m; \
})
#define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1) \
{ \
v16i8 _4tap_4wid_vec0_m, _4tap_4wid_vec1_m, _4tap_4wid_vec2_m, \
_4tap_4wid_vec3_m; \
\
VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, _4tap_4wid_vec0_m, \
_4tap_4wid_vec1_m); \
DOTP_SB2_SH(_4tap_4wid_vec0_m, _4tap_4wid_vec1_m, filt0, filt0, out0, \
out1); \
VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, _4tap_4wid_vec2_m, \
_4tap_4wid_vec3_m); \
DPADD_SB2_SH(_4tap_4wid_vec2_m, _4tap_4wid_vec3_m, filt1, filt1, out0, \
out1); \
}
#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1, out2, out3) \
{ \
v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \
\
VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \
DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \
out0, out1, out2, out3); \
VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \
DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \
out0, out1, out2, out3); \
#define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \
filt0, filt1, out0, out1, out2, out3) \
{ \
v16i8 _4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \
_4tap_8wid_vec3_m; \
\
VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, _4tap_8wid_vec0_m, \
_4tap_8wid_vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, _4tap_8wid_vec2_m, \
_4tap_8wid_vec3_m); \
DOTP_SB4_SH(_4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \
_4tap_8wid_vec3_m, filt0, filt0, filt0, filt0, out0, out1, \
out2, out3); \
VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, _4tap_8wid_vec0_m, \
_4tap_8wid_vec1_m); \
VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, _4tap_8wid_vec2_m, \
_4tap_8wid_vec3_m); \
DPADD_SB4_SH(_4tap_8wid_vec0_m, _4tap_8wid_vec1_m, _4tap_8wid_vec2_m, \
_4tap_8wid_vec3_m, filt1, filt1, filt1, filt1, out0, out1, \
out2, out3); \
}
static void common_hz_6t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride,

Просмотреть файл

@ -135,27 +135,6 @@ int vp8_decode_frame(VP8D_COMP *pbi);
int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf);
int vp8_remove_decoder_instances(struct frame_buffers *fb);
#if CONFIG_DEBUG
#define CHECK_MEM_ERROR(lval, expr) \
do { \
assert(pbi->common.error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval " at %s:%d", __FILE__, \
__LINE__); \
} while (0)
#else
#define CHECK_MEM_ERROR(lval, expr) \
do { \
assert(pbi->common.error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval); \
} while (0)
#endif
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -30,11 +30,13 @@
#include "error_concealment.h"
#endif
#define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n)))
#define CALLOC_ARRAY_ALIGNED(p, n, algn) \
do { \
CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \
memset((p), 0, (n) * sizeof(*(p))); \
#define CALLOC_ARRAY(p, n) \
CHECK_MEM_ERROR(&pbi->common.error, (p), vpx_calloc(sizeof(*(p)), (n)))
#define CALLOC_ARRAY_ALIGNED(p, n, algn) \
do { \
CHECK_MEM_ERROR(&pbi->common.error, (p), \
vpx_memalign((algn), sizeof(*(p)) * (n))); \
memset((p), 0, (n) * sizeof(*(p))); \
} while (0)
static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd,
@ -754,7 +756,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
uv_width = width >> 1;
/* Allocate a vpx_atomic_int for each mb row. */
CHECK_MEM_ERROR(pbi->mt_current_mb_col,
CHECK_MEM_ERROR(&pc->error, pbi->mt_current_mb_col,
vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows));
for (i = 0; i < pc->mb_rows; ++i)
vpx_atomic_init(&pbi->mt_current_mb_col[i], 0);
@ -762,7 +764,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
/* Allocate memory for above_row buffers. */
CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) {
CHECK_MEM_ERROR(pbi->mt_yabove_row[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_yabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(width + (VP8BORDERINPIXELS << 1))));
vp8_zero_array(pbi->mt_yabove_row[i], width + (VP8BORDERINPIXELS << 1));
@ -770,7 +772,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) {
CHECK_MEM_ERROR(pbi->mt_uabove_row[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_uabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(uv_width + VP8BORDERINPIXELS)));
vp8_zero_array(pbi->mt_uabove_row[i], uv_width + VP8BORDERINPIXELS);
@ -778,7 +780,7 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i) {
CHECK_MEM_ERROR(pbi->mt_vabove_row[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_vabove_row[i],
vpx_memalign(16, sizeof(unsigned char) *
(uv_width + VP8BORDERINPIXELS)));
vp8_zero_array(pbi->mt_vabove_row[i], uv_width + VP8BORDERINPIXELS);
@ -787,17 +789,17 @@ void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) {
/* Allocate memory for left_col buffers. */
CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_yleft_col[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_yleft_col[i],
vpx_calloc(sizeof(unsigned char) * 16, 1));
CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_uleft_col[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_uleft_col[i],
vpx_calloc(sizeof(unsigned char) * 8, 1));
CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows);
for (i = 0; i < pc->mb_rows; ++i)
CHECK_MEM_ERROR(pbi->mt_vleft_col[i],
CHECK_MEM_ERROR(&pc->error, pbi->mt_vleft_col[i],
vpx_calloc(sizeof(unsigned char) * 8, 1));
}
}

Просмотреть файл

@ -28,11 +28,11 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
zig_zag1 = vld1q_u16(inv_zig_zag + 8);
int16x8_t x0, x1, sz0, sz1, y0, y1;
uint16x8_t eob0, eob1;
#ifndef __aarch64__
#if !VPX_ARCH_AARCH64
uint16x4_t eob_d16;
uint32x2_t eob_d32;
uint32x4_t eob_q32;
#endif // __arch64__
#endif // !VPX_ARCH_AARCH64
/* sign of z: z >> 15 */
sz0 = vshrq_n_s16(z0, 15);
@ -70,7 +70,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
/* select the largest value */
eob0 = vmaxq_u16(eob0, eob1);
#ifdef __aarch64__
#if VPX_ARCH_AARCH64
*d->eob = (int8_t)vmaxvq_u16(eob0);
#else
eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0));
@ -79,7 +79,7 @@ void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) {
eob_d32 = vpmax_u32(eob_d32, eob_d32);
vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0);
#endif // __aarch64__
#endif // VPX_ARCH_AARCH64
/* qcoeff = x */
vst1q_s16(d->qcoeff, x0);

Просмотреть файл

@ -92,8 +92,7 @@ typedef struct macroblock {
signed int last_act_zbin_adj;
int *mvcost[2];
/* MSVC generates code that thinks this is 16-byte aligned */
DECLARE_ALIGNED(16, int*, mvsadcost[2]);
int *mvsadcost[2];
int (*mbmode_cost)[MB_MODE_COUNT];
int (*intra_uv_mode_cost)[MB_MODE_COUNT];
int (*bmode_costs)[10][10];

Просмотреть файл

@ -123,7 +123,7 @@ static void calc_av_activity(VP8_COMP *cpi, int64_t activity_sum) {
unsigned int tmp;
/* Create a list to sort to */
CHECK_MEM_ERROR(sortlist,
CHECK_MEM_ERROR(&cpi->common.error, sortlist,
vpx_calloc(sizeof(unsigned int), cpi->common.MBs));
/* Copy map to sort list */

Просмотреть файл

@ -510,16 +510,16 @@ int vp8cx_create_encoder_threads(VP8_COMP *cpi) {
if (th_count == 0) return 0;
CHECK_MEM_ERROR(cpi->h_encoding_thread,
CHECK_MEM_ERROR(&cpi->common.error, cpi->h_encoding_thread,
vpx_malloc(sizeof(pthread_t) * th_count));
CHECK_MEM_ERROR(cpi->h_event_start_encoding,
CHECK_MEM_ERROR(&cpi->common.error, cpi->h_event_start_encoding,
vpx_malloc(sizeof(sem_t) * th_count));
CHECK_MEM_ERROR(cpi->h_event_end_encoding,
CHECK_MEM_ERROR(&cpi->common.error, cpi->h_event_end_encoding,
vpx_malloc(sizeof(sem_t) * th_count));
CHECK_MEM_ERROR(cpi->mb_row_ei,
CHECK_MEM_ERROR(&cpi->common.error, cpi->mb_row_ei,
vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
CHECK_MEM_ERROR(cpi->en_thread_data,
CHECK_MEM_ERROR(&cpi->common.error, cpi->en_thread_data,
vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
vpx_atomic_store_release(&cpi->b_multi_threaded, 1);

Просмотреть файл

@ -49,7 +49,6 @@ void vp8_cal_low_res_mb_cols(VP8_COMP *cpi) {
void vp8_cal_dissimilarity(VP8_COMP *cpi) {
VP8_COMMON *cm = &cpi->common;
int i;
/* Note: The first row & first column in mip are outside the frame, which
* were initialized to all 0.(ref_frame, mode, mv...)
@ -67,6 +66,7 @@ void vp8_cal_dissimilarity(VP8_COMP *cpi) {
store_info->frame_type = cm->frame_type;
if (cm->frame_type != KEY_FRAME) {
int i;
store_info->is_frame_dropped = 0;
for (i = 1; i < MAX_REF_FRAMES; ++i)
store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i];

Просмотреть файл

@ -1169,7 +1169,8 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
#else
unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16;
#endif
CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok)));
CHECK_MEM_ERROR(&cpi->common.error, cpi->tok,
vpx_calloc(tokens, sizeof(*cpi->tok)));
}
/* Data used for real time vc mode to see if gf needs refreshing */
@ -1178,37 +1179,39 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
/* Structures used to monitor GF usage */
vpx_free(cpi->gf_active_flags);
CHECK_MEM_ERROR(
cpi->gf_active_flags,
&cpi->common.error, cpi->gf_active_flags,
vpx_calloc(sizeof(*cpi->gf_active_flags), cm->mb_rows * cm->mb_cols));
cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
vpx_free(cpi->mb_activity_map);
CHECK_MEM_ERROR(
cpi->mb_activity_map,
&cpi->common.error, cpi->mb_activity_map,
vpx_calloc(sizeof(*cpi->mb_activity_map), cm->mb_rows * cm->mb_cols));
/* allocate memory for storing last frame's MVs for MV prediction. */
vpx_free(cpi->lfmv);
CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2),
sizeof(*cpi->lfmv)));
CHECK_MEM_ERROR(
&cpi->common.error, cpi->lfmv,
vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2), sizeof(*cpi->lfmv)));
vpx_free(cpi->lf_ref_frame_sign_bias);
CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias,
CHECK_MEM_ERROR(&cpi->common.error, cpi->lf_ref_frame_sign_bias,
vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2),
sizeof(*cpi->lf_ref_frame_sign_bias)));
vpx_free(cpi->lf_ref_frame);
CHECK_MEM_ERROR(cpi->lf_ref_frame,
CHECK_MEM_ERROR(&cpi->common.error, cpi->lf_ref_frame,
vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2),
sizeof(*cpi->lf_ref_frame)));
/* Create the encoder segmentation map and set all entries to 0 */
vpx_free(cpi->segmentation_map);
CHECK_MEM_ERROR(
cpi->segmentation_map,
&cpi->common.error, cpi->segmentation_map,
vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->segmentation_map)));
cpi->cyclic_refresh_mode_index = 0;
vpx_free(cpi->active_map);
CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cm->mb_rows * cm->mb_cols,
sizeof(*cpi->active_map)));
CHECK_MEM_ERROR(
&cpi->common.error, cpi->active_map,
vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->active_map)));
memset(cpi->active_map, 1, (cm->mb_rows * cm->mb_cols));
#if CONFIG_MULTITHREAD
@ -1226,7 +1229,7 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
int i;
vpx_free(cpi->mt_current_mb_col);
CHECK_MEM_ERROR(cpi->mt_current_mb_col,
CHECK_MEM_ERROR(&cpi->common.error, cpi->mt_current_mb_col,
vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));
for (i = 0; i < cm->mb_rows; ++i)
vpx_atomic_init(&cpi->mt_current_mb_col[i], 0);
@ -1235,7 +1238,8 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi) {
#endif
vpx_free(cpi->tplist);
CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
CHECK_MEM_ERROR(&cpi->common.error, cpi->tplist,
vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows));
#if CONFIG_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
@ -1773,8 +1777,9 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
cpi->common.error.setjmp = 1;
CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site),
(MAX_MVSEARCH_STEPS * 8) + 1));
CHECK_MEM_ERROR(
&cpi->common.error, cpi->mb.ss,
vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
vp8_create_common(&cpi->common);
@ -1879,18 +1884,19 @@ struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) {
}
if (cpi->cyclic_refresh_mode_enabled) {
CHECK_MEM_ERROR(cpi->cyclic_refresh_map,
CHECK_MEM_ERROR(&cpi->common.error, cpi->cyclic_refresh_map,
vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
} else {
cpi->cyclic_refresh_map = (signed char *)NULL;
}
CHECK_MEM_ERROR(cpi->skin_map, vpx_calloc(cm->mb_rows * cm->mb_cols,
sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(
&cpi->common.error, cpi->skin_map,
vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cpi->consec_zero_last,
CHECK_MEM_ERROR(&cpi->common.error, cpi->consec_zero_last,
vpx_calloc(cm->mb_rows * cm->mb_cols, 1));
CHECK_MEM_ERROR(cpi->consec_zero_last_mvbias,
CHECK_MEM_ERROR(&cpi->common.error, cpi->consec_zero_last_mvbias,
vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1));
/*Initialize the feed-forward activity masking.*/
@ -2109,7 +2115,6 @@ void vp8_remove_compressor(VP8_COMP **comp) {
double time_encoded =
(cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) /
10000000.000;
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
if (cpi->b_calculate_psnr) {
if (cpi->oxcf.number_of_layers > 1) {
@ -2138,6 +2143,7 @@ void vp8_remove_compressor(VP8_COMP **comp) {
total_psnr2, total_ssim);
}
} else {
double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded;
double samples =
3.0 / 2 * cpi->count * cpi->common.Width * cpi->common.Height;
double total_psnr =
@ -3203,7 +3209,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
int frame_under_shoot_limit;
int Loop = 0;
int loop_count;
VP8_COMMON *cm = &cpi->common;
int active_worst_qchanged = 0;
@ -3769,8 +3774,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
vp8_save_coding_context(cpi);
loop_count = 0;
scale_and_extend_source(cpi->un_scaled_source, cpi);
#if CONFIG_TEMPORAL_DENOISING && CONFIG_POSTPROC
@ -3993,7 +3996,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
q_low = cpi->active_best_quality;
q_high = cpi->active_worst_quality;
loop_count++;
Loop = 1;
continue;
@ -4219,7 +4221,6 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size,
if (Loop == 1) {
vp8_restore_coding_context(cpi);
loop_count++;
#if CONFIG_INTERNAL_STATS
cpi->tot_recode_hits++;
#endif

Просмотреть файл

@ -731,26 +731,6 @@ void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **);
void vp8_set_speed_features(VP8_COMP *cpi);
#if CONFIG_DEBUG
#define CHECK_MEM_ERROR(lval, expr) \
do { \
assert(cpi->common.error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval " at %s:%d", __FILE__, \
__LINE__); \
} while (0)
#else
#define CHECK_MEM_ERROR(lval, expr) \
do { \
assert(cpi->common.error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval); \
} while (0)
#endif
#ifdef __cplusplus
} // extern "C"
#endif

Просмотреть файл

@ -911,12 +911,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
}
}
if (setjmp(ctx->cpi->common.error.jmp)) {
ctx->cpi->common.error.setjmp = 0;
vpx_clear_system_state();
return VPX_CODEC_CORRUPT_FRAME;
}
/* Initialize the encoder instance on the first frame*/
if (!res && ctx->cpi) {
unsigned int lib_flags;
@ -927,6 +921,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
unsigned char *cx_data_end;
int comp_data_state = 0;
if (setjmp(ctx->cpi->common.error.jmp)) {
ctx->cpi->common.error.setjmp = 0;
vpx_clear_system_state();
return VPX_CODEC_CORRUPT_FRAME;
}
ctx->cpi->common.error.setjmp = 1;
/* Set up internal flags */
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) {
((VP8_COMP *)ctx->cpi)->b_calculate_psnr = 1;
@ -947,19 +948,10 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
if (img != NULL) {
res = image2yuvconfig(img, &sd);
if (sd.y_width != ctx->cfg.g_w || sd.y_height != ctx->cfg.g_h) {
/* from vpx_encoder.h for g_w/g_h:
"Note that the frames passed as input to the encoder must have this
resolution"
*/
ctx->base.err_detail = "Invalid input frame resolution";
res = VPX_CODEC_INVALID_PARAM;
} else {
if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags,
&sd, dst_time_stamp, dst_end_time_stamp)) {
VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd,
dst_time_stamp, dst_end_time_stamp)) {
VP8_COMP *cpi = (VP8_COMP *)ctx->cpi;
res = update_error_state(ctx, &cpi->common.error);
}
/* reset for next frame */
@ -971,8 +963,6 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
cx_data_end = ctx->cx_data + cx_data_sz;
lib_flags = 0;
ctx->cpi->common.error.setjmp = 1;
while (cx_data_sz >= ctx->cx_data_sz / 2) {
comp_data_state = vp8_get_compressed_data(
ctx->cpi, &lib_flags, &size, cx_data, cx_data_end, &dst_time_stamp,
@ -1068,6 +1058,7 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx,
}
}
}
ctx->cpi->common.error.setjmp = 0;
}
return res;

Просмотреть файл

@ -310,6 +310,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0];
VP8_COMMON *const pc = &pbi->common;
if (setjmp(pbi->common.error.jmp)) {
pbi->common.error.setjmp = 0;
vp8_remove_decoder_instances(fb);
vp8_zero(fb->pbi);
vpx_clear_system_state();
@ -494,6 +495,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx,
/* get ready for the next series of fragments */
ctx->fragments.count = 0;
pbi->common.error.setjmp = 0;
}
return res;

Просмотреть файл

@ -294,6 +294,34 @@ void VP8RateControlRTC::ComputeQP(const VP8FrameParamsQpRTC &frame_params) {
int VP8RateControlRTC::GetQP() const { return q_; }
int VP8RateControlRTC::GetLoopfilterLevel() const {
VP8_COMMON *cm = &cpi_->common;
const double qp = q_;
// This model is from linear regression
if (cm->Width * cm->Height <= 320 * 240) {
cm->filter_level = static_cast<int>(0.352685 * qp + 2.957774);
} else if (cm->Width * cm->Height <= 640 * 480) {
cm->filter_level = static_cast<int>(0.485069 * qp - 0.534462);
} else {
cm->filter_level = static_cast<int>(0.314875 * qp + 7.959003);
}
int min_filter_level = 0;
// This logic is from get_min_filter_level() in picklpf.c
if (q_ > 6 && q_ <= 16) {
min_filter_level = 1;
} else {
min_filter_level = (q_ / 8);
}
const int max_filter_level = 63;
if (cm->filter_level < min_filter_level) cm->filter_level = min_filter_level;
if (cm->filter_level > max_filter_level) cm->filter_level = max_filter_level;
return cm->filter_level;
}
void VP8RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {
VP8_COMMON *const cm = &cpi_->common;
vpx_clear_system_state();

Просмотреть файл

@ -42,6 +42,9 @@ class VP8RateControlRTC {
bool UpdateRateControl(const VP8RateControlRtcConfig &rc_cfg);
// GetQP() needs to be called after ComputeQP() to get the latest QP
int GetQP() const;
// GetLoopfilterLevel() needs to be called after ComputeQP() since loopfilter
// level is calculated from frame qp.
int GetLoopfilterLevel() const;
// int GetLoopfilterLevel() const;
void ComputeQP(const VP8FrameParamsQpRTC &frame_params);
// Feedback to rate control with the size of current encoded frame

Просмотреть файл

@ -64,9 +64,9 @@ highbd_dct_const_round_shift_low_8(const int64x2x2_t *const in) {
#define highbd_iadst_half_butterfly(in, c, lane, out) \
do { \
int64x2x2_t t[2]; \
vmull_lane_s32_dual(in, c, lane, t); \
out = highbd_dct_const_round_shift_low_8(t); \
int64x2x2_t _t[2]; \
vmull_lane_s32_dual(in, c, lane, _t); \
out = highbd_dct_const_round_shift_low_8(_t); \
} while (0)
#define highbd_iadst_butterfly(in0, in1, c, lane0, lane1, s0, s1) \

Просмотреть файл

@ -46,27 +46,6 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
return num_values > 0 ? get_msb(num_values) + 1 : 0;
}
#if CONFIG_DEBUG
#define CHECK_MEM_ERROR(cm, lval, expr) \
do { \
assert(&(cm)->error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval " at %s:%d", __FILE__, \
__LINE__); \
} while (0)
#else
#define CHECK_MEM_ERROR(cm, lval, expr) \
do { \
assert(&(cm)->error.setjmp); \
(lval) = (expr); \
if (!(lval)) \
vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \
"Failed to allocate " #lval); \
} while (0)
#endif
#define VP9_SYNC_CODE_0 0x49
#define VP9_SYNC_CODE_1 0x83
#define VP9_SYNC_CODE_2 0x42

Просмотреть файл

@ -150,6 +150,7 @@ void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
assert(((intptr_t)input) % 32 == 0);
/* The calculation can be simplified if there are not many non-zero dct
* coefficients. Use eobs to separate different cases. */
if (eob == 1) /* DC only DCT coefficient. */
@ -164,6 +165,7 @@ void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
assert(((intptr_t)input) % 32 == 0);
if (eob == 1)
vpx_idct32x32_1_add(input, dest, stride);
else if (eob <= 34)

Просмотреть файл

@ -23,7 +23,7 @@ struct macroblockd;
/* Encoder forward decls */
struct macroblock;
struct vp9_variance_vtable;
struct vp9_sad_table;
struct search_site_config;
struct mv;
union int_mv;
@ -171,7 +171,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
#
# Motion search
#
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv";
specialize qw/vp9_diamond_search_sad avx neon/;
#

Просмотреть файл

@ -283,7 +283,7 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
{
int i;
CHECK_MEM_ERROR(cm, lf_sync->mutex,
CHECK_MEM_ERROR(&cm->error, lf_sync->mutex,
vpx_malloc(sizeof(*lf_sync->mutex) * rows));
if (lf_sync->mutex) {
for (i = 0; i < rows; ++i) {
@ -291,7 +291,7 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
}
}
CHECK_MEM_ERROR(cm, lf_sync->cond,
CHECK_MEM_ERROR(&cm->error, lf_sync->cond,
vpx_malloc(sizeof(*lf_sync->cond) * rows));
if (lf_sync->cond) {
for (i = 0; i < rows; ++i) {
@ -299,11 +299,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
}
}
CHECK_MEM_ERROR(cm, lf_sync->lf_mutex,
CHECK_MEM_ERROR(&cm->error, lf_sync->lf_mutex,
vpx_malloc(sizeof(*lf_sync->lf_mutex)));
pthread_mutex_init(lf_sync->lf_mutex, NULL);
CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
CHECK_MEM_ERROR(&cm->error, lf_sync->recon_done_mutex,
vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
if (lf_sync->recon_done_mutex) {
for (i = 0; i < rows; ++i) {
@ -311,7 +311,7 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
}
}
CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
CHECK_MEM_ERROR(&cm->error, lf_sync->recon_done_cond,
vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
if (lf_sync->recon_done_cond) {
for (i = 0; i < rows; ++i) {
@ -321,15 +321,15 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
}
#endif // CONFIG_MULTITHREAD
CHECK_MEM_ERROR(cm, lf_sync->lfdata,
CHECK_MEM_ERROR(&cm->error, lf_sync->lfdata,
vpx_malloc(num_workers * sizeof(*lf_sync->lfdata)));
lf_sync->num_workers = num_workers;
lf_sync->num_active_workers = lf_sync->num_workers;
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
CHECK_MEM_ERROR(&cm->error, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
CHECK_MEM_ERROR(&cm->error, lf_sync->num_tiles_done,
vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
mi_cols_aligned_to_sb(cm->mi_rows) >>
MI_BLOCK_SIZE_LOG2));

Просмотреть файл

@ -1469,7 +1469,7 @@ static void resize_mv_buffer(VP9_COMMON *cm) {
vpx_free(cm->cur_frame->mvs);
cm->cur_frame->mi_rows = cm->mi_rows;
cm->cur_frame->mi_cols = cm->mi_cols;
CHECK_MEM_ERROR(cm, cm->cur_frame->mvs,
CHECK_MEM_ERROR(&cm->error, cm->cur_frame->mvs,
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
sizeof(*cm->cur_frame->mvs)));
}
@ -1776,7 +1776,8 @@ static void vp9_jobq_alloc(VP9Decoder *pbi) {
if (jobq_size > row_mt_worker_data->jobq_size) {
vpx_free(row_mt_worker_data->jobq_buf);
CHECK_MEM_ERROR(cm, row_mt_worker_data->jobq_buf, vpx_calloc(1, jobq_size));
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->jobq_buf,
vpx_calloc(1, jobq_size));
vp9_jobq_init(&row_mt_worker_data->jobq, row_mt_worker_data->jobq_buf,
jobq_size);
row_mt_worker_data->jobq_size = jobq_size;
@ -1923,7 +1924,7 @@ static int row_decode_worker_hook(void *arg1, void *arg2) {
const int is_last_row = sb_rows - 1 == cur_sb_row;
int mi_col_start, mi_col_end;
if (!tile_data_recon)
CHECK_MEM_ERROR(cm, tile_data_recon,
CHECK_MEM_ERROR(&cm->error, tile_data_recon,
vpx_memalign(32, sizeof(TileWorkerData)));
tile_data_recon->xd = pbi->mb;
@ -2025,7 +2026,7 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
if (cm->lf.filter_level && !cm->skip_loop_filter &&
pbi->lf_worker.data1 == NULL) {
CHECK_MEM_ERROR(cm, pbi->lf_worker.data1,
CHECK_MEM_ERROR(&cm->error, pbi->lf_worker.data1,
vpx_memalign(32, sizeof(LFWorkerData)));
pbi->lf_worker.hook = vp9_loop_filter_worker;
if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) {
@ -2192,8 +2193,6 @@ static int tile_worker_hook(void *arg1, void *arg2) {
volatile int mi_row = 0;
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
if (setjmp(tile_data->error_info.jmp)) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
@ -2206,6 +2205,7 @@ static int tile_worker_hook(void *arg1, void *arg2) {
}
return 0;
}
tile_data->error_info.setjmp = 1;
tile_data->xd.corrupted = 0;
@ -2285,7 +2285,7 @@ static INLINE void init_mt(VP9Decoder *pbi) {
if (pbi->num_tile_workers == 0) {
const int num_threads = pbi->max_threads;
CHECK_MEM_ERROR(cm, pbi->tile_workers,
CHECK_MEM_ERROR(&cm->error, pbi->tile_workers,
vpx_malloc(num_threads * sizeof(*pbi->tile_workers)));
for (n = 0; n < num_threads; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
@ -2824,7 +2824,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
const int num_jobs = sb_rows << cm->log2_tile_cols;
if (pbi->row_mt_worker_data == NULL) {
CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
CHECK_MEM_ERROR(&cm->error, pbi->row_mt_worker_data,
vpx_calloc(1, sizeof(*pbi->row_mt_worker_data)));
#if CONFIG_MULTITHREAD
pthread_mutex_init(&pbi->row_mt_worker_data->recon_done_mutex, NULL);
@ -3006,7 +3006,8 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
// platforms without DECLARE_ALIGNED().
assert((sizeof(*pbi->tile_worker_data) % 16) == 0);
vpx_free(pbi->tile_worker_data);
CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size));
CHECK_MEM_ERROR(&cm->error, pbi->tile_worker_data,
vpx_memalign(32, twd_size));
pbi->total_tiles = tile_rows * tile_cols;
}

Просмотреть файл

@ -66,7 +66,7 @@ void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
{
int i;
CHECK_MEM_ERROR(
cm, row_mt_worker_data->recon_sync_mutex,
&cm->error, row_mt_worker_data->recon_sync_mutex,
vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs));
if (row_mt_worker_data->recon_sync_mutex) {
for (i = 0; i < num_jobs; ++i) {
@ -75,7 +75,7 @@ void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
}
CHECK_MEM_ERROR(
cm, row_mt_worker_data->recon_sync_cond,
&cm->error, row_mt_worker_data->recon_sync_cond,
vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs));
if (row_mt_worker_data->recon_sync_cond) {
for (i = 0; i < num_jobs; ++i) {
@ -86,24 +86,24 @@ void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
#endif
row_mt_worker_data->num_sbs = num_sbs;
for (plane = 0; plane < 3; ++plane) {
CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
vpx_memalign(16, dqcoeff_size));
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->dqcoeff[plane],
vpx_memalign(32, dqcoeff_size));
memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->eob[plane],
vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
sizeof(*row_mt_worker_data->eob[plane])));
}
CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->partition,
vpx_calloc(num_sbs * PARTITIONS_PER_SB,
sizeof(*row_mt_worker_data->partition)));
CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->recon_map,
vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));
// allocate memory for thread_data
if (row_mt_worker_data->thread_data == NULL) {
const size_t thread_size =
max_threads * sizeof(*row_mt_worker_data->thread_data);
CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data,
CHECK_MEM_ERROR(&cm->error, row_mt_worker_data->thread_data,
vpx_memalign(32, thread_size));
}
}
@ -181,9 +181,10 @@ VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
cm->error.setjmp = 1;
CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
CHECK_MEM_ERROR(&cm->error, cm->fc,
(FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
CHECK_MEM_ERROR(
cm, cm->frame_contexts,
&cm->error, cm->frame_contexts,
(FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
pbi->need_resync = 1;

Просмотреть файл

@ -54,7 +54,7 @@ typedef struct TileWorkerData {
VP9LfSync *lf_sync;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
DECLARE_ALIGNED(32, tran_low_t, dqcoeff[32 * 32]);
DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]);
struct vpx_internal_error_info error_info;
} TileWorkerData;

Просмотреть файл

@ -21,7 +21,7 @@
// Compute the sum of all pixel differences of this MB.
static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
return vaddlvq_s8(v_sum_diff_total);
#else
const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);

Просмотреть файл

@ -30,30 +30,6 @@ static INLINE int_mv pack_int_mv(int16_t row, int16_t col) {
return result;
}
static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) {
// This is simplified from the C implementation to utilise that
// x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and
// x->nmvjointsadcost[1] == x->nmvjointsadcost[3]
return mv.as_int == 0 ? 0 : 1;
}
static INLINE int mv_cost(const int_mv mv, const int *joint_cost,
int *const comp_cost[2]) {
assert(mv.as_mv.row >= -MV_MAX && mv.as_mv.row < MV_MAX);
assert(mv.as_mv.col >= -MV_MAX && mv.as_mv.col < MV_MAX);
return joint_cost[get_mv_joint(mv)] + comp_cost[0][mv.as_mv.row] +
comp_cost[1][mv.as_mv.col];
}
static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
int sad_per_bit) {
const int_mv diff =
pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col);
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
VP9_PROB_COST_SHIFT);
}
/*****************************************************************************
* This function utilizes 3 properties of the cost function lookup tables, *
* constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in *
@ -71,8 +47,9 @@ static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref,
*****************************************************************************/
int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
const search_site_config *cfg, MV *ref_mv,
MV *best_mv, int search_param, int sad_per_bit,
int *num00, const vp9_variance_fn_ptr_t *fn_ptr,
uint32_t start_mv_sad, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_sad_fn_ptr_t *sad_fn_ptr,
const MV *center_mv) {
static const uint32_t data[4] = { 0, 1, 2, 3 };
const uint32x4_t v_idx_d = vld1q_u32((const uint32_t *)data);
@ -101,8 +78,8 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
pack_int_mv(center_mv->row >> 3, center_mv->col >> 3);
const int16x8_t vfcmv = vreinterpretq_s16_s32(vdupq_n_s32(fcenter_mv.as_int));
const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row);
const int ref_col = clamp(ref_mv->col, minmv.as_mv.col, maxmv.as_mv.col);
const int ref_row = ref_mv->row;
const int ref_col = ref_mv->col;
int_mv bmv = pack_int_mv(ref_row, ref_col);
int_mv new_bmv = bmv;
@ -117,12 +94,13 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Work out the start point for the search
const uint8_t *best_address = in_what;
const uint8_t *new_best_address = best_address;
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
int64x2_t v_ba_q = vdupq_n_s64((intptr_t)best_address);
#else
int32x4_t v_ba_d = vdupq_n_s32((intptr_t)best_address);
#endif
unsigned int best_sad = INT_MAX;
// Starting position
unsigned int best_sad = start_mv_sad;
int i, j, step;
// Check the prerequisite cost function properties that are easy to check
@ -131,10 +109,6 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[2]);
assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[3]);
// Check the starting position
best_sad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
best_sad += mvsad_err_cost(x, bmv, &fcenter_mv.as_mv, sad_per_bit);
*num00 = 0;
for (i = 0, step = 0; step < tot_steps; step++) {
@ -143,7 +117,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
int8x16_t v_inside_d;
uint32x4_t v_outside_d;
int32x4_t v_cost_d, v_sad_d;
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
int64x2_t v_blocka[2];
#else
int32x4_t v_blocka[1];
@ -164,7 +138,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
vreinterpretq_s32_s16(v_these_mv_w)));
// If none of them are inside, then move on
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
horiz_max = vmaxvq_u32(vreinterpretq_u32_s8(v_inside_d));
#else
horiz_max_0 = vmax_u32(vget_low_u32(vreinterpretq_u32_s8(v_inside_d)),
@ -193,7 +167,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Compute the SIMD pointer offsets.
{
#if defined(__aarch64__) // sizeof(intptr_t) == 8
#if VPX_ARCH_AARCH64 // sizeof(intptr_t) == 8
// Load the offsets
int64x2_t v_bo10_q = vld1q_s64((const int64_t *)&ss_os[i + 0]);
int64x2_t v_bo32_q = vld1q_s64((const int64_t *)&ss_os[i + 2]);
@ -214,8 +188,8 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
#endif
}
fn_ptr->sdx4df(what, what_stride, (const uint8_t **)&v_blocka[0],
in_what_stride, (uint32_t *)&v_sad_d);
sad_fn_ptr->sdx4df(what, what_stride, (const uint8_t **)&v_blocka[0],
in_what_stride, (uint32_t *)&v_sad_d);
// Look up the component cost of the residual motion vector
{
@ -260,7 +234,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
// Find the minimum value and index horizontally in v_sad_d
{
uint32_t local_best_sad;
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
local_best_sad = vminvq_u32(vreinterpretq_u32_s32(v_sad_d));
#else
uint32x2_t horiz_min_0 =
@ -282,7 +256,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
uint32x4_t v_mask_d = vandq_u32(v_sel_d, v_idx_d);
v_mask_d = vbslq_u32(v_sel_d, v_mask_d, vdupq_n_u32(0xffffffff));
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
local_best_idx = vminvq_u32(v_mask_d);
#else
horiz_min_0 =
@ -306,7 +280,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
best_address = new_best_address;
v_bmv_w = vreinterpretq_s16_s32(vdupq_n_s32(bmv.as_int));
#if defined(__aarch64__)
#if VPX_ARCH_AARCH64
v_ba_q = vdupq_n_s64((intptr_t)best_address);
#else
v_ba_d = vdupq_n_s32((intptr_t)best_address);

Просмотреть файл

@ -50,7 +50,7 @@ static VPX_FORCE_INLINE int16x8_t get_max_lane_eob(const int16_t *iscan_ptr,
}
static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
#ifdef __aarch64__
#if VPX_ARCH_AARCH64
return (uint16_t)vmaxvq_s16(v_eobmax);
#else
const int16x4_t v_eobmax_3210 =
@ -65,7 +65,7 @@ static VPX_FORCE_INLINE uint16_t get_max_eob(int16x8_t v_eobmax) {
vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3));
return (uint16_t)vget_lane_s16(v_eobmax_final, 0);
#endif // __aarch64__
#endif // VPX_ARCH_AARCH64
}
static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
@ -81,7 +81,7 @@ static VPX_FORCE_INLINE void load_fp_values(const int16_t *round_ptr,
static VPX_FORCE_INLINE void update_fp_values(int16x8_t *v_round,
int16x8_t *v_quant,
int16x8_t *v_dequant) {
#ifdef __aarch64__
#if VPX_ARCH_AARCH64
*v_round = vdupq_laneq_s16(*v_round, 1);
*v_quant = vdupq_laneq_s16(*v_quant, 1);
*v_dequant = vdupq_laneq_s16(*v_dequant, 1);

Просмотреть файл

@ -967,13 +967,13 @@ static void encode_tiles_buffer_alloc(VP9_COMP *const cpi) {
int i;
const size_t worker_data_size =
cpi->num_workers * sizeof(*cpi->vp9_bitstream_worker_data);
CHECK_MEM_ERROR(cm, cpi->vp9_bitstream_worker_data,
CHECK_MEM_ERROR(&cm->error, cpi->vp9_bitstream_worker_data,
vpx_memalign(16, worker_data_size));
memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size);
for (i = 1; i < cpi->num_workers; ++i) {
cpi->vp9_bitstream_worker_data[i].dest_size =
cpi->oxcf.width * cpi->oxcf.height;
CHECK_MEM_ERROR(cm, cpi->vp9_bitstream_worker_data[i].dest,
CHECK_MEM_ERROR(&cm->error, cpi->vp9_bitstream_worker_data[i].dest,
vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size));
}
}

Просмотреть файл

@ -25,16 +25,17 @@ static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
int i, k;
ctx->num_4x4_blk = num_blk;
CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_blk, sizeof(uint8_t)));
CHECK_MEM_ERROR(&cm->error, ctx->zcoeff_blk,
vpx_calloc(num_blk, sizeof(uint8_t)));
for (i = 0; i < MAX_MB_PLANE; ++i) {
for (k = 0; k < 3; ++k) {
CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
CHECK_MEM_ERROR(&cm->error, ctx->coeff[i][k],
vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
CHECK_MEM_ERROR(&cm->error, ctx->qcoeff[i][k],
vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
CHECK_MEM_ERROR(&cm->error, ctx->dqcoeff[i][k],
vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k])));
CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
CHECK_MEM_ERROR(&cm->error, ctx->eobs[i][k],
vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k])));
ctx->coeff_pbuf[i][k] = ctx->coeff[i][k];
ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k];
@ -100,10 +101,10 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) {
int nodes;
vpx_free(td->leaf_tree);
CHECK_MEM_ERROR(cm, td->leaf_tree,
CHECK_MEM_ERROR(&cm->error, td->leaf_tree,
vpx_calloc(leaf_nodes, sizeof(*td->leaf_tree)));
vpx_free(td->pc_tree);
CHECK_MEM_ERROR(cm, td->pc_tree,
CHECK_MEM_ERROR(&cm->error, td->pc_tree,
vpx_calloc(tree_nodes, sizeof(*td->pc_tree)));
this_pc = &td->pc_tree[0];

Просмотреть файл

@ -634,11 +634,11 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES;
init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES;
denoiser->num_layers = num_layers;
CHECK_MEM_ERROR(cm, denoiser->running_avg_y,
CHECK_MEM_ERROR(&cm->error, denoiser->running_avg_y,
vpx_calloc(denoiser->num_ref_frames * num_layers,
sizeof(denoiser->running_avg_y[0])));
CHECK_MEM_ERROR(
cm, denoiser->mc_running_avg_y,
&cm->error, denoiser->mc_running_avg_y,
vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0])));
for (layer = 0; layer < num_layers; ++layer) {

Просмотреть файл

@ -1545,7 +1545,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
if (low_res && threshold_4x4avg < INT64_MAX)
CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2)));
CHECK_MEM_ERROR(&cm->error, vt2, vpx_calloc(16, sizeof(*vt2)));
// Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
// for splits.
for (i = 0; i < 4; i++) {
@ -3710,7 +3710,6 @@ static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int row, col;
int dr = 0;
int count = 0;
double r0, rk, beta;
TplDepFrame *tpl_frame;
@ -3734,8 +3733,6 @@ static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
intra_cost += this_stats->intra_cost;
mc_dep_cost += this_stats->mc_dep_cost;
++count;
}
}
@ -5786,7 +5783,7 @@ static void source_var_based_partition_search_method(VP9_COMP *cpi) {
if (cm->last_width != cm->width || cm->last_height != cm->height) {
if (cpi->source_diff_var) vpx_free(cpi->source_diff_var);
CHECK_MEM_ERROR(cm, cpi->source_diff_var,
CHECK_MEM_ERROR(&cm->error, cpi->source_diff_var,
vpx_calloc(cm->MBs, sizeof(cpi->source_diff_var)));
}
@ -5826,7 +5823,7 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
CHECK_MEM_ERROR(
cm, cpi->tile_data,
&cm->error, cpi->tile_data,
vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
cpi->allocated_tiles = tile_cols * tile_rows;
@ -6185,7 +6182,6 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
int mi_row, mi_col;
int sum_delta = 0;
int map_index = 0;
int qdelta_index;
int segment_id;
@ -6195,7 +6191,6 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
segment_id = mi_8x8[0]->segment_id;
qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q);
sum_delta += qdelta_index;
map_index++;
}
mi_8x8_ptr += cm->mi_stride;
}

Просмотреть файл

@ -12,6 +12,7 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
@ -680,9 +681,10 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
}
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]) {
vpx_codec_err_t vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map,
unsigned int rows, unsigned int cols,
int delta_q[8], int delta_lf[8], int skip[8],
int ref_frame[8]) {
VP9_COMMON *cm = &cpi->common;
vpx_roi_map_t *roi = &cpi->roi;
const int range = 63;
@ -693,13 +695,13 @@ int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
// Check number of rows and columns match
if (frame_rows != (int)rows || frame_cols != (int)cols) {
return -1;
return VPX_CODEC_INVALID_PARAM;
}
if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
!check_seg_range(ref_frame, ref_frame_range) ||
!check_seg_range(skip, skip_range))
return -1;
return VPX_CODEC_INVALID_PARAM;
// Also disable segmentation if no deltas are specified.
if (!map ||
@ -713,14 +715,15 @@ int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
ref_frame[6] == -1 && ref_frame[7] == -1))) {
vp9_disable_segmentation(&cm->seg);
cpi->roi.enabled = 0;
return 0;
return VPX_CODEC_OK;
}
if (roi->roi_map) {
vpx_free(roi->roi_map);
roi->roi_map = NULL;
}
CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
roi->roi_map = vpx_malloc(rows * cols);
if (!roi->roi_map) return VPX_CODEC_MEM_ERROR;
// Copy to ROI structure in the compressor.
memcpy(roi->roi_map, map, rows * cols);
@ -732,7 +735,7 @@ int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
roi->rows = rows;
roi->cols = cols;
return 0;
return VPX_CODEC_OK;
}
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
@ -1373,7 +1376,7 @@ static void alloc_context_buffers_ext(VP9_COMP *cpi) {
VP9_COMMON *cm = &cpi->common;
int mi_size = cm->mi_cols * cm->mi_rows;
CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base,
CHECK_MEM_ERROR(&cm->error, cpi->mbmi_ext_base,
vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)));
}
@ -1392,14 +1395,14 @@ static void alloc_compressor_data(VP9_COMP *cpi) {
{
unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols);
CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0],
CHECK_MEM_ERROR(&cm->error, cpi->tile_tok[0][0],
vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0])));
}
sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
vpx_free(cpi->tplist[0][0]);
CHECK_MEM_ERROR(
cm, cpi->tplist[0][0],
&cm->error, cpi->tplist[0][0],
vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0])));
vp9_setup_pc_tree(&cpi->common, &cpi->td);
@ -1561,13 +1564,15 @@ void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
}
#if CONFIG_VP9_HIGHBITDEPTH
#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
#define HIGHBD_BFP(BT, SDF, SDSF, SDAF, VF, SVF, SVAF, SDX4DF, SDSX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdsf = SDSF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
cpi->fn_ptr[BT].sdx4df = SDX4DF; \
cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
#define MAKE_BFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
@ -1627,284 +1632,361 @@ void vp9_set_rc_buffer_sizes(VP9_COMP *cpi) {
}
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_32x16)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_32x16x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_16x32)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_16x32x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_64x32)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_64x32x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_32x64)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_32x64x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_32x32)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_32x32x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_64x64)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_64x64x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_16x16)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_16x16x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_16x8)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_16x8x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_8x16)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_8x16x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_8x8)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_8x8x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_8x4)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_8x4x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_4x8)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_4x8x4d)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4)
MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad_skip_4x4)
MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d)
MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad_skip_4x4x4d)
static void highbd_set_var_fns(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
if (cm->use_highbitdepth) {
switch (cm->bit_depth) {
case VPX_BITS_8:
HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8,
vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16,
vpx_highbd_8_sub_pixel_variance32x16,
vpx_highbd_8_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits8)
HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8,
vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32,
vpx_highbd_8_sub_pixel_variance16x32,
vpx_highbd_8_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits8)
HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8,
vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32,
vpx_highbd_8_sub_pixel_variance64x32,
vpx_highbd_8_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits8)
HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8,
vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64,
vpx_highbd_8_sub_pixel_variance32x64,
vpx_highbd_8_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits8)
HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8,
vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32,
vpx_highbd_8_sub_pixel_variance32x32,
vpx_highbd_8_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits8)
HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8,
vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64,
vpx_highbd_8_sub_pixel_variance64x64,
vpx_highbd_8_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits8)
HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8,
vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16,
vpx_highbd_8_sub_pixel_variance16x16,
vpx_highbd_8_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits8)
HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8,
vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8,
vpx_highbd_8_sub_pixel_variance16x8,
vpx_highbd_8_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits8)
HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8,
vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16,
vpx_highbd_8_sub_pixel_variance8x16,
vpx_highbd_8_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits8)
HIGHBD_BFP(
BLOCK_32X16, vpx_highbd_sad32x16_bits8,
vpx_highbd_sad_skip_32x16_bits8, vpx_highbd_sad32x16_avg_bits8,
vpx_highbd_8_variance32x16, vpx_highbd_8_sub_pixel_variance32x16,
vpx_highbd_8_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits8, vpx_highbd_sad_skip_32x16x4d_bits8)
HIGHBD_BFP(
BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8)
BLOCK_16X32, vpx_highbd_sad16x32_bits8,
vpx_highbd_sad_skip_16x32_bits8, vpx_highbd_sad16x32_avg_bits8,
vpx_highbd_8_variance16x32, vpx_highbd_8_sub_pixel_variance16x32,
vpx_highbd_8_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits8, vpx_highbd_sad_skip_16x32x4d_bits8)
HIGHBD_BFP(
BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8)
BLOCK_64X32, vpx_highbd_sad64x32_bits8,
vpx_highbd_sad_skip_64x32_bits8, vpx_highbd_sad64x32_avg_bits8,
vpx_highbd_8_variance64x32, vpx_highbd_8_sub_pixel_variance64x32,
vpx_highbd_8_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits8, vpx_highbd_sad_skip_64x32x4d_bits8)
HIGHBD_BFP(
BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8)
BLOCK_32X64, vpx_highbd_sad32x64_bits8,
vpx_highbd_sad_skip_32x64_bits8, vpx_highbd_sad32x64_avg_bits8,
vpx_highbd_8_variance32x64, vpx_highbd_8_sub_pixel_variance32x64,
vpx_highbd_8_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits8, vpx_highbd_sad_skip_32x64x4d_bits8)
HIGHBD_BFP(
BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8)
BLOCK_32X32, vpx_highbd_sad32x32_bits8,
vpx_highbd_sad_skip_32x32_bits8, vpx_highbd_sad32x32_avg_bits8,
vpx_highbd_8_variance32x32, vpx_highbd_8_sub_pixel_variance32x32,
vpx_highbd_8_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits8, vpx_highbd_sad_skip_32x32x4d_bits8)
HIGHBD_BFP(
BLOCK_64X64, vpx_highbd_sad64x64_bits8,
vpx_highbd_sad_skip_64x64_bits8, vpx_highbd_sad64x64_avg_bits8,
vpx_highbd_8_variance64x64, vpx_highbd_8_sub_pixel_variance64x64,
vpx_highbd_8_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits8, vpx_highbd_sad_skip_64x64x4d_bits8)
HIGHBD_BFP(
BLOCK_16X16, vpx_highbd_sad16x16_bits8,
vpx_highbd_sad_skip_16x16_bits8, vpx_highbd_sad16x16_avg_bits8,
vpx_highbd_8_variance16x16, vpx_highbd_8_sub_pixel_variance16x16,
vpx_highbd_8_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits8, vpx_highbd_sad_skip_16x16x4d_bits8)
HIGHBD_BFP(
BLOCK_16X8, vpx_highbd_sad16x8_bits8,
vpx_highbd_sad_skip_16x8_bits8, vpx_highbd_sad16x8_avg_bits8,
vpx_highbd_8_variance16x8, vpx_highbd_8_sub_pixel_variance16x8,
vpx_highbd_8_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits8, vpx_highbd_sad_skip_16x8x4d_bits8)
HIGHBD_BFP(
BLOCK_8X16, vpx_highbd_sad8x16_bits8,
vpx_highbd_sad_skip_8x16_bits8, vpx_highbd_sad8x16_avg_bits8,
vpx_highbd_8_variance8x16, vpx_highbd_8_sub_pixel_variance8x16,
vpx_highbd_8_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits8, vpx_highbd_sad_skip_8x16x4d_bits8)
HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits8,
vpx_highbd_sad_skip_8x8_bits8, vpx_highbd_sad8x8_avg_bits8,
vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8,
vpx_highbd_8_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x4d_bits8, vpx_highbd_sad_skip_8x8x4d_bits8)
HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits8,
vpx_highbd_sad_skip_8x4_bits8, vpx_highbd_sad8x4_avg_bits8,
vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4,
vpx_highbd_8_sub_pixel_avg_variance8x4,
vpx_highbd_sad8x4x4d_bits8, vpx_highbd_sad_skip_8x4x4d_bits8)
HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits8,
vpx_highbd_sad_skip_4x8_bits8, vpx_highbd_sad4x8_avg_bits8,
vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8,
vpx_highbd_8_sub_pixel_avg_variance4x8,
vpx_highbd_sad4x8x4d_bits8, vpx_highbd_sad_skip_4x8x4d_bits8)
HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits8,
vpx_highbd_sad_skip_4x4_bits8, vpx_highbd_sad4x4_avg_bits8,
vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4,
vpx_highbd_8_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits8, vpx_highbd_sad_skip_4x4x4d_bits8)
break;
case VPX_BITS_10:
HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10,
vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16,
vpx_highbd_10_sub_pixel_variance32x16,
vpx_highbd_10_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits10)
HIGHBD_BFP(
BLOCK_32X16, vpx_highbd_sad32x16_bits10,
vpx_highbd_sad_skip_32x16_bits10, vpx_highbd_sad32x16_avg_bits10,
vpx_highbd_10_variance32x16, vpx_highbd_10_sub_pixel_variance32x16,
vpx_highbd_10_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits10, vpx_highbd_sad_skip_32x16x4d_bits10)
HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10,
vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32,
vpx_highbd_10_sub_pixel_variance16x32,
vpx_highbd_10_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits10)
HIGHBD_BFP(
BLOCK_16X32, vpx_highbd_sad16x32_bits10,
vpx_highbd_sad_skip_16x32_bits10, vpx_highbd_sad16x32_avg_bits10,
vpx_highbd_10_variance16x32, vpx_highbd_10_sub_pixel_variance16x32,
vpx_highbd_10_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits10, vpx_highbd_sad_skip_16x32x4d_bits10)
HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10,
vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32,
vpx_highbd_10_sub_pixel_variance64x32,
vpx_highbd_10_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits10)
HIGHBD_BFP(
BLOCK_64X32, vpx_highbd_sad64x32_bits10,
vpx_highbd_sad_skip_64x32_bits10, vpx_highbd_sad64x32_avg_bits10,
vpx_highbd_10_variance64x32, vpx_highbd_10_sub_pixel_variance64x32,
vpx_highbd_10_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits10, vpx_highbd_sad_skip_64x32x4d_bits10)
HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10,
vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64,
vpx_highbd_10_sub_pixel_variance32x64,
vpx_highbd_10_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits10)
HIGHBD_BFP(
BLOCK_32X64, vpx_highbd_sad32x64_bits10,
vpx_highbd_sad_skip_32x64_bits10, vpx_highbd_sad32x64_avg_bits10,
vpx_highbd_10_variance32x64, vpx_highbd_10_sub_pixel_variance32x64,
vpx_highbd_10_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits10, vpx_highbd_sad_skip_32x64x4d_bits10)
HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10,
vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32,
vpx_highbd_10_sub_pixel_variance32x32,
vpx_highbd_10_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits10)
HIGHBD_BFP(
BLOCK_32X32, vpx_highbd_sad32x32_bits10,
vpx_highbd_sad_skip_32x32_bits10, vpx_highbd_sad32x32_avg_bits10,
vpx_highbd_10_variance32x32, vpx_highbd_10_sub_pixel_variance32x32,
vpx_highbd_10_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits10, vpx_highbd_sad_skip_32x32x4d_bits10)
HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10,
vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64,
vpx_highbd_10_sub_pixel_variance64x64,
vpx_highbd_10_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits10)
HIGHBD_BFP(
BLOCK_64X64, vpx_highbd_sad64x64_bits10,
vpx_highbd_sad_skip_64x64_bits10, vpx_highbd_sad64x64_avg_bits10,
vpx_highbd_10_variance64x64, vpx_highbd_10_sub_pixel_variance64x64,
vpx_highbd_10_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits10, vpx_highbd_sad_skip_64x64x4d_bits10)
HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10,
vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16,
vpx_highbd_10_sub_pixel_variance16x16,
vpx_highbd_10_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits10)
HIGHBD_BFP(
BLOCK_16X16, vpx_highbd_sad16x16_bits10,
vpx_highbd_sad_skip_16x16_bits10, vpx_highbd_sad16x16_avg_bits10,
vpx_highbd_10_variance16x16, vpx_highbd_10_sub_pixel_variance16x16,
vpx_highbd_10_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits10, vpx_highbd_sad_skip_16x16x4d_bits10)
HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10,
vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8,
vpx_highbd_10_sub_pixel_variance16x8,
vpx_highbd_10_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits10)
HIGHBD_BFP(
BLOCK_16X8, vpx_highbd_sad16x8_bits10,
vpx_highbd_sad_skip_16x8_bits10, vpx_highbd_sad16x8_avg_bits10,
vpx_highbd_10_variance16x8, vpx_highbd_10_sub_pixel_variance16x8,
vpx_highbd_10_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits10, vpx_highbd_sad_skip_16x8x4d_bits10)
HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10,
vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16,
vpx_highbd_10_sub_pixel_variance8x16,
vpx_highbd_10_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits10)
HIGHBD_BFP(
BLOCK_8X16, vpx_highbd_sad8x16_bits10,
vpx_highbd_sad_skip_8x16_bits10, vpx_highbd_sad8x16_avg_bits10,
vpx_highbd_10_variance8x16, vpx_highbd_10_sub_pixel_variance8x16,
vpx_highbd_10_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits10, vpx_highbd_sad_skip_8x16x4d_bits10)
HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10,
vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
vpx_highbd_10_sub_pixel_variance8x8,
vpx_highbd_10_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x4d_bits10)
HIGHBD_BFP(
BLOCK_8X8, vpx_highbd_sad8x8_bits10, vpx_highbd_sad_skip_8x8_bits10,
vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8,
vpx_highbd_10_sub_pixel_variance8x8,
vpx_highbd_10_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x4d_bits10, vpx_highbd_sad_skip_8x8x4d_bits10)
HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10,
vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
vpx_highbd_10_sub_pixel_variance8x4,
vpx_highbd_10_sub_pixel_avg_variance8x4,
vpx_highbd_sad8x4x4d_bits10)
HIGHBD_BFP(
BLOCK_8X4, vpx_highbd_sad8x4_bits10, vpx_highbd_sad_skip_8x4_bits10,
vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4,
vpx_highbd_10_sub_pixel_variance8x4,
vpx_highbd_10_sub_pixel_avg_variance8x4,
vpx_highbd_sad8x4x4d_bits10, vpx_highbd_sad_skip_8x4x4d_bits10)
HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10,
vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
vpx_highbd_10_sub_pixel_variance4x8,
vpx_highbd_10_sub_pixel_avg_variance4x8,
vpx_highbd_sad4x8x4d_bits10)
HIGHBD_BFP(
BLOCK_4X8, vpx_highbd_sad4x8_bits10, vpx_highbd_sad_skip_4x8_bits10,
vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8,
vpx_highbd_10_sub_pixel_variance4x8,
vpx_highbd_10_sub_pixel_avg_variance4x8,
vpx_highbd_sad4x8x4d_bits10, vpx_highbd_sad_skip_4x8x4d_bits10)
HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10,
vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
vpx_highbd_10_sub_pixel_variance4x4,
vpx_highbd_10_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits10)
HIGHBD_BFP(
BLOCK_4X4, vpx_highbd_sad4x4_bits10, vpx_highbd_sad_skip_4x4_bits10,
vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4,
vpx_highbd_10_sub_pixel_variance4x4,
vpx_highbd_10_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits10, vpx_highbd_sad_skip_4x4x4d_bits10)
break;
default:
assert(cm->bit_depth == VPX_BITS_12);
HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
vpx_highbd_12_sub_pixel_variance32x16,
vpx_highbd_12_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits12)
HIGHBD_BFP(
BLOCK_32X16, vpx_highbd_sad32x16_bits12,
vpx_highbd_sad_skip_32x16_bits12, vpx_highbd_sad32x16_avg_bits12,
vpx_highbd_12_variance32x16, vpx_highbd_12_sub_pixel_variance32x16,
vpx_highbd_12_sub_pixel_avg_variance32x16,
vpx_highbd_sad32x16x4d_bits12, vpx_highbd_sad_skip_32x16x4d_bits12)
HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12,
vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32,
vpx_highbd_12_sub_pixel_variance16x32,
vpx_highbd_12_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits12)
HIGHBD_BFP(
BLOCK_16X32, vpx_highbd_sad16x32_bits12,
vpx_highbd_sad_skip_16x32_bits12, vpx_highbd_sad16x32_avg_bits12,
vpx_highbd_12_variance16x32, vpx_highbd_12_sub_pixel_variance16x32,
vpx_highbd_12_sub_pixel_avg_variance16x32,
vpx_highbd_sad16x32x4d_bits12, vpx_highbd_sad_skip_16x32x4d_bits12)
HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12,
vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32,
vpx_highbd_12_sub_pixel_variance64x32,
vpx_highbd_12_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits12)
HIGHBD_BFP(
BLOCK_64X32, vpx_highbd_sad64x32_bits12,
vpx_highbd_sad_skip_64x32_bits12, vpx_highbd_sad64x32_avg_bits12,
vpx_highbd_12_variance64x32, vpx_highbd_12_sub_pixel_variance64x32,
vpx_highbd_12_sub_pixel_avg_variance64x32,
vpx_highbd_sad64x32x4d_bits12, vpx_highbd_sad_skip_64x32x4d_bits12)
HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12,
vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64,
vpx_highbd_12_sub_pixel_variance32x64,
vpx_highbd_12_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits12)
HIGHBD_BFP(
BLOCK_32X64, vpx_highbd_sad32x64_bits12,
vpx_highbd_sad_skip_32x64_bits12, vpx_highbd_sad32x64_avg_bits12,
vpx_highbd_12_variance32x64, vpx_highbd_12_sub_pixel_variance32x64,
vpx_highbd_12_sub_pixel_avg_variance32x64,
vpx_highbd_sad32x64x4d_bits12, vpx_highbd_sad_skip_32x64x4d_bits12)
HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12,
vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32,
vpx_highbd_12_sub_pixel_variance32x32,
vpx_highbd_12_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits12)
HIGHBD_BFP(
BLOCK_32X32, vpx_highbd_sad32x32_bits12,
vpx_highbd_sad_skip_32x32_bits12, vpx_highbd_sad32x32_avg_bits12,
vpx_highbd_12_variance32x32, vpx_highbd_12_sub_pixel_variance32x32,
vpx_highbd_12_sub_pixel_avg_variance32x32,
vpx_highbd_sad32x32x4d_bits12, vpx_highbd_sad_skip_32x32x4d_bits12)
HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12,
vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64,
vpx_highbd_12_sub_pixel_variance64x64,
vpx_highbd_12_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits12)
HIGHBD_BFP(
BLOCK_64X64, vpx_highbd_sad64x64_bits12,
vpx_highbd_sad_skip_64x64_bits12, vpx_highbd_sad64x64_avg_bits12,
vpx_highbd_12_variance64x64, vpx_highbd_12_sub_pixel_variance64x64,
vpx_highbd_12_sub_pixel_avg_variance64x64,
vpx_highbd_sad64x64x4d_bits12, vpx_highbd_sad_skip_64x64x4d_bits12)
HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12,
vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16,
vpx_highbd_12_sub_pixel_variance16x16,
vpx_highbd_12_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits12)
HIGHBD_BFP(
BLOCK_16X16, vpx_highbd_sad16x16_bits12,
vpx_highbd_sad_skip_16x16_bits12, vpx_highbd_sad16x16_avg_bits12,
vpx_highbd_12_variance16x16, vpx_highbd_12_sub_pixel_variance16x16,
vpx_highbd_12_sub_pixel_avg_variance16x16,
vpx_highbd_sad16x16x4d_bits12, vpx_highbd_sad_skip_16x16x4d_bits12)
HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12,
vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8,
vpx_highbd_12_sub_pixel_variance16x8,
vpx_highbd_12_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits12)
HIGHBD_BFP(
BLOCK_16X8, vpx_highbd_sad16x8_bits12,
vpx_highbd_sad_skip_16x8_bits12, vpx_highbd_sad16x8_avg_bits12,
vpx_highbd_12_variance16x8, vpx_highbd_12_sub_pixel_variance16x8,
vpx_highbd_12_sub_pixel_avg_variance16x8,
vpx_highbd_sad16x8x4d_bits12, vpx_highbd_sad_skip_16x8x4d_bits12)
HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12,
vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16,
vpx_highbd_12_sub_pixel_variance8x16,
vpx_highbd_12_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits12)
HIGHBD_BFP(
BLOCK_8X16, vpx_highbd_sad8x16_bits12,
vpx_highbd_sad_skip_8x16_bits12, vpx_highbd_sad8x16_avg_bits12,
vpx_highbd_12_variance8x16, vpx_highbd_12_sub_pixel_variance8x16,
vpx_highbd_12_sub_pixel_avg_variance8x16,
vpx_highbd_sad8x16x4d_bits12, vpx_highbd_sad_skip_8x16x4d_bits12)
HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12,
vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
vpx_highbd_12_sub_pixel_variance8x8,
vpx_highbd_12_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x4d_bits12)
HIGHBD_BFP(
BLOCK_8X8, vpx_highbd_sad8x8_bits12, vpx_highbd_sad_skip_8x8_bits12,
vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8,
vpx_highbd_12_sub_pixel_variance8x8,
vpx_highbd_12_sub_pixel_avg_variance8x8,
vpx_highbd_sad8x8x4d_bits12, vpx_highbd_sad_skip_8x8x4d_bits12)
HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12,
vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
vpx_highbd_12_sub_pixel_variance8x4,
vpx_highbd_12_sub_pixel_avg_variance8x4,
vpx_highbd_sad8x4x4d_bits12)
HIGHBD_BFP(
BLOCK_8X4, vpx_highbd_sad8x4_bits12, vpx_highbd_sad_skip_8x4_bits12,
vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4,
vpx_highbd_12_sub_pixel_variance8x4,
vpx_highbd_12_sub_pixel_avg_variance8x4,
vpx_highbd_sad8x4x4d_bits12, vpx_highbd_sad_skip_8x4x4d_bits12)
HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12,
vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
vpx_highbd_12_sub_pixel_variance4x8,
vpx_highbd_12_sub_pixel_avg_variance4x8,
vpx_highbd_sad4x8x4d_bits12)
HIGHBD_BFP(
BLOCK_4X8, vpx_highbd_sad4x8_bits12, vpx_highbd_sad_skip_4x8_bits12,
vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8,
vpx_highbd_12_sub_pixel_variance4x8,
vpx_highbd_12_sub_pixel_avg_variance4x8,
vpx_highbd_sad4x8x4d_bits12, vpx_highbd_sad_skip_4x8x4d_bits12)
HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12,
vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
vpx_highbd_12_sub_pixel_variance4x4,
vpx_highbd_12_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits12)
HIGHBD_BFP(
BLOCK_4X4, vpx_highbd_sad4x4_bits12, vpx_highbd_sad_skip_4x4_bits12,
vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4,
vpx_highbd_12_sub_pixel_variance4x4,
vpx_highbd_12_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits12, vpx_highbd_sad_skip_4x4x4d_bits12)
break;
}
}
@ -1916,48 +1998,48 @@ static void realloc_segmentation_maps(VP9_COMP *cpi) {
// Create the encoder segmentation map and set all entries to 0
vpx_free(cpi->segmentation_map);
CHECK_MEM_ERROR(cm, cpi->segmentation_map,
CHECK_MEM_ERROR(&cm->error, cpi->segmentation_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
// Create a map used for cyclic background refresh.
if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh);
CHECK_MEM_ERROR(cm, cpi->cyclic_refresh,
CHECK_MEM_ERROR(&cm->error, cpi->cyclic_refresh,
vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols));
// Create a map used to mark inactive areas.
vpx_free(cpi->active_map.map);
CHECK_MEM_ERROR(cm, cpi->active_map.map,
CHECK_MEM_ERROR(&cm->error, cpi->active_map.map,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
// And a place holder structure is the coding context
// for use if we want to save and restore it
vpx_free(cpi->coding_context.last_frame_seg_map_copy);
CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy,
CHECK_MEM_ERROR(&cm->error, cpi->coding_context.last_frame_seg_map_copy,
vpx_calloc(cm->mi_rows * cm->mi_cols, 1));
}
static void alloc_copy_partition_data(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
if (cpi->prev_partition == NULL) {
CHECK_MEM_ERROR(cm, cpi->prev_partition,
CHECK_MEM_ERROR(&cm->error, cpi->prev_partition,
(BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
sizeof(*cpi->prev_partition)));
}
if (cpi->prev_segment_id == NULL) {
CHECK_MEM_ERROR(
cm, cpi->prev_segment_id,
&cm->error, cpi->prev_segment_id,
(int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->prev_segment_id)));
}
if (cpi->prev_variance_low == NULL) {
CHECK_MEM_ERROR(cm, cpi->prev_variance_low,
CHECK_MEM_ERROR(&cm->error, cpi->prev_variance_low,
(uint8_t *)vpx_calloc(
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25,
sizeof(*cpi->prev_variance_low)));
}
if (cpi->copied_frame_cnt == NULL) {
CHECK_MEM_ERROR(
cm, cpi->copied_frame_cnt,
&cm->error, cpi->copied_frame_cnt,
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->copied_frame_cnt)));
}
@ -2290,9 +2372,10 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
cm->free_mi = vp9_enc_free_mi;
cm->setup_mi = vp9_enc_setup_mi;
CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
CHECK_MEM_ERROR(&cm->error, cm->fc,
(FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
CHECK_MEM_ERROR(
cm, cm->frame_contexts,
&cm->error, cm->frame_contexts,
(FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
cpi->compute_frame_low_motion_onepass = 1;
@ -2319,38 +2402,38 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
CHECK_MEM_ERROR(
cm, cpi->skin_map,
&cm->error, cpi->skin_map,
vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
#if !CONFIG_REALTIME_ONLY
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
CHECK_MEM_ERROR(&cm->error, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
#endif
CHECK_MEM_ERROR(
cm, cpi->consec_zero_mv,
&cm->error, cpi->consec_zero_mv,
vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv)));
CHECK_MEM_ERROR(cm, cpi->nmvcosts[0],
CHECK_MEM_ERROR(&cm->error, cpi->nmvcosts[0],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0])));
CHECK_MEM_ERROR(cm, cpi->nmvcosts[1],
CHECK_MEM_ERROR(&cm->error, cpi->nmvcosts[1],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1])));
CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0],
CHECK_MEM_ERROR(&cm->error, cpi->nmvcosts_hp[0],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0])));
CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1],
CHECK_MEM_ERROR(&cm->error, cpi->nmvcosts_hp[1],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1])));
CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0],
CHECK_MEM_ERROR(&cm->error, cpi->nmvsadcosts[0],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0])));
CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1],
CHECK_MEM_ERROR(&cm->error, cpi->nmvsadcosts[1],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1])));
CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0],
CHECK_MEM_ERROR(&cm->error, cpi->nmvsadcosts_hp[0],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0])));
CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1],
CHECK_MEM_ERROR(&cm->error, cpi->nmvsadcosts_hp[1],
vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1])));
for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]));
i++) {
CHECK_MEM_ERROR(
cm, cpi->mbgraph_stats[i].mb_stats,
&cm->error, cpi->mbgraph_stats[i].mb_stats,
vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
@ -2394,7 +2477,7 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
}
if (cpi->b_calculate_consistency) {
CHECK_MEM_ERROR(cm, cpi->ssim_vars,
CHECK_MEM_ERROR(&cm->error, cpi->ssim_vars,
vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
sizeof(*cpi->ssim_vars) * 4));
cpi->worst_consistency = 100.0;
@ -2479,7 +2562,7 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
vpx_free(lc->rc_twopass_stats_in.buf);
lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz;
CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf,
CHECK_MEM_ERROR(&cm->error, lc->rc_twopass_stats_in.buf,
vpx_malloc(lc->rc_twopass_stats_in.sz));
lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf;
lc->twopass.stats_in = lc->twopass.stats_in_start;
@ -2534,7 +2617,7 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
const int h = num_8x8_blocks_high_lookup[bsize];
const int num_cols = (cm->mi_cols + w - 1) / w;
const int num_rows = (cm->mi_rows + h - 1) / h;
CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors,
CHECK_MEM_ERROR(&cm->error, cpi->mi_ssim_rdmult_scaling_factors,
vpx_calloc(num_rows * num_cols,
sizeof(*cpi->mi_ssim_rdmult_scaling_factors)));
}
@ -2543,68 +2626,76 @@ VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf,
#if CONFIG_NON_GREEDY_MV
cpi->tpl_ready = 0;
#endif // CONFIG_NON_GREEDY_MV
for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) {
cpi->tpl_stats[i].tpl_stats_ptr = NULL;
}
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var,
CHECK_MEM_ERROR(&cm->error, cpi->source_diff_var,
vpx_calloc(cm->MBs, sizeof(cpi->source_diff_var)));
cpi->source_var_thresh = 0;
cpi->frames_till_next_var_check = 0;
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
cpi->fn_ptr[BT].sdx4df = SDX4DF;
#define BFP(BT, SDF, SDSF, SDAF, VF, SVF, SVAF, SDX4DF, SDSX4DF) \
cpi->fn_ptr[BT].sdf = SDF; \
cpi->fn_ptr[BT].sdsf = SDSF; \
cpi->fn_ptr[BT].sdaf = SDAF; \
cpi->fn_ptr[BT].vf = VF; \
cpi->fn_ptr[BT].svf = SVF; \
cpi->fn_ptr[BT].svaf = SVAF; \
cpi->fn_ptr[BT].sdx4df = SDX4DF; \
cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16,
vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16,
vpx_sad32x16x4d)
BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad_skip_32x16, vpx_sad32x16_avg,
vpx_variance32x16, vpx_sub_pixel_variance32x16,
vpx_sub_pixel_avg_variance32x16, vpx_sad32x16x4d, vpx_sad_skip_32x16x4d)
BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32,
vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32,
vpx_sad16x32x4d)
BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad_skip_16x32, vpx_sad16x32_avg,
vpx_variance16x32, vpx_sub_pixel_variance16x32,
vpx_sub_pixel_avg_variance16x32, vpx_sad16x32x4d, vpx_sad_skip_16x32x4d)
BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32,
vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32,
vpx_sad64x32x4d)
BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad_skip_64x32, vpx_sad64x32_avg,
vpx_variance64x32, vpx_sub_pixel_variance64x32,
vpx_sub_pixel_avg_variance64x32, vpx_sad64x32x4d, vpx_sad_skip_64x32x4d)
BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64,
vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64,
vpx_sad32x64x4d)
BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad_skip_32x64, vpx_sad32x64_avg,
vpx_variance32x64, vpx_sub_pixel_variance32x64,
vpx_sub_pixel_avg_variance32x64, vpx_sad32x64x4d, vpx_sad_skip_32x64x4d)
BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32,
vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32,
vpx_sad32x32x4d)
BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad_skip_32x32, vpx_sad32x32_avg,
vpx_variance32x32, vpx_sub_pixel_variance32x32,
vpx_sub_pixel_avg_variance32x32, vpx_sad32x32x4d, vpx_sad_skip_32x32x4d)
BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64,
vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64,
vpx_sad64x64x4d)
BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad_skip_64x64, vpx_sad64x64_avg,
vpx_variance64x64, vpx_sub_pixel_variance64x64,
vpx_sub_pixel_avg_variance64x64, vpx_sad64x64x4d, vpx_sad_skip_64x64x4d)
BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16,
vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16,
vpx_sad16x16x4d)
BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad_skip_16x16, vpx_sad16x16_avg,
vpx_variance16x16, vpx_sub_pixel_variance16x16,
vpx_sub_pixel_avg_variance16x16, vpx_sad16x16x4d, vpx_sad_skip_16x16x4d)
BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8,
vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8,
vpx_sad16x8x4d)
BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad_skip_16x8, vpx_sad16x8_avg,
vpx_variance16x8, vpx_sub_pixel_variance16x8,
vpx_sub_pixel_avg_variance16x8, vpx_sad16x8x4d, vpx_sad_skip_16x8x4d)
BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16,
vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16,
vpx_sad8x16x4d)
BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad_skip_8x16, vpx_sad8x16_avg,
vpx_variance8x16, vpx_sub_pixel_variance8x16,
vpx_sub_pixel_avg_variance8x16, vpx_sad8x16x4d, vpx_sad_skip_8x16x4d)
BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8,
vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d)
BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad_skip_8x8, vpx_sad8x8_avg, vpx_variance8x8,
vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d,
vpx_sad_skip_8x8x4d)
BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4,
vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d)
BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad_skip_8x4, vpx_sad8x4_avg, vpx_variance8x4,
vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d,
vpx_sad_skip_8x4x4d)
BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8,
vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d)
BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad_skip_4x8, vpx_sad4x8_avg, vpx_variance4x8,
vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d,
vpx_sad_skip_4x8x4d)
BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4,
vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d)
BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad_skip_4x4, vpx_sad4x4_avg, vpx_variance4x4,
vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d,
vpx_sad_skip_4x4x4d)
#if CONFIG_VP9_HIGHBITDEPTH
highbd_set_var_fns(cpi);
@ -2785,6 +2876,10 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vp9_extrc_delete(&cpi->ext_ratectrl);
// Help detect use after free of the error detail string.
memset(cm->error.detail, 'A', sizeof(cm->error.detail) - 1);
cm->error.detail[sizeof(cm->error.detail) - 1] = '\0';
vp9_remove_common(cm);
vp9_free_ref_frame_buffers(cm->buffer_pool);
#if CONFIG_VP9_POSTPROC
@ -3659,7 +3754,7 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
case 6: l = 150; break;
}
if (!cpi->common.postproc_state.limits) {
CHECK_MEM_ERROR(cm, cpi->common.postproc_state.limits,
CHECK_MEM_ERROR(&cm->error, cpi->common.postproc_state.limits,
vpx_calloc(cpi->un_scaled_source->y_width,
sizeof(*cpi->common.postproc_state.limits)));
}
@ -4003,7 +4098,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
svc->spatial_layer_id == svc->number_spatial_layers - 2) {
if (svc->prev_partition_svc == NULL) {
CHECK_MEM_ERROR(
cm, svc->prev_partition_svc,
&cm->error, svc->prev_partition_svc,
(BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
sizeof(*svc->prev_partition_svc)));
}
@ -4355,10 +4450,13 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest
const int orig_rc_max_frame_bandwidth = rc->max_frame_bandwidth;
#if CONFIG_RATE_CTRL
const FRAME_UPDATE_TYPE update_type =
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
RATE_QSTEP_MODEL *rq_model;
{
const FRAME_UPDATE_TYPE update_type =
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
rq_model = &cpi->rq_model[frame_type];
}
init_rq_history(rq_history);
#endif // CONFIG_RATE_CTRL
@ -5202,7 +5300,7 @@ static void init_mb_wiener_var_buffer(VP9_COMP *cpi) {
cpi->mb_wiener_variance = NULL;
CHECK_MEM_ERROR(
cm, cpi->mb_wiener_variance,
&cm->error, cpi->mb_wiener_variance,
vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance)));
cpi->mb_wiener_var_rows = cm->mb_rows;
cpi->mb_wiener_var_cols = cm->mb_cols;
@ -6449,7 +6547,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
pthread_mutex_init(&cpi->kmeans_mutex, NULL);
#endif
CHECK_MEM_ERROR(
cm, cpi->kmeans_data_arr,
&cm->error, cpi->kmeans_data_arr,
vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr)));
cpi->kmeans_data_stride = mi_cols;
cpi->kmeans_data_arr_alloc = 1;

Просмотреть файл

@ -505,6 +505,7 @@ typedef struct EncFrameBuf {
} EncFrameBuf;
// Maximum operating frame buffer size needed for a GOP using ARF reference.
// This is used to allocate the memory for TPL stats for a GOP.
#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
#define MAX_KMEANS_GROUPS 8
@ -743,6 +744,8 @@ typedef struct VP9_COMP {
BLOCK_SIZE tpl_bsize;
TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
// Used to store TPL stats before propagation
VpxTplGopStats tpl_gop_stats;
YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
EncFrameBuf enc_frame_buf[REF_FRAMES];
#if CONFIG_MULTITHREAD
@ -1057,7 +1060,7 @@ static INLINE void partition_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height);
CHECK_MEM_ERROR(cm, cpi->partition_info,
CHECK_MEM_ERROR(&cm->error, cpi->partition_info,
(PARTITION_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(PARTITION_INFO)));
memset(cpi->partition_info, 0,
@ -1085,7 +1088,7 @@ static INLINE void motion_vector_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_4x4(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_4x4(cpi->frame_info.frame_height);
CHECK_MEM_ERROR(cm, cpi->motion_vector_info,
CHECK_MEM_ERROR(&cm->error, cpi->motion_vector_info,
(MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(MOTION_VECTOR_INFO)));
memset(cpi->motion_vector_info, 0,
@ -1104,7 +1107,7 @@ static INLINE void free_motion_vector_info(struct VP9_COMP *cpi) {
static INLINE void tpl_stats_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
CHECK_MEM_ERROR(
cm, cpi->tpl_stats_info,
&cm->error, cpi->tpl_stats_info,
(TplDepStats *)vpx_calloc(MAX_LAG_BUFFERS, sizeof(TplDepStats)));
memset(cpi->tpl_stats_info, 0, MAX_LAG_BUFFERS * sizeof(TplDepStats));
}
@ -1123,7 +1126,7 @@ static INLINE void fp_motion_vector_info_init(struct VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
const int unit_width = get_num_unit_16x16(cpi->frame_info.frame_width);
const int unit_height = get_num_unit_16x16(cpi->frame_info.frame_height);
CHECK_MEM_ERROR(cm, cpi->fp_motion_vector_info,
CHECK_MEM_ERROR(&cm->error, cpi->fp_motion_vector_info,
(MOTION_VECTOR_INFO *)vpx_calloc(unit_width * unit_height,
sizeof(MOTION_VECTOR_INFO)));
}
@ -1454,9 +1457,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
unsigned int cols, int delta_q[8], int delta_lf[8],
int skip[8], int ref_frame[8]);
vpx_codec_err_t vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map,
unsigned int rows, unsigned int cols,
int delta_q[8], int delta_lf[8], int skip[8],
int ref_frame[8]);
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
@ -1471,7 +1475,7 @@ static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows ||
new_fb_ptr->mi_cols < cm->mi_cols) {
vpx_free(new_fb_ptr->mvs);
CHECK_MEM_ERROR(cm, new_fb_ptr->mvs,
CHECK_MEM_ERROR(&cm->error, new_fb_ptr->mvs,
(MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols,
sizeof(*new_fb_ptr->mvs)));
new_fb_ptr->mi_rows = cm->mi_rows;
@ -1479,6 +1483,40 @@ static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) {
}
}
static INLINE int mv_cost(const MV *mv, const int *joint_cost,
int *const comp_cost[2]) {
assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
comp_cost[1][mv->col];
}
static INLINE int mvsad_err_cost(const MACROBLOCK *x, const MV *mv,
const MV *ref, int sad_per_bit) {
MV diff;
diff.row = mv->row - ref->row;
diff.col = mv->col - ref->col;
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
VP9_PROB_COST_SHIFT);
}
static INLINE uint32_t get_start_mv_sad(const MACROBLOCK *x, const MV *mvp_full,
const MV *ref_mv_full,
vpx_sad_fn_t sad_fn_ptr, int sadpb) {
const int src_buf_stride = x->plane[0].src.stride;
const uint8_t *const src_buf = x->plane[0].src.buf;
const MACROBLOCKD *const xd = &x->e_mbd;
const int pred_buf_stride = xd->plane[0].pre[0].stride;
const uint8_t *const pred_buf =
xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col;
uint32_t start_mv_sad =
sad_fn_ptr(src_buf, src_buf_stride, pred_buf, pred_buf_stride);
start_mv_sad += mvsad_err_cost(x, mvp_full, ref_mv_full, sadpb);
return start_mv_sad;
}
static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim,
int subsampling_dim, int blk_dim) {
return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;

Просмотреть файл

@ -94,10 +94,10 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
vp9_bitstream_encode_tiles_buffer_dealloc(cpi);
vp9_encode_free_mt_data(cpi);
CHECK_MEM_ERROR(cm, cpi->workers,
CHECK_MEM_ERROR(&cm->error, cpi->workers,
vpx_malloc(num_workers * sizeof(*cpi->workers)));
CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
CHECK_MEM_ERROR(&cm->error, cpi->tile_thr_data,
vpx_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
for (i = 0; i < num_workers; i++) {
@ -111,7 +111,7 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
thread_data->cpi = cpi;
// Allocate thread data.
CHECK_MEM_ERROR(cm, thread_data->td,
CHECK_MEM_ERROR(&cm->error, thread_data->td,
vpx_memalign(32, sizeof(*thread_data->td)));
vp9_zero(*thread_data->td);
@ -121,7 +121,7 @@ static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
vp9_setup_pc_tree(cm, thread_data->td);
// Allocate frame counters in thread data.
CHECK_MEM_ERROR(cm, thread_data->td->counts,
CHECK_MEM_ERROR(&cm->error, thread_data->td->counts,
vpx_calloc(1, sizeof(*thread_data->td->counts)));
// Create threads
@ -292,7 +292,7 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
{
int i;
CHECK_MEM_ERROR(cm, row_mt_sync->mutex,
CHECK_MEM_ERROR(&cm->error, row_mt_sync->mutex,
vpx_malloc(sizeof(*row_mt_sync->mutex) * rows));
if (row_mt_sync->mutex) {
for (i = 0; i < rows; ++i) {
@ -300,7 +300,7 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
}
}
CHECK_MEM_ERROR(cm, row_mt_sync->cond,
CHECK_MEM_ERROR(&cm->error, row_mt_sync->cond,
vpx_malloc(sizeof(*row_mt_sync->cond) * rows));
if (row_mt_sync->cond) {
for (i = 0; i < rows; ++i) {
@ -310,7 +310,7 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
}
#endif // CONFIG_MULTITHREAD
CHECK_MEM_ERROR(cm, row_mt_sync->cur_col,
CHECK_MEM_ERROR(&cm->error, row_mt_sync->cur_col,
vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows));
// Set up nsync.

Просмотреть файл

@ -435,6 +435,9 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
MV center_mv_full = ref_mv_full;
unsigned int start_mv_sad;
vp9_sad_fn_ptr_t sad_fn_ptr;
int step_param = 3;
int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
@ -455,10 +458,18 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
#endif // CONFIG_VP9_HIGHBITDEPTH
// Calculate SAD of the start mv
clamp_mv(&ref_mv_full, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
start_mv_sad = get_start_mv_sad(x, &ref_mv_full, &center_mv_full,
cpi->fn_ptr[bsize].sdf, x->sadperbit16);
sad_fn_ptr.sdf = cpi->fn_ptr[bsize].sdf;
sad_fn_ptr.sdx4df = cpi->fn_ptr[bsize].sdx4df;
// Center the initial step/diamond search on best mv.
tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param, x->sadperbit16, &num00,
&v_fn_ptr, ref_mv);
tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, start_mv_sad,
&tmp_mv, step_param, x->sadperbit16, &num00,
&sad_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
@ -478,9 +489,9 @@ static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
if (num00) {
--num00;
} else {
tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv,
step_param + n, x->sadperbit16, &num00,
&v_fn_ptr, ref_mv);
tmp_err = cpi->diamond_search_sad(
x, &cpi->ss_cfg, &ref_mv_full, start_mv_sad, &tmp_mv, step_param + n,
x->sadperbit16, &num00, &sad_fn_ptr, ref_mv);
if (tmp_err < INT_MAX)
tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
if (tmp_err < INT_MAX - new_mv_mode_penalty)
@ -1411,7 +1422,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
if (cpi->row_mt_bit_exact && cpi->twopass.fp_mb_float_stats == NULL)
CHECK_MEM_ERROR(
cm, cpi->twopass.fp_mb_float_stats,
&cm->error, cpi->twopass.fp_mb_float_stats,
vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1));
{

Просмотреть файл

@ -98,8 +98,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv,
// If the current best reference mv is not centered on 0,0 then do a 0,0
// based search as well.
if (ref_mv->row != 0 || ref_mv->col != 0) {
unsigned int tmp_err;
MV zero_ref_mv = { 0, 0 }, tmp_mv;
MV zero_ref_mv = { 0, 0 };
tmp_err =
do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, mb_row, mb_col);
@ -289,7 +288,7 @@ static void separate_arf_mbs(VP9_COMP *cpi) {
int *arf_not_zz;
CHECK_MEM_ERROR(
cm, arf_not_zz,
&cm->error, arf_not_zz,
vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1));
// We are not interested in results beyond the alt ref itself.

Просмотреть файл

@ -77,14 +77,6 @@ int vp9_init_search_range(int size) {
return sr;
}
static INLINE int mv_cost(const MV *mv, const int *joint_cost,
int *const comp_cost[2]) {
assert(mv->row >= -MV_MAX && mv->row < MV_MAX);
assert(mv->col >= -MV_MAX && mv->col < MV_MAX);
return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] +
comp_cost[1][mv->col];
}
int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost,
int *mvcost[2], int weight) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
@ -103,15 +95,6 @@ static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost,
}
return 0;
}
static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref,
int sad_per_bit) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
return ROUND_POWER_OF_TWO(
(unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit,
VP9_PROB_COST_SHIFT);
}
void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) {
int len;
int ss_count = 0;
@ -2070,9 +2053,9 @@ int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row,
#endif // CONFIG_NON_GREEDY_MV
int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr,
MV *ref_mv, uint32_t start_mv_sad, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_sad_fn_ptr_t *sad_fn_ptr,
const MV *center_mv) {
int i, j, step;
@ -2083,7 +2066,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
const int in_what_stride = xd->plane[0].pre[0].stride;
const uint8_t *best_address;
unsigned int bestsad = INT_MAX;
unsigned int bestsad = start_mv_sad;
int best_site = -1;
int last_site = -1;
@ -2101,8 +2084,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
const int tot_steps = cfg->total_steps - search_param;
const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
ref_row = ref_mv->row;
ref_col = ref_mv->col;
*num00 = 0;
@ -2113,10 +2094,6 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
best_address = in_what;
// Check the starting position
bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) +
mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 0;
for (step = 0; step < tot_steps; step++) {
@ -2140,8 +2117,8 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
sad_array);
sad_fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
sad_array);
for (t = 0; t < 4; t++, i++) {
if (sad_array[t] < bestsad) {
@ -2165,7 +2142,7 @@ int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
if (is_mv_in(&x->mv_limits, &this_mv)) {
const uint8_t *const check_here = ss_os[i] + best_address;
unsigned int thissad =
fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
sad_fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
if (thissad < bestsad) {
thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
@ -2507,15 +2484,54 @@ int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
point as the best match, we will do a final 1-away diamond
refining search */
static int full_pixel_diamond(const VP9_COMP *const cpi,
const MACROBLOCK *const x, MV *mvp_full,
int step_param, int sadpb, int further_steps,
int do_refine, int *cost_list,
const MACROBLOCK *const x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int sadpb,
int further_steps, int do_refine,
int use_downsampled_sad, int *cost_list,
const vp9_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv) {
MV temp_mv;
int thissme, n, num00 = 0;
int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param, sadpb, &n, fn_ptr, ref_mv);
int bestsme;
const int src_buf_stride = x->plane[0].src.stride;
const uint8_t *const src_buf = x->plane[0].src.buf;
const MACROBLOCKD *const xd = &x->e_mbd;
const int pred_buf_stride = xd->plane[0].pre[0].stride;
uint8_t *pred_buf;
vp9_sad_fn_ptr_t sad_fn_ptr;
unsigned int start_mv_sad, start_mv_sad_even_rows, start_mv_sad_odd_rows;
const MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
clamp_mv(mvp_full, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
pred_buf =
xd->plane[0].pre[0].buf + mvp_full->row * pred_buf_stride + mvp_full->col;
start_mv_sad_even_rows =
fn_ptr->sdsf(src_buf, src_buf_stride, pred_buf, pred_buf_stride);
start_mv_sad_odd_rows =
fn_ptr->sdsf(src_buf + src_buf_stride, src_buf_stride,
pred_buf + pred_buf_stride, pred_buf_stride);
start_mv_sad = (start_mv_sad_even_rows + start_mv_sad_odd_rows) >> 1;
start_mv_sad += mvsad_err_cost(x, mvp_full, &ref_mv_full, sadpb);
sad_fn_ptr.sdf = fn_ptr->sdf;
sad_fn_ptr.sdx4df = fn_ptr->sdx4df;
if (use_downsampled_sad && num_4x4_blocks_high_lookup[bsize] >= 2) {
// If the absolute difference between the pred-to-src SAD of even rows and
// the pred-to-src SAD of odd rows is small, skip every other row in sad
// computation.
const int odd_to_even_diff_sad =
abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows);
const int mult_thresh = 10;
if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) {
sad_fn_ptr.sdf = fn_ptr->sdsf;
sad_fn_ptr.sdx4df = fn_ptr->sdsx4df;
}
}
bestsme =
cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad, &temp_mv,
step_param, sadpb, &n, &sad_fn_ptr, ref_mv);
if (bestsme < INT_MAX)
bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
*dst_mv = temp_mv;
@ -2530,9 +2546,9 @@ static int full_pixel_diamond(const VP9_COMP *const cpi,
if (num00) {
num00--;
} else {
thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv,
step_param + n, sadpb, &num00, fn_ptr,
ref_mv);
thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, start_mv_sad,
&temp_mv, step_param + n, sadpb, &num00,
&sad_fn_ptr, ref_mv);
if (thissme < INT_MAX)
thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1);
@ -2550,8 +2566,8 @@ static int full_pixel_diamond(const VP9_COMP *const cpi,
if (do_refine) {
const int search_range = 8;
MV best_mv = *dst_mv;
thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr,
ref_mv);
thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range,
&sad_fn_ptr, ref_mv);
if (thissme < INT_MAX)
thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1);
if (thissme < bestsme) {
@ -2560,6 +2576,27 @@ static int full_pixel_diamond(const VP9_COMP *const cpi,
}
}
if (sad_fn_ptr.sdf != fn_ptr->sdf) {
// If we are skipping rows when we perform the motion search, we need to
// check the quality of skipping. If it's bad, then we run search with
// skip row features off.
const uint8_t *best_address = get_buf_from_mv(&xd->plane[0].pre[0], dst_mv);
const int sad =
fn_ptr->sdf(src_buf, src_buf_stride, best_address, pred_buf_stride);
const int skip_sad =
fn_ptr->sdsf(src_buf, src_buf_stride, best_address, pred_buf_stride);
// We will keep the result of skipping rows if it's good enough.
const int kSADThresh =
1 << (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= VPXMAX(sad, 1) * 9) {
// There is a large discrepancy between skipping and not skipping, so we
// need to redo the motion search.
return full_pixel_diamond(cpi, x, bsize, mvp_full, step_param, sadpb,
further_steps, do_refine, 0, cost_list, fn_ptr,
ref_mv, dst_mv);
}
}
// Return cost list.
if (cost_list) {
calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list);
@ -2711,7 +2748,7 @@ int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
const vp9_sad_fn_ptr_t *sad_fn_ptr,
const MV *center_mv) {
const MACROBLOCKD *const xd = &x->e_mbd;
const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
@ -2720,7 +2757,7 @@ int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 };
const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv);
unsigned int best_sad =
fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
sad_fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
@ -2737,7 +2774,8 @@ int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
best_address - 1, best_address + 1,
best_address + in_what->stride };
fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
sad_fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride,
sads);
for (j = 0; j < 4; ++j) {
if (sads[j] < best_sad) {
@ -2757,8 +2795,8 @@ int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
if (is_mv_in(&x->mv_limits, &mv)) {
unsigned int sad =
fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
sad_fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
@ -2875,9 +2913,10 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
break;
case NSTEP:
case MESH:
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
var = full_pixel_diamond(
cpi, x, bsize, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cpi->sf.mv.use_downsampled_sad, cost_list, fn_ptr, ref_mv, tmp_mv);
break;
default: assert(0 && "Unknown search method");
}

Просмотреть файл

@ -41,6 +41,11 @@ typedef struct search_site_config {
int total_steps;
} search_site_config;
typedef struct vp9_sad_table {
vpx_sad_fn_t sdf;
vpx_sad_multi_d_fn_t sdx4df;
} vp9_sad_fn_ptr_t;
static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
const MV *mv) {
return &buf->buf[mv->row * buf->stride + mv->col];
@ -63,12 +68,13 @@ int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv,
struct VP9_COMP;
struct SPEED_FEATURES;
struct vp9_sad_table;
int vp9_init_search_range(int size);
int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
int error_per_bit, int search_range,
const struct vp9_variance_vtable *fn_ptr,
const struct vp9_sad_table *sad_fn_ptr,
const struct mv *center_mv);
// Perform integral projection based motion estimation.
@ -94,9 +100,9 @@ extern fractional_mv_step_fp vp9_return_max_sub_pixel_mv;
extern fractional_mv_step_fp vp9_return_min_sub_pixel_mv;
typedef int (*vp9_diamond_search_fn_t)(
const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv,
int search_param, int sad_per_bit, int *num00,
const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv);
const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv,
uint32_t start_mv_sad, MV *best_mv, int search_param, int sad_per_bit,
int *num00, const vp9_sad_fn_ptr_t *sad_fn_ptr, const MV *center_mv);
int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
int search_range,

Просмотреть файл

@ -59,7 +59,7 @@ void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
int i;
CHECK_MEM_ERROR(
cm, this_tile->row_base_thresh_freq_fact,
&cm->error, this_tile->row_base_thresh_freq_fact,
(int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
sizeof(*(this_tile->row_base_thresh_freq_fact))));
for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
@ -85,7 +85,7 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
multi_thread_ctxt->allocated_tile_rows = tile_rows;
multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col;
CHECK_MEM_ERROR(cm, multi_thread_ctxt->job_queue,
CHECK_MEM_ERROR(&cm->error, multi_thread_ctxt->job_queue,
(JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue)));
#if CONFIG_MULTITHREAD

Просмотреть файл

@ -566,23 +566,26 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
// Transform skipping test in UV planes.
for (i = 1; i <= 2; i++) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd);
struct macroblock_plane *const p_uv = &x->plane[i];
struct macroblockd_plane *const pd_uv = &xd->plane[i];
const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd_uv);
const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd);
const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd_uv);
const int uv_bw = b_width_log2_lookup[uv_bsize];
const int uv_bh = b_height_log2_lookup[uv_bsize];
const int sf = (uv_bw - b_width_log2_lookup[unit_size]) +
(uv_bh - b_height_log2_lookup[unit_size]);
const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
const uint32_t uv_dc_thr =
pd_uv->dequant[0] * pd_uv->dequant[0] >> (6 - sf);
const uint32_t uv_ac_thr =
pd_uv->dequant[1] * pd_uv->dequant[1] >> (6 - sf);
int j = i - 1;
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
flag_preduv_computed[i - 1] = 1;
var_uv[j] = cpi->fn_ptr[uv_bsize].vf(
p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]);
var_uv[j] = cpi->fn_ptr[uv_bsize].vf(p_uv->src.buf, p_uv->src.stride,
pd_uv->dst.buf, pd_uv->dst.stride,
&sse_uv[j]);
if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
(sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
@ -1933,15 +1936,15 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
svc->spatial_layer_id > 0 && !gf_temporal_ref) {
if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
struct scale_factors *const ref_sf = &cm->frame_refs[LAST_FRAME - 1].sf;
if (vp9_is_scaled(ref_sf)) {
svc_force_zero_mode[LAST_FRAME - 1] = 1;
inter_layer_ref = LAST_FRAME;
}
}
if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
if (vp9_is_scaled(sf)) {
struct scale_factors *const ref_sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
if (vp9_is_scaled(ref_sf)) {
svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
inter_layer_ref = GOLDEN_FRAME;
}
@ -2772,9 +2775,10 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
if ((cpi->ref_frame_flags & ref_frame_to_flag(ref_frame)) &&
(yv12 != NULL)) {
int_mv *const candidates = mbmi_ext->ref_mvs[ref_frame];
const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf,
sf);
const struct scale_factors *const ref_sf =
&cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, ref_sf,
ref_sf);
vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col,
mbmi_ext->mode_context);

Просмотреть файл

@ -1150,8 +1150,9 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
if (frame_is_intra_only(cm)) {
if (oxcf->rc_mode == VPX_Q) {
int qindex = cq_level;
double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex = vp9_compute_qdelta(rc, q, q * 0.25, cm->bit_depth);
double qstart = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex =
vp9_compute_qdelta(rc, qstart, qstart * 0.25, cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else if (rc->this_key_frame_forced) {
// Handle the special case for key frames forced when we have reached
@ -1206,12 +1207,14 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
} else if (oxcf->rc_mode == VPX_Q) {
int qindex = cq_level;
double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
double qstart = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
int delta_qindex;
if (cpi->refresh_alt_ref_frame)
delta_qindex = vp9_compute_qdelta(rc, q, q * 0.40, cm->bit_depth);
delta_qindex =
vp9_compute_qdelta(rc, qstart, qstart * 0.40, cm->bit_depth);
else
delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
delta_qindex =
vp9_compute_qdelta(rc, qstart, qstart * 0.50, cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
@ -1219,11 +1222,12 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
} else {
if (oxcf->rc_mode == VPX_Q) {
int qindex = cq_level;
double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
double qstart = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0,
0.70, 1.0, 0.85, 1.0 };
int delta_qindex = vp9_compute_qdelta(
rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
rc, qstart,
qstart * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL],
cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
@ -1859,8 +1863,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[KEY_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
if (cpi->use_svc) {
int i = 0;
SVC *svc = &cpi->svc;
int i;
for (i = 0; i < svc->number_temporal_layers; ++i) {
const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
svc->number_temporal_layers);
@ -3269,11 +3272,9 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
MODE_INFO **mi = cm->mi_grid_visible;
int sum_intra_usage = 0;
int mi_row, mi_col;
int tot = 0;
for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++;
tot++;
mi++;
}
mi += 8;

Просмотреть файл

@ -588,15 +588,15 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
if (x->skip_encode && !is_inter_block(xd->mi[0])) {
// TODO(jingning): tune the model to better capture the distortion.
const int64_t p =
const int64_t mean_quant_error =
(pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >>
#if CONFIG_VP9_HIGHBITDEPTH
(shift + 2 + (bd - 8) * 2);
#else
(shift + 2);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_dist += (p >> 4);
*out_sse += p;
*out_dist += (mean_quant_error >> 4);
*out_sse += mean_quant_error;
}
} else {
const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
@ -785,13 +785,12 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
const int16_t *const diff =
&p->src_diff[4 * (blk_row * diff_stride + blk_col)];
const int enable_trellis_opt =
const int use_trellis_opt =
do_trellis_opt(pd, diff, diff_stride, blk_row, blk_col, plane_bsize,
tx_size, &encode_b_arg);
// full forward transform and quantization
vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
if (enable_trellis_opt)
vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
if (use_trellis_opt) vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
tx_size, &dist, &sse, recon, sse_calc_done);
} else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) {
@ -1436,7 +1435,6 @@ static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
if (ref_best_rd < 0) is_cost_valid = 0;
if (is_inter_block(mi) && is_cost_valid) {
int plane;
for (plane = 1; plane < MAX_MB_PLANE; ++plane)
vp9_subtract_plane(x, bsize, plane);
}
@ -1900,11 +1898,22 @@ static INLINE int skip_single_mode_based_on_mode_rate(
return 0;
}
#define NUM_ITERS 4
#define MAX_JOINT_MV_SEARCH_ITERS 4
static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) {
int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0
if (sf_level >= 2)
num_iters = 0;
else if (sf_level >= 1)
num_iters = bsize < BLOCK_8X8
? 0
: (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS);
return num_iters;
}
static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int_mv *frame_mv, int mi_row, int mi_col,
int_mv single_newmv[MAX_REF_FRAMES],
int *rate_mv) {
int *rate_mv, int num_iters) {
const VP9_COMMON *const cm = &cpi->common;
const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
@ -1913,7 +1922,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const int refs[2] = { mi->ref_frame[0],
mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
int_mv ref_mv[2];
int_mv iter_mvs[NUM_ITERS][2];
int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2];
int ite, ref;
const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
struct scale_factors sf;
@ -1934,6 +1943,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
#endif // CONFIG_VP9_HIGHBITDEPTH
// Check number of iterations do not exceed the max
assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS);
for (ref = 0; ref < 2; ++ref) {
ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
@ -1964,7 +1976,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
// Allow joint search multiple times iteratively for each reference frame
// and break out of the search loop if it couldn't find a better mv.
for (ite = 0; ite < NUM_ITERS; ite++) {
for (ite = 0; ite < num_iters; ite++) {
struct buf_2d ref_yv12[2];
uint32_t bestsme = UINT_MAX;
int sadpb = x->sadperbit16;
@ -2046,7 +2058,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
} else {
break;
}
if (ite < NUM_ITERS - 1) {
if (ite < num_iters - 1) {
iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int;
iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int;
}
@ -2070,7 +2082,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
static int64_t rd_pick_best_sub8x8_mode(
VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
int_mv *second_best_ref_mv, int64_t best_rd_so_far, int *returntotrate,
int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf,
int filter_idx, int mi_row, int mi_col) {
@ -2103,7 +2115,7 @@ static int64_t rd_pick_best_sub8x8_mode(
vp9_zero(*bsi);
bsi->segment_rd = best_rd;
bsi->segment_rd = best_rd_so_far;
bsi->ref_mv[0] = best_ref_mv;
bsi->ref_mv[1] = second_best_ref_mv;
bsi->mvp.as_int = best_ref_mv->as_int;
@ -2129,14 +2141,14 @@ static int64_t rd_pick_best_sub8x8_mode(
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
PREDICTION_MODE mode_selected = ZEROMV;
int64_t best_rd = INT64_MAX;
const int i = idy * 2 + idx;
const int block = idy * 2 + idx;
int ref;
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
frame_mv[ZEROMV][frame].as_int = 0;
vp9_append_sub8x8_mvs_for_idx(
cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
cm, xd, block, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
&frame_mv[NEARMV][frame], mbmi_ext->mode_context);
}
@ -2146,7 +2158,7 @@ static int64_t rd_pick_best_sub8x8_mode(
struct buf_2d orig_pre[2];
mode_idx = INTER_OFFSET(this_mode);
bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
bsi->rdstat[block][mode_idx].brdcost = INT64_MAX;
if (!(inter_mode_mask & (1 << this_mode))) continue;
if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
@ -2154,14 +2166,14 @@ static int64_t rd_pick_best_sub8x8_mode(
continue;
memcpy(orig_pre, pd->pre, sizeof(orig_pre));
memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
sizeof(bsi->rdstat[i][mode_idx].ta));
memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
sizeof(bsi->rdstat[i][mode_idx].tl));
memcpy(bsi->rdstat[block][mode_idx].ta, t_above,
sizeof(bsi->rdstat[block][mode_idx].ta));
memcpy(bsi->rdstat[block][mode_idx].tl, t_left,
sizeof(bsi->rdstat[block][mode_idx].tl));
// motion search for newmv (single predictor case only)
if (!has_second_rf && this_mode == NEWMV &&
seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) {
seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV) {
MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
int step_param = 0;
uint32_t bestsme = UINT_MAX;
@ -2177,12 +2189,13 @@ static int64_t rd_pick_best_sub8x8_mode(
if (cpi->oxcf.mode != BEST) {
// use previous block's result as next block's MV predictor.
if (i > 0) {
bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
if (block > 0) {
bsi->mvp.as_int = mi->bmi[block - 1].as_mv[0].as_int;
if (block == 2)
bsi->mvp.as_int = mi->bmi[block - 2].as_mv[0].as_int;
}
}
if (i == 0)
if (block == 0)
max_mv = x->max_mv_context[mi->ref_frame[0]];
else
max_mv =
@ -2211,7 +2224,7 @@ static int64_t rd_pick_best_sub8x8_mode(
}
// adjust src pointer for this block
mi_buf_shift(x, i);
mi_buf_shift(x, block);
vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
@ -2234,7 +2247,7 @@ static int64_t rd_pick_best_sub8x8_mode(
cpi->sf.use_accurate_subpel_search);
// save motion search result for use in compound prediction
seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
seg_mvs[block][mi->ref_frame[0]].as_mv = *new_mv;
}
x->pred_mv[mi->ref_frame[0]] = *new_mv;
@ -2244,40 +2257,44 @@ static int64_t rd_pick_best_sub8x8_mode(
}
if (has_second_rf) {
if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV)
if (seg_mvs[block][mi->ref_frame[1]].as_int == INVALID_MV ||
seg_mvs[block][mi->ref_frame[0]].as_int == INVALID_MV)
continue;
}
if (has_second_rf && this_mode == NEWMV &&
mi->interp_filter == EIGHTTAP) {
// Decide number of joint motion search iterations
const int num_joint_search_iters = get_joint_search_iters(
cpi->sf.comp_inter_joint_search_iter_level, bsize);
// adjust src pointers
mi_buf_shift(x, i);
if (sf->comp_inter_joint_search_thresh <= bsize) {
mi_buf_shift(x, block);
if (num_joint_search_iters) {
int rate_mv;
joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
mi_col, seg_mvs[i], &rate_mv);
seg_mvs[i][mi->ref_frame[0]].as_int =
mi_col, seg_mvs[block], &rate_mv,
num_joint_search_iters);
seg_mvs[block][mi->ref_frame[0]].as_int =
frame_mv[this_mode][mi->ref_frame[0]].as_int;
seg_mvs[i][mi->ref_frame[1]].as_int =
seg_mvs[block][mi->ref_frame[1]].as_int =
frame_mv[this_mode][mi->ref_frame[1]].as_int;
}
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
}
bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i],
bsi->ref_mv, x->nmvjointcost, x->mvcost);
bsi->rdstat[block][mode_idx].brate = set_and_cost_bmi_mvs(
cpi, x, xd, block, this_mode, mode_mv[this_mode], frame_mv,
seg_mvs[block], bsi->ref_mv, x->nmvjointcost, x->mvcost);
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
bsi->rdstat[i][mode_idx].mvs[ref].as_int =
bsi->rdstat[block][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
bsi->rdstat[block + 1][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
bsi->rdstat[block + 2][mode_idx].mvs[ref].as_int =
mode_mv[this_mode][ref].as_int;
}
@ -2295,7 +2312,7 @@ static int64_t rd_pick_best_sub8x8_mode(
for (ref = 0; ref < 1 + has_second_rf; ++ref) {
subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int;
}
if (filter_idx > 1 && !subpelmv && !have_ref) {
@ -2303,53 +2320,55 @@ static int64_t rd_pick_best_sub8x8_mode(
have_ref = 1;
for (ref = 0; ref < 1 + has_second_rf; ++ref)
have_ref &= mode_mv[this_mode][ref].as_int ==
ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
ref_bsi->rdstat[block][mode_idx].mvs[ref].as_int;
}
if (!subpelmv && have_ref &&
ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
sizeof(SEG_RDSTAT));
ref_bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) {
memcpy(&bsi->rdstat[block][mode_idx],
&ref_bsi->rdstat[block][mode_idx], sizeof(SEG_RDSTAT));
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs =
ref_bsi->rdstat[i + 1][mode_idx].eobs;
bsi->rdstat[block + 1][mode_idx].eobs =
ref_bsi->rdstat[block + 1][mode_idx].eobs;
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs =
ref_bsi->rdstat[i + 2][mode_idx].eobs;
bsi->rdstat[block + 2][mode_idx].eobs =
ref_bsi->rdstat[block + 2][mode_idx].eobs;
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
if (bsi->rdstat[block][mode_idx].brdcost < best_rd) {
mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost;
best_rd = bsi->rdstat[block][mode_idx].brdcost;
}
continue;
}
}
bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
cpi, x, bsi->segment_rd - this_segment_rd, i,
&bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
&bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
bsi->rdstat[i][mode_idx].tl, mi_row, mi_col);
if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[i][mode_idx].brdcost +=
RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
bsi->rdstat[block][mode_idx].brdcost = encode_inter_mb_segment(
cpi, x, bsi->segment_rd - this_segment_rd, block,
&bsi->rdstat[block][mode_idx].byrate,
&bsi->rdstat[block][mode_idx].bdist,
&bsi->rdstat[block][mode_idx].bsse, bsi->rdstat[block][mode_idx].ta,
bsi->rdstat[block][mode_idx].tl, mi_row, mi_col);
if (bsi->rdstat[block][mode_idx].brdcost < INT64_MAX) {
bsi->rdstat[block][mode_idx].brdcost += RDCOST(
x->rdmult, x->rddiv, bsi->rdstat[block][mode_idx].brate, 0);
bsi->rdstat[block][mode_idx].brate +=
bsi->rdstat[block][mode_idx].byrate;
bsi->rdstat[block][mode_idx].eobs = p->eobs[block];
if (num_4x4_blocks_wide > 1)
bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
bsi->rdstat[block + 1][mode_idx].eobs = p->eobs[block + 1];
if (num_4x4_blocks_high > 1)
bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
bsi->rdstat[block + 2][mode_idx].eobs = p->eobs[block + 2];
}
if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
if (bsi->rdstat[block][mode_idx].brdcost < best_rd) {
mode_selected = this_mode;
best_rd = bsi->rdstat[i][mode_idx].brdcost;
best_rd = bsi->rdstat[block][mode_idx].brdcost;
}
} /*for each 4x4 mode*/
if (best_rd == INT64_MAX) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
for (iy = block + 1; iy < 4; ++iy)
for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
@ -2357,22 +2376,22 @@ static int64_t rd_pick_best_sub8x8_mode(
}
mode_idx = INTER_OFFSET(mode_selected);
memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
memcpy(t_above, bsi->rdstat[block][mode_idx].ta, sizeof(t_above));
memcpy(t_left, bsi->rdstat[block][mode_idx].tl, sizeof(t_left));
set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
x->mvcost);
set_and_cost_bmi_mvs(cpi, x, xd, block, mode_selected,
mode_mv[mode_selected], frame_mv, seg_mvs[block],
bsi->ref_mv, x->nmvjointcost, x->mvcost);
br += bsi->rdstat[i][mode_idx].brate;
bd += bsi->rdstat[i][mode_idx].bdist;
block_sse += bsi->rdstat[i][mode_idx].bsse;
segmentyrate += bsi->rdstat[i][mode_idx].byrate;
this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
br += bsi->rdstat[block][mode_idx].brate;
bd += bsi->rdstat[block][mode_idx].bdist;
block_sse += bsi->rdstat[block][mode_idx].bsse;
segmentyrate += bsi->rdstat[block][mode_idx].byrate;
this_segment_rd += bsi->rdstat[block][mode_idx].brdcost;
if (this_segment_rd > bsi->segment_rd) {
int iy, midx;
for (iy = i + 1; iy < 4; ++iy)
for (iy = block + 1; iy < 4; ++iy)
for (midx = 0; midx < INTER_MODES; ++midx)
bsi->rdstat[iy][midx].brdcost = INT64_MAX;
bsi->segment_rd = INT64_MAX;
@ -2390,7 +2409,7 @@ static int64_t rd_pick_best_sub8x8_mode(
// update the coding decisions
for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
if (bsi->segment_rd > best_rd) return INT64_MAX;
if (bsi->segment_rd > best_rd_so_far) return INT64_MAX;
/* set it to the best */
for (i = 0; i < 4; i++) {
mode_idx = INTER_OFFSET(bsi->modes[i]);
@ -2635,9 +2654,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
tmp_mv->as_int = INVALID_MV;
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
xd->plane[i].pre[0] = backup_yv12[i];
int j;
for (j = 0; j < MAX_MB_PLANE; ++j)
xd->plane[j].pre[0] = backup_yv12[j];
}
return;
}
@ -2877,16 +2896,20 @@ static int64_t handle_inter_mode(
if (this_mode == NEWMV) {
int rate_mv;
if (is_comp_pred) {
// Decide number of joint motion search iterations
const int num_joint_search_iters = get_joint_search_iters(
cpi->sf.comp_inter_joint_search_iter_level, bsize);
// Initialize mv using single prediction mode result.
frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
if (num_joint_search_iters) {
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, joint_motion_search_time);
#endif
joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
single_newmv, &rate_mv);
single_newmv, &rate_mv, num_joint_search_iters);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, joint_motion_search_time);
#endif
@ -2967,8 +2990,14 @@ static int64_t handle_inter_mode(
// rate.
if (skip_single_mode_based_on_mode_rate(mode_mv, single_mode_rate,
this_mode, refs[0], *rate2,
best_mode_index))
best_mode_index)) {
// Check when the single inter mode is pruned, NEARESTMV or NEWMV modes
// are not early terminated. This ensures all single modes are not getting
// skipped when the speed feature is enabled.
assert(single_mode_rate[INTER_OFFSET(NEARESTMV)] != INT_MAX ||
single_mode_rate[INTER_OFFSET(NEWMV)] != INT_MAX);
return INT64_MAX;
}
}
if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
mi->mode != NEARESTMV)
@ -4346,7 +4375,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
int skippable = 0;
int i;
int this_skip2 = 0;
int64_t total_sse = INT_MAX;
int early_term = 0;
@ -4507,7 +4535,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
: NULL;
if (scaled_ref_frame[ref]) {
int i;
// Swap out the reference frame for a version that's been scaled to
// match the resolution of the current frame, allowing the existing
// motion search code to be used without additional modifications.
@ -4651,7 +4678,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
&uv_sse, BLOCK_8X8, tmp_best_rdu)) {
for (ref = 0; ref < 2; ++ref) {
if (scaled_ref_frame[ref]) {
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}
@ -4668,7 +4694,6 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
for (ref = 0; ref < 2; ++ref) {
if (scaled_ref_frame[ref]) {
// Restore the prediction frame pointers to their unscaled versions.
int i;
for (i = 0; i < MAX_MB_PLANE; ++i)
xd->plane[i].pre[ref] = backup_yv12[ref][i];
}

Просмотреть файл

@ -231,6 +231,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->allow_skip_recode = 1;
sf->less_rectangular_check = 1;
sf->mv.auto_mv_step_size = 1;
sf->mv.use_downsampled_sad = 1;
sf->prune_ref_frame_for_rect_partitions = 1;
sf->temporal_filter_search_method = NSTEP;
sf->tx_size_search_breakout = 1;
@ -243,6 +244,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->trellis_opt_tx_rd.thresh = boosted ? 4.0 : 3.0;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
sf->comp_inter_joint_search_iter_level = 1;
// Reference masking is not supported in dynamic scaling mode.
sf->reference_masking = oxcf->resize_mode != RESIZE_DYNAMIC;
@ -330,7 +332,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
: FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR;
sf->disable_filter_search_var_thresh = 100;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->comp_inter_joint_search_iter_level = 2;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->recode_tolerance_high = 45;
sf->enhanced_full_pixel_motion_search = 0;
@ -396,7 +398,6 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
}
if (speed >= 5) {
int i;
sf->optimize_coefficients = 0;
sf->mv.search_method = HEX;
sf->disable_filter_search_var_thresh = 500;
@ -530,7 +531,7 @@ static void set_rt_speed_feature_framesize_independent(
}
sf->disable_filter_search_var_thresh = 50;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->comp_inter_joint_search_iter_level = 2;
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->lf_motion_threshold = LOW_MOTION_THRESHOLD;
sf->adjust_partitioning_from_last_frame = 1;
@ -675,7 +676,7 @@ static void set_rt_speed_feature_framesize_independent(
if (cpi->content_state_sb_fd == NULL &&
(!cpi->use_svc ||
svc->spatial_layer_id == svc->number_spatial_layers - 1)) {
CHECK_MEM_ERROR(cm, cpi->content_state_sb_fd,
CHECK_MEM_ERROR(&cm->error, cpi->content_state_sb_fd,
(uint8_t *)vpx_calloc(
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(uint8_t)));
@ -831,13 +832,13 @@ static void set_rt_speed_feature_framesize_independent(
}
if (cpi->count_arf_frame_usage == NULL) {
CHECK_MEM_ERROR(
cm, cpi->count_arf_frame_usage,
&cm->error, cpi->count_arf_frame_usage,
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_arf_frame_usage)));
}
if (cpi->count_lastgolden_frame_usage == NULL)
CHECK_MEM_ERROR(
cm, cpi->count_lastgolden_frame_usage,
&cm->error, cpi->count_lastgolden_frame_usage,
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_lastgolden_frame_usage)));
}
@ -927,7 +928,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) {
sf->coeff_prob_appx_step = 1;
sf->mv.auto_mv_step_size = 0;
sf->mv.fullpel_search_step_param = 6;
sf->comp_inter_joint_search_thresh = BLOCK_4X4;
sf->mv.use_downsampled_sad = 0;
sf->comp_inter_joint_search_iter_level = 0;
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;

Просмотреть файл

@ -210,6 +210,10 @@ typedef struct MV_SPEED_FEATURES {
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
// Whether to downsample the rows in sad calculation during motion search.
// This is only active when there are at least 8 rows.
int use_downsampled_sad;
} MV_SPEED_FEATURES;
typedef struct PARTITION_SEARCH_BREAKOUT_THR {
@ -282,11 +286,20 @@ typedef struct SPEED_FEATURES {
// adds overhead.
int static_segmentation;
// If 1 we iterate finding a best reference for 2 ref frames together - via
// a log search that iterates 4 times (check around mv for last for best
// error of combined predictor then check around mv for alt). If 0 we
// we just use the best motion vector found for each frame by itself.
BLOCK_SIZE comp_inter_joint_search_thresh;
// The best compound predictor is found using an iterative log search process
// that searches for best ref0 mv using error of combined predictor and then
// searches for best ref1 mv. This sf determines the number of iterations of
// this process based on block size. The sf becomes more aggressive from level
// 0 to 2. The following table indicates the number of iterations w.r.t bsize:
// -----------------------------------------------
// |sf (level)|bsize < 8X8| [8X8, 16X16] | > 16X16 |
// | 0 | 4 | 4 | 4 |
// | 1 | 0 | 2 | 4 |
// | 2 | 0 | 0 | 0 |
// -----------------------------------------------
// Here, 0 iterations indicate using the best single motion vector selected
// for each ref frame without any iterative refinement.
int comp_inter_joint_search_iter_level;
// This variable is used to cap the maximum number of times we skip testing a
// mode to be evaluated. A high value means we will be faster.

Просмотреть файл

@ -107,7 +107,6 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers);
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
RATE_CONTROL *const lrc = &lc->rc;
int i;
lc->current_video_frame_in_layer = 0;
lc->layer_size = 0;
lc->frames_from_key_frame = 0;
@ -164,17 +163,17 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lc->actual_num_seg1_blocks = 0;
lc->actual_num_seg2_blocks = 0;
lc->counter_encode_maxq_scene_change = 0;
CHECK_MEM_ERROR(cm, lc->map,
CHECK_MEM_ERROR(&cm->error, lc->map,
vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
last_coded_q_map_size =
mi_rows * mi_cols * sizeof(*lc->last_coded_q_map);
CHECK_MEM_ERROR(cm, lc->last_coded_q_map,
CHECK_MEM_ERROR(&cm->error, lc->last_coded_q_map,
vpx_malloc(last_coded_q_map_size));
assert(MAXQ <= 255);
memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size);
consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv);
CHECK_MEM_ERROR(cm, lc->consec_zero_mv,
CHECK_MEM_ERROR(&cm->error, lc->consec_zero_mv,
vpx_malloc(consec_zero_mv_size));
memset(lc->consec_zero_mv, 0, consec_zero_mv_size);
}
@ -799,9 +798,9 @@ int vp9_one_pass_svc_start_layer(VP9_COMP *const cpi) {
for (sl = svc->number_spatial_layers - 1;
sl >= svc->first_spatial_layer_to_encode; sl--) {
int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id;
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
cpi->rc = lc->rc;
cpi->oxcf.target_bandwidth = lc->target_bandwidth;
LAYER_CONTEXT *const sl_lc = &svc->layer_context[layer];
cpi->rc = sl_lc->rc;
cpi->oxcf.target_bandwidth = sl_lc->target_bandwidth;
if (vp9_test_drop(cpi)) {
int sl2;
// Set flag to force drop in encoding for this mode.
@ -1050,17 +1049,17 @@ void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
int sl, tl;
for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
// Check for reset based on avg_frame_bandwidth for spatial layer sl.
int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
const int spatial_layer_idx = LAYER_IDS_TO_IDX(
sl, svc->number_temporal_layers - 1, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[spatial_layer_idx];
RATE_CONTROL *lrc = &lc->rc;
if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) ||
lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) {
// Reset for all temporal layers with spatial layer sl.
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
int temporal_layer_idx =
LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
lrc = &svc->layer_context[temporal_layer_idx].rc;
lrc->rc_1_frame = 0;
lrc->rc_2_frame = 0;
lrc->bits_off_target = lrc->optimal_buffer_level;

Просмотреть файл

@ -450,8 +450,6 @@ void vp9_highbd_apply_temporal_filter_c(
// Apply the filter to luma
for (row = 0; row < (int)block_height; row++) {
for (col = 0; col < (int)block_width; col++) {
const int uv_row = row >> ss_y;
const int uv_col = col >> ss_x;
const int filter_weight = get_filter_weight(
row, col, block_height, block_width, blk_fw, use_32x32);
@ -476,6 +474,8 @@ void vp9_highbd_apply_temporal_filter_c(
// Sum the corresponding uv pixels to the current y modifier
// Note we are rounding down instead of rounding to the nearest pixel.
uv_row = row >> ss_y;
uv_col = col >> ss_x;
y_mod += u_diff_sse[uv_row * uv_diff_stride + uv_col];
y_mod += v_diff_sse[uv_row * uv_diff_stride + uv_col];

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше