Bug 1476408 - Update libaom to rev b25610052a1398032320008d69b51d2da94f5928; r=TD-Linux

Tags: #secure-revision

Bug #: 1476408

Differential Revision: https://phabricator.services.mozilla.com/D2358

--HG--
extra : rebase_source : fa2438ada27a67e400617705014460b6d5ff485c
This commit is contained in:
Dan Minor 2018-07-23 14:28:45 -04:00
Родитель d421ba3540
Коммит 48e87ceaf3
240 изменённых файлов: 16984 добавлений и 6146 удалений

Просмотреть файл

@ -22,4 +22,4 @@ To update to a fork, use
The last update was pulled from https://aomedia.googlesource.com/aom/
The git commit ID used was d14c5bb4f336ef1842046089849dee4a301fbbf0 (Mon Jun 25 07:54:59 2018 -0700).
The git commit ID used was b25610052a1398032320008d69b51d2da94f5928 (Mon Jul 23 18:08:58 2018 +0000).

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -63,22 +63,22 @@ void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_c
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_convolve_2d_copy_sr av1_convolve_2d_copy_sr_c
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
#define av1_convolve_2d_scale av1_convolve_2d_scale_c
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_convolve_2d_sr av1_convolve_2d_sr_c
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
#define av1_convolve_horiz_rs av1_convolve_horiz_rs_c
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_convolve_x_sr av1_convolve_x_sr_c
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_convolve_y_sr av1_convolve_y_sr_c
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
@ -108,13 +108,13 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_copy_sr av1_highbd_convolve_2d_copy_sr_c
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_sr av1_highbd_convolve_2d_sr_c
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
@ -126,10 +126,10 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
#define av1_highbd_convolve_horiz_rs av1_highbd_convolve_horiz_rs_c
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_x_sr av1_highbd_convolve_x_sr_c
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_y_sr av1_highbd_convolve_y_sr_c
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
@ -147,16 +147,16 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_2d av1_highbd_jnt_convolve_2d_c
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_2d_copy av1_highbd_jnt_convolve_2d_copy_c
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_x av1_highbd_jnt_convolve_x_c
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_y av1_highbd_jnt_convolve_y_c
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -225,16 +225,16 @@ void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#define av1_inv_txfm_add av1_inv_txfm_add_c
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_jnt_convolve_2d av1_jnt_convolve_2d_c
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_jnt_convolve_2d_copy av1_jnt_convolve_2d_copy_c
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_jnt_convolve_x av1_jnt_convolve_x_c
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
#define av1_jnt_convolve_y av1_jnt_convolve_y_c
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,

Просмотреть файл

@ -24,6 +24,7 @@
.equ CONFIG_COLLECT_INTER_MODE_RD_STATS, 1
.equ CONFIG_COLLECT_RD_STATS, 0
.equ CONFIG_DEBUG, 0
.equ CONFIG_DENOISE, 0
.equ CONFIG_DIST_8X8, 1
.equ CONFIG_ENTROPY_STATS, 0
.equ CONFIG_FILEOPTIONS, 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -1041,13 +1041,15 @@ void aom_lowbd_blend_a64_d16_mask_neon(uint8_t *dst, uint32_t dst_stride, const
RTCD_EXTERN void (*aom_lowbd_blend_a64_d16_mask)(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
void aom_lpf_horizontal_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define aom_lpf_horizontal_14 aom_lpf_horizontal_14_c
void aom_lpf_horizontal_14_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*aom_lpf_horizontal_14)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void aom_lpf_horizontal_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define aom_lpf_horizontal_14_dual aom_lpf_horizontal_14_dual_c
void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define aom_lpf_horizontal_4 aom_lpf_horizontal_4_c
void aom_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c
@ -1074,13 +1076,15 @@ void aom_lpf_vertical_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, c
#define aom_lpf_vertical_14_dual aom_lpf_vertical_14_dual_c
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define aom_lpf_vertical_4 aom_lpf_vertical_4_c
void aom_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c
void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define aom_lpf_vertical_6 aom_lpf_vertical_6_c
void aom_lpf_vertical_6_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*aom_lpf_vertical_6)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void aom_lpf_vertical_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define aom_lpf_vertical_6_dual aom_lpf_vertical_6_dual_c
@ -1468,12 +1472,20 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_neon;
aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
if (flags & HAS_NEON) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_neon;
aom_lpf_horizontal_14 = aom_lpf_horizontal_14_c;
if (flags & HAS_NEON) aom_lpf_horizontal_14 = aom_lpf_horizontal_14_neon;
aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c;
if (flags & HAS_NEON) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_neon;
aom_lpf_horizontal_6 = aom_lpf_horizontal_6_c;
if (flags & HAS_NEON) aom_lpf_horizontal_6 = aom_lpf_horizontal_6_neon;
aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c;
if (flags & HAS_NEON) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_neon;
aom_lpf_vertical_14 = aom_lpf_vertical_14_c;
if (flags & HAS_NEON) aom_lpf_vertical_14 = aom_lpf_vertical_14_neon;
aom_lpf_vertical_4 = aom_lpf_vertical_4_c;
if (flags & HAS_NEON) aom_lpf_vertical_4 = aom_lpf_vertical_4_neon;
aom_lpf_vertical_6 = aom_lpf_vertical_6_c;
if (flags & HAS_NEON) aom_lpf_vertical_6 = aom_lpf_vertical_6_neon;
aom_lpf_vertical_8 = aom_lpf_vertical_8_c;
if (flags & HAS_NEON) aom_lpf_vertical_8 = aom_lpf_vertical_8_neon;
aom_v_predictor_16x16 = aom_v_predictor_16x16_c;

Просмотреть файл

@ -52,7 +52,8 @@ extern "C" {
#endif
void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
#define apply_selfguided_restoration apply_selfguided_restoration_c
void apply_selfguided_restoration_neon(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
#define av1_build_compound_diffwtd_mask av1_build_compound_diffwtd_mask_c
@ -64,27 +65,27 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_M
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_c
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
#define av1_convolve_2d_scale av1_convolve_2d_scale_c
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
#define av1_convolve_horiz_rs av1_convolve_horiz_rs_c
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -113,13 +114,13 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_copy_sr av1_highbd_convolve_2d_copy_sr_c
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_2d_sr av1_highbd_convolve_2d_sr_c
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
@ -131,10 +132,10 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
#define av1_highbd_convolve_horiz_rs av1_highbd_convolve_horiz_rs_c
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_x_sr av1_highbd_convolve_x_sr_c
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_convolve_y_sr av1_highbd_convolve_y_sr_c
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
@ -152,16 +153,16 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_2d av1_highbd_jnt_convolve_2d_c
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_2d_copy av1_highbd_jnt_convolve_2d_copy_c
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_x av1_highbd_jnt_convolve_x_c
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
#define av1_highbd_jnt_convolve_y av1_highbd_jnt_convolve_y_c
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -228,28 +229,34 @@ void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride
#define av1_inv_txfm2d_add_8x8 av1_inv_txfm2d_add_8x8_c
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
#define av1_inv_txfm_add av1_inv_txfm_add_c
void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
#define av1_selfguided_restoration av1_selfguided_restoration_c
void av1_selfguided_restoration_neon(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
RTCD_EXTERN void (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd);
void av1_upsample_intra_edge_c(uint8_t *p, int sz);
#define av1_upsample_intra_edge av1_upsample_intra_edge_c
@ -328,6 +335,8 @@ static void setup_rtcd_internal(void)
(void)flags;
apply_selfguided_restoration = apply_selfguided_restoration_c;
if (flags & HAS_NEON) apply_selfguided_restoration = apply_selfguided_restoration_neon;
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
if (flags & HAS_NEON) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_neon;
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
@ -338,6 +347,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) av1_convolve_x_sr = av1_convolve_x_sr_neon;
av1_convolve_y_sr = av1_convolve_y_sr_c;
if (flags & HAS_NEON) av1_convolve_y_sr = av1_convolve_y_sr_neon;
av1_inv_txfm_add = av1_inv_txfm_add_c;
if (flags & HAS_NEON) av1_inv_txfm_add = av1_inv_txfm_add_neon;
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
if (flags & HAS_NEON) av1_jnt_convolve_2d = av1_jnt_convolve_2d_neon;
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
@ -346,6 +357,8 @@ static void setup_rtcd_internal(void)
if (flags & HAS_NEON) av1_jnt_convolve_x = av1_jnt_convolve_x_neon;
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
if (flags & HAS_NEON) av1_jnt_convolve_y = av1_jnt_convolve_y_neon;
av1_selfguided_restoration = av1_selfguided_restoration_c;
if (flags & HAS_NEON) av1_selfguided_restoration = av1_selfguided_restoration_neon;
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
if (flags & HAS_NEON) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_neon;
cdef_filter_block = cdef_filter_block_c;

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
CONFIG_COLLECT_RD_STATS equ 0
CONFIG_DEBUG equ 0
CONFIG_DENOISE equ 0
CONFIG_DIST_8X8 equ 1
CONFIG_ENTROPY_STATS equ 0
CONFIG_FILEOPTIONS equ 1

Просмотреть файл

@ -26,6 +26,7 @@
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
#define CONFIG_COLLECT_RD_STATS 0
#define CONFIG_DEBUG 0
#define CONFIG_DENOISE 0
#define CONFIG_DIST_8X8 1
#define CONFIG_ENTROPY_STATS 0
#define CONFIG_FILEOPTIONS 1

Просмотреть файл

@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,

Просмотреть файл

@ -58,14 +58,15 @@ files = {
'../../third_party/aom/aom_util/debug_util.c',
'../../third_party/aom/av1/av1_dx_iface.c',
'../../third_party/aom/av1/common/alloccommon.c',
'../../third_party/aom/av1/common/arm/av1_inv_txfm_neon.c',
'../../third_party/aom/av1/common/arm/av1_txfm_neon.c',
'../../third_party/aom/av1/common/arm/blend_a64_hmask_neon.c',
'../../third_party/aom/av1/common/arm/blend_a64_vmask_neon.c',
'../../third_party/aom/av1/common/arm/cfl_neon.c',
'../../third_party/aom/av1/common/arm/convolve_neon.c',
'../../third_party/aom/av1/common/arm/intrapred_neon.c',
'../../third_party/aom/av1/common/arm/jnt_convolve_neon.c',
'../../third_party/aom/av1/common/arm/reconinter_neon.c',
'../../third_party/aom/av1/common/arm/selfguided_neon.c',
'../../third_party/aom/av1/common/arm/wiener_convolve_neon.c',
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
@ -82,7 +83,6 @@ files = {
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/filter.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
@ -109,6 +109,8 @@ files = {
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
'../../third_party/aom/av1/encoder/arm/neon/quantize_neon.c',
'../../third_party/aom/stats/aomstats.c',
'../../third_party/aom/stats/rate_hist.c',
],
'GENERIC_EXPORTS': [
'../../third_party/aom/aom/aom.h',
@ -174,7 +176,6 @@ files = {
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/filter.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
@ -200,6 +201,8 @@ files = {
'../../third_party/aom/av1/decoder/detokenize.c',
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
'../../third_party/aom/stats/aomstats.c',
'../../third_party/aom/stats/rate_hist.c',
],
'IA32_EXPORTS': [
'../../third_party/aom/aom/aom.h',
@ -298,7 +301,6 @@ files = {
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/filter.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
@ -361,6 +363,8 @@ files = {
'../../third_party/aom/av1/decoder/detokenize.c',
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
'../../third_party/aom/stats/aomstats.c',
'../../third_party/aom/stats/rate_hist.c',
],
'X64_EXPORTS': [
'../../third_party/aom/aom/aom.h',
@ -458,7 +462,6 @@ files = {
'../../third_party/aom/av1/common/entropy.c',
'../../third_party/aom/av1/common/entropymode.c',
'../../third_party/aom/av1/common/entropymv.c',
'../../third_party/aom/av1/common/filter.c',
'../../third_party/aom/av1/common/frame_buffers.c',
'../../third_party/aom/av1/common/idct.c',
'../../third_party/aom/av1/common/mvref_common.c',
@ -521,5 +524,7 @@ files = {
'../../third_party/aom/av1/decoder/detokenize.c',
'../../third_party/aom/av1/decoder/dthread.c',
'../../third_party/aom/av1/decoder/obu.c',
'../../third_party/aom/stats/aomstats.c',
'../../third_party/aom/stats/rate_hist.c',
],
}

1
third_party/aom/.cmake-format.py поставляемый
Просмотреть файл

@ -1,3 +1,4 @@
# Generated with cmake-format 0.3.6
# How wide to allow formatted cmake files
line_width = 80

630
third_party/aom/CHANGELOG поставляемый
Просмотреть файл

@ -1,631 +1,5 @@
Next Release
- Incompatible changes:
The AV1 encoder's default keyframe interval changed to 128 from 9999.
Support for armv6 was removed.
2018-06-28 v1.0.0
AOMedia Codec Workgroup Approved version 1.0
2016-04-07 v0.1.0 "AOMedia Codec 1"
This release is the first Alliance for Open Media codec.
2015-11-09 v1.5.0 "Javan Whistling Duck"
This release improves upon the VP9 encoder and speeds up the encoding and
decoding processes.
- Upgrading:
This release is ABI incompatible with 1.4.0. It drops deprecated VP8
controls and adds a variety of VP9 controls for testing.
The vpxenc utility now prefers VP9 by default.
- Enhancements:
Faster VP9 encoding and decoding
Smaller library size by combining functions used by VP8 and VP9
- Bug Fixes:
A variety of fuzzing issues
2015-04-03 v1.4.0 "Indian Runner Duck"
This release includes significant improvements to the VP9 codec.
- Upgrading:
This release is ABI incompatible with 1.3.0. It drops the compatibility
layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
controls for VP9.
- Enhancements:
Faster VP9 encoding and decoding
Multithreaded VP9 decoding (tile and frame-based)
Multithreaded VP9 encoding - on by default
YUV 4:2:2 and 4:4:4 support in VP9
10 and 12bit support in VP9
64bit ARM support by replacing ARM assembly with intrinsics
- Bug Fixes:
Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
files.
- Known Issues:
Frame Parallel decoding fails for segmented and non-420 files.
2013-11-15 v1.3.0 "Forest"
This release introduces the VP9 codec in a backward-compatible way.
All existing users of VP8 can continue to use the library without
modification. However, some VP8 options do not map to VP9 in the same manner.
The VP9 encoder in this release is not feature complete. Users interested in
the encoder are advised to use the git master branch and discuss issues on
libvpx mailing lists.
- Upgrading:
This release is ABI and API compatible with Duclair (v1.0.0). Users
of older releases should refer to the Upgrading notes in this document
for that release.
- Enhancements:
Get rid of bashisms in the main build scripts
Added usage info on command line options
Add lossless compression mode
Dll build of libvpx
Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13)
Add option to disable documentation
configure: add --enable-external-build support
make: support V=1 as short form of verbose=yes
configure: support mingw-w64
configure: support hardfloat armv7 CHOSTS
configure: add support for android x86
Add estimated completion time to vpxenc
Don't exit on decode errors in vpxenc
vpxenc: support scaling prior to encoding
vpxdec: support scaling output
vpxenc: improve progress indicators with --skip
msvs: Don't link to winmm.lib
Add a new script for producing vcxproj files
Produce Visual Studio 10 and 11 project files
Produce Windows Phone project files
msvs-build: use msbuild for vs >= 2005
configure: default configure log to config.log
Add encoding option --static-thresh
- Speed:
Miscellaneous speed optimizations for VP8 and VP9.
- Quality:
In general, quality is consistent with the Eider release.
- Bug Fixes:
This release represents approximately a year of engineering effort,
and contains multiple bug fixes. Please refer to git history for details.
2012-12-21 v1.2.0
This release acts as a checkpoint for a large amount of internal refactoring
and testing. It also contains a number of small bugfixes, so all users are
encouraged to upgrade.
- Upgrading:
This release is ABI and API compatible with Duclair (v1.0.0). Users
of older releases should refer to the Upgrading notes in this
document for that release.
- Enhancements:
VP8 optimizations for MIPS dspr2
vpxenc: add -quiet option
- Speed:
Encoder and decoder speed is consistent with the Eider release.
- Quality:
In general, quality is consistent with the Eider release.
Minor tweaks to ARNR filtering
Minor improvements to real time encoding with multiple temporal layers
- Bug Fixes:
Fixes multithreaded encoder race condition in loopfilter
Fixes multi-resolution threaded encoding
Fix potential encoder dead-lock after picture resize
2012-05-09 v1.1.0 "Eider"
This introduces a number of enhancements, mostly focused on real-time
encoding. In addition, it fixes a decoder bug (first introduced in
Duclair) so all users of that release are encouraged to upgrade.
- Upgrading:
This release is ABI and API compatible with Duclair (v1.0.0). Users
of older releases should refer to the Upgrading notes in this
document for that release.
This release introduces a new temporal denoiser, controlled by the
VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not
currently take a strength parameter, so the control is effectively
a boolean - zero (off) or non-zero (on). For compatibility with
existing applications, the values accepted are the same as those
for the spatial denoiser (0-6). The temporal denoiser is enabled
by default, and the older spatial denoiser may be restored by
configuring with --disable-temporal-denoising. The temporal denoiser
is more computationally intensive than the spatial one.
This release removes support for a legacy, decode only API that was
supported, but deprecated, at the initial release of libvpx
(v0.9.0). This is not expected to have any impact. If you are
impacted, you can apply a reversion to commit 2bf8fb58 locally.
Please update to the latest libvpx API if you are affected.
- Enhancements:
Adds a motion compensated temporal denoiser to the encoder, which
gives higher quality than the older spatial denoiser. (See above
for notes on upgrading).
In addition, support for new compilers and platforms were added,
including:
improved support for XCode
Android x86 NDK build
OS/2 support
SunCC support
Changing resolution with vpx_codec_enc_config_set() is now
supported. Previously, reinitializing the codec was required to
change the input resolution.
The vpxenc application has initial support for producing multiple
encodes from the same input in one call. Resizing is not yet
supported, but varying other codec parameters is. Use -- to
delineate output streams. Options persist from one stream to the
next.
Also, the vpxenc application will now use a keyframe interval of
5 seconds by default. Use the --kf-max-dist option to override.
- Speed:
Decoder performance improved 2.5% versus Duclair. Encoder speed is
consistent with Duclair for most material. Two pass encoding of
slideshow-like material will see significant improvements.
Large realtime encoding speed gains at a small quality expense are
possible by configuring the on-the-fly bitpacking experiment with
--enable-onthefly-bitpacking. Realtime encoder can be up to 13%
faster (ARM) depending on the number of threads and bitrate
settings. This technique sees constant gain over the 5-16 speed
range. For VC style input the loss seen is up to 0.2dB. See commit
52cf4dca for further details.
- Quality:
On the whole, quality is consistent with the Duclair release. Some
tweaks:
Reduced blockiness in easy sections by applying a penalty to
intra modes.
Improved quality of static sections (like slideshows) with
two pass encoding.
Improved keyframe sizing with multiple temporal layers
- Bug Fixes:
Corrected alt-ref contribution to frame rate for visible updates
to the alt-ref buffer. This affected applications making manual
usage of the frame reference flags, or temporal layers.
Additional constraints were added to disable multi-frame quality
enhancement (MFQE) in sections of the frame where there is motion.
(#392)
Fixed corruption issues when vpx_codec_enc_config_set() was called
with spatial resampling enabled.
Fixed a decoder error introduced in Duclair where the segmentation
map was not being reinitialized on keyframes (#378)
2012-01-27 v1.0.0 "Duclair"
Our fourth named release, focused on performance and features related to
real-time encoding. It also fixes a decoder crash bug introduced in
v0.9.7, so all users of that release are encouraged to upgrade.
- Upgrading:
This release is ABI incompatible with prior releases of libvpx, so the
"major" version number has been bumped to 1. You must recompile your
applications against the latest version of the libvpx headers. The
API remains compatible, and this should not require code changes in most
applications.
- Enhancements:
This release introduces several substantial new features to the encoder,
of particular interest to real time streaming applications.
Temporal scalability allows the encoder to produce a stream that can
be decimated to different frame rates, with independent rate targetting
for each substream.
Multiframe quality enhancement postprocessing can make visual quality
more consistent in the presence of frames that are substantially
different quality than the surrounding frames, as in the temporal
scalability case and in some forced keyframe scenarios.
Multiple-resolution encoding support allows the encoding of the
same content at different resolutions faster than encoding them
separately.
- Speed:
Optimization targets for this release included the decoder and the real-
time modes of the encoder. Decoder speed on x86 has improved 10.5% with
this release. Encoder improvements followed a curve where speeds 1-3
improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved
1.5% to 10.5%, respectively. "Best" mode speed is consistent with the
Cayuga release.
- Quality:
Encoder quality in the single stream case is consistent with the Cayuga
release.
- Bug Fixes:
This release fixes an OOB read decoder crash bug present in v0.9.7
related to the clamping of motion vectors in SPLITMV blocks. This
behavior could be triggered by corrupt input or by starting
decoding from a P-frame.
2011-08-15 v0.9.7-p1 "Cayuga" patch 1
This is an incremental bugfix release against Cayuga. All users of that
release are strongly encouraged to upgrade.
- Fix potential OOB reads (cdae03a)
An unbounded out of bounds read was discovered when the
decoder was requested to perform error concealment (new in
Cayuga) given a frame with corrupt partition sizes.
A bounded out of bounds read was discovered affecting all
versions of libvpx. Given an multipartition input frame that
is truncated between the mode/mv partition and the first
residiual paritition (in the block of partition offsets), up
to 3 extra bytes could have been read from the source buffer.
The code will not take any action regardless of the contents
of these undefined bytes, as the truncated buffer is detected
immediately following the read based on the calculated
starting position of the coefficient partition.
- Fix potential error concealment crash when the very first frame
is missing or corrupt (a609be5)
- Fix significant artifacts in error concealment (a4c2211, 99d870a)
- Revert 1-pass CBR rate control changes (e961317)
Further testing showed this change produced undesirable visual
artifacts, rolling back for now.
2011-08-02 v0.9.7 "Cayuga"
Our third named release, focused on a faster, higher quality, encoder.
- Upgrading:
This release is backwards compatible with Aylesbury (v0.9.5) and
Bali (v0.9.6). Users of older releases should refer to the Upgrading
notes in this document for that release.
- Enhancements:
Stereo 3D format support for vpxenc
Runtime detection of available processor cores.
Allow specifying --end-usage by enum name
vpxdec: test for frame corruption
vpxenc: add quantizer histogram display
vpxenc: add rate histogram display
Set VPX_FRAME_IS_DROPPABLE
update configure for ios sdk 4.3
Avoid text relocations in ARM vp8 decoder
Generate a vpx.pc file for pkg-config.
New ways of passing encoded data between encoder and decoder.
- Speed:
This release includes across-the-board speed improvements to the
encoder. On x86, these measure at approximately 11.5% in Best mode,
21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6).
On ARM Cortex A9 with Neon extensions, real-time encoding of video
telephony content is 35% faster than Bali on single core and 48%
faster on multi-core. On the NVidia Tegra2 platform, real time
encoding is 40% faster than Bali.
Decoder speed was not a priority for this release, but improved
approximately 8.4% on x86.
Reduce motion vector search on alt-ref frame.
Encoder loopfilter running in its own thread
Reworked loopfilter to precalculate more parameters
SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}().
Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3.
Removed redundant checks
Reduced structure sizes
utilize preload in ARMv6 MC/LPF/Copy routines
ARM optimized quantization, dfct, variance, subtract
Increase chrow row alignment to 16 bytes.
disable trellis optimization for first pass
Write SSSE3 sub-pixel filter function
Improve SSE2 half-pixel filter funtions
Add vp8_sub_pixel_variance16x8_ssse3 function
Reduce unnecessary distortion computation
Use diamond search to replace full search
Preload reference area in sub-pixel motion search (real-time mode)
- Quality:
This release focused primarily on one-pass use cases, including
video conferencing. Low latency data rate control was significantly
improved, improving streamability over bandwidth constrained links.
Added support for error concealment, allowing frames to maintain
visual quality in the presence of substantial packet loss.
Add rc_max_intra_bitrate_pct control
Limit size of initial keyframe in one-pass.
Improve framerate adaptation
Improved 1-pass CBR rate control
Improved KF insertion after fades to still.
Improved key frame detection.
Improved activity masking (lower PSNR impact for same SSIM boost)
Improved interaction between GF and ARFs
Adding error-concealment to the decoder.
Adding support for independent partitions
Adjusted rate-distortion constants
- Bug Fixes:
Removed firstpass motion map
Fix parallel make install
Fix multithreaded encoding for 1 MB wide frame
Fixed iwalsh_neon build problems with RVDS4.1
Fix semaphore emulation, spin-wait intrinsics on Windows
Fix build with xcode4 and simplify GLOBAL.
Mark ARM asm objects as allowing a non-executable stack.
Fix vpxenc encoding incorrect webm file header on big endian
2011-03-07 v0.9.6 "Bali"
Our second named release, focused on a faster, higher quality, encoder.
- Upgrading:
This release is backwards compatible with Aylesbury (v0.9.5). Users
of older releases should refer to the Upgrading notes in this
document for that release.
- Enhancements:
vpxenc --psnr shows a summary when encode completes
--tune=ssim option to enable activity masking
improved postproc visualizations for development
updated support for Apple iOS to SDK 4.2
query decoder to determine which reference frames were updated
implemented error tracking in the decoder
fix pipe support on windows
- Speed:
Primary focus was on good quality mode, speed 0. Average improvement
on x86 about 40%, up to 100% on user-generated content at that speed.
Best quality mode speed improved 35%, and realtime speed 10-20%. This
release also saw significant improvement in realtime encoding speed
on ARM platforms.
Improved encoder threading
Dont pick encoder filter level when loopfilter is disabled.
Avoid double copying of key frames into alt and golden buffer
FDCT optimizations.
x86 sse2 temporal filter
SSSE3 version of fast quantizer
vp8_rd_pick_best_mbsegmentation code restructure
Adjusted breakout RD for SPLITMV
Changed segmentation check order
Improved rd_pick_intra4x4block
Adds armv6 optimized variance calculation
ARMv6 optimized sad16x16
ARMv6 optimized half pixel variance calculations
Full search SAD function optimization in SSE4.1
Improve MV prediction accuracy to achieve performance gain
Improve MV prediction in vp8_pick_inter_mode() for speed>3
- Quality:
Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release
also includes support for "activity masking," which greatly improves
SSIM at the expense of PSNR. For now, this feature is available with
the --tune=ssim option. Further experimentation in this area
is ongoing. This release also introduces a new rate control mode
called "CQ," which changes the allocation of bits within a clip to
the sections where they will have the most visual impact.
Tuning for the more exact quantizer.
Relax rate control for last few frames
CQ Mode
Limit key frame quantizer for forced key frames.
KF/GF Pulsing
Add simple version of activity masking.
make rdmult adaptive for intra in quantizer RDO
cap the best quantizer for 2nd order DC
change the threshold of DC check for encode breakout
- Bug Fixes:
Fix crash on Sparc Solaris.
Fix counter of fixed keyframe distance
ARNR filter pointer update bug fix
Fixed use of motion percentage in KF/GF group calc
Changed condition for using RD in Intra Mode
Fix encoder real-time only configuration.
Fix ARM encoder crash with multiple token partitions
Fixed bug first cluster timecode of webm file is wrong.
Fixed various encoder bugs with odd-sized images
vp8e_get_preview fixed when spatial resampling enabled
quantizer: fix assertion in fast quantizer path
Allocate source buffers to be multiples of 16
Fix for manual Golden frame frequency
Fix drastic undershoot in long form content
2010-10-28 v0.9.5 "Aylesbury"
Our first named release, focused on a faster decoder, and a better encoder.
- Upgrading:
This release incorporates backwards-incompatible changes to the
ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
vpxdec
* the -q (quiet) option has been removed, and replaced with
-v (verbose). the output is quiet by default. Use -v to see
the version number of the binary.
* The default behavior is now to write output to a single file
instead of individual frames. The -y option has been removed.
Y4M output is the default.
* For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
options must be specified.
$ ivfdec -o OUTPUT INPUT
$ vpxdec --i420 -o OUTPUT INPUT
* If an output file is not specified, the default is to write
Y4M to stdout. This makes piping more natural.
$ ivfdec -y -o - INPUT | ...
$ vpxdec INPUT | ...
* The output file has additional flexibility for formatting the
filename. It supports escape characters for constructing a
filename from the width, height, and sequence number. This
replaces the -p option. To get the equivalent:
$ ivfdec -p frame INPUT
$ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
vpxenc
* The output file must be specified with -o, rather than as the
last argument.
$ ivfenc <options> INPUT OUTPUT
$ vpxenc <options> -o OUTPUT INPUT
* The output defaults to webm. To get IVF output, use the --ivf
option.
$ ivfenc <options> INPUT OUTPUT.ivf
$ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
- Enhancements:
ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
vpxdec supports .webm input
vpxdec writes .y4m by default
vpxenc writes .webm output by default
vpxenc --psnr now shows the average/overall PSNR at the end
ARM platforms now support runtime cpu detection
vpxdec visualizations added for motion vectors, block modes, references
vpxdec now silent by default
vpxdec --progress shows frame-by-frame timing information
vpxenc supports the distinction between --fps and --timebase
NASM is now a supported assembler
configure: enable PIC for shared libs by default
configure: add --enable-small
configure: support for ppc32-linux-gcc
configure: support for sparc-solaris-gcc
- Bugs:
Improve handling of invalid frames
Fix valgrind errors in the NEON loop filters.
Fix loopfilter delta zero transitions
Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
Build fixes for darwin-icc
- Speed:
20-40% (average 28%) improvement in libvpx decoder speed,
including:
Rewrite vp8_short_walsh4x4_sse2()
Optimizations on the loopfilters.
Miscellaneous improvements for Atom
Add 4-tap version of 2nd-pass ARMv6 MC filter.
Improved multithread utilization
Better instruction choices on x86
reorder data to use wider instructions
Update NEON wide idcts
Make block access to frame buffer sequential
Improved subset block search
Bilinear subpixel optimizations for ssse3.
Decrease memory footprint
Encoder speed improvements (percentage gain not measured):
Skip unnecessary search of identical frames
Add SSE2 subtract functions
Improve bounds checking in vp8_diamond_search_sadx4()
Added vp8_fast_quantize_b_sse2
- Quality:
Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
encoding mode, and up to 60% improvement on very noisy, still
or slow moving source video
Motion compensated temporal filter for Alt-Ref Noise Reduction
Improved use of trellis quantization on 2nd order Y blocks
Tune effect of motion on KF/GF boost in two pass
Allow coefficient optimization for good quality speed 0.
Improved control of active min quantizer for two pass.
Enable ARFs for non-lagged compress
2010-09-02 v0.9.2
- Enhancements:
Disable frame dropping by default
Improved multithreaded performance
Improved Force Key Frame Behaviour
Increased rate control buffer level precision
Fix bug in 1st pass motion compensation
ivfenc: correct fixed kf interval, --disable-kf
- Speed:
Changed above and left context data layout
Rework idct calling structure.
Removed unnecessary MB_MODE_INFO copies
x86: SSSE3 sixtap prediction
Reworked IDCT to include reconstruction (add) step
Swap alt/gold/new/last frame buffer ptrs instead of copying.
Improve SSE2 loopfilter functions
Change bitreader to use a larger window.
Avoid loopfilter reinitialization when possible
- Quality:
Normalize quantizer's zero bin and rounding factors
Add trellis quantization.
Make the quantizer exact.
Updates to ARNR filtering algorithm
Fix breakout thresh computation for golden & AltRef frames
Redo the forward 4x4 dct
Improve the accuracy of forward walsh-hadamard transform
Further adjustment of RD behaviour with Q and Zbin.
- Build System:
Allow linking of libs built with MinGW to MSVC
Fix target auto-detection on mingw32
Allow --cpu= to work for x86.
configure: pass original arguments through to make dist
Fix builds without runtime CPU detection
msvs: fix install of codec sources
msvs: Change devenv.com command line for better msys support
msvs: Add vs9 targets.
Add x86_64-linux-icc target
- Bugs:
Potential crashes on older MinGW builds
Fix two-pass framrate for Y4M input.
Fixed simple loop filter, other crashes on ARM v6
arm: fix missing dependency with --enable-shared
configure: support directories containing .o
Replace pinsrw (SSE) with MMX instructions
apple: include proper mach primatives
Fixed rate control bug with long key frame interval.
Fix DSO link errors on x86-64 when not using a version script
Fixed buffer selection for UV in AltRef filtering
2010-06-17 v0.9.1
- Enhancements:
* ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
* Speed optimizations
- Bugfixes:
* Rate control
* Prevent out-of-bounds accesses on invalid data
- Build system updates:
* Detect toolchain to be used automatically for native builds
* Support building shared libraries
* Better autotools emulation (--prefix, --libdir, DESTDIR)
- Updated LICENSE
* http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
2010-05-18 v0.9.0
- Initial open source release. Welcome to WebM and VP8!

8
third_party/aom/CMakeLists.txt поставляемый
Просмотреть файл

@ -186,11 +186,9 @@ list(APPEND AOM_ENCODER_APP_UTIL_SOURCES
"${AOM_ROOT}/examples/encoder_util.h"
"${AOM_ROOT}/examples/encoder_util.c")
if (ENABLE_EXAMPLES)
list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
"${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
"${AOM_ROOT}/stats/rate_hist.h")
endif ()
list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
"${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
"${AOM_ROOT}/stats/rate_hist.h")
list(APPEND AOM_PKG_CONFIG_SOURCES "${AOM_CONFIG_DIR}/aom.pc")

14
third_party/aom/aom/aomcx.h поставляемый
Просмотреть файл

@ -854,6 +854,12 @@ enum aome_enc_control_id {
/*!\brief Codec control function to set the path to the film grain parameters
*/
AV1E_SET_FILM_GRAIN_TABLE,
/*!\brief Sets the noise level */
AV1E_SET_DENOISE_NOISE_LEVEL,
/*!\brief Sets the denoisers block size */
AV1E_SET_DENOISE_BLOCK_SIZE,
};
/*!\brief aom 1-D scaling mode
@ -1165,6 +1171,14 @@ AOM_CTRL_USE_TYPE(AV1E_SET_FILM_GRAIN_TABLE, const char *)
AOM_CTRL_USE_TYPE(AV1E_SET_CDF_UPDATE_MODE, int)
#define AOM_CTRL_AV1E_SET_CDF_UPDATE_MODE
#ifdef CONFIG_DENOISE
AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_NOISE_LEVEL, int);
#define AOM_CTRL_AV1E_SET_DENOISE_NOISE_LEVEL
AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_BLOCK_SIZE, unsigned int);
#define AOM_CTRL_AV1E_SET_DENOISE_BLOCK_SIZE
#endif
/*!\endcond */
/*! @} - end defgroup aom_encoder */
#ifdef __cplusplus

18
third_party/aom/aom/aomdx.h поставляемый
Просмотреть файл

@ -119,6 +119,12 @@ enum aom_dec_control_id {
/** control function to get the bit depth of the stream. */
AV1D_GET_BIT_DEPTH,
/** control function to get the image format of the stream. */
AV1D_GET_IMG_FORMAT,
/** control function to get the size of the tile. */
AV1D_GET_TILE_SIZE,
/** control function to set the byte alignment of the planes in the reference
* buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets
* legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly
@ -187,6 +193,12 @@ enum aom_dec_control_id {
*/
AV1D_EXT_TILE_DEBUG,
/** control function to enable the row based multi-threading of decoding. A
* value that is equal to 1 indicates that row based multi-threading is
* enabled.
*/
AV1D_SET_ROW_MT,
/** control function to indicate whether bitstream is in Annex-B format. */
AV1D_SET_IS_ANNEXB,
@ -238,6 +250,10 @@ AOM_CTRL_USE_TYPE(AV1D_GET_DISPLAY_SIZE, int *)
#define AOM_CTRL_AV1D_GET_DISPLAY_SIZE
AOM_CTRL_USE_TYPE(AV1D_GET_BIT_DEPTH, unsigned int *)
#define AOM_CTRL_AV1D_GET_BIT_DEPTH
AOM_CTRL_USE_TYPE(AV1D_GET_IMG_FORMAT, aom_img_fmt_t *)
#define AOM_CTRL_AV1D_GET_IMG_FORMAT
AOM_CTRL_USE_TYPE(AV1D_GET_TILE_SIZE, unsigned int *)
#define AOM_CTRL_AV1D_GET_TILE_SIZE
AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_SIZE, int *)
#define AOM_CTRL_AV1D_GET_FRAME_SIZE
AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
@ -258,6 +274,8 @@ AOM_CTRL_USE_TYPE(AV1D_SET_EXT_REF_PTR, av1_ext_ref_frame_t *)
#define AOM_CTRL_AV1D_SET_EXT_REF_PTR
AOM_CTRL_USE_TYPE(AV1D_EXT_TILE_DEBUG, unsigned int)
#define AOM_CTRL_AV1D_EXT_TILE_DEBUG
AOM_CTRL_USE_TYPE(AV1D_SET_ROW_MT, unsigned int)
#define AOM_CTRL_AV1D_SET_ROW_MT
AOM_CTRL_USE_TYPE(AV1D_SET_IS_ANNEXB, unsigned int)
#define AOM_CTRL_AV1D_SET_IS_ANNEXB
AOM_CTRL_USE_TYPE(AV1D_SET_OPERATING_POINT, int)

Просмотреть файл

@ -417,7 +417,7 @@ struct aom_internal_error_info {
aom_codec_err_t error_code;
int has_detail;
char detail[80];
int setjmp;
int setjmp; // Boolean: whether 'jmp' is valid.
jmp_buf jmp;
};

8
third_party/aom/aom_dsp/aom_dsp.cmake поставляемый
Просмотреть файл

@ -83,6 +83,7 @@ list(APPEND AOM_DSP_COMMON_INTRIN_SSE4_1
list(APPEND AOM_DSP_COMMON_INTRIN_AVX2
"${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/convolve_avx2.h"
"${AOM_ROOT}/aom_dsp/x86/fft_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
@ -190,13 +191,16 @@ if(CONFIG_AV1_ENCODER)
"${AOM_ROOT}/aom_dsp/x86/ssim_opt_x86_64.asm")
list(APPEND AOM_DSP_ENCODER_INTRIN_AVX2
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/subtract_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/highbd_quantize_intrin_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/sad4d_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/sad_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/sad_highbd_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/sad_impl_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/variance_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/variance_impl_avx2.c")
"${AOM_ROOT}/aom_dsp/x86/variance_impl_avx2.c"
"${AOM_ROOT}/aom_dsp/x86/obmc_sad_avx2.c")
list(APPEND AOM_DSP_ENCODER_ASM_SSSE3_X86_64
"${AOM_ROOT}/aom_dsp/x86/quantize_ssse3_x86_64.asm")
@ -205,9 +209,11 @@ if(CONFIG_AV1_ENCODER)
"${AOM_ROOT}/aom_dsp/x86/quantize_avx_x86_64.asm")
list(APPEND AOM_DSP_ENCODER_INTRIN_SSSE3
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.h"
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.h"
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/variance_impl_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
"${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")

2
third_party/aom/aom_dsp/aom_dsp_rtcd.c поставляемый
Просмотреть файл

@ -15,4 +15,4 @@
#include "aom_ports/aom_once.h"
void aom_dsp_rtcd() { once(setup_rtcd_internal); }
void aom_dsp_rtcd() { aom_once(setup_rtcd_internal); }

23
third_party/aom/aom_dsp/aom_dsp_rtcd_defs.pl поставляемый
Просмотреть файл

@ -377,7 +377,7 @@ add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8
specialize qw/aom_lpf_vertical_14_dual sse2/;
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_6 sse2/;
specialize qw/aom_lpf_vertical_6 sse2 neon/;
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_8 sse2 neon/;
@ -386,13 +386,13 @@ add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_
specialize qw/aom_lpf_vertical_8_dual sse2/;
add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_vertical_4 sse2/;
specialize qw/aom_lpf_vertical_4 sse2 neon/;
add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_vertical_4_dual sse2/;
add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_14 sse2/;
specialize qw/aom_lpf_horizontal_14 sse2 neon/;
add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_horizontal_14_dual sse2/;
@ -410,7 +410,7 @@ add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint
specialize qw/aom_lpf_horizontal_8_dual sse2/;
add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/aom_lpf_horizontal_4 sse2/;
specialize qw/aom_lpf_horizontal_4 sse2 neon/;
add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/aom_lpf_horizontal_4_dual sse2/;
@ -564,7 +564,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
# Block subtraction
#
add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/aom_subtract_block neon msa sse2/;
specialize qw/aom_subtract_block neon msa sse2 avx2/;
add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
specialize qw/aom_highbd_subtract_block sse2/;
@ -732,14 +732,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2/;
}
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2/;
}
@ -750,7 +750,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
($w, $h) = @$_;
add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
}
}
@ -759,7 +759,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
($w, $h) = @$_;
add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
}
}
@ -1102,6 +1102,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
specialize "aom_obmc_sub_pixel_variance${w}x${h}", q/sse4_1/;
}
@ -1539,9 +1540,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/aom_comp_mask_pred ssse3 avx2/;
add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
specialize qw/aom_highbd_comp_mask_pred avx2/;
} # CONFIG_AV1_ENCODER

60
third_party/aom/aom_dsp/arm/intrapred_neon.c поставляемый
Просмотреть файл

@ -528,3 +528,63 @@ void aom_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
}
}
}
static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
const uint16_t *above,
const uint16_t *left) {
assert(bw >= 4);
assert(IS_POWER_OF_TWO(bw));
int expected_dc, sum = 0;
const int count = bw * 2;
uint32x4_t sum_q = vdupq_n_u32(0);
uint32x2_t sum_d;
uint16_t *dst_1;
if (bw >= 8) {
for (int i = 0; i < bw; i += 8) {
sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
above += 8;
left += 8;
}
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
expected_dc = (sum + (count >> 1)) / count;
const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
for (int r = 0; r < bw; r++) {
dst_1 = dst;
for (int i = 0; i < bw; i += 8) {
vst1q_u16(dst_1, dc);
dst_1 += 8;
}
dst += stride;
}
} else { // 4x4
sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
expected_dc = (sum + (count >> 1)) / count;
const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
for (int r = 0; r < bw; r++) {
vst1_u16(dst, dc);
dst += stride;
}
}
}
#define intra_pred_highbd_sized_neon(type, width) \
void aom_highbd_##type##_predictor_##width##x##width##_neon( \
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
const uint16_t *left, int bd) { \
(void)bd; \
highbd_##type##_predictor(dst, stride, width, above, left); \
}
#define intra_pred_square(type) \
intra_pred_highbd_sized_neon(type, 4); \
intra_pred_highbd_sized_neon(type, 8); \
intra_pred_highbd_sized_neon(type, 16); \
intra_pred_highbd_sized_neon(type, 32); \
intra_pred_highbd_sized_neon(type, 64);
intra_pred_square(dc);
#undef intra_pred_square

228
third_party/aom/aom_dsp/arm/loopfilter_neon.c поставляемый
Просмотреть файл

@ -52,6 +52,36 @@ static INLINE uint8x8_t lpf_mask(uint8x8_t p3q3, uint8x8_t p2q2, uint8x8_t p1q1,
return mask_8x8;
}
static INLINE uint8x8_t lpf_mask2(uint8x8_t p1q1, uint8x8_t p0q0,
const uint8_t blimit, const uint8_t limit) {
uint32x2x2_t p0q0_p1q1;
uint16x8_t temp_16x8;
uint16x4_t temp0_16x4, temp1_16x4;
const uint16x4_t blimit_16x4 = vdup_n_u16(blimit);
const uint8x8_t limit_8x8 = vdup_n_u8(limit);
uint8x8_t mask_8x8, temp_8x8;
mask_8x8 = vabd_u8(p1q1, p0q0);
mask_8x8 = vcle_u8(mask_8x8, limit_8x8);
temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(mask_8x8)));
mask_8x8 = vand_u8(mask_8x8, temp_8x8);
p0q0_p1q1 = vtrn_u32(vreinterpret_u32_u8(p0q0), vreinterpret_u32_u8(p1q1));
temp_8x8 = vabd_u8(vreinterpret_u8_u32(p0q0_p1q1.val[0]),
vreinterpret_u8_u32(p0q0_p1q1.val[1]));
temp_16x8 = vmovl_u8(temp_8x8);
temp0_16x4 = vshl_n_u16(vget_low_u16(temp_16x8), 1);
temp1_16x4 = vshr_n_u16(vget_high_u16(temp_16x8), 1);
temp0_16x4 = vadd_u16(temp0_16x4, temp1_16x4);
temp0_16x4 = vcle_u16(temp0_16x4, blimit_16x4);
temp_8x8 = vmovn_u16(vcombine_u16(temp0_16x4, temp0_16x4));
mask_8x8 = vand_u8(mask_8x8, temp_8x8);
return mask_8x8;
}
static INLINE uint8x8_t lpf_flat_mask4(uint8x8_t p3q3, uint8x8_t p2q2,
uint8x8_t p1q1, uint8x8_t p0q0) {
const uint8x8_t thresh_8x8 = vdup_n_u8(1); // for bd==8 threshold is always 1
@ -523,6 +553,68 @@ static void lpf_6_neon(uint8x8_t *p2q2, uint8x8_t *p1q1, uint8x8_t *p0q0,
}
}
static void lpf_4_neon(uint8x8_t *p1q1, uint8x8_t *p0q0, const uint8_t blimit,
const uint8_t limit, const uint8_t thresh) {
int32x2x2_t ps0_qs0, ps1_qs1;
int16x8_t filter_s16;
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
uint8x8_t mask_8x8, temp0_8x8, temp1_8x8;
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
int8x8_t op0, oq0, op1, oq1;
int8x8_t pq_s0, pq_s1;
int8x8_t filter_s8, filter1_s8, filter2_s8;
int8x8_t hev_8x8;
const int8x8_t sign_mask = vdup_n_s8(0x80);
const int8x8_t val_4 = vdup_n_s8(4);
const int8x8_t val_3 = vdup_n_s8(3);
// Calculate filter mask
mask_8x8 = lpf_mask2(*p1q1, *p0q0, blimit, limit);
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
// hev_mask
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
// add outer taps if we have high edge variance
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
filter_s8 = vand_s8(filter_s8, hev_8x8);
// inner taps
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
filter_s16 = vmovl_s8(filter_s8);
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
filter_s8 = vqmovn_s16(filter_s16);
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
filter1_s8 = vqadd_s8(filter_s8, val_4);
filter2_s8 = vqadd_s8(filter_s8, val_3);
filter1_s8 = vshr_n_s8(filter1_s8, 3);
filter2_s8 = vshr_n_s8(filter2_s8, 3);
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
filter_s8 = vrshr_n_s8(filter1_s8, 1);
filter_s8 = vbic_s8(filter_s8, hev_8x8);
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
*p0q0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
*p1q1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
}
void aom_lpf_vertical_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint8x16_t row0, row1, row2, row3;
@ -646,6 +738,125 @@ void aom_lpf_vertical_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
store_u8_8x4(src - 4, stride, p3q0, p2q1, p1q2, p0q3);
}
void aom_lpf_vertical_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint32x2x2_t p2q2_p1q1, pxqy_p0q0;
uint32x2_t pq_rev;
uint8x8_t pxq0, p2q1, p1q2, p0qy;
uint8x8_t p0q0, p1q1, p2q2, pxqy;
// row0: px p2 p1 p0 | q0 q1 q2 qy
// row1: px p2 p1 p0 | q0 q1 q2 qy
// row2: px p2 p1 p0 | q0 q1 q2 qy
// row3: px p2 p1 p0 | q0 q1 q2 qy
load_u8_8x4(src - 4, stride, &pxq0, &p2q1, &p1q2, &p0qy);
transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
pq_rev = vrev64_u32(vreinterpret_u32_u8(p0qy));
pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxq0), pq_rev);
pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q2));
p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q1), pq_rev);
p0q0 = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
p1q1 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
p2q2 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
pxqy = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
lpf_6_neon(&p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
pq_rev = vrev64_u32(vreinterpret_u32_u8(p0q0));
pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxqy), pq_rev);
pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q1));
p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q2), pq_rev);
p0qy = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
p1q2 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
p2q1 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
pxq0 = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
store_u8_8x4(src - 4, stride, pxq0, p2q1, p1q2, p0qy);
}
void aom_lpf_vertical_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0;
uint32x2_t pq_rev;
uint8x8_t UNINITIALIZED_IS_SAFE(p1p0), q0q1, p0q0, p1q1;
// row0: p1 p0 | q0 q1
// row1: p1 p0 | q0 q1
// row2: p1 p0 | q0 q1
// row3: p1 p0 | q0 q1
load_u8_4x1(src - 2, &p1p0, 0);
load_u8_4x1((src - 2) + 1 * stride, &p1p0, 1);
load_u8_4x1((src - 2) + 2 * stride, &q0q1, 0);
load_u8_4x1((src - 2) + 3 * stride, &q0q1, 1);
transpose_u8_4x4(&p1p0, &q0q1);
p1q0_p0q1 = vtrn_u32(vreinterpret_u32_u8(p1p0), vreinterpret_u32_u8(q0q1));
pq_rev = vrev64_u32(p1q0_p0q1.val[1]);
p1q1_p0q0 = vtrn_u32(p1q0_p0q1.val[0], pq_rev);
p1q1 = vreinterpret_u8_u32(p1q1_p0q0.val[0]);
p0q0 = vreinterpret_u8_u32(p1q1_p0q0.val[1]);
lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
p1p0_q1q0 = vtrn_u32(vreinterpret_u32_u8(p1q1), vreinterpret_u32_u8(p0q0));
p1p0 = vreinterpret_u8_u32(p1p0_q1q0.val[0]);
q0q1 = vreinterpret_u8_u32(vrev64_u32(p1p0_q1q0.val[1]));
transpose_u8_4x4(&p1p0, &q0q1);
store_u8_4x1(src - 2, p1p0, 0);
store_u8_4x1((src - 2) + 1 * stride, q0q1, 0);
store_u8_4x1((src - 2) + 2 * stride, p1p0, 1);
store_u8_4x1((src - 2) + 3 * stride, q0q1, 1);
}
void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, UNINITIALIZED_IS_SAFE(p6q6);
load_u8_4x1(src - 7 * stride, &p6q6, 0);
load_u8_4x1(src - 6 * stride, &p5q5, 0);
load_u8_4x1(src - 5 * stride, &p4q4, 0);
load_u8_4x1(src - 4 * stride, &p3q3, 0);
load_u8_4x1(src - 3 * stride, &p2q2, 0);
load_u8_4x1(src - 2 * stride, &p1q1, 0);
load_u8_4x1(src - 1 * stride, &p0q0, 0);
load_u8_4x1(src + 0 * stride, &p0q0, 1);
load_u8_4x1(src + 1 * stride, &p1q1, 1);
load_u8_4x1(src + 2 * stride, &p2q2, 1);
load_u8_4x1(src + 3 * stride, &p3q3, 1);
load_u8_4x1(src + 4 * stride, &p4q4, 1);
load_u8_4x1(src + 5 * stride, &p5q5, 1);
load_u8_4x1(src + 6 * stride, &p6q6, 1);
lpf_14_neon(&p6q6, &p5q5, &p4q4, &p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit,
*thresh);
store_u8_4x1(src - 6 * stride, p5q5, 0);
store_u8_4x1(src - 5 * stride, p4q4, 0);
store_u8_4x1(src - 4 * stride, p3q3, 0);
store_u8_4x1(src - 3 * stride, p2q2, 0);
store_u8_4x1(src - 2 * stride, p1q1, 0);
store_u8_4x1(src - 1 * stride, p0q0, 0);
store_u8_4x1(src + 0 * stride, p0q0, 1);
store_u8_4x1(src + 1 * stride, p1q1, 1);
store_u8_4x1(src + 2 * stride, p2q2, 1);
store_u8_4x1(src + 3 * stride, p3q3, 1);
store_u8_4x1(src + 4 * stride, p4q4, 1);
store_u8_4x1(src + 5 * stride, p5q5, 1);
}
void aom_lpf_horizontal_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint8x8_t p0q0, p1q1, p2q2, p3q3;
@ -698,3 +909,20 @@ void aom_lpf_horizontal_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
vst1_lane_u32((uint32_t *)(src + 1 * stride), vreinterpret_u32_u8(p1q1), 1);
vst1_lane_u32((uint32_t *)(src + 2 * stride), vreinterpret_u32_u8(p2q2), 1);
}
void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1);
load_u8_4x1(src - 2 * stride, &p1q1, 0);
load_u8_4x1(src - 1 * stride, &p0q0, 0);
load_u8_4x1(src + 0 * stride, &p0q0, 1);
load_u8_4x1(src + 1 * stride, &p1q1, 1);
lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
store_u8_4x1(src - 2 * stride, p1q1, 0);
store_u8_4x1(src - 1 * stride, p0q0, 0);
store_u8_4x1(src + 0 * stride, p0q0, 1);
store_u8_4x1(src + 1 * stride, p1q1, 1);
}

7
third_party/aom/aom_dsp/bitreader_buffer.c поставляемый
Просмотреть файл

@ -8,11 +8,14 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include "config/aom_config.h"
#include "aom_dsp/bitreader_buffer.h"
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb) {
size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb) {
return (rb->bit_offset + 7) >> 3;
}
@ -31,6 +34,7 @@ int aom_rb_read_bit(struct aom_read_bit_buffer *rb) {
}
int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
assert(bits <= 31);
int value = 0, bit;
for (bit = bits - 1; bit >= 0; bit--) value |= aom_rb_read_bit(rb) << bit;
return value;
@ -38,6 +42,7 @@ int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
uint32_t aom_rb_read_unsigned_literal(struct aom_read_bit_buffer *rb,
int bits) {
assert(bits <= 32);
uint32_t value = 0;
int bit;
for (bit = bits - 1; bit >= 0; bit--)

2
third_party/aom/aom_dsp/bitreader_buffer.h поставляемый
Просмотреть файл

@ -31,7 +31,7 @@ struct aom_read_bit_buffer {
aom_rb_error_handler error_handler;
};
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb);
size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb);
int aom_rb_read_bit(struct aom_read_bit_buffer *rb);

3
third_party/aom/aom_dsp/bitwriter_buffer.c поставляемый
Просмотреть файл

@ -9,6 +9,7 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <limits.h>
#include <stdlib.h>
@ -49,12 +50,14 @@ void aom_wb_overwrite_bit(struct aom_write_bit_buffer *wb, int bit) {
}
void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits) {
assert(bits <= 31);
int bit;
for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
}
void aom_wb_write_unsigned_literal(struct aom_write_bit_buffer *wb,
uint32_t data, int bits) {
assert(bits <= 32);
int bit;
for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
}

40
third_party/aom/aom_dsp/grain_synthesis.c поставляемый
Просмотреть файл

@ -17,6 +17,7 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "aom_dsp/grain_synthesis.h"
#include "aom_mem/aom_mem.h"
@ -237,7 +238,7 @@ static int grain_max;
static uint16_t random_register = 0; // random number generator register
static void init_arrays(aom_film_grain_t *params, int luma_stride,
static void init_arrays(const aom_film_grain_t *params, int luma_stride,
int chroma_stride, int ***pred_pos_luma_p,
int ***pred_pos_chroma_p, int **luma_grain_block,
int **cb_grain_block, int **cr_grain_block,
@ -331,7 +332,7 @@ static void init_arrays(aom_film_grain_t *params, int luma_stride,
(int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
}
static void dealloc_arrays(aom_film_grain_t *params, int ***pred_pos_luma,
static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
int ***pred_pos_chroma, int **luma_grain_block,
int **cb_grain_block, int **cr_grain_block,
int **y_line_buf, int **cb_line_buf,
@ -396,10 +397,14 @@ static void init_random_generator(int luma_line, uint16_t seed) {
}
static void generate_luma_grain_block(
aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
int left_pad, int top_pad, int right_pad, int bottom_pad) {
if (params->num_y_points == 0) return;
if (params->num_y_points == 0) {
memset(luma_grain_block, 0,
sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
return;
}
int bit_depth = params->bit_depth;
int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
@ -431,7 +436,7 @@ static void generate_luma_grain_block(
}
static void generate_chroma_grain_blocks(
aom_film_grain_t *params,
const aom_film_grain_t *params,
// int** pred_pos_luma,
int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
@ -443,7 +448,7 @@ static void generate_chroma_grain_blocks(
int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
if (params->num_y_points > 0) ++num_pos_chroma;
int rounding_offset = (1 << (params->ar_coeff_shift - 1));
int chroma_grain_samples = chroma_block_size_y * chroma_block_size_x;
int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
if (params->num_cb_points || params->chroma_scaling_from_luma) {
init_random_generator(7 << 5, params->random_seed);
@ -455,7 +460,8 @@ static void generate_chroma_grain_blocks(
((1 << gauss_sec_shift) >> 1)) >>
gauss_sec_shift;
} else {
memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_samples);
memset(cb_grain_block, 0,
sizeof(*cb_grain_block) * chroma_grain_block_size);
}
if (params->num_cr_points || params->chroma_scaling_from_luma) {
@ -468,7 +474,8 @@ static void generate_chroma_grain_blocks(
((1 << gauss_sec_shift) >> 1)) >>
gauss_sec_shift;
} else {
memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_samples);
memset(cr_grain_block, 0,
sizeof(*cr_grain_block) * chroma_grain_block_size);
}
for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
@ -522,7 +529,7 @@ static void generate_chroma_grain_blocks(
}
}
static void init_scaling_function(int scaling_points[][2], int num_points,
static void init_scaling_function(const int scaling_points[][2], int num_points,
int scaling_lut[]) {
if (num_points == 0) return;
@ -559,7 +566,7 @@ static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
(bit_depth - 8));
}
static void add_noise_to_block(aom_film_grain_t *params, uint8_t *luma,
static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
uint8_t *cb, uint8_t *cr, int luma_stride,
int chroma_stride, int *luma_grain,
int *cb_grain, int *cr_grain,
@ -675,7 +682,7 @@ static void add_noise_to_block(aom_film_grain_t *params, uint8_t *luma,
}
static void add_noise_to_block_hbd(
aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
int half_luma_height, int half_luma_width, int bit_depth,
@ -903,7 +910,7 @@ static void hor_boundary_overlap(int *top_block, int top_stride,
}
}
void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
void av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
aom_image_t *dst) {
uint8_t *luma, *cb, *cr;
int height, width, luma_stride, chroma_stride;
@ -950,6 +957,11 @@ void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
exit(1);
}
assert(params->bit_depth == src->bit_depth);
dst->fmt = src->fmt;
dst->bit_depth = src->bit_depth;
dst->r_w = src->r_w;
dst->r_h = src->r_h;
dst->d_w = src->d_w;
@ -999,15 +1011,13 @@ void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
params->bit_depth = dst->bit_depth;
av1_add_film_grain_run(params, luma, cb, cr, height, width, luma_stride,
chroma_stride, use_high_bit_depth, chroma_subsamp_y,
chroma_subsamp_x, mc_identity);
return;
}
void av1_add_film_grain_run(aom_film_grain_t *params, uint8_t *luma,
void av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
uint8_t *cb, uint8_t *cr, int height, int width,
int luma_stride, int chroma_stride,
int use_high_bit_depth, int chroma_subsamp_y,

10
third_party/aom/aom_dsp/grain_synthesis.h поставляемый
Просмотреть файл

@ -72,7 +72,7 @@ typedef struct {
int clip_to_restricted_range;
int bit_depth; // video bit depth
unsigned int bit_depth; // video bit depth
int chroma_scaling_from_luma;
@ -94,7 +94,7 @@ typedef struct {
* \param[in] luma_stride luma plane stride
* \param[in] chroma_stride chroma plane stride
*/
void av1_add_film_grain_run(aom_film_grain_t *grain_params, uint8_t *luma,
void av1_add_film_grain_run(const aom_film_grain_t *grain_params, uint8_t *luma,
uint8_t *cb, uint8_t *cr, int height, int width,
int luma_stride, int chroma_stride,
int use_high_bit_depth, int chroma_subsamp_y,
@ -106,10 +106,10 @@ void av1_add_film_grain_run(aom_film_grain_t *grain_params, uint8_t *luma,
*
* \param[in] grain_params Grain parameters
* \param[in] src Source image
* \param[in] dst Resulting image with grain
* \param[out] dst Resulting image with grain
*/
void av1_add_film_grain(aom_film_grain_t *grain_params, aom_image_t *src,
aom_image_t *dst);
void av1_add_film_grain(const aom_film_grain_t *grain_params,
const aom_image_t *src, aom_image_t *dst);
#ifdef __cplusplus
} // extern "C"

186
third_party/aom/aom_dsp/noise_model.c поставляемый
Просмотреть файл

@ -1458,3 +1458,189 @@ int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
}
return init_success;
}
struct aom_denoise_and_model_t {
int block_size;
int bit_depth;
float noise_level;
// Size of current denoised buffer and flat_block buffer
int width;
int height;
int y_stride;
int uv_stride;
int num_blocks_w;
int num_blocks_h;
// Buffers for image and noise_psd allocated on the fly
float *noise_psd[3];
uint8_t *denoised[3];
uint8_t *flat_blocks;
aom_flat_block_finder_t flat_block_finder;
aom_noise_model_t noise_model;
};
struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
int block_size,
float noise_level) {
struct aom_denoise_and_model_t *ctx =
(struct aom_denoise_and_model_t *)aom_malloc(
sizeof(struct aom_denoise_and_model_t));
if (!ctx) {
fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
return NULL;
}
memset(ctx, 0, sizeof(*ctx));
ctx->block_size = block_size;
ctx->noise_level = noise_level;
ctx->bit_depth = bit_depth;
ctx->noise_psd[0] =
aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
ctx->noise_psd[1] =
aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
ctx->noise_psd[2] =
aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
fprintf(stderr, "Unable to allocate noise PSD buffers\n");
aom_denoise_and_model_free(ctx);
return NULL;
}
return ctx;
}
void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
aom_free(ctx->flat_blocks);
for (int i = 0; i < 3; ++i) {
aom_free(ctx->denoised[i]);
aom_free(ctx->noise_psd[i]);
}
aom_noise_model_free(&ctx->noise_model);
aom_flat_block_finder_free(&ctx->flat_block_finder);
aom_free(ctx);
}
static int denoise_and_model_realloc_if_necessary(
struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
return 1;
const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
const int block_size = ctx->block_size;
ctx->width = sd->y_width;
ctx->height = sd->y_height;
ctx->y_stride = sd->y_stride;
ctx->uv_stride = sd->uv_stride;
for (int i = 0; i < 3; ++i) {
aom_free(ctx->denoised[i]);
ctx->denoised[i] = NULL;
}
aom_free(ctx->flat_blocks);
ctx->flat_blocks = NULL;
ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
fprintf(stderr, "Unable to allocate denoise buffers\n");
return 0;
}
ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
aom_flat_block_finder_free(&ctx->flat_block_finder);
if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
ctx->bit_depth, use_highbd)) {
fprintf(stderr, "Unable to init flat block finder\n");
return 0;
}
const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
ctx->bit_depth, use_highbd };
aom_noise_model_free(&ctx->noise_model);
if (!aom_noise_model_init(&ctx->noise_model, params)) {
fprintf(stderr, "Unable to init noise model\n");
return 0;
}
// Simply use a flat PSD (although we could use the flat blocks to estimate
// PSD) those to estimate an actual noise PSD)
const float y_noise_level =
aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
const float uv_noise_level = aom_noise_psd_get_default_value(
ctx->block_size >> sd->subsampling_x, ctx->noise_level);
for (int i = 0; i < block_size * block_size; ++i) {
ctx->noise_psd[0][i] = y_noise_level;
ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
}
return 1;
}
int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
YV12_BUFFER_CONFIG *sd,
aom_film_grain_t *film_grain) {
const int block_size = ctx->block_size;
const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
uint8_t *raw_data[3] = {
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
};
const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
fprintf(stderr, "Unable to realloc buffers\n");
return 0;
}
aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
sd->y_height, strides[0], ctx->flat_blocks);
if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
strides, chroma_sub_log2, ctx->noise_psd,
block_size, ctx->bit_depth, use_highbd)) {
fprintf(stderr, "Unable to denoise image\n");
return 0;
}
const aom_noise_status_t status = aom_noise_model_update(
&ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
block_size);
int have_noise_estimate = 0;
if (status == AOM_NOISE_STATUS_OK) {
have_noise_estimate = 1;
} else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
aom_noise_model_save_latest(&ctx->noise_model);
have_noise_estimate = 1;
} else {
// Unable to update noise model; proceed if we have a previous estimate.
have_noise_estimate =
(ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
}
film_grain->apply_grain = 0;
if (have_noise_estimate) {
if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
fprintf(stderr, "Unable to get grain parameters.\n");
return 0;
}
if (!film_grain->random_seed) {
film_grain->random_seed = 1071;
}
memcpy(raw_data[0], ctx->denoised[0],
(strides[0] * sd->y_height) << use_highbd);
memcpy(raw_data[1], ctx->denoised[1],
(strides[1] * sd->uv_height) << use_highbd);
memcpy(raw_data[2], ctx->denoised[2],
(strides[2] * sd->uv_height) << use_highbd);
}
return 1;
}

37
third_party/aom/aom_dsp/noise_model.h поставляемый
Просмотреть файл

@ -18,6 +18,7 @@ extern "C" {
#include <stdint.h>
#include "aom_dsp/grain_synthesis.h"
#include "aom_scale/yv12config.h"
/*!\brief Wrapper of data required to represent linear system of eqns and soln.
*/
@ -280,6 +281,42 @@ int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
int w, int h, int stride[3], int chroma_sub_log2[2],
float *noise_psd[3], int block_size, int bit_depth,
int use_highbd);
struct aom_denoise_and_model_t;
/*!\brief Denoise the buffer and model the residual noise.
*
* This is meant to be called sequentially on input frames. The input buffer
* is denoised and the residual noise is modelled. The current noise estimate
* is populated in film_grain. Returns true on success. The grain.apply_grain
* parameter will be true when the input buffer was successfully denoised and
* grain was modelled. Returns false on error.
*
* \param[in] ctx Struct allocated with aom_denoise_and_model_alloc
* that holds some buffers for denoising and the current
* noise estimate.
* \param[in/out] buf The raw input buffer to be denoised.
* \param[out] grain Output film grain parameters
*/
int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
YV12_BUFFER_CONFIG *buf, aom_film_grain_t *grain);
/*!\brief Allocates a context that can be used for denoising and noise modeling.
*
* \param[in] bit_depth Bit depth of buffers this will be run on.
* \param[in] block_size Block size for noise modeling and flat block
* estimation
* \param[in] noise_level The noise_level (2.5 for moderate noise, and 5 for
* higher levels of noise)
*/
struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
int block_size,
float noise_level);
/*!\brief Frees the denoise context allocated with aom_denoise_and_model_alloc
*/
void aom_denoise_and_model_free(struct aom_denoise_and_model_t *denoise_model);
#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

Просмотреть файл

@ -289,6 +289,15 @@ SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
return c_v256_shr_s32(a, c);
}
SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
return c_v256_shl_64(a, c);
}
SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
return c_v256_shr_u64(a, c);
}
SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
return c_v256_shr_s64(a, c);
}
SIMD_INLINE v256 v256_shr_n_byte(v256 a, unsigned int n) {
return c_v256_shr_n_byte(a, n);

39
third_party/aom/aom_dsp/variance.c поставляемый
Просмотреть файл

@ -386,7 +386,7 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
}
}
const InterpFilterParams filter =
const InterpFilterParams *filter =
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
@ -413,12 +413,12 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
const int16_t *const kernel_y =
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
const int intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_convolve8_horiz(ref - ref_stride * ((filter.taps >> 1) - 1), ref_stride,
temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
intermediate_height);
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
aom_convolve8_horiz(ref - ref_stride * ((filter->taps >> 1) - 1),
ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
width, intermediate_height);
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
width, height);
}
@ -974,7 +974,7 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
}
}
const InterpFilterParams filter =
const InterpFilterParams *filter =
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
@ -1004,14 +1004,14 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
const int16_t *const kernel_y =
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
const int intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1),
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
ref_stride, CONVERT_TO_BYTEPTR(temp),
MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
intermediate_height, bd);
aom_highbd_convolve8_vert(
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
16, width, height, bd);
}
@ -1185,29 +1185,18 @@ void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
}
}
void aom_highbd_comp_mask_upsampled_pred_c(
void aom_highbd_comp_mask_upsampled_pred(
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
int bd) {
int i, j;
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
bd);
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
if (!invert_mask)
comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]);
else
comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]);
}
comp_pred += width;
pred += width;
mask += mask_stride;
}
aom_highbd_comp_mask_pred(comp_pred, pred8, width, height,
CONVERT_TO_BYTEPTR(comp_pred), width, mask,
mask_stride, invert_mask);
}
#define HIGHBD_MASK_SUBPIX_VAR(W, H) \

7
third_party/aom/aom_dsp/variance.h поставляемый
Просмотреть файл

@ -76,6 +76,13 @@ void aom_comp_mask_upsampled_pred(
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask);
void aom_highbd_comp_mask_upsampled_pred(
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
int bd);
typedef unsigned int (*aom_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride,
const int32_t *wsrc,
const int32_t *msk);

Просмотреть файл

@ -41,25 +41,163 @@
#define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
#endif // __clang__
static void aom_filter_block1d16_h8_avx2(
static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) {
*((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
*((uint32_t *)(output_ptr + stride)) =
_mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
}
static INLINE __m256i xx_loadu2_epi64(const void *hi, const void *lo) {
__m256i a = _mm256_castsi128_si256(_mm_loadl_epi64((const __m128i *)(lo)));
a = _mm256_inserti128_si256(a, _mm_loadl_epi64((const __m128i *)(hi)), 1);
return a;
}
static INLINE void xx_storeu2_epi64(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) {
_mm_storel_epi64((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
_mm_storel_epi64((__m128i *)(output_ptr + stride),
_mm256_extractf128_si256(*a, 1));
}
static INLINE __m256i xx_loadu2_mi128(const void *hi, const void *lo) {
__m256i a = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(lo)));
a = _mm256_inserti128_si256(a, _mm_loadu_si128((const __m128i *)(hi)), 1);
return a;
}
static INLINE void xx_store2_mi128(const uint8_t *output_ptr,
const ptrdiff_t stride, const __m256i *a) {
_mm_store_si128((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
_mm_store_si128((__m128i *)(output_ptr + stride),
_mm256_extractf128_si256(*a, 1));
}
static void aom_filter_block1d4_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
__m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
__m256i srcReg32b1, srcReg32b2, filtersReg32;
__m256i addFilterReg32, filt1Reg, filt2Reg;
__m256i firstFilters, secondFilters;
__m256i srcRegFilt32b1_1, srcRegFilt32b2;
__m256i srcReg32b1;
unsigned int i;
ptrdiff_t src_stride, dst_stride;
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
src_ptr -= 3;
addFilterReg32 = _mm256_set1_epi16(32);
filtersReg = _mm_loadu_si128((const __m128i *)filter);
filtersReg = _mm_srai_epi16(filtersReg, 1);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
// have the same data in both lanes of a 256 bit register
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
// duplicate only the first 32 bits
firstFilters = _mm256_shuffle_epi32(filtersReg32, 0);
// duplicate only the second 32 bits
secondFilters = _mm256_shuffle_epi32(filtersReg32, 0x55);
filt1Reg = _mm256_load_si256((__m256i const *)filt_d4_global_avx2);
filt2Reg = _mm256_load_si256((__m256i const *)(filt_d4_global_avx2 + 32));
// multiple the size of the source and destination stride by two
src_stride = src_pixels_per_line << 1;
dst_stride = output_pitch << 1;
for (i = output_height; i > 1; i -= 2) {
// load the 2 strides of source
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
// filter the source buffer
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
// multiply 4 adjacent elements with the filter and add the result
srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
// filter the source buffer
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
// multiply 4 adjacent elements with the filter and add the result
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
srcRegFilt32b1_1 =
_mm256_hadds_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
// shift by 6 bit each 16 bit
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve result
srcRegFilt32b1_1 =
_mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
src_ptr += src_stride;
xx_storeu2_epi32(output_ptr, output_pitch, &srcRegFilt32b1_1);
output_ptr += dst_stride;
}
// if the number of strides is odd.
// process only 4 bytes
if (i > 0) {
__m128i srcReg1, srcRegFilt1_1;
__m128i srcRegFilt2;
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
// filter the source buffer
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
// multiply 4 adjacent elements with the filter and add the result
srcRegFilt1_1 =
_mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
// filter the source buffer
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
// multiply 4 adjacent elements with the filter and add the result
srcRegFilt2 =
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(secondFilters));
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
srcRegFilt1_1 = _mm_hadds_epi16(srcRegFilt1_1, _mm_setzero_si128());
// shift by 6 bit each 16 bit
srcRegFilt1_1 =
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve result
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
// save 4 bytes
*((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
}
}
static void aom_filter_block1d8_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
__m256i srcRegFilt32b1_1, srcRegFilt32b2, srcRegFilt32b3;
__m256i srcReg32b1;
unsigned int i;
ptrdiff_t src_stride, dst_stride;
src_ptr -= 3;
addFilterReg32 = _mm256_set1_epi16(32);
filtersReg = _mm_loadu_si128((const __m128i *)filter);
filtersReg = _mm_srai_epi16(filtersReg, 1);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
// have the same data in both lanes of a 256 bit register
const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
// duplicate only the first 16 bits (first and second byte)
// across 256 bit register
@ -74,22 +212,17 @@ static void aom_filter_block1d16_h8_avx2(
// across 256 bit register
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2);
filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2);
filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2);
filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2);
filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
// multiple the size of the source and destination stride by two
src_stride = src_pixels_per_line << 1;
dst_stride = output_pitch << 1;
for (i = output_height; i > 1; i -= 2) {
// load the 2 strides of source
srcReg32b1 =
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr - 3)));
srcReg32b1 = _mm256_inserti128_si256(
srcReg32b1,
_mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line - 3)),
1);
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
// filter the source buffer
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
@ -110,80 +243,31 @@ static void aom_filter_block1d16_h8_avx2(
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
// add and saturate the results together
srcRegFilt32b1_1 = _mm256_adds_epi16(
srcRegFilt32b1_1, _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
__m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
// reading 2 strides of the next 16 bytes
// (part of it was being read by earlier read)
srcReg32b2 =
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr + 5)));
srcReg32b2 = _mm256_inserti128_si256(
srcReg32b2,
_mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line + 5)),
1);
// add and saturate the results together
srcRegFilt32b1_1 = _mm256_adds_epi16(
srcRegFilt32b1_1, _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
// filter the source buffer
srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
// add and saturate the results together
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
// filter the source buffer
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
// add and saturate the results together
srcRegFilt32b2_1 = _mm256_adds_epi16(
srcRegFilt32b2_1, _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
srcRegFilt32b2_1 = _mm256_adds_epi16(
srcRegFilt32b2_1, _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64);
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64);
// shift by 7 bit each 16 bit
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7);
srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7);
// shift by 6 bit each 16 bit
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
// result
srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
// convolve result and the second lane contain the second convolve result
srcRegFilt32b1_1 =
_mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
src_ptr += src_stride;
// save 16 bytes
_mm_store_si128((__m128i *)output_ptr,
_mm256_castsi256_si128(srcRegFilt32b1_1));
// save the next 16 bits
_mm_store_si128((__m128i *)(output_ptr + output_pitch),
_mm256_extractf128_si256(srcRegFilt32b1_1, 1));
xx_storeu2_epi64(output_ptr, output_pitch, &srcRegFilt32b1_1);
output_ptr += dst_stride;
}
// if the number of strides is odd.
// process only 16 bytes
// process only 8 bytes
if (i > 0) {
__m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
__m128i srcReg1, srcRegFilt1_1;
__m128i srcRegFilt2, srcRegFilt3;
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
// filter the source buffer
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
@ -210,15 +294,172 @@ static void aom_filter_block1d16_h8_avx2(
// add and saturate the results together
srcRegFilt1_1 =
_mm_adds_epi16(srcRegFilt1_1, _mm_min_epi16(srcRegFilt3, srcRegFilt2));
_mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
// reading the next 16 bytes
// shift by 6 bit each 16 bit
srcRegFilt1_1 =
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
// result
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
// save 8 bytes
_mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1_1);
}
}
static void aom_filter_block1d16_h8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
__m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
__m256i srcReg32b1, srcReg32b2, filtersReg32;
unsigned int i;
ptrdiff_t src_stride, dst_stride;
src_ptr -= 3;
addFilterReg32 = _mm256_set1_epi16(32);
filtersReg = _mm_loadu_si128((const __m128i *)filter);
filtersReg = _mm_srai_epi16(filtersReg, 1);
// converting the 16 bit (short) to 8 bit (byte) and have the same data
// in both lanes of 128 bit register.
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
// have the same data in both lanes of a 256 bit register
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
// duplicate only the first 16 bits (first and second byte)
// across 256 bit register
firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
// duplicate only the second 16 bits (third and forth byte)
// across 256 bit register
secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
// duplicate only the third 16 bits (fifth and sixth byte)
// across 256 bit register
thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
// duplicate only the forth 16 bits (seventh and eighth byte)
// across 256 bit register
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
// multiple the size of the source and destination stride by two
src_stride = src_pixels_per_line << 1;
dst_stride = output_pitch << 1;
for (i = output_height; i > 1; i -= 2) {
// load the 2 strides of source
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
// filter the source buffer
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
// add and saturate the results together
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
// filter the source buffer
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
__m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
// reading 2 strides of the next 16 bytes
// (part of it was being read by earlier read)
srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
srcReg32b2 =
xx_loadu2_mi128(src_ptr + src_pixels_per_line + 8, src_ptr + 8);
// filter the source buffer
srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
// add and saturate the results together
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
// filter the source buffer
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
// add and saturate the results together
srcRegFilt32b2_1 = _mm256_adds_epi16(
srcRegFilt32b2_1, _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2));
// shift by 6 bit each 16 bit
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg32);
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve result
srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
src_ptr += src_stride;
xx_store2_mi128(output_ptr, output_pitch, &srcRegFilt32b1_1);
output_ptr += dst_stride;
}
// if the number of strides is odd.
// process only 16 bytes
if (i > 0) {
__m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
__m128i srcRegFilt2, srcRegFilt3;
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
// filter the source buffer
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt4Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt1_1 =
_mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
srcRegFilt2 =
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(forthFilters));
// add and saturate the results together
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
// filter the source buffer
srcRegFilt3 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt3 =
_mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(secondFilters));
srcRegFilt2 =
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters));
// add and saturate the results together
srcRegFilt1_1 =
_mm_adds_epi16(srcRegFilt1_1, _mm_max_epi16(srcRegFilt3, srcRegFilt2));
_mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
// reading the next 16 bytes
// (part of it was being read by earlier read)
srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 8));
// filter the source buffer
srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt1Reg));
@ -245,19 +486,16 @@ static void aom_filter_block1d16_h8_avx2(
// add and saturate the results together
srcRegFilt2_1 =
_mm_adds_epi16(srcRegFilt2_1, _mm_min_epi16(srcRegFilt3, srcRegFilt2));
srcRegFilt2_1 =
_mm_adds_epi16(srcRegFilt2_1, _mm_max_epi16(srcRegFilt3, srcRegFilt2));
_mm_adds_epi16(srcRegFilt2_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
// shift by 6 bit each 16 bit
srcRegFilt1_1 =
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg64));
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
srcRegFilt2_1 =
_mm_adds_epi16(srcRegFilt2_1, _mm256_castsi256_si128(addFilterReg64));
// shift by 7 bit each 16 bit
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
_mm_adds_epi16(srcRegFilt2_1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
@ -269,11 +507,11 @@ static void aom_filter_block1d16_h8_avx2(
}
}
static void aom_filter_block1d16_v8_avx2(
static void aom_filter_block1d8_v8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg64;
__m256i addFilterReg32;
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
__m256i srcReg32b11, srcReg32b12, filtersReg32;
@ -281,11 +519,11 @@ static void aom_filter_block1d16_v8_avx2(
unsigned int i;
ptrdiff_t src_stride, dst_stride;
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
addFilterReg32 = _mm256_set1_epi16(32);
filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the
// same data in both lanes of 128 bit register.
filtersReg = _mm_srai_epi16(filtersReg, 1);
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
// have the same data in both lanes of a 256 bit register
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
@ -308,49 +546,178 @@ static void aom_filter_block1d16_v8_avx2(
dst_stride = out_pitch << 1;
// load 16 bytes 7 times in stride of src_pitch
srcReg32b1 =
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr)));
srcReg32b2 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)));
srcReg32b3 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)));
srcReg32b4 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)));
srcReg32b5 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)));
srcReg32b6 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)));
srcReg32b1 = xx_loadu2_epi64(src_ptr + src_pitch, src_ptr);
srcReg32b3 =
xx_loadu2_epi64(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
srcReg32b5 =
xx_loadu2_epi64(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
srcReg32b7 = _mm256_castsi128_si256(
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)));
// have each consecutive loads on the same 256 register
srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
// merge every two consecutive registers except the last one
srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
for (i = output_height; i > 1; i -= 2) {
// load the last 2 loads of 16 bytes and have every two
// consecutive loads in the same 256 bit register
srcReg32b8 = _mm256_castsi128_si256(
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)));
srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
_mm256_castsi256_si128(srcReg32b8), 1);
srcReg32b9 = _mm256_castsi128_si256(
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 8)));
srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
_mm256_castsi256_si128(srcReg32b9), 1);
// merge every two consecutive registers
// save
srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
// multiply 2 adjacent elements with the filter and add the result
srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
// multiply 2 adjacent elements with the filter and add the result
srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
// shift by 6 bit each 16 bit
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
// result
srcReg32b1 = _mm256_packus_epi16(srcReg32b10, _mm256_setzero_si256());
src_ptr += src_stride;
xx_storeu2_epi64(output_ptr, out_pitch, &srcReg32b1);
output_ptr += dst_stride;
// save part of the registers for next strides
srcReg32b10 = srcReg32b11;
srcReg32b11 = srcReg32b2;
srcReg32b2 = srcReg32b4;
srcReg32b7 = srcReg32b9;
}
if (i > 0) {
__m128i srcRegFilt1, srcRegFilt4, srcRegFilt6, srcRegFilt8;
// load the last 16 bytes
srcRegFilt8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
// merge the last 2 results together
srcRegFilt4 =
_mm_unpacklo_epi8(_mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
_mm256_castsi256_si128(firstFilters));
srcRegFilt4 =
_mm_maddubs_epi16(srcRegFilt4, _mm256_castsi256_si128(forthFilters));
// add and saturate the results together
srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
_mm256_castsi256_si128(secondFilters));
// multiply 2 adjacent elements with the filter and add the result
srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
_mm256_castsi256_si128(thirdFilters));
// add and saturate the results together
srcRegFilt1 =
_mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
// shift by 6 bit each 16 bit
srcRegFilt1 =
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve result
srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, _mm_setzero_si128());
// save 8 bytes
_mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1);
}
}
static void aom_filter_block1d16_v8_avx2(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
__m128i filtersReg;
__m256i addFilterReg32;
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
__m256i srcReg32b11, srcReg32b12, filtersReg32;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
unsigned int i;
ptrdiff_t src_stride, dst_stride;
addFilterReg32 = _mm256_set1_epi16(32);
filtersReg = _mm_loadu_si128((const __m128i *)filter);
// converting the 16 bit (short) to 8 bit (byte) and have the
// same data in both lanes of 128 bit register.
filtersReg = _mm_srai_epi16(filtersReg, 1);
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
// have the same data in both lanes of a 256 bit register
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
// duplicate only the first 16 bits (first and second byte)
// across 256 bit register
firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
// duplicate only the second 16 bits (third and forth byte)
// across 256 bit register
secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
// duplicate only the third 16 bits (fifth and sixth byte)
// across 256 bit register
thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
// duplicate only the forth 16 bits (seventh and eighth byte)
// across 256 bit register
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
// multiple the size of the source and destination stride by two
src_stride = src_pitch << 1;
dst_stride = out_pitch << 1;
// load 16 bytes 7 times in stride of src_pitch
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pitch, src_ptr);
srcReg32b3 =
xx_loadu2_mi128(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
srcReg32b5 =
xx_loadu2_mi128(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
srcReg32b7 = _mm256_castsi128_si256(
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
// have each consecutive loads on the same 256 register
srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
_mm256_castsi256_si128(srcReg32b2), 1);
srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
_mm256_castsi256_si128(srcReg32b3), 1);
srcReg32b3 = _mm256_inserti128_si256(srcReg32b3,
_mm256_castsi256_si128(srcReg32b4), 1);
srcReg32b4 = _mm256_inserti128_si256(srcReg32b4,
_mm256_castsi256_si128(srcReg32b5), 1);
srcReg32b5 = _mm256_inserti128_si256(srcReg32b5,
_mm256_castsi256_si128(srcReg32b6), 1);
srcReg32b6 = _mm256_inserti128_si256(srcReg32b6,
_mm256_castsi256_si128(srcReg32b7), 1);
srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
// merge every two consecutive registers except the last one
srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2);
// save
srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
// save
srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4);
// save
srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
// save
srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6);
for (i = output_height; i > 1; i -= 2) {
@ -383,9 +750,7 @@ static void aom_filter_block1d16_v8_avx2(
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_min_epi16(srcReg32b8, srcReg32b12));
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_max_epi16(srcReg32b8, srcReg32b12));
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
// multiply 2 adjacent elements with the filter and add the result
srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
@ -399,16 +764,13 @@ static void aom_filter_block1d16_v8_avx2(
// add and saturate the results together
srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
_mm256_min_epi16(srcReg32b8, srcReg32b12));
srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
_mm256_max_epi16(srcReg32b8, srcReg32b12));
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
// shift by 7 bit each 16 bit
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7);
srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7);
// shift by 6 bit each 16 bit
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg32);
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
@ -417,12 +779,7 @@ static void aom_filter_block1d16_v8_avx2(
src_ptr += src_stride;
// save 16 bytes
_mm_store_si128((__m128i *)output_ptr, _mm256_castsi256_si128(srcReg32b1));
// save the next 16 bits
_mm_store_si128((__m128i *)(output_ptr + out_pitch),
_mm256_extractf128_si256(srcReg32b1, 1));
xx_store2_mi128(output_ptr, out_pitch, &srcReg32b1);
output_ptr += dst_stride;
@ -475,24 +832,17 @@ static void aom_filter_block1d16_v8_avx2(
// add and saturate the results together
srcRegFilt1 =
_mm_adds_epi16(srcRegFilt1, _mm_min_epi16(srcRegFilt4, srcRegFilt6));
_mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
srcRegFilt3 =
_mm_adds_epi16(srcRegFilt3, _mm_min_epi16(srcRegFilt5, srcRegFilt7));
_mm_adds_epi16(srcRegFilt3, _mm_adds_epi16(srcRegFilt5, srcRegFilt7));
// add and saturate the results together
// shift by 6 bit each 16 bit
srcRegFilt1 =
_mm_adds_epi16(srcRegFilt1, _mm_max_epi16(srcRegFilt4, srcRegFilt6));
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt3 =
_mm_adds_epi16(srcRegFilt3, _mm_max_epi16(srcRegFilt5, srcRegFilt7));
srcRegFilt1 =
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg64));
srcRegFilt3 =
_mm_adds_epi16(srcRegFilt3, _mm256_castsi256_si128(addFilterReg64));
// shift by 7 bit each 16 bit
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7);
_mm_adds_epi16(srcRegFilt3, _mm256_castsi256_si128(addFilterReg32));
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 6);
// shrink to 8 bit each 16 bits, the first lane contain the first
// convolve result and the second lane contain the second convolve
@ -506,21 +856,6 @@ static void aom_filter_block1d16_v8_avx2(
#if HAVE_AVX2 && HAVE_SSSE3
filter8_1dfunction aom_filter_block1d4_v8_ssse3;
#if ARCH_X86_64
filter8_1dfunction aom_filter_block1d8_v8_intrin_ssse3;
filter8_1dfunction aom_filter_block1d8_h8_intrin_ssse3;
filter8_1dfunction aom_filter_block1d4_h8_intrin_ssse3;
#define aom_filter_block1d8_v8_avx2 aom_filter_block1d8_v8_intrin_ssse3
#define aom_filter_block1d8_h8_avx2 aom_filter_block1d8_h8_intrin_ssse3
#define aom_filter_block1d4_h8_avx2 aom_filter_block1d4_h8_intrin_ssse3
#else // ARCH_X86
filter8_1dfunction aom_filter_block1d8_v8_ssse3;
filter8_1dfunction aom_filter_block1d8_h8_ssse3;
filter8_1dfunction aom_filter_block1d4_h8_ssse3;
#define aom_filter_block1d8_v8_avx2 aom_filter_block1d8_v8_ssse3
#define aom_filter_block1d8_h8_avx2 aom_filter_block1d8_h8_ssse3
#define aom_filter_block1d4_h8_avx2 aom_filter_block1d4_h8_ssse3
#endif // ARCH_X86_64
filter8_1dfunction aom_filter_block1d16_v2_ssse3;
filter8_1dfunction aom_filter_block1d16_h2_ssse3;
filter8_1dfunction aom_filter_block1d8_v2_ssse3;

32
third_party/aom/aom_dsp/x86/convolve_avx2.h поставляемый
Просмотреть файл

@ -13,31 +13,27 @@
#define AOM_DSP_X86_CONVOLVE_AVX2_H_
// filters for 16
DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = {
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
DECLARE_ALIGNED(32, static const uint8_t, filt_global_avx2[]) = {
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1,
2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 2, 3, 3, 4, 4, 5,
5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6,
7, 7, 8, 8, 9, 9, 10, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
10, 11, 11, 12, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
12, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 6, 7,
7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
};
DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = {
2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
};
DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = {
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
};
DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
DECLARE_ALIGNED(32, static const uint8_t, filt_d4_global_avx2[]) = {
0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, 3, 1, 2,
3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9,
7, 8, 9, 10, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10,
};
static INLINE void prepare_coeffs_lowbd(
const InterpFilterParams *const filter_params, const int subpel_q4,
__m256i *const coeffs /* [4] */) {
const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
*filter_params, subpel_q4 & SUBPEL_MASK);
filter_params, subpel_q4 & SUBPEL_MASK);
const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
const __m256i filter_coeffs = _mm256_broadcastsi128_si256(coeffs_8);
@ -65,7 +61,7 @@ static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
const int subpel_q4,
__m256i *const coeffs /* [4] */) {
const int16_t *filter = av1_get_interp_filter_subpel_kernel(
*filter_params, subpel_q4 & SUBPEL_MASK);
filter_params, subpel_q4 & SUBPEL_MASK);
const __m128i coeff_8 = _mm_loadu_si128((__m128i *)filter);
const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);

2
third_party/aom/aom_dsp/x86/convolve_sse2.h поставляемый
Просмотреть файл

@ -19,7 +19,7 @@ static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
const int subpel_q4,
__m128i *const coeffs /* [4] */) {
const int16_t *filter = av1_get_interp_filter_subpel_kernel(
*filter_params, subpel_q4 & SUBPEL_MASK);
filter_params, subpel_q4 & SUBPEL_MASK);
const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
// coeffs 0 1 0 1 0 1 0 1

Просмотреть файл

@ -105,8 +105,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int i, j;
@ -254,8 +254,8 @@ void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int i, j;

Просмотреть файл

@ -18,8 +18,8 @@
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4,
const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
@ -166,8 +166,8 @@ void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4,
const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {

Просмотреть файл

@ -676,7 +676,7 @@ void aom_highbd_upsampled_pred_sse2(MACROBLOCKD *xd,
}
}
const InterpFilterParams filter =
const InterpFilterParams *filter =
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
@ -726,14 +726,14 @@ void aom_highbd_upsampled_pred_sse2(MACROBLOCKD *xd,
const int16_t *const kernel_y =
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
const int intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1),
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
ref_stride, CONVERT_TO_BYTEPTR(temp),
MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
intermediate_height, bd);
aom_highbd_convolve8_vert(
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
16, width, height, bd);
}

Просмотреть файл

@ -22,118 +22,12 @@
void aom_var_filter_block2d_bil_first_pass_ssse3(
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter) {
// Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
// in computation using _mm_maddubs_epi16.
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
const __m128i r = _mm_set1_epi16(round);
const uint8_t f0 = filter[0] >> 1;
const uint8_t f1 = filter[1] >> 1;
const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
f0, f1, f0, f1, f0, f1);
const __m128i shuffle_mask =
_mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8);
unsigned int i, j;
(void)pixel_step;
if (output_width >= 8) {
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 8) {
// load source
__m128i source_low = xx_loadl_64(a);
__m128i source_hi = _mm_setzero_si128();
// avoid load undefined memory
if (a + 8 != NULL) source_hi = xx_loadl_64(a + 8);
__m128i source = _mm_unpacklo_epi64(source_low, source_hi);
// shuffle to:
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
// b[i] = a[i] * filter[0] + a[i + 1] * filter[1]
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
// round
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
xx_storeu_128(b, res);
a += 8;
b += 8;
}
a += src_pixels_per_line - output_width;
}
} else {
for (i = 0; i < output_height; ++i) {
// load source, only first 5 values are meaningful:
// { a[0], a[1], a[2], a[3], a[4], xxxx }
__m128i source = xx_loadl_64(a);
// shuffle, up to the first 8 are useful
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
xx_storel_64(b, res);
a += src_pixels_per_line;
b += output_width;
}
}
}
unsigned int output_width, const uint8_t *filter);
void aom_var_filter_block2d_bil_second_pass_ssse3(
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter) {
const int16_t round = (1 << FILTER_BITS) >> 1;
const __m128i r = _mm_set1_epi32(round);
const __m128i filters =
_mm_setr_epi16(filter[0], filter[1], filter[0], filter[1], filter[0],
filter[1], filter[0], filter[1]);
const __m128i shuffle_mask =
_mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
const __m128i mask =
_mm_setr_epi8(0, 4, 8, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 4) {
// load source as:
// { a[0], a[1], a[2], a[3], a[w], a[w+1], a[w+2], a[w+3] }
__m128i source1 = xx_loadl_64(a);
__m128i source2 = xx_loadl_64(a + pixel_step);
__m128i source = _mm_unpacklo_epi64(source1, source2);
// shuffle source to:
// { a[0], a[w], a[1], a[w+1], a[2], a[w+2], a[3], a[w+3] }
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
// b[i] = a[i] * filter[0] + a[w + i] * filter[1]
__m128i res = _mm_madd_epi16(source_shuffle, filters);
// round
res = _mm_srai_epi32(_mm_add_epi32(res, r), FILTER_BITS);
// shuffle to get each lower 8 bit of every 32 bit
res = _mm_shuffle_epi8(res, mask);
xx_storel_32(b, res);
a += 4;
b += 4;
}
a += src_pixels_per_line - output_width;
}
}
unsigned int output_width, const uint8_t *filter);
static INLINE void compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
const __m128i *w, const __m128i *r,

390
third_party/aom/aom_dsp/x86/masked_sad_intrin_avx2.c поставляемый Normal file
Просмотреть файл

@ -0,0 +1,390 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <stdio.h>
#include <tmmintrin.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/blend.h"
#include "aom/aom_integer.h"
#include "aom_dsp/x86/synonyms.h"
#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
static INLINE unsigned int masked_sad32xh_avx2(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int width, int height) {
int x, y;
__m256i res = _mm256_setzero_si256();
const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
const __m256i round_scale =
_mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
for (y = 0; y < height; y++) {
for (x = 0; x < width; x += 32) {
const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
const __m256i m = _mm256_lddqu_si256((const __m256i *)&m_ptr[x]);
const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
// Calculate 16 predicted pixels.
// Note that the maximum value of any entry of 'pred_l' or 'pred_r'
// is 64 * 255, so we have plenty of space to add rounding constants.
const __m256i data_l = _mm256_unpacklo_epi8(a, b);
const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
__m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
const __m256i data_r = _mm256_unpackhi_epi8(a, b);
const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
__m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
}
src_ptr += src_stride;
a_ptr += a_stride;
b_ptr += b_stride;
m_ptr += m_stride;
}
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
res = _mm256_shuffle_epi32(res, 0xd8);
res = _mm256_permute4x64_epi64(res, 0xd8);
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int32_t sad = _mm256_extract_epi32(res, 0);
return (sad + 31) >> 6;
}
static INLINE __m256i xx_loadu2_m128i(const void *hi, const void *lo) {
__m128i a0 = _mm_lddqu_si128((const __m128i *)(lo));
__m128i a1 = _mm_lddqu_si128((const __m128i *)(hi));
__m256i a = _mm256_castsi128_si256(a0);
return _mm256_inserti128_si256(a, a1, 1);
}
static INLINE unsigned int masked_sad16xh_avx2(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int height) {
int y;
__m256i res = _mm256_setzero_si256();
const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
const __m256i round_scale =
_mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
for (y = 0; y < height; y += 2) {
const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
const __m256i m = xx_loadu2_m128i(m_ptr + m_stride, m_ptr);
const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
// Calculate 16 predicted pixels.
// Note that the maximum value of any entry of 'pred_l' or 'pred_r'
// is 64 * 255, so we have plenty of space to add rounding constants.
const __m256i data_l = _mm256_unpacklo_epi8(a, b);
const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
__m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
const __m256i data_r = _mm256_unpackhi_epi8(a, b);
const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
__m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
src_ptr += src_stride << 1;
a_ptr += a_stride << 1;
b_ptr += b_stride << 1;
m_ptr += m_stride << 1;
}
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
res = _mm256_shuffle_epi32(res, 0xd8);
res = _mm256_permute4x64_epi64(res, 0xd8);
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int32_t sad = _mm256_extract_epi32(res, 0);
return (sad + 31) >> 6;
}
static INLINE unsigned int aom_masked_sad_avx2(
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
int invert_mask, int m, int n) {
unsigned int sad;
if (!invert_mask) {
switch (m) {
case 4:
sad = aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
second_pred, m, msk, msk_stride, n);
break;
case 8:
sad = aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride,
second_pred, m, msk, msk_stride, n);
break;
case 16:
sad = masked_sad16xh_avx2(src, src_stride, ref, ref_stride, second_pred,
m, msk, msk_stride, n);
break;
default:
sad = masked_sad32xh_avx2(src, src_stride, ref, ref_stride, second_pred,
m, msk, msk_stride, m, n);
break;
}
} else {
switch (m) {
case 4:
sad = aom_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, n);
break;
case 8:
sad = aom_masked_sad8xh_ssse3(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, n);
break;
case 16:
sad = masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, n);
break;
default:
sad = masked_sad32xh_avx2(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, m, n);
break;
}
}
return sad;
}
#define MASKSADMXN_AVX2(m, n) \
unsigned int aom_masked_sad##m##x##n##_avx2( \
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
int invert_mask) { \
return aom_masked_sad_avx2(src, src_stride, ref, ref_stride, second_pred, \
msk, msk_stride, invert_mask, m, n); \
}
MASKSADMXN_AVX2(4, 4)
MASKSADMXN_AVX2(4, 8)
MASKSADMXN_AVX2(8, 4)
MASKSADMXN_AVX2(8, 8)
MASKSADMXN_AVX2(8, 16)
MASKSADMXN_AVX2(16, 8)
MASKSADMXN_AVX2(16, 16)
MASKSADMXN_AVX2(16, 32)
MASKSADMXN_AVX2(32, 16)
MASKSADMXN_AVX2(32, 32)
MASKSADMXN_AVX2(32, 64)
MASKSADMXN_AVX2(64, 32)
MASKSADMXN_AVX2(64, 64)
MASKSADMXN_AVX2(64, 128)
MASKSADMXN_AVX2(128, 64)
MASKSADMXN_AVX2(128, 128)
MASKSADMXN_AVX2(4, 16)
MASKSADMXN_AVX2(16, 4)
MASKSADMXN_AVX2(8, 32)
MASKSADMXN_AVX2(32, 8)
MASKSADMXN_AVX2(16, 64)
MASKSADMXN_AVX2(64, 16)
static INLINE unsigned int highbd_masked_sad8xh_avx2(
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
int height) {
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
int y;
__m256i res = _mm256_setzero_si256();
const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
const __m256i round_const =
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
const __m256i one = _mm256_set1_epi16(1);
for (y = 0; y < height; y += 2) {
const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
// Zero-extend mask to 16 bits
const __m256i m = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(
_mm_loadl_epi64((const __m128i *)(m_ptr)),
_mm_loadl_epi64((const __m128i *)(m_ptr + m_stride))));
const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
const __m256i data_l = _mm256_unpacklo_epi16(a, b);
const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
__m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
AOM_BLEND_A64_ROUND_BITS);
const __m256i data_r = _mm256_unpackhi_epi16(a, b);
const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
__m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
AOM_BLEND_A64_ROUND_BITS);
// Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
// so it is safe to do signed saturation here.
const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
// There is no 16-bit SAD instruction, so we have to synthesize
// an 8-element SAD. We do this by storing 4 32-bit partial SADs,
// and accumulating them at the end
const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
src_ptr += src_stride << 1;
a_ptr += a_stride << 1;
b_ptr += b_stride << 1;
m_ptr += m_stride << 1;
}
// At this point, we have four 32-bit partial SADs stored in 'res'.
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
return (sad + 31) >> 6;
}
static INLINE unsigned int highbd_masked_sad16xh_avx2(
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
int width, int height) {
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
int x, y;
__m256i res = _mm256_setzero_si256();
const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
const __m256i round_const =
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
const __m256i one = _mm256_set1_epi16(1);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x += 16) {
const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
// Zero-extend mask to 16 bits
const __m256i m =
_mm256_cvtepu8_epi16(_mm_lddqu_si128((const __m128i *)&m_ptr[x]));
const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
const __m256i data_l = _mm256_unpacklo_epi16(a, b);
const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
__m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
AOM_BLEND_A64_ROUND_BITS);
const __m256i data_r = _mm256_unpackhi_epi16(a, b);
const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
__m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
AOM_BLEND_A64_ROUND_BITS);
// Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
// so it is safe to do signed saturation here.
const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
// There is no 16-bit SAD instruction, so we have to synthesize
// an 8-element SAD. We do this by storing 4 32-bit partial SADs,
// and accumulating them at the end
const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
}
src_ptr += src_stride;
a_ptr += a_stride;
b_ptr += b_stride;
m_ptr += m_stride;
}
// At this point, we have four 32-bit partial SADs stored in 'res'.
res = _mm256_hadd_epi32(res, res);
res = _mm256_hadd_epi32(res, res);
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
return (sad + 31) >> 6;
}
static INLINE unsigned int aom_highbd_masked_sad_avx2(
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
int invert_mask, int m, int n) {
unsigned int sad;
if (!invert_mask) {
switch (m) {
case 4:
sad =
aom_highbd_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
second_pred, m, msk, msk_stride, n);
break;
case 8:
sad = highbd_masked_sad8xh_avx2(src, src_stride, ref, ref_stride,
second_pred, m, msk, msk_stride, n);
break;
default:
sad = highbd_masked_sad16xh_avx2(src, src_stride, ref, ref_stride,
second_pred, m, msk, msk_stride, m, n);
break;
}
} else {
switch (m) {
case 4:
sad =
aom_highbd_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, n);
break;
case 8:
sad = highbd_masked_sad8xh_avx2(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, n);
break;
default:
sad = highbd_masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
ref_stride, msk, msk_stride, m, n);
break;
}
}
return sad;
}
#define HIGHBD_MASKSADMXN_AVX2(m, n) \
unsigned int aom_highbd_masked_sad##m##x##n##_avx2( \
const uint8_t *src8, int src_stride, const uint8_t *ref8, \
int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
int msk_stride, int invert_mask) { \
return aom_highbd_masked_sad_avx2(src8, src_stride, ref8, ref_stride, \
second_pred8, msk, msk_stride, \
invert_mask, m, n); \
}
HIGHBD_MASKSADMXN_AVX2(4, 4);
HIGHBD_MASKSADMXN_AVX2(4, 8);
HIGHBD_MASKSADMXN_AVX2(8, 4);
HIGHBD_MASKSADMXN_AVX2(8, 8);
HIGHBD_MASKSADMXN_AVX2(8, 16);
HIGHBD_MASKSADMXN_AVX2(16, 8);
HIGHBD_MASKSADMXN_AVX2(16, 16);
HIGHBD_MASKSADMXN_AVX2(16, 32);
HIGHBD_MASKSADMXN_AVX2(32, 16);
HIGHBD_MASKSADMXN_AVX2(32, 32);
HIGHBD_MASKSADMXN_AVX2(32, 64);
HIGHBD_MASKSADMXN_AVX2(64, 32);
HIGHBD_MASKSADMXN_AVX2(64, 64);
HIGHBD_MASKSADMXN_AVX2(64, 128);
HIGHBD_MASKSADMXN_AVX2(128, 64);
HIGHBD_MASKSADMXN_AVX2(128, 128);
HIGHBD_MASKSADMXN_AVX2(4, 16);
HIGHBD_MASKSADMXN_AVX2(16, 4);
HIGHBD_MASKSADMXN_AVX2(8, 32);
HIGHBD_MASKSADMXN_AVX2(32, 8);
HIGHBD_MASKSADMXN_AVX2(16, 64);
HIGHBD_MASKSADMXN_AVX2(64, 16);

Просмотреть файл

@ -19,6 +19,8 @@
#include "aom/aom_integer.h"
#include "aom_dsp/x86/synonyms.h"
#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
// For width a multiple of 16
static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
int src_stride,
@ -27,16 +29,6 @@ static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
const uint8_t *m_ptr, int m_stride,
int width, int height);
static INLINE unsigned int masked_sad8xh_ssse3(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int height);
static INLINE unsigned int masked_sad4xh_ssse3(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int height);
#define MASKSADMXN_SSSE3(m, n) \
unsigned int aom_masked_sad##m##x##n##_ssse3( \
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
@ -56,11 +48,11 @@ static INLINE unsigned int masked_sad4xh_ssse3(
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
int invert_mask) { \
if (!invert_mask) \
return masked_sad8xh_ssse3(src, src_stride, ref, ref_stride, \
second_pred, 8, msk, msk_stride, n); \
return aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride, \
second_pred, 8, msk, msk_stride, n); \
else \
return masked_sad8xh_ssse3(src, src_stride, second_pred, 8, ref, \
ref_stride, msk, msk_stride, n); \
return aom_masked_sad8xh_ssse3(src, src_stride, second_pred, 8, ref, \
ref_stride, msk, msk_stride, n); \
}
#define MASKSAD4XN_SSSE3(n) \
@ -69,11 +61,11 @@ static INLINE unsigned int masked_sad4xh_ssse3(
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
int invert_mask) { \
if (!invert_mask) \
return masked_sad4xh_ssse3(src, src_stride, ref, ref_stride, \
second_pred, 4, msk, msk_stride, n); \
return aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride, \
second_pred, 4, msk, msk_stride, n); \
else \
return masked_sad4xh_ssse3(src, src_stride, second_pred, 4, ref, \
ref_stride, msk, msk_stride, n); \
return aom_masked_sad4xh_ssse3(src, src_stride, second_pred, 4, ref, \
ref_stride, msk, msk_stride, n); \
}
MASKSADMXN_SSSE3(128, 128)
@ -145,10 +137,11 @@ static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
return (sad + 31) >> 6;
}
static INLINE unsigned int masked_sad8xh_ssse3(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int height) {
unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height) {
int y;
__m128i res = _mm_setzero_si128();
const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
@ -189,10 +182,11 @@ static INLINE unsigned int masked_sad8xh_ssse3(
return (sad + 31) >> 6;
}
static INLINE unsigned int masked_sad4xh_ssse3(
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
int height) {
unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height) {
int y;
__m128i res = _mm_setzero_si128();
const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
@ -238,11 +232,6 @@ static INLINE unsigned int highbd_masked_sad_ssse3(
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
int width, int height);
static INLINE unsigned int highbd_masked_sad4xh_ssse3(
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
int height);
#define HIGHBD_MASKSADMXN_SSSE3(m, n) \
unsigned int aom_highbd_masked_sad##m##x##n##_ssse3( \
const uint8_t *src8, int src_stride, const uint8_t *ref8, \
@ -262,11 +251,13 @@ static INLINE unsigned int highbd_masked_sad4xh_ssse3(
int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
int msk_stride, int invert_mask) { \
if (!invert_mask) \
return highbd_masked_sad4xh_ssse3(src8, src_stride, ref8, ref_stride, \
second_pred8, 4, msk, msk_stride, n); \
return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, ref8, \
ref_stride, second_pred8, 4, msk, \
msk_stride, n); \
else \
return highbd_masked_sad4xh_ssse3(src8, src_stride, second_pred8, 4, \
ref8, ref_stride, msk, msk_stride, n); \
return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, second_pred8, 4, \
ref8, ref_stride, msk, msk_stride, \
n); \
}
HIGHBD_MASKSADMXN_SSSE3(128, 128)
@ -350,10 +341,11 @@ static INLINE unsigned int highbd_masked_sad_ssse3(
return (sad + 31) >> 6;
}
static INLINE unsigned int highbd_masked_sad4xh_ssse3(
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
int height) {
unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height) {
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);

33
third_party/aom/aom_dsp/x86/masked_sad_intrin_ssse3.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,33 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef _AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H
#define _AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H
unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height);
unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
const uint8_t *a_ptr, int a_stride,
const uint8_t *b_ptr, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height);
unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
const uint8_t *m_ptr, int m_stride,
int height);
#endif

270
third_party/aom/aom_dsp/x86/obmc_sad_avx2.c поставляемый Normal file
Просмотреть файл

@ -0,0 +1,270 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <immintrin.h>
#include "config/aom_config.h"
#include "aom_ports/mem.h"
#include "aom/aom_integer.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/x86/obmc_intrinsic_ssse3.h"
#include "aom_dsp/x86/synonyms.h"
////////////////////////////////////////////////////////////////////////////////
// 8 bit
////////////////////////////////////////////////////////////////////////////////
static INLINE unsigned int obmc_sad_w4_avx2(const uint8_t *pre,
const int pre_stride,
const int32_t *wsrc,
const int32_t *mask,
const int height) {
int n = 0;
__m256i v_sad_d = _mm256_setzero_si256();
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
do {
const __m128i v_p_b_0 = xx_loadl_32(pre);
const __m128i v_p_b_1 = xx_loadl_32(pre + pre_stride);
const __m128i v_p_b = _mm_unpacklo_epi32(v_p_b_0, v_p_b_1);
const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
const __m256i v_p_d = _mm256_cvtepu8_epi32(v_p_b);
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
// Rounded absolute difference
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
n += 8;
pre += pre_stride << 1;
} while (n < 8 * (height >> 1));
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
return xx_hsum_epi32_si32(v_sad_d_0);
}
static INLINE unsigned int obmc_sad_w8n_avx2(
const uint8_t *pre, const int pre_stride, const int32_t *wsrc,
const int32_t *mask, const int width, const int height) {
const int pre_step = pre_stride - width;
int n = 0;
__m256i v_sad_d = _mm256_setzero_si256();
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
assert(width >= 8);
assert(IS_POWER_OF_TWO(width));
do {
const __m128i v_p0_b = xx_loadl_64(pre + n);
const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
const __m256i v_p0_d = _mm256_cvtepu8_epi32(v_p0_b);
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
// Rounded absolute difference
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
n += 8;
if ((n & (width - 1)) == 0) pre += pre_step;
} while (n < width * height);
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
return xx_hsum_epi32_si32(v_sad_d_0);
}
#define OBMCSADWXH(w, h) \
unsigned int aom_obmc_sad##w##x##h##_avx2( \
const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
const int32_t *msk) { \
if (w == 4) { \
return obmc_sad_w4_avx2(pre, pre_stride, wsrc, msk, h); \
} else { \
return obmc_sad_w8n_avx2(pre, pre_stride, wsrc, msk, w, h); \
} \
}
OBMCSADWXH(128, 128)
OBMCSADWXH(128, 64)
OBMCSADWXH(64, 128)
OBMCSADWXH(64, 64)
OBMCSADWXH(64, 32)
OBMCSADWXH(32, 64)
OBMCSADWXH(32, 32)
OBMCSADWXH(32, 16)
OBMCSADWXH(16, 32)
OBMCSADWXH(16, 16)
OBMCSADWXH(16, 8)
OBMCSADWXH(8, 16)
OBMCSADWXH(8, 8)
OBMCSADWXH(8, 4)
OBMCSADWXH(4, 8)
OBMCSADWXH(4, 4)
OBMCSADWXH(4, 16)
OBMCSADWXH(16, 4)
OBMCSADWXH(8, 32)
OBMCSADWXH(32, 8)
OBMCSADWXH(16, 64)
OBMCSADWXH(64, 16)
////////////////////////////////////////////////////////////////////////////////
// High bit-depth
////////////////////////////////////////////////////////////////////////////////
static INLINE unsigned int hbd_obmc_sad_w4_avx2(const uint8_t *pre8,
const int pre_stride,
const int32_t *wsrc,
const int32_t *mask,
const int height) {
const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
int n = 0;
__m256i v_sad_d = _mm256_setzero_si256();
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
do {
const __m128i v_p_w_0 = xx_loadl_64(pre);
const __m128i v_p_w_1 = xx_loadl_64(pre + pre_stride);
const __m128i v_p_w = _mm_unpacklo_epi64(v_p_w_0, v_p_w_1);
const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
const __m256i v_p_d = _mm256_cvtepu16_epi32(v_p_w);
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
// Rounded absolute difference
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
n += 8;
pre += pre_stride << 1;
} while (n < 8 * (height >> 1));
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
return xx_hsum_epi32_si32(v_sad_d_0);
}
static INLINE unsigned int hbd_obmc_sad_w8n_avx2(
const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
const int32_t *mask, const int width, const int height) {
const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
const int pre_step = pre_stride - width;
int n = 0;
__m256i v_sad_d = _mm256_setzero_si256();
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
assert(width >= 8);
assert(IS_POWER_OF_TWO(width));
do {
const __m128i v_p0_w = _mm_lddqu_si128((__m128i *)(pre + n));
const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
const __m256i v_p0_d = _mm256_cvtepu16_epi32(v_p0_w);
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
// boundaries. We use pmaddwd, as it has lower latency on Haswell
// than pmulld but produces the same result with these inputs.
const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
// Rounded absolute difference
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
n += 8;
if (n % width == 0) pre += pre_step;
} while (n < width * height);
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
return xx_hsum_epi32_si32(v_sad_d_0);
}
#define HBD_OBMCSADWXH(w, h) \
unsigned int aom_highbd_obmc_sad##w##x##h##_avx2( \
const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
const int32_t *mask) { \
if (w == 4) { \
return hbd_obmc_sad_w4_avx2(pre, pre_stride, wsrc, mask, h); \
} else { \
return hbd_obmc_sad_w8n_avx2(pre, pre_stride, wsrc, mask, w, h); \
} \
}
HBD_OBMCSADWXH(128, 128)
HBD_OBMCSADWXH(128, 64)
HBD_OBMCSADWXH(64, 128)
HBD_OBMCSADWXH(64, 64)
HBD_OBMCSADWXH(64, 32)
HBD_OBMCSADWXH(32, 64)
HBD_OBMCSADWXH(32, 32)
HBD_OBMCSADWXH(32, 16)
HBD_OBMCSADWXH(16, 32)
HBD_OBMCSADWXH(16, 16)
HBD_OBMCSADWXH(16, 8)
HBD_OBMCSADWXH(8, 16)
HBD_OBMCSADWXH(8, 8)
HBD_OBMCSADWXH(8, 4)
HBD_OBMCSADWXH(4, 8)
HBD_OBMCSADWXH(4, 4)
HBD_OBMCSADWXH(4, 16)
HBD_OBMCSADWXH(16, 4)
HBD_OBMCSADWXH(8, 32)
HBD_OBMCSADWXH(32, 8)
HBD_OBMCSADWXH(16, 64)
HBD_OBMCSADWXH(64, 16)

Просмотреть файл

@ -26,6 +26,16 @@
// 8 bit
////////////////////////////////////////////////////////////////////////////////
void aom_var_filter_block2d_bil_first_pass_ssse3(
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter);
void aom_var_filter_block2d_bil_second_pass_ssse3(
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter);
static INLINE void obmc_variance_w4(const uint8_t *pre, const int pre_stride,
const int32_t *wsrc, const int32_t *mask,
unsigned int *const sse, int *const sum,
@ -152,6 +162,46 @@ OBMCVARWXH(32, 8)
OBMCVARWXH(16, 64)
OBMCVARWXH(64, 16)
#include "config/aom_dsp_rtcd.h"
#define OBMC_SUBPIX_VAR(W, H) \
uint32_t aom_obmc_sub_pixel_variance##W##x##H##_sse4_1( \
const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint8_t temp2[H * W]; \
\
aom_var_filter_block2d_bil_first_pass_ssse3( \
pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
aom_var_filter_block2d_bil_second_pass_ssse3( \
fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
\
return aom_obmc_variance##W##x##H##_sse4_1(temp2, W, wsrc, mask, sse); \
}
OBMC_SUBPIX_VAR(128, 128)
OBMC_SUBPIX_VAR(128, 64)
OBMC_SUBPIX_VAR(64, 128)
OBMC_SUBPIX_VAR(64, 64)
OBMC_SUBPIX_VAR(64, 32)
OBMC_SUBPIX_VAR(32, 64)
OBMC_SUBPIX_VAR(32, 32)
OBMC_SUBPIX_VAR(32, 16)
OBMC_SUBPIX_VAR(16, 32)
OBMC_SUBPIX_VAR(16, 16)
OBMC_SUBPIX_VAR(16, 8)
OBMC_SUBPIX_VAR(8, 16)
OBMC_SUBPIX_VAR(8, 8)
OBMC_SUBPIX_VAR(8, 4)
OBMC_SUBPIX_VAR(4, 8)
OBMC_SUBPIX_VAR(4, 4)
OBMC_SUBPIX_VAR(4, 16)
OBMC_SUBPIX_VAR(16, 4)
OBMC_SUBPIX_VAR(8, 32)
OBMC_SUBPIX_VAR(32, 8)
OBMC_SUBPIX_VAR(16, 64)
OBMC_SUBPIX_VAR(64, 16)
////////////////////////////////////////////////////////////////////////////////
// High bit-depth
////////////////////////////////////////////////////////////////////////////////

108
third_party/aom/aom_dsp/x86/subtract_avx2.c поставляемый Normal file
Просмотреть файл

@ -0,0 +1,108 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <immintrin.h>
#include "config/aom_dsp_rtcd.h"
static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr,
const uint8_t *pred_ptr) {
__m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
__m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
__m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s));
__m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1));
__m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p));
__m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1));
const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
const __m256i d_1 = _mm256_sub_epi16(s_1, p_1);
_mm256_store_si256((__m256i *)(diff_ptr), d_0);
_mm256_store_si256((__m256i *)(diff_ptr + 16), d_1);
}
static INLINE void aom_subtract_block_16xn_avx2(
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
for (int32_t j = 0; j < rows; ++j) {
__m128i s = _mm_lddqu_si128((__m128i *)(src_ptr));
__m128i p = _mm_lddqu_si128((__m128i *)(pred_ptr));
__m256i s_0 = _mm256_cvtepu8_epi16(s);
__m256i p_0 = _mm256_cvtepu8_epi16(p);
const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
_mm256_store_si256((__m256i *)(diff_ptr), d_0);
src_ptr += src_stride;
pred_ptr += pred_stride;
diff_ptr += diff_stride;
}
}
static INLINE void aom_subtract_block_32xn_avx2(
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
for (int32_t j = 0; j < rows; ++j) {
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
src_ptr += src_stride;
pred_ptr += pred_stride;
diff_ptr += diff_stride;
}
}
static INLINE void aom_subtract_block_64xn_avx2(
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
for (int32_t j = 0; j < rows; ++j) {
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
src_ptr += src_stride;
pred_ptr += pred_stride;
diff_ptr += diff_stride;
}
}
static INLINE void aom_subtract_block_128xn_avx2(
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
for (int32_t j = 0; j < rows; ++j) {
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
subtract32_avx2(diff_ptr + 64, src_ptr + 64, pred_ptr + 64);
subtract32_avx2(diff_ptr + 96, src_ptr + 96, pred_ptr + 96);
src_ptr += src_stride;
pred_ptr += pred_stride;
diff_ptr += diff_stride;
}
}
void aom_subtract_block_avx2(int rows, int cols, int16_t *diff_ptr,
ptrdiff_t diff_stride, const uint8_t *src_ptr,
ptrdiff_t src_stride, const uint8_t *pred_ptr,
ptrdiff_t pred_stride) {
switch (cols) {
case 16:
aom_subtract_block_16xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
src_stride, pred_ptr, pred_stride);
break;
case 32:
aom_subtract_block_32xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
src_stride, pred_ptr, pred_stride);
break;
case 64:
aom_subtract_block_64xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
src_stride, pred_ptr, pred_stride);
break;
case 128:
aom_subtract_block_128xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
src_stride, pred_ptr, pred_stride);
break;
default:
aom_subtract_block_sse2(rows, cols, diff_ptr, diff_stride, src_ptr,
src_stride, pred_ptr, pred_stride);
break;
}
}

199
third_party/aom/aom_dsp/x86/txfm_common_avx2.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,199 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AOM_DSP_X86_TXFM_COMMON_AVX2_H_
#define AOM_DSP_X86_TXFM_COMMON_AVX2_H_
#include <emmintrin.h>
#include "aom/aom_integer.h"
#include "aom_dsp/x86/synonyms.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef void (*transform_1d_avx2)(const __m256i *input, __m256i *output,
int8_t cos_bit);
static INLINE __m256i pair_set_w16_epi16(int16_t a, int16_t b) {
return _mm256_set1_epi32(
(int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
}
static INLINE void btf_16_w16_avx2(const __m256i w0, const __m256i w1,
__m256i *in0, __m256i *in1, const __m256i _r,
const int32_t cos_bit) {
__m256i t0 = _mm256_unpacklo_epi16(*in0, *in1);
__m256i t1 = _mm256_unpackhi_epi16(*in0, *in1);
__m256i u0 = _mm256_madd_epi16(t0, w0);
__m256i u1 = _mm256_madd_epi16(t1, w0);
__m256i v0 = _mm256_madd_epi16(t0, w1);
__m256i v1 = _mm256_madd_epi16(t1, w1);
__m256i a0 = _mm256_add_epi32(u0, _r);
__m256i a1 = _mm256_add_epi32(u1, _r);
__m256i b0 = _mm256_add_epi32(v0, _r);
__m256i b1 = _mm256_add_epi32(v1, _r);
__m256i c0 = _mm256_srai_epi32(a0, cos_bit);
__m256i c1 = _mm256_srai_epi32(a1, cos_bit);
__m256i d0 = _mm256_srai_epi32(b0, cos_bit);
__m256i d1 = _mm256_srai_epi32(b1, cos_bit);
*in0 = _mm256_packs_epi32(c0, c1);
*in1 = _mm256_packs_epi32(d0, d1);
}
static INLINE void btf_16_adds_subs_avx2(__m256i *in0, __m256i *in1) {
const __m256i _in0 = *in0;
const __m256i _in1 = *in1;
*in0 = _mm256_adds_epi16(_in0, _in1);
*in1 = _mm256_subs_epi16(_in0, _in1);
}
static INLINE void btf_32_add_sub_avx2(__m256i *in0, __m256i *in1) {
const __m256i _in0 = *in0;
const __m256i _in1 = *in1;
*in0 = _mm256_add_epi32(_in0, _in1);
*in1 = _mm256_sub_epi32(_in0, _in1);
}
static INLINE void btf_16_adds_subs_out_avx2(__m256i *out0, __m256i *out1,
__m256i in0, __m256i in1) {
const __m256i _in0 = in0;
const __m256i _in1 = in1;
*out0 = _mm256_adds_epi16(_in0, _in1);
*out1 = _mm256_subs_epi16(_in0, _in1);
}
static INLINE void btf_32_add_sub_out_avx2(__m256i *out0, __m256i *out1,
__m256i in0, __m256i in1) {
const __m256i _in0 = in0;
const __m256i _in1 = in1;
*out0 = _mm256_add_epi32(_in0, _in1);
*out1 = _mm256_sub_epi32(_in0, _in1);
}
static INLINE __m256i load_16bit_to_16bit_avx2(const int16_t *a) {
return _mm256_load_si256((const __m256i *)a);
}
static INLINE void load_buffer_16bit_to_16bit_avx2(const int16_t *in,
int stride, __m256i *out,
int out_size) {
for (int i = 0; i < out_size; ++i) {
out[i] = load_16bit_to_16bit_avx2(in + i * stride);
}
}
static INLINE void load_buffer_16bit_to_16bit_flip_avx2(const int16_t *in,
int stride,
__m256i *out,
int out_size) {
for (int i = 0; i < out_size; ++i) {
out[out_size - i - 1] = load_16bit_to_16bit_avx2(in + i * stride);
}
}
static INLINE __m256i load_32bit_to_16bit_w16_avx2(const int32_t *a) {
const __m256i a_low = _mm256_lddqu_si256((const __m256i *)a);
const __m256i b = _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
return _mm256_permute4x64_epi64(b, 0xD8);
}
static INLINE void load_buffer_32bit_to_16bit_w16_avx2(const int32_t *in,
int stride, __m256i *out,
int out_size) {
for (int i = 0; i < out_size; ++i) {
out[i] = load_32bit_to_16bit_w16_avx2(in + i * stride);
}
}
static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
__m256i *const out) {
// Unpack 16 bit elements. Goes from:
// in[0]: 00 01 02 03 08 09 0a 0b 04 05 06 07 0c 0d 0e 0f
// in[1]: 10 11 12 13 18 19 1a 1b 14 15 16 17 1c 1d 1e 1f
// in[2]: 20 21 22 23 28 29 2a 2b 24 25 26 27 2c 2d 2e 2f
// in[3]: 30 31 32 33 38 39 3a 3b 34 35 36 37 3c 3d 3e 3f
// in[4]: 40 41 42 43 48 49 4a 4b 44 45 46 47 4c 4d 4e 4f
// in[5]: 50 51 52 53 58 59 5a 5b 54 55 56 57 5c 5d 5e 5f
// in[6]: 60 61 62 63 68 69 6a 6b 64 65 66 67 6c 6d 6e 6f
// in[7]: 70 71 72 73 78 79 7a 7b 74 75 76 77 7c 7d 7e 7f
// in[8]: 80 81 82 83 88 89 8a 8b 84 85 86 87 8c 8d 8e 8f
// to:
// a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
// a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
// a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
// a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
// ...
__m256i a[16];
for (int i = 0; i < 16; i += 2) {
a[i / 2 + 0] = _mm256_unpacklo_epi16(in[i], in[i + 1]);
a[i / 2 + 8] = _mm256_unpackhi_epi16(in[i], in[i + 1]);
}
__m256i b[16];
for (int i = 0; i < 16; i += 2) {
b[i / 2 + 0] = _mm256_unpacklo_epi32(a[i], a[i + 1]);
b[i / 2 + 8] = _mm256_unpackhi_epi32(a[i], a[i + 1]);
}
__m256i c[16];
for (int i = 0; i < 16; i += 2) {
c[i / 2 + 0] = _mm256_unpacklo_epi64(b[i], b[i + 1]);
c[i / 2 + 8] = _mm256_unpackhi_epi64(b[i], b[i + 1]);
}
out[0 + 0] = _mm256_permute2x128_si256(c[0], c[1], 0x20);
out[1 + 0] = _mm256_permute2x128_si256(c[8], c[9], 0x20);
out[2 + 0] = _mm256_permute2x128_si256(c[4], c[5], 0x20);
out[3 + 0] = _mm256_permute2x128_si256(c[12], c[13], 0x20);
out[0 + 8] = _mm256_permute2x128_si256(c[0], c[1], 0x31);
out[1 + 8] = _mm256_permute2x128_si256(c[8], c[9], 0x31);
out[2 + 8] = _mm256_permute2x128_si256(c[4], c[5], 0x31);
out[3 + 8] = _mm256_permute2x128_si256(c[12], c[13], 0x31);
out[4 + 0] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x20);
out[5 + 0] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x20);
out[6 + 0] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x20);
out[7 + 0] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x20);
out[4 + 8] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x31);
out[5 + 8] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x31);
out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
}
static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
for (int i = 0; i < size; ++i) {
out[size - i - 1] = in[i];
}
}
static INLINE void round_shift_16bit_w16_avx2(__m256i *in, int size, int bit) {
if (bit < 0) {
bit = -bit;
__m256i round = _mm256_set1_epi16(1 << (bit - 1));
for (int i = 0; i < size; ++i) {
in[i] = _mm256_adds_epi16(in[i], round);
in[i] = _mm256_srai_epi16(in[i], bit);
}
} else if (bit > 0) {
for (int i = 0; i < size; ++i) {
in[i] = _mm256_slli_epi16(in[i], bit);
}
}
}
#ifdef __cplusplus
}
#endif
#endif // AOM_DSP_X86_TXFM_COMMON_AVX2_H_

113
third_party/aom/aom_dsp/x86/variance_avx2.c поставляемый
Просмотреть файл

@ -324,6 +324,12 @@ static INLINE __m256i mm256_loadu2(const uint8_t *p0, const uint8_t *p1) {
return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
}
static INLINE __m256i mm256_loadu2_16(const uint16_t *p0, const uint16_t *p1) {
const __m256i d =
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)p1));
return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
}
static INLINE void comp_mask_pred_line_avx2(const __m256i s0, const __m256i s1,
const __m256i a,
uint8_t *comp_pred) {
@ -401,3 +407,110 @@ void aom_comp_mask_pred_avx2(uint8_t *comp_pred, const uint8_t *pred, int width,
} while (i < height);
}
}
static INLINE __m256i highbd_comp_mask_pred_line_avx2(const __m256i s0,
const __m256i s1,
const __m256i a) {
const __m256i alpha_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
const __m256i round_const =
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
const __m256i a_inv = _mm256_sub_epi16(alpha_max, a);
const __m256i s_lo = _mm256_unpacklo_epi16(s0, s1);
const __m256i a_lo = _mm256_unpacklo_epi16(a, a_inv);
const __m256i pred_lo = _mm256_madd_epi16(s_lo, a_lo);
const __m256i pred_l = _mm256_srai_epi32(
_mm256_add_epi32(pred_lo, round_const), AOM_BLEND_A64_ROUND_BITS);
const __m256i s_hi = _mm256_unpackhi_epi16(s0, s1);
const __m256i a_hi = _mm256_unpackhi_epi16(a, a_inv);
const __m256i pred_hi = _mm256_madd_epi16(s_hi, a_hi);
const __m256i pred_h = _mm256_srai_epi32(
_mm256_add_epi32(pred_hi, round_const), AOM_BLEND_A64_ROUND_BITS);
const __m256i comp = _mm256_packs_epi32(pred_l, pred_h);
return comp;
}
void aom_highbd_comp_mask_pred_avx2(uint16_t *comp_pred, const uint8_t *pred8,
int width, int height, const uint8_t *ref8,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask) {
int i = 0;
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
const uint16_t *src0 = invert_mask ? pred : ref;
const uint16_t *src1 = invert_mask ? ref : pred;
const int stride0 = invert_mask ? width : ref_stride;
const int stride1 = invert_mask ? ref_stride : width;
const __m256i zero = _mm256_setzero_si256();
if (width == 8) {
do {
const __m256i s0 = mm256_loadu2_16(src0 + stride0, src0);
const __m256i s1 = mm256_loadu2_16(src1 + stride1, src1);
const __m128i m_l = _mm_loadl_epi64((const __m128i *)mask);
const __m128i m_h = _mm_loadl_epi64((const __m128i *)(mask + 8));
__m256i m = _mm256_castsi128_si256(m_l);
m = _mm256_insertf128_si256(m, m_h, 1);
const __m256i m_16 = _mm256_unpacklo_epi8(m, zero);
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
_mm_storeu_si128((__m128i *)(comp_pred), _mm256_castsi256_si128(comp));
_mm_storeu_si128((__m128i *)(comp_pred + width),
_mm256_extractf128_si256(comp, 1));
src0 += (stride0 << 1);
src1 += (stride1 << 1);
mask += (mask_stride << 1);
comp_pred += (width << 1);
i += 2;
} while (i < height);
} else if (width == 16) {
do {
const __m256i s0 = _mm256_loadu_si256((const __m256i *)(src0));
const __m256i s1 = _mm256_loadu_si256((const __m256i *)(src1));
const __m256i m_16 =
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
_mm256_storeu_si256((__m256i *)comp_pred, comp);
src0 += stride0;
src1 += stride1;
mask += mask_stride;
comp_pred += width;
i += 1;
} while (i < height);
} else if (width == 32) {
do {
const __m256i s0 = _mm256_loadu_si256((const __m256i *)src0);
const __m256i s2 = _mm256_loadu_si256((const __m256i *)(src0 + 16));
const __m256i s1 = _mm256_loadu_si256((const __m256i *)src1);
const __m256i s3 = _mm256_loadu_si256((const __m256i *)(src1 + 16));
const __m256i m01_16 =
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
const __m256i m23_16 =
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)(mask + 16)));
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m01_16);
const __m256i comp1 = highbd_comp_mask_pred_line_avx2(s2, s3, m23_16);
_mm256_storeu_si256((__m256i *)comp_pred, comp);
_mm256_storeu_si256((__m256i *)(comp_pred + 16), comp1);
src0 += stride0;
src1 += stride1;
mask += mask_stride;
comp_pred += width;
i += 1;
} while (i < height);
}
}

129
third_party/aom/aom_dsp/x86/variance_impl_ssse3.c поставляемый Normal file
Просмотреть файл

@ -0,0 +1,129 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <tmmintrin.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/x86/synonyms.h"
void aom_var_filter_block2d_bil_first_pass_ssse3(
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter) {
// Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
// in computation using _mm_maddubs_epi16.
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
const __m128i r = _mm_set1_epi16(round);
const uint8_t f0 = filter[0] >> 1;
const uint8_t f1 = filter[1] >> 1;
const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
f0, f1, f0, f1, f0, f1);
unsigned int i, j;
(void)pixel_step;
if (output_width >= 8) {
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 8) {
// load source
__m128i source_low = xx_loadl_64(a);
__m128i source_hi = xx_loadl_64(a + 1);
// unpack to:
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
__m128i source = _mm_unpacklo_epi8(source_low, source_hi);
// b[i] = a[i] * filter[0] + a[i + 1] * filter[1]
__m128i res = _mm_maddubs_epi16(source, filters);
// round
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
xx_storeu_128(b, res);
a += 8;
b += 8;
}
a += src_pixels_per_line - output_width;
}
} else {
const __m128i shuffle_mask =
_mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8);
for (i = 0; i < output_height; ++i) {
// load source, only first 5 values are meaningful:
// { a[0], a[1], a[2], a[3], a[4], xxxx }
__m128i source = xx_loadl_64(a);
// shuffle, up to the first 8 are useful
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
xx_storel_64(b, res);
a += src_pixels_per_line;
b += output_width;
}
}
}
void aom_var_filter_block2d_bil_second_pass_ssse3(
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
unsigned int pixel_step, unsigned int output_height,
unsigned int output_width, const uint8_t *filter) {
const int16_t round = (1 << FILTER_BITS) >> 1;
const __m128i r = _mm_set1_epi32(round);
const __m128i filters =
_mm_setr_epi16(filter[0], filter[1], filter[0], filter[1], filter[0],
filter[1], filter[0], filter[1]);
const __m128i shuffle_mask =
_mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
const __m128i mask =
_mm_setr_epi8(0, 4, 8, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
unsigned int i, j;
for (i = 0; i < output_height; ++i) {
for (j = 0; j < output_width; j += 4) {
// load source as:
// { a[0], a[1], a[2], a[3], a[w], a[w+1], a[w+2], a[w+3] }
__m128i source1 = xx_loadl_64(a);
__m128i source2 = xx_loadl_64(a + pixel_step);
__m128i source = _mm_unpacklo_epi64(source1, source2);
// shuffle source to:
// { a[0], a[w], a[1], a[w+1], a[2], a[w+2], a[3], a[w+3] }
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
// b[i] = a[i] * filter[0] + a[w + i] * filter[1]
__m128i res = _mm_madd_epi16(source_shuffle, filters);
// round
res = _mm_srai_epi32(_mm_add_epi32(res, r), FILTER_BITS);
// shuffle to get each lower 8 bit of every 32 bit
res = _mm_shuffle_epi8(res, mask);
xx_storel_32(b, res);
a += 4;
b += 4;
}
a += src_pixels_per_line - output_width;
}
}

12
third_party/aom/aom_dsp/x86/variance_sse2.c поставляемый
Просмотреть файл

@ -569,7 +569,7 @@ void aom_upsampled_pred_sse2(MACROBLOCKD *xd, const struct AV1Common *const cm,
}
}
const InterpFilterParams filter =
const InterpFilterParams *filter =
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
if (!subpel_x_q3 && !subpel_y_q3) {
@ -633,12 +633,12 @@ void aom_upsampled_pred_sse2(MACROBLOCKD *xd, const struct AV1Common *const cm,
const int16_t *const kernel_y =
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
const int intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_convolve8_horiz(ref - ref_stride * ((filter.taps >> 1) - 1), ref_stride,
temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
intermediate_height);
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
aom_convolve8_horiz(ref - ref_stride * ((filter->taps >> 1) - 1),
ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
width, intermediate_height);
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
width, height);
}

41
third_party/aom/aom_ports/aom_once.h поставляемый
Просмотреть файл

@ -17,7 +17,7 @@
/* Implement a function wrapper to guarantee initialization
* thread-safety for library singletons.
*
* NOTE: These functions use static locks, and can only be
* NOTE: This function uses static locks, and can only be
* used with one common argument per compilation unit. So
*
* file1.c:
@ -25,8 +25,8 @@
* ...
* aom_once(foo);
*
* file2.c:
* aom_once(bar);
* file2.c:
* aom_once(bar);
*
* will ensure foo() and bar() are each called only once, but in
*
@ -46,19 +46,19 @@
* local initializers are not thread-safe in MSVC prior to Visual
* Studio 2015.
*
* As a static, once_state will be zero-initialized as program start.
* As a static, aom_once_state will be zero-initialized as program start.
*/
static LONG once_state;
static void once(void (*func)(void)) {
/* Try to advance once_state from its initial value of 0 to 1.
static LONG aom_once_state;
static void aom_once(void (*func)(void)) {
/* Try to advance aom_once_state from its initial value of 0 to 1.
* Only one thread can succeed in doing so.
*/
if (InterlockedCompareExchange(&once_state, 1, 0) == 0) {
/* We're the winning thread, having set once_state to 1.
if (InterlockedCompareExchange(&aom_once_state, 1, 0) == 0) {
/* We're the winning thread, having set aom_once_state to 1.
* Call our function. */
func();
/* Now advance once_state to 2, unblocking any other threads. */
InterlockedIncrement(&once_state);
/* Now advance aom_once_state to 2, unblocking any other threads. */
InterlockedIncrement(&aom_once_state);
return;
}
@ -66,10 +66,10 @@ static void once(void (*func)(void)) {
* the state variable so we don't return before func()
* has finished executing elsewhere.
*
* Try to advance once_state from 2 to 2, which is only possible
* Try to advance aom_once_state from 2 to 2, which is only possible
* after the winning thead advances it from 1 to 2.
*/
while (InterlockedCompareExchange(&once_state, 2, 2) != 2) {
while (InterlockedCompareExchange(&aom_once_state, 2, 2) != 2) {
/* State isn't yet 2. Try again.
*
* We are used for singleton initialization functions,
@ -83,8 +83,8 @@ static void once(void (*func)(void)) {
Sleep(0);
}
/* We've seen once_state advance to 2, so we know func()
* has been called. And we've left once_state as we found it,
/* We've seen aom_once_state advance to 2, so we know func()
* has been called. And we've left aom_once_state as we found it,
* so other threads will have the same experience.
*
* It's safe to return now.
@ -95,7 +95,7 @@ static void once(void (*func)(void)) {
#elif CONFIG_MULTITHREAD && defined(__OS2__)
#define INCL_DOS
#include <os2.h>
static void once(void (*func)(void)) {
static void aom_once(void (*func)(void)) {
static int done;
/* If the initialization is complete, return early. */
@ -117,18 +117,15 @@ static void once(void (*func)(void)) {
#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
#include <pthread.h>
static void once(void (*func)(void)) {
static void aom_once(void (*func)(void)) {
static pthread_once_t lock = PTHREAD_ONCE_INIT;
pthread_once(&lock, func);
}
#else
/* No-op version that performs no synchronization. *_rtcd() is idempotent,
* so as long as your platform provides atomic loads/stores of pointers
* no synchronization is strictly necessary.
*/
/* Default version that performs no synchronization. */
static void once(void (*func)(void)) {
static void aom_once(void (*func)(void)) {
static int done;
if (!done) {

2
third_party/aom/aom_scale/aom_scale_rtcd.c поставляемый
Просмотреть файл

@ -15,4 +15,4 @@
#include "aom_ports/aom_once.h"
void aom_scale_rtcd() { once(setup_rtcd_internal); }
void aom_scale_rtcd() { aom_once(setup_rtcd_internal); }

Просмотреть файл

@ -51,6 +51,10 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
aom_codec_frame_buffer_t *fb,
aom_get_frame_buffer_cb_fn_t cb, void *cb_priv) {
if (ybf) {
#if CONFIG_SIZE_LIMIT
if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) return -1;
#endif
const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
const int aligned_width = (width + 7) & ~7;
const int aligned_height = (height + 7) & ~7;
@ -154,7 +158,7 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
(uv_border_h * uv_stride) + uv_border_w,
aom_byte_align);
ybf->use_external_refernce_buffers = 0;
ybf->use_external_reference_buffers = 0;
if (use_highbitdepth) {
if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);

2
third_party/aom/aom_scale/yv12config.h поставляемый
Просмотреть файл

@ -81,7 +81,7 @@ typedef struct yv12_buffer_config {
// Indicate whether y_buffer, u_buffer, and v_buffer points to the internally
// allocated memory or external buffers.
int use_external_refernce_buffers;
int use_external_reference_buffers;
// This is needed to store y_buffer, u_buffer, and v_buffer when set reference
// uses an external refernece, and restore those buffer pointers after the
// external reference frame is no longer used.

5
third_party/aom/aom_util/aom_thread.h поставляемый
Просмотреть файл

@ -369,7 +369,8 @@ typedef enum {
} AVxWorkerStatus;
// Function to be called by the worker thread. Takes two opaque pointers as
// arguments (data1 and data2), and should return false in case of error.
// arguments (data1 and data2). Should return true on success and return false
// in case of error.
typedef int (*AVxWorkerHook)(void *, void *);
// Platform-dependent implementation details for the worker.
@ -382,7 +383,7 @@ typedef struct {
AVxWorkerHook hook; // hook to call
void *data1; // first argument passed to 'hook'
void *data2; // second argument passed to 'hook'
int had_error; // return value of the last call to 'hook'
int had_error; // true if a call to 'hook' returned false
} AVxWorker;
// The interface for all thread-worker related functions. All these functions

34
third_party/aom/apps/aomdec.c поставляемый
Просмотреть файл

@ -83,6 +83,8 @@ static const arg_def_t outputfile =
ARG_DEF("o", "output", 1, "Output file name pattern (see below)");
static const arg_def_t threadsarg =
ARG_DEF("t", "threads", 1, "Max threads to use");
static const arg_def_t rowmtarg =
ARG_DEF(NULL, "row-mt", 1, "Enable row based multi-threading");
static const arg_def_t verbosearg =
ARG_DEF("v", "verbose", 0, "Show version string");
static const arg_def_t scalearg =
@ -114,12 +116,12 @@ static const arg_def_t outallarg = ARG_DEF(
NULL, "all-layers", 0, "Output all decoded frames of a scalable bitstream");
static const arg_def_t *all_args[] = {
&help, &codecarg, &use_yv12, &use_i420, &flipuvarg,
&rawvideo, &noblitarg, &progressarg, &limitarg, &skiparg,
&postprocarg, &summaryarg, &outputfile, &threadsarg, &verbosearg,
&scalearg, &fb_arg, &md5arg, &framestatsarg, &continuearg,
&outbitdeptharg, &tilem, &tiler, &tilec, &isannexb,
&oppointarg, &outallarg, NULL
&help, &codecarg, &use_yv12, &use_i420, &flipuvarg,
&rawvideo, &noblitarg, &progressarg, &limitarg, &skiparg,
&postprocarg, &summaryarg, &outputfile, &threadsarg, &rowmtarg,
&verbosearg, &scalearg, &fb_arg, &md5arg, &framestatsarg,
&continuearg, &outbitdeptharg, &tilem, &tiler, &tilec,
&isannexb, &oppointarg, &outallarg, NULL
};
#if CONFIG_LIBYUV
@ -512,6 +514,7 @@ static int main_loop(int argc, const char **argv_) {
int do_scale = 0;
int operating_point = 0;
int output_all_layers = 0;
unsigned int row_mt = 0;
aom_image_t *scaled_img = NULL;
aom_image_t *img_shifted = NULL;
int frame_avail, got_data, flush_decoder = 0;
@ -601,6 +604,15 @@ static int main_loop(int argc, const char **argv_) {
summary = 1;
} else if (arg_match(&arg, &threadsarg, argi)) {
cfg.threads = arg_parse_uint(&arg);
#if !CONFIG_MULTITHREAD
if (cfg.threads > 1) {
die("Error: --threads=%d is not supported when CONFIG_MULTITHREAD = "
"0.\n",
cfg.threads);
}
#endif
} else if (arg_match(&arg, &rowmtarg, argi)) {
row_mt = arg_parse_uint(&arg);
} else if (arg_match(&arg, &verbosearg, argi)) {
quiet = 0;
} else if (arg_match(&arg, &scalearg, argi)) {
@ -763,6 +775,11 @@ static int main_loop(int argc, const char **argv_) {
aom_codec_error(&decoder));
goto fail;
}
if (aom_codec_control(&decoder, AV1D_SET_ROW_MT, row_mt)) {
fprintf(stderr, "Failed to set row_mt: %s\n", aom_codec_error(&decoder));
goto fail;
}
#endif
if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
@ -910,9 +927,8 @@ static int main_loop(int argc, const char **argv_) {
// Shift up or down if necessary
if (output_bit_depth != 0) {
const aom_img_fmt_t shifted_fmt =
output_bit_depth == 8
? img->fmt ^ (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH)
: img->fmt | AOM_IMG_FMT_HIGHBITDEPTH;
output_bit_depth == 8 ? img->fmt & ~AOM_IMG_FMT_HIGHBITDEPTH
: img->fmt | AOM_IMG_FMT_HIGHBITDEPTH;
if (shifted_fmt != img->fmt || output_bit_depth != img->bit_depth) {
if (img_shifted &&

15
third_party/aom/apps/aomenc.c поставляемый
Просмотреть файл

@ -475,6 +475,13 @@ static const arg_def_t film_grain_test =
static const arg_def_t film_grain_table =
ARG_DEF(NULL, "film-grain-table", 1,
"Path to file containing film grain parameters");
#if CONFIG_DENOISE
static const arg_def_t denoise_noise_level =
ARG_DEF(NULL, "denoise-noise-level", 1,
"Amount of noise (from 0 = don't denoise, to 50)");
static const arg_def_t denoise_block_size =
ARG_DEF(NULL, "denoise-block-size", 1, "Denoise block size (default = 32)");
#endif
static const arg_def_t enable_ref_frame_mvs =
ARG_DEF(NULL, "enable-ref-frame-mvs", 1,
"Enable temporal mv prediction (default is 1)");
@ -656,6 +663,10 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
&timing_info,
&film_grain_test,
&film_grain_table,
#if CONFIG_DENOISE
&denoise_noise_level,
&denoise_block_size,
#endif
&enable_ref_frame_mvs,
&bitdeptharg,
&inbitdeptharg,
@ -708,6 +719,10 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
AV1E_SET_TIMING_INFO_TYPE,
AV1E_SET_FILM_GRAIN_TEST_VECTOR,
AV1E_SET_FILM_GRAIN_TABLE,
#if CONFIG_DENOISE
AV1E_SET_DENOISE_NOISE_LEVEL,
AV1E_SET_DENOISE_BLOCK_SIZE,
#endif
AV1E_SET_ENABLE_REF_FRAME_MVS,
AV1E_SET_ENABLE_DF,
AV1E_SET_ENABLE_ORDER_HINT,

10
third_party/aom/av1/av1.cmake поставляемый
Просмотреть файл

@ -45,7 +45,6 @@ list(APPEND AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/entropymv.c"
"${AOM_ROOT}/av1/common/entropymv.h"
"${AOM_ROOT}/av1/common/enums.h"
"${AOM_ROOT}/av1/common/filter.c"
"${AOM_ROOT}/av1/common/filter.h"
"${AOM_ROOT}/av1/common/frame_buffers.c"
"${AOM_ROOT}/av1/common/frame_buffers.h"
@ -274,7 +273,10 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_1
list(APPEND AOM_AV1_ENCODER_INTRIN_AVX2
"${AOM_ROOT}/av1/encoder/x86/av1_quantize_avx2.c"
"${AOM_ROOT}/av1/encoder/x86/av1_highbd_quantize_avx2.c"
"${AOM_ROOT}/av1/encoder/x86/error_intrin_avx2.c")
"${AOM_ROOT}/av1/encoder/x86/error_intrin_avx2.c"
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_avx2.h"
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_avx2.c"
"${AOM_ROOT}/av1/encoder/x86/wedge_utils_avx2.c")
list(APPEND AOM_AV1_ENCODER_INTRIN_NEON
"${AOM_ROOT}/av1/encoder/arm/neon/quantize_neon.c")
@ -296,7 +298,9 @@ list(APPEND AOM_AV1_COMMON_INTRIN_NEON
"${AOM_ROOT}/av1/common/arm/blend_a64_vmask_neon.c"
"${AOM_ROOT}/av1/common/arm/reconinter_neon.c"
"${AOM_ROOT}/av1/common/arm/wiener_convolve_neon.c"
"${AOM_ROOT}/av1/common/arm/intrapred_neon.c"
"${AOM_ROOT}/av1/common/arm/selfguided_neon.c"
"${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.c"
"${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.h"
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_2

59
third_party/aom/av1/av1_cx_iface.c поставляемый
Просмотреть файл

@ -94,6 +94,10 @@ struct av1_extracfg {
int enable_warped_motion; // sequence level
int allow_warped_motion; // frame level
int enable_superres;
#if CONFIG_DENOISE
float noise_level;
int noise_block_size;
#endif
};
static struct av1_extracfg default_extra_cfg = {
@ -160,6 +164,10 @@ static struct av1_extracfg default_extra_cfg = {
1, // enable_warped_motion at sequence level
1, // allow_warped_motion at frame level
1, // superres
#if CONFIG_DENOISE
0, // noise_level
32, // noise_block_size
#endif
};
struct aom_codec_alg_priv {
@ -464,7 +472,7 @@ static aom_codec_err_t set_encoder_config(
oxcf->buffer_model.num_units_in_decoding_tick = cfg->g_timebase.num;
oxcf->timing_info.equal_picture_interval = 0;
oxcf->decoder_model_info_present_flag = 1;
oxcf->buffer_removal_delay_present = 1;
oxcf->buffer_removal_time_present = 1;
oxcf->display_model_info_present_flag = 1;
}
if (oxcf->init_framerate > 180) {
@ -612,6 +620,10 @@ static aom_codec_err_t set_encoder_config(
oxcf->film_grain_test_vector = extra_cfg->film_grain_test_vector;
oxcf->film_grain_table_filename = extra_cfg->film_grain_table_filename;
}
#if CONFIG_DENOISE
oxcf->noise_level = extra_cfg->noise_level;
oxcf->noise_block_size = extra_cfg->noise_block_size;
#endif
oxcf->large_scale_tile = cfg->large_scale_tile;
oxcf->single_tile_decoding =
(oxcf->large_scale_tile) ? extra_cfg->single_tile_decoding : 0;
@ -710,7 +722,7 @@ static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
ctx->cfg = *cfg;
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
// On profile change, request a key frame
force_key |= ctx->cpi->common.profile != ctx->oxcf.profile;
force_key |= ctx->cpi->common.seq_params.profile != ctx->oxcf.profile;
av1_change_config(ctx->cpi, &ctx->oxcf);
}
@ -1055,6 +1067,23 @@ static aom_codec_err_t ctrl_set_film_grain_table(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
#if CONFIG_DENOISE
static aom_codec_err_t ctrl_set_denoise_noise_level(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.noise_level =
((float)CAST(AV1E_SET_DENOISE_NOISE_LEVEL, args)) / 10.0f;
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_denoise_block_size(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
extra_cfg.noise_block_size = CAST(AV1E_SET_DENOISE_BLOCK_SIZE, args);
return update_extra_cfg(ctx, &extra_cfg);
}
#endif
static aom_codec_err_t ctrl_set_deltaq_mode(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@ -1119,7 +1148,7 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx,
}
priv->extra_cfg = default_extra_cfg;
once(av1_initialize_enc);
aom_once(av1_initialize_enc);
res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
@ -1200,6 +1229,9 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
volatile aom_enc_frame_flags_t flags = enc_flags;
// The jmp_buf is valid only for the duration of the function that calls
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
// before it returns.
if (setjmp(cpi->common.error.jmp)) {
cpi->common.error.setjmp = 0;
res = update_error_state(ctx, &cpi->common.error);
@ -1259,7 +1291,6 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
if (cx_data_sz < ctx->cx_data_sz / 2) {
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
"Compressed data buffer too small");
return AOM_CODEC_ERROR;
}
}
@ -1275,8 +1306,8 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
!img, timebase)) {
if (cpi->common.seq_params.frame_id_numbers_present_flag) {
if (cpi->common.invalid_delta_frame_id_minus_1) {
ctx->base.err_detail = "Invalid delta_frame_id_minus_1";
return AOM_CODEC_ERROR;
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
"Invalid delta_frame_id_minus_1");
}
}
cpi->seq_params_locked = 1;
@ -1305,7 +1336,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
// OBUs are preceded/succeeded by an unsigned leb128 coded integer.
if (write_uleb_obu_size(obu_header_size, obu_payload_size,
ctx->pending_cx_data) != AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
}
frame_size += obu_header_size + obu_payload_size + length_field_size;
@ -1315,7 +1346,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
size_t curr_frame_size = frame_size;
if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
}
frame_size = curr_frame_size;
@ -1327,7 +1358,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
}
if (write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
}
frame_size += length_field_size;
}
@ -1358,7 +1389,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
}
if (write_uleb_obu_size(0, (uint32_t)tu_size, ctx->pending_cx_data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
}
ctx->pending_cx_data_sz += length_field_size;
}
@ -1710,6 +1741,10 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ AV1E_SET_SINGLE_TILE_DECODING, ctrl_set_single_tile_decoding },
{ AV1E_SET_FILM_GRAIN_TEST_VECTOR, ctrl_set_film_grain_test_vector },
{ AV1E_SET_FILM_GRAIN_TABLE, ctrl_set_film_grain_table },
#if CONFIG_DENOISE
{ AV1E_SET_DENOISE_NOISE_LEVEL, ctrl_set_denoise_noise_level },
{ AV1E_SET_DENOISE_BLOCK_SIZE, ctrl_set_denoise_block_size },
#endif // CONFIG_FILM_GRAIN
{ AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
// Getters
@ -1728,7 +1763,7 @@ static aom_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
{
// NOLINT
0, // g_usage
8, // g_threads
0, // g_threads
0, // g_profile
320, // g_width
@ -1810,7 +1845,7 @@ CODEC_INTERFACE(aom_codec_av1_cx) = {
NULL, // aom_codec_peek_si_fn_t
NULL, // aom_codec_get_si_fn_t
NULL, // aom_codec_decode_fn_t
NULL, // aom_codec_frame_get_fn_t
NULL, // aom_codec_get_frame_fn_t
NULL // aom_codec_set_fb_fn_t
},
{

135
third_party/aom/av1/av1_dx_iface.c поставляемый
Просмотреть файл

@ -50,6 +50,7 @@ struct aom_codec_alg_priv {
int decode_tile_col;
unsigned int tile_mode;
unsigned int ext_tile_debug;
unsigned int row_mt;
EXTERNAL_REFERENCES ext_refs;
unsigned int is_annexb;
int operating_point;
@ -61,7 +62,7 @@ struct aom_codec_alg_priv {
int last_submit_worker_id;
int next_output_worker_id;
int available_threads;
aom_image_t *image_with_grain;
aom_image_t *image_with_grain[MAX_NUM_SPATIAL_LAYERS];
int need_resync; // wait for key/intra-only frame
// BufferPool that holds all reference frames. Shared by all the FrameWorkers.
BufferPool *buffer_pool;
@ -101,7 +102,7 @@ static aom_codec_err_t decoder_init(aom_codec_ctx_t *ctx,
// default values
priv->cfg.cfg.ext_partition = 1;
}
priv->image_with_grain = NULL;
av1_zero(priv->image_with_grain);
}
return AOM_CODEC_OK;
@ -139,7 +140,9 @@ static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
aom_free(ctx->frame_workers);
aom_free(ctx->buffer_pool);
if (ctx->image_with_grain) aom_img_free(ctx->image_with_grain);
for (int i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) {
if (ctx->image_with_grain[i]) aom_img_free(ctx->image_with_grain[i]);
}
aom_free(ctx);
return AOM_CODEC_OK;
}
@ -339,16 +342,16 @@ static int frame_worker_hook(void *arg1, void *arg2) {
const uint8_t *data = frame_worker_data->data;
(void)arg2;
frame_worker_data->result = av1_receive_compressed_data(
frame_worker_data->pbi, frame_worker_data->data_size, &data);
int result = av1_receive_compressed_data(frame_worker_data->pbi,
frame_worker_data->data_size, &data);
frame_worker_data->data_end = data;
if (frame_worker_data->result != 0) {
if (result != 0) {
// Check decode result in serial decode.
frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
frame_worker_data->pbi->need_resync = 1;
}
return !frame_worker_data->result;
return !result;
}
static aom_codec_err_t init_decoder(aom_codec_alg_priv_t *ctx) {
@ -429,6 +432,7 @@ static aom_codec_err_t init_decoder(aom_codec_alg_priv_t *ctx) {
frame_worker_data->pbi->operating_point = ctx->operating_point;
frame_worker_data->pbi->output_all_layers = ctx->output_all_layers;
frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
frame_worker_data->pbi->row_mt = ctx->row_mt;
worker->hook = (AVxWorkerHook)frame_worker_hook;
if (!winterface->reset(worker)) {
@ -489,6 +493,7 @@ static aom_codec_err_t decode_one(aom_codec_alg_priv_t *ctx,
frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
frame_worker_data->pbi->row_mt = ctx->row_mt;
frame_worker_data->pbi->ext_refs = ctx->ext_refs;
frame_worker_data->pbi->common.is_annexb = ctx->is_annexb;
@ -592,21 +597,31 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
return res;
}
aom_image_t *add_grain_if_needed(aom_image_t *img, aom_image_t *grain_img_buf,
aom_film_grain_t *grain_params) {
// If grain_params->apply_grain is false, returns img. Otherwise, adds film
// grain to img, saves the result in *grain_img_ptr (allocating *grain_img_ptr
// if necessary), and returns *grain_img_ptr.
static aom_image_t *add_grain_if_needed(aom_image_t *img,
aom_image_t **grain_img_ptr,
aom_film_grain_t *grain_params) {
if (!grain_params->apply_grain) return img;
if (grain_img_buf &&
(img->d_w != grain_img_buf->d_w || img->d_h != grain_img_buf->d_h ||
img->fmt != grain_img_buf->fmt || !(img->d_h % 2) || !(img->d_w % 2))) {
aom_img_free(grain_img_buf);
grain_img_buf = NULL;
aom_image_t *grain_img_buf = *grain_img_ptr;
const int w_even = ALIGN_POWER_OF_TWO(img->d_w, 1);
const int h_even = ALIGN_POWER_OF_TWO(img->d_h, 1);
if (grain_img_buf) {
const int alloc_w = ALIGN_POWER_OF_TWO(grain_img_buf->d_w, 1);
const int alloc_h = ALIGN_POWER_OF_TWO(grain_img_buf->d_h, 1);
if (w_even != alloc_w || h_even != alloc_h ||
img->fmt != grain_img_buf->fmt) {
aom_img_free(grain_img_buf);
grain_img_buf = NULL;
}
}
if (!grain_img_buf) {
int w_even = img->d_w % 2 ? img->d_w + 1 : img->d_w;
int h_even = img->d_h % 2 ? img->d_h + 1 : img->d_h;
grain_img_buf = aom_img_alloc(NULL, img->fmt, w_even, h_even, 16);
grain_img_buf->bit_depth = img->bit_depth;
*grain_img_ptr = grain_img_buf;
}
av1_add_film_grain(grain_params, img, grain_img_buf);
@ -649,8 +664,6 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
aom_film_grain_t *grain_params;
if (av1_get_raw_frame(frame_worker_data->pbi, *index, &sd,
&grain_params) == 0) {
*index += 1; // Advance the iterator to point to the next image
AV1Decoder *const pbi = frame_worker_data->pbi;
AV1_COMMON *const cm = &pbi->common;
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
@ -659,6 +672,7 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
yuvconfig2image(&ctx->img, sd, frame_worker_data->user_priv);
if (!pbi->ext_tile_debug && cm->large_scale_tile) {
*index += 1; // Advance the iterator to point to the next image
img = &ctx->img;
img->img_data = pbi->tile_list_output;
img->sz = pbi->tile_list_size;
@ -688,11 +702,14 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
const int tile_col = AOMMIN(pbi->dec_tile_col, cm->tile_cols - 1);
const int mi_col = tile_col * cm->tile_width;
const int ssx = ctx->img.x_chroma_shift;
const int is_hbd =
(ctx->img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0;
int plane;
ctx->img.planes[0] += mi_col * MI_SIZE;
ctx->img.planes[0] += mi_col * MI_SIZE * (1 + is_hbd);
if (num_planes > 1) {
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
ctx->img.planes[plane] += mi_col * (MI_SIZE >> ssx);
ctx->img.planes[plane] +=
mi_col * (MI_SIZE >> ssx) * (1 + is_hbd);
}
}
ctx->img.d_w =
@ -703,7 +720,10 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
img = &ctx->img;
img->temporal_id = cm->temporal_layer_id;
img->spatial_id = cm->spatial_layer_id;
return add_grain_if_needed(img, ctx->image_with_grain, grain_params);
aom_image_t *res = add_grain_if_needed(
img, &ctx->image_with_grain[*index], grain_params);
*index += 1; // Advance the iterator to point to the next image
return res;
}
} else {
// Decoding failed. Release the worker thread.
@ -999,7 +1019,7 @@ static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
*bit_depth = cm->bit_depth;
*bit_depth = cm->seq_params.bit_depth;
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
@ -1009,6 +1029,64 @@ static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
return AOM_CODEC_INVALID_PARAM;
}
static aom_img_fmt_t get_img_format(int subsampling_x, int subsampling_y,
int use_highbitdepth) {
aom_img_fmt_t fmt = 0;
if (subsampling_x == 0 && subsampling_y == 0)
fmt = AOM_IMG_FMT_I444;
else if (subsampling_x == 1 && subsampling_y == 0)
fmt = AOM_IMG_FMT_I422;
else if (subsampling_x == 1 && subsampling_y == 1)
fmt = AOM_IMG_FMT_I420;
if (use_highbitdepth) fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
return fmt;
}
static aom_codec_err_t ctrl_get_img_format(aom_codec_alg_priv_t *ctx,
va_list args) {
aom_img_fmt_t *const img_fmt = va_arg(args, aom_img_fmt_t *);
AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
if (img_fmt) {
if (worker) {
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
*img_fmt = get_img_format(cm->seq_params.subsampling_x,
cm->seq_params.subsampling_y,
cm->seq_params.use_highbitdepth);
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
}
}
return AOM_CODEC_INVALID_PARAM;
}
static aom_codec_err_t ctrl_get_tile_size(aom_codec_alg_priv_t *ctx,
va_list args) {
unsigned int *const tile_size = va_arg(args, unsigned int *);
AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
if (tile_size) {
if (worker) {
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
*tile_size =
((cm->tile_width * MI_SIZE) << 16) + cm->tile_height * MI_SIZE;
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
}
}
return AOM_CODEC_INVALID_PARAM;
}
static aom_codec_err_t ctrl_set_invert_tile_order(aom_codec_alg_priv_t *ctx,
va_list args) {
ctx->invert_tile_order = va_arg(args, int);
@ -1124,6 +1202,12 @@ static aom_codec_err_t ctrl_ext_tile_debug(aom_codec_alg_priv_t *ctx,
return AOM_CODEC_OK;
}
static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
va_list args) {
ctx->row_mt = va_arg(args, unsigned int);
return AOM_CODEC_OK;
}
static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ AV1_COPY_REFERENCE, ctrl_copy_reference },
@ -1145,6 +1229,7 @@ static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ AV1D_SET_OUTPUT_ALL_LAYERS, ctrl_set_output_all_layers },
{ AV1_SET_INSPECTION_CALLBACK, ctrl_set_inspection_callback },
{ AV1D_EXT_TILE_DEBUG, ctrl_ext_tile_debug },
{ AV1D_SET_ROW_MT, ctrl_set_row_mt },
{ AV1D_SET_EXT_REF_PTR, ctrl_set_ext_ref_ptr },
// Getters
@ -1152,6 +1237,8 @@ static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ AOMD_GET_LAST_QUANTIZER, ctrl_get_last_quantizer },
{ AOMD_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates },
{ AV1D_GET_BIT_DEPTH, ctrl_get_bit_depth },
{ AV1D_GET_IMG_FORMAT, ctrl_get_img_format },
{ AV1D_GET_TILE_SIZE, ctrl_get_tile_size },
{ AV1D_GET_DISPLAY_SIZE, ctrl_get_render_size },
{ AV1D_GET_FRAME_SIZE, ctrl_get_frame_size },
{ AV1_GET_ACCOUNTING, ctrl_get_accounting },
@ -1180,7 +1267,7 @@ CODEC_INTERFACE(aom_codec_av1_dx) = {
decoder_peek_si, // aom_codec_peek_si_fn_t
decoder_get_si, // aom_codec_get_si_fn_t
decoder_decode, // aom_codec_decode_fn_t
decoder_get_frame, // aom_codec_frame_get_fn_t
decoder_get_frame, // aom_codec_get_frame_fn_t
decoder_set_fb_fn, // aom_codec_set_fb_fn_t
},
{

4
third_party/aom/av1/common/alloccommon.c поставляемый
Просмотреть файл

@ -137,11 +137,11 @@ void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
// Now we need to allocate enough space to store the line buffers for the
// stripes
const int frame_w = cm->superres_upscaled_width;
const int use_highbd = cm->use_highbitdepth ? 1 : 0;
const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0;
for (int p = 0; p < num_planes; ++p) {
const int is_uv = p > 0;
const int ss_x = is_uv && cm->subsampling_x;
const int ss_x = is_uv && cm->seq_params.subsampling_x;
const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT

844
third_party/aom/av1/common/arm/av1_inv_txfm_neon.c поставляемый Normal file
Просмотреть файл

@ -0,0 +1,844 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "config/av1_rtcd.h"
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
#include "av1/common/enums.h"
#include "av1/common/idct.h"
#include "av1/common/arm/av1_inv_txfm_neon.h"
static INLINE TxSetType find_TxSetType(TX_SIZE tx_size) {
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
TxSetType tx_set_type;
if (tx_size_sqr_up > TX_32X32) {
tx_set_type = EXT_TX_SET_DCTONLY;
} else if (tx_size_sqr_up == TX_32X32) {
tx_set_type = EXT_TX_SET_DCT_IDTX;
} else {
tx_set_type = EXT_TX_SET_ALL16;
}
return tx_set_type;
}
// 1D itx types
typedef enum ATTRIBUTE_PACKED {
IDCT_1D,
IADST_1D,
IFLIPADST_1D = IADST_1D,
IIDENTITY_1D,
ITX_TYPES_1D,
} ITX_TYPE_1D;
static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
IDCT_1D, IADST_1D, IDCT_1D, IADST_1D,
IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D,
IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D,
IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
};
static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
IDCT_1D, IDCT_1D, IADST_1D, IADST_1D,
IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D,
};
// 1D functions
static const transform_1d_neon lowbd_txfm_all_1d_arr[TX_SIZES][ITX_TYPES_1D] = {
{ av1_idct4_new, av1_iadst4_new, av1_iidentity4_c },
{ av1_idct8_new, av1_iadst8_new, av1_iidentity8_c },
{ av1_idct16_new, av1_iadst16_new, av1_iidentity16_c },
{ av1_idct32_new, NULL, NULL },
{ av1_idct64_new, NULL, NULL },
};
// Functions for blocks with eob at DC and within
// topleft 8x8, 16x16, 32x32 corner
static const transform_1d_neon
lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
{
{ av1_idct4_new, av1_idct4_new, NULL, NULL },
{ av1_iadst4_new, av1_iadst4_new, NULL, NULL },
{ av1_iidentity4_c, av1_iidentity4_c, NULL, NULL },
},
{ { av1_idct8_new, av1_idct8_new, NULL, NULL },
{ av1_iadst8_new, av1_iadst8_new, NULL, NULL },
{ av1_iidentity8_c, av1_iidentity8_c, NULL, NULL } },
{
{ av1_idct16_new, av1_idct16_new, av1_idct16_new, NULL },
{ av1_iadst16_new, av1_iadst16_new, av1_iadst16_new, NULL },
{ av1_iidentity16_c, av1_iidentity16_c, av1_iidentity16_c, NULL },
},
{ { av1_idct32_new, av1_idct32_new, av1_idct32_new, av1_idct32_new },
{ NULL, NULL, NULL, NULL },
{ av1_iidentity32_c, av1_iidentity32_c, av1_iidentity32_c,
av1_iidentity32_c } },
{ { av1_idct64_new, av1_idct64_new, av1_idct64_new, av1_idct64_new },
{ NULL, NULL, NULL, NULL },
{ NULL, NULL, NULL, NULL } }
};
static INLINE void lowbd_inv_txfm2d_add_idtx_neon(const int32_t *input,
uint8_t *output, int stride,
TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
int32_t *temp_in = txfm_buf;
int eobx, eoby;
get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
assert(col_txfm != NULL);
assert(row_txfm != NULL);
// row tx
int row_start = (buf_size_nonzero_h_div8 * 8);
for (int i = 0; i < row_start; i++) {
if (abs(rect_type) == 1) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
} else {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
}
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
// Doing memset for the rows which are not processed in row transform.
memset(buf_ptr, 0,
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
// col tx
for (int c = 0; c < txfm_size_col; c++) {
for (r = 0; r < txfm_size_row; ++r) temp_in[r] = buf[r * txfm_size_col + c];
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
}
}
static INLINE void lowbd_inv_txfm2d_add_v_identity_neon(
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
int32_t *temp_in = txfm_buf;
int eobx, eoby;
get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
assert(col_txfm != NULL);
assert(row_txfm != NULL);
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
// row tx
int row_start = (buf_size_nonzero_h_div8 * 8);
for (int i = 0; i < row_start; i++) {
if (abs(rect_type) == 1) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
} else {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
}
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
// Doing memset for the rows which are not processed in row transform.
memset(buf_ptr, 0,
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
// col tx
for (int c = 0; c < txfm_size_col; c++) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
static INLINE void lowbd_inv_txfm2d_add_h_identity_neon(
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
int32_t *temp_in = txfm_buf;
int eobx, eoby;
get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
assert(col_txfm != NULL);
assert(row_txfm != NULL);
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
// row tx
int row_start = (buf_size_nonzero_h_div8 * 8);
for (int i = 0; i < row_start; i++) {
if (abs(rect_type) == 1) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
} else {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
}
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
// Doing memset for the rows which are not processed in row transform.
memset(buf_ptr, 0,
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
// col tx
for (int c = 0; c < txfm_size_col; c++) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
static INLINE void lowbd_inv_txfm2d_add_4x4_neon(const int32_t *input,
uint8_t *output, int stride,
TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
(void)eob;
DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 8 + 8]);
int32_t *temp_in = txfm_buf;
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
for (int i = 0; i < txfm_size_row; i++) {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
for (int c = 0; c < txfm_size_col; ++c) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
void lowbd_inv_txfm2d_add_4x8_neon(const int32_t *input, uint8_t *output,
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
int eob) {
(void)eob;
DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
int32_t *temp_in = txfm_buf;
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
for (int i = 0; i < txfm_size_row; i++) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
for (int c = 0; c < txfm_size_col; ++c) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
void lowbd_inv_txfm2d_add_8x4_neon(const int32_t *input, uint8_t *output,
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
int eob) {
(void)eob;
DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
int32_t *temp_in = txfm_buf;
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
for (int i = 0; i < txfm_size_row; i++) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
for (int c = 0; c < txfm_size_col; ++c) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
void lowbd_inv_txfm2d_add_4x16_neon(const int32_t *input, uint8_t *output,
int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
(void)eob;
DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
int32_t *temp_in = txfm_buf;
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
for (int i = 0; i < txfm_size_row; i++) {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
for (int c = 0; c < txfm_size_col; ++c) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
void lowbd_inv_txfm2d_add_16x4_neon(const int32_t *input, uint8_t *output,
int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
(void)eob;
DECLARE_ALIGNED(32, int, txfm_buf[16 * 4 + 16 + 16]);
int32_t *temp_in = txfm_buf;
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
int r, bd = 8;
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
int ud_flip, lr_flip;
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
for (int i = 0; i < txfm_size_row; i++) {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
for (int c = 0; c < txfm_size_col; ++c) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
static INLINE void lowbd_inv_txfm2d_add_no_identity_neon(
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
int32_t *temp_in = txfm_buf;
int eobx, eoby, ud_flip, lr_flip, row_start;
get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
const int8_t *shift = inv_txfm_shift_ls[tx_size];
const int txw_idx = get_txw_idx(tx_size);
const int txh_idx = get_txh_idx(tx_size);
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
const int txfm_size_col = tx_size_wide[tx_size];
const int txfm_size_row = tx_size_high[tx_size];
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
int32_t *temp_out = temp_in + buf_offset;
int32_t *buf = temp_out + buf_offset;
int32_t *buf_ptr = buf;
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
const int bd = 8;
int r;
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
const transform_1d_neon row_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
const transform_1d_neon col_txfm =
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
assert(col_txfm != NULL);
assert(row_txfm != NULL);
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
row_start = (buf_size_nonzero_h_div8 << 3);
for (int i = 0; i < row_start; i++) {
if (abs(rect_type) == 1) {
for (int j = 0; j < txfm_size_col; j++)
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
} else {
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
}
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
input += txfm_size_col;
buf_ptr += txfm_size_col;
}
// Doing memset for the rows which are not processed in row transform.
memset(buf_ptr, 0,
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
for (int c = 0; c < txfm_size_col; c++) {
if (lr_flip == 0) {
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + c];
} else {
// flip left right
for (r = 0; r < txfm_size_row; ++r)
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
if (ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] =
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
}
} else {
// flip upside down
for (r = 0; r < txfm_size_row; ++r) {
output[r * stride + c] = highbd_clip_pixel_add(
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
}
}
}
}
static INLINE void lowbd_inv_txfm2d_add_universe_neon(
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
TX_SIZE tx_size, int eob) {
switch (tx_type) {
case IDTX:
lowbd_inv_txfm2d_add_idtx_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case H_DCT:
case H_ADST:
case H_FLIPADST:
lowbd_inv_txfm2d_add_v_identity_neon(input, output, stride, tx_type,
tx_size, eob);
break;
case V_DCT:
case V_ADST:
case V_FLIPADST:
lowbd_inv_txfm2d_add_h_identity_neon(input, output, stride, tx_type,
tx_size, eob);
break;
default:
lowbd_inv_txfm2d_add_no_identity_neon(input, output, stride, tx_type,
tx_size, eob);
break;
}
}
void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input, uint8_t *output,
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
int eob) {
int row;
switch (tx_size) {
case TX_4X4:
lowbd_inv_txfm2d_add_4x4_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case TX_4X8:
lowbd_inv_txfm2d_add_4x8_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case TX_8X4:
lowbd_inv_txfm2d_add_8x4_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case TX_4X16:
lowbd_inv_txfm2d_add_4x16_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case TX_16X4:
lowbd_inv_txfm2d_add_16x4_neon(input, output, stride, tx_type, tx_size,
eob);
break;
case TX_16X64: {
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
tx_size, eob);
} break;
case TX_64X16: {
int32_t mod_input[64 * 16];
for (row = 0; row < 16; ++row) {
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
tx_size, eob);
} break;
case TX_32X64: {
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
tx_size, eob);
} break;
case TX_64X32: {
int32_t mod_input[64 * 32];
for (row = 0; row < 32; ++row) {
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
tx_size, eob);
} break;
case TX_64X64: {
int32_t mod_input[64 * 64];
for (row = 0; row < 32; ++row) {
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
}
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
tx_size, eob);
} break;
default:
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
tx_size, eob);
break;
}
}
void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
const TxfmParam *txfm_param) {
const TX_TYPE tx_type = txfm_param->tx_type;
if (!txfm_param->lossless) {
av1_lowbd_inv_txfm2d_add_neon(dqcoeff, dst, stride, tx_type,
txfm_param->tx_size, txfm_param->eob);
} else {
av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
}
}

152
third_party/aom/av1/common/arm/av1_inv_txfm_neon.h поставляемый Normal file
Просмотреть файл

@ -0,0 +1,152 @@
/*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
#define AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
#include "config/aom_config.h"
#include "config/av1_rtcd.h"
#include "aom/aom_integer.h"
#include "av1/common/enums.h"
#include "av1/common/av1_inv_txfm1d.h"
#include "av1/common/av1_inv_txfm1d_cfg.h"
#include "av1/common/av1_txfm.h"
typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output,
const int8_t cos_bit,
const int8_t *stage_ptr);
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
};
DECLARE_ALIGNED(16, static const int16_t,
av1_eob_to_eobxy_16x16_default[16]) = {
0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
};
DECLARE_ALIGNED(16, static const int16_t,
av1_eob_to_eobxy_32x32_default[32]) = {
0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
};
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
};
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
};
DECLARE_ALIGNED(16, static const int16_t,
av1_eob_to_eobxy_16x32_default[32]) = {
0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
};
DECLARE_ALIGNED(16, static const int16_t,
av1_eob_to_eobxy_32x16_default[16]) = {
0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
};
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
};
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
};
DECLARE_ALIGNED(16, static const int16_t *,
av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
NULL,
av1_eob_to_eobxy_8x8_default,
av1_eob_to_eobxy_16x16_default,
av1_eob_to_eobxy_32x32_default,
av1_eob_to_eobxy_32x32_default,
NULL,
NULL,
av1_eob_to_eobxy_8x16_default,
av1_eob_to_eobxy_16x8_default,
av1_eob_to_eobxy_16x32_default,
av1_eob_to_eobxy_32x16_default,
av1_eob_to_eobxy_32x32_default,
av1_eob_to_eobxy_32x32_default,
NULL,
NULL,
av1_eob_to_eobxy_8x32_default,
av1_eob_to_eobxy_32x8_default,
av1_eob_to_eobxy_16x32_default,
av1_eob_to_eobxy_32x16_default,
};
static const int lowbd_txfm_all_1d_zeros_idx[32] = {
0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
};
// Transform block width in log2 for eob (size of 64 map to 32)
static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
};
static int eob_fill[32] = {
0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
};
static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
TX_SIZE tx_size, int eob) {
if (eob == 1) {
*eobx = 0;
*eoby = 0;
return;
}
const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
const int eob_row = (eob - 1) >> tx_w_log2;
const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
*eobx = eobxy & 0xFF;
*eoby = eobxy >> 8;
}
static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
TX_SIZE tx_size, int eob) {
eob -= 1;
const int txfm_size_row = tx_size_high[tx_size];
const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
*eobx = eob / (eoby_max + 1);
*eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
}
static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
TX_SIZE tx_size, int eob) {
eob -= 1;
const int txfm_size_col = tx_size_wide[tx_size];
const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
*eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
const int temp_eoby = eob / (eobx_max + 1);
assert(temp_eoby < 32);
*eoby = eob_fill[temp_eoby];
}
#endif // AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_

Просмотреть файл

@ -164,8 +164,8 @@ static INLINE uint8x8_t convolve8_vert_8x4_s32(
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
@ -182,7 +182,7 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0);
const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
@ -485,8 +485,8 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
const int vert_offset = filter_params_y->taps / 2 - 1;
@ -502,7 +502,7 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
if (w <= 4) {
uint8x8_t d01, d23;
@ -680,8 +680,8 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
int im_dst_stride;
@ -711,7 +711,7 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
int16_t x_filter_tmp[8];
int16x8_t filter_x_coef = vld1q_s16(x_filter);
@ -896,7 +896,7 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
(1 << (offset_bits - conv_params->round_1 - 1));
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
@ -1086,8 +1086,8 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
}
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
(void)filter_params_x;

Просмотреть файл

@ -1,79 +0,0 @@
/*
*
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <arm_neon.h>
#include <assert.h>
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#include "av1/common/arm/mem_neon.h"
#include "config/aom_dsp_rtcd.h"
static INLINE void highbd_dc_predictor_neon(uint16_t *dst, ptrdiff_t stride,
int bw, const uint16_t *above,
const uint16_t *left) {
assert(bw >= 4);
assert(IS_POWER_OF_TWO(bw));
int expected_dc, sum = 0;
const int count = bw * 2;
uint32x4_t sum_q = vdupq_n_u32(0);
uint32x2_t sum_d;
uint16_t *dst_1;
if (bw >= 8) {
for (int i = 0; i < bw; i += 8) {
sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
above += 8;
left += 8;
}
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
expected_dc = (sum + (count >> 1)) / count;
const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
for (int r = 0; r < bw; r++) {
dst_1 = dst;
for (int i = 0; i < bw; i += 8) {
vst1q_u16(dst_1, dc);
dst_1 += 8;
}
dst += stride;
}
} else { // 4x4
sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
expected_dc = (sum + (count >> 1)) / count;
const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
for (int r = 0; r < bw; r++) {
vst1_u16(dst, dc);
dst += stride;
}
}
}
#define intra_pred_highbd_sized(type, width) \
void aom_highbd_##type##_predictor_##width##x##width##_neon( \
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
const uint16_t *left, int bd) { \
(void)bd; \
highbd_##type##_predictor_neon(dst, stride, width, above, left); \
}
#define intra_pred_square(type) \
intra_pred_highbd_sized(type, 4); \
intra_pred_highbd_sized(type, 8); \
intra_pred_highbd_sized(type, 16); \
intra_pred_highbd_sized(type, 32); \
intra_pred_highbd_sized(type, 64);
intra_pred_square(dc);
#undef intra_pred_square

Просмотреть файл

@ -515,8 +515,8 @@ static INLINE void jnt_convolve_2d_vert_neon(
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
assert(!(w % 4));
@ -532,9 +532,9 @@ void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
const int round_0 = conv_params->round_0 - 1;
const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
int16_t x_filter_tmp[8];
int16x8_t filter_x_coef = vld1q_s16(x_filter);
@ -553,8 +553,8 @@ void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
uint8_t *dst8, int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
uint8x8_t res0_8, res1_8, res2_8, res3_8, tmp_shift0, tmp_shift1, tmp_shift2,
@ -679,8 +679,8 @@ void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
assert(!(w % 4));
@ -705,7 +705,7 @@ void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
const uint8_t *src_ptr = src - horiz_offset;
@ -1013,8 +1013,8 @@ void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
assert(!(w % 4));
@ -1040,7 +1040,7 @@ void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const uint8_t *src_ptr = src - (vert_offset * src_stride);

84
third_party/aom/av1/common/arm/mem_neon.h поставляемый
Просмотреть файл

@ -22,6 +22,14 @@ static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0,
s += p;
}
/* These intrinsics require immediate values, so we must use #defines
to enforce that. */
#define load_u8_4x1(s, s0, lane) \
do { \
*(s0) = vreinterpret_u8_u32( \
vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \
} while (0)
static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p,
uint8x8_t *const s0, uint8x8_t *const s1,
uint8x8_t *const s2, uint8x8_t *const s3,
@ -128,6 +136,13 @@ static INLINE void load_s16_4x4(const int16_t *s, ptrdiff_t p,
*s3 = vld1_s16(s);
}
/* These intrinsics require immediate values, so we must use #defines
to enforce that. */
#define store_u8_4x1(s, s0, lane) \
do { \
vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \
} while (0)
static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
const uint8x8_t s1, const uint8x8_t s2,
const uint8x8_t s3, const uint8x8_t s4,
@ -242,6 +257,30 @@ static INLINE void store_s16_8x8(int16_t *s, ptrdiff_t dst_stride,
vst1q_s16(s, s7);
}
static INLINE void store_s16_4x4(int16_t *s, ptrdiff_t dst_stride,
const int16x4_t s0, const int16x4_t s1,
const int16x4_t s2, const int16x4_t s3) {
vst1_s16(s, s0);
s += dst_stride;
vst1_s16(s, s1);
s += dst_stride;
vst1_s16(s, s2);
s += dst_stride;
vst1_s16(s, s3);
}
static INLINE void store_s16_8x4(int16_t *s, ptrdiff_t dst_stride,
const int16x8_t s0, const int16x8_t s1,
const int16x8_t s2, const int16x8_t s3) {
vst1q_s16(s, s0);
s += dst_stride;
vst1q_s16(s, s1);
s += dst_stride;
vst1q_s16(s, s2);
s += dst_stride;
vst1q_s16(s, s3);
}
static INLINE void load_s16_8x8(const int16_t *s, ptrdiff_t p,
int16x8_t *const s0, int16x8_t *const s1,
int16x8_t *const s2, int16x8_t *const s3,
@ -398,4 +437,49 @@ static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride,
*tu1 = vsetq_lane_u64(a, *tu1, 1);
}
static INLINE void load_s32_4x4(int32_t *s, int32_t p, int32x4_t *s1,
int32x4_t *s2, int32x4_t *s3, int32x4_t *s4) {
*s1 = vld1q_s32(s);
s += p;
*s2 = vld1q_s32(s);
s += p;
*s3 = vld1q_s32(s);
s += p;
*s4 = vld1q_s32(s);
}
static INLINE void store_s32_4x4(int32_t *s, int32_t p, int32x4_t s1,
int32x4_t s2, int32x4_t s3, int32x4_t s4) {
vst1q_s32(s, s1);
s += p;
vst1q_s32(s, s2);
s += p;
vst1q_s32(s, s3);
s += p;
vst1q_s32(s, s4);
}
static INLINE void load_u32_4x4(uint32_t *s, int32_t p, uint32x4_t *s1,
uint32x4_t *s2, uint32x4_t *s3,
uint32x4_t *s4) {
*s1 = vld1q_u32(s);
s += p;
*s2 = vld1q_u32(s);
s += p;
*s3 = vld1q_u32(s);
s += p;
*s4 = vld1q_u32(s);
}
static INLINE void store_u32_4x4(uint32_t *s, int32_t p, uint32x4_t s1,
uint32x4_t s2, uint32x4_t s3, uint32x4_t s4) {
vst1q_u32(s, s1);
s += p;
vst1q_u32(s, s2);
s += p;
vst1q_u32(s, s3);
s += p;
vst1q_u32(s, s4);
}
#endif // AV1_COMMON_ARM_MEM_NEON_H_

1506
third_party/aom/av1/common/arm/selfguided_neon.c поставляемый Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -419,4 +419,42 @@ static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1,
*a3 = vreinterpret_s16_s32(c1.val[1]);
}
static INLINE int32x4x2_t aom_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
int32x4x2_t b0;
b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1));
b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1));
return b0;
}
static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1,
int32x4_t *a2, int32x4_t *a3) {
// Swap 32 bit elements. Goes from:
// a0: 00 01 02 03
// a1: 10 11 12 13
// a2: 20 21 22 23
// a3: 30 31 32 33
// to:
// b0.val[0]: 00 10 02 12
// b0.val[1]: 01 11 03 13
// b1.val[0]: 20 30 22 32
// b1.val[1]: 21 31 23 33
const int32x4x2_t b0 = vtrnq_s32(*a0, *a1);
const int32x4x2_t b1 = vtrnq_s32(*a2, *a3);
// Swap 64 bit elements resulting in:
// c0.val[0]: 00 10 20 30
// c0.val[1]: 02 12 22 32
// c1.val[0]: 01 11 21 31
// c1.val[1]: 03 13 23 33
const int32x4x2_t c0 = aom_vtrnq_s64_to_s32(b0.val[0], b1.val[0]);
const int32x4x2_t c1 = aom_vtrnq_s64_to_s32(b0.val[1], b1.val[1]);
*a0 = c0.val[0];
*a1 = c1.val[0];
*a2 = c0.val[1];
*a3 = c1.val[1];
}
#endif // AV1_COMMON_ARM_TRANSPOSE_NEON_H_

51
third_party/aom/av1/common/av1_loopfilter.c поставляемый
Просмотреть файл

@ -1308,7 +1308,7 @@ static int compare_ref_dst(AV1_COMMON *const cm, uint8_t *ref_buf,
end <<= MI_SIZE_LOG2;
uint8_t *ref0 = ref_buf;
uint8_t *dst0 = dst_buf;
if (cm->use_highbitdepth) {
if (cm->seq_params.use_highbitdepth) {
const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref_buf);
const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst_buf);
for (int j = 0; j < 4; ++j) {
@ -1404,11 +1404,11 @@ void av1_filter_block_plane_ver(AV1_COMMON *const cm,
uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
if (cm->use_highbitdepth)
if (cm->seq_params.use_highbitdepth)
highbd_filter_selectively_vert_row2(
ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
&cm->lf_info, lfl, lfl2, (int)cm->bit_depth);
&cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
else
filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
mask_16x16_0, mask_8x8_0, mask_4x4_0,
@ -1474,10 +1474,11 @@ void av1_filter_block_plane_hor(AV1_COMMON *const cm,
mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
if (cm->use_highbitdepth)
highbd_filter_selectively_horiz(
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->bit_depth);
if (cm->seq_params.use_highbitdepth)
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl,
(int)cm->seq_params.bit_depth);
else
filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl);
@ -1652,6 +1653,8 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
const int dst_stride = plane_ptr->dst.stride;
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
const int use_highbitdepth = cm->seq_params.use_highbitdepth;
const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
for (int y = 0; y < y_range; y += row_step) {
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
for (int x = 0; x < x_range;) {
@ -1677,40 +1680,40 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
bit_depth);
else
aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
case 6: // apply 6-tap filter for chroma plane only
assert(plane != 0);
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
bit_depth);
else
aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
// apply 8-tap filtering
case 8:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
bit_depth);
else
aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
// apply 14-tap filtering
case 14:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim, params.hev_thr,
cm->bit_depth);
bit_depth);
else
aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
@ -1737,6 +1740,8 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
const int dst_stride = plane_ptr->dst.stride;
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
const int use_highbitdepth = cm->seq_params.use_highbitdepth;
const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
for (int x = 0; x < x_range; x += col_step) {
uint8_t *p = dst_ptr + x * MI_SIZE;
for (int y = 0; y < y_range;) {
@ -1762,10 +1767,10 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
params.hev_thr, bit_depth);
else
aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
@ -1773,30 +1778,30 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
// apply 6-tap filtering
case 6:
assert(plane != 0);
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
params.hev_thr, bit_depth);
else
aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
// apply 8-tap filtering
case 8:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
params.hev_thr, bit_depth);
else
aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
params.hev_thr);
break;
// apply 14-tap filtering
case 14:
if (cm->use_highbitdepth)
if (use_highbitdepth)
aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
params.mblim, params.lim,
params.hev_thr, cm->bit_depth);
params.hev_thr, bit_depth);
else
aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
params.hev_thr);

6
third_party/aom/av1/common/av1_rtcd.c поставляемый
Просмотреть файл

@ -16,7 +16,7 @@
#include "aom_ports/aom_once.h"
void av1_rtcd() {
// TODO(JBB): Remove this once, by insuring that both the encoder and
// decoder setup functions are protected by once();
once(setup_rtcd_internal);
// TODO(JBB): Remove this aom_once, by insuring that both the encoder and
// decoder setup functions are protected by aom_once();
aom_once(setup_rtcd_internal);
}

50
third_party/aom/av1/common/av1_rtcd_defs.pl поставляемый
Просмотреть файл

@ -106,7 +106,7 @@ specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
#inv txfm
add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
specialize qw/av1_inv_txfm_add ssse3 avx2/;
specialize qw/av1_inv_txfm_add ssse3 avx2 neon/;
add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
@ -181,7 +181,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
#fwd txfm
add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1/;
specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2/;
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
@ -241,11 +241,11 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
specialize qw/av1_txb_init_levels sse4_1/;
add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
specialize qw/av1_wedge_sse_from_residuals sse2/;
specialize qw/av1_wedge_sse_from_residuals sse2 avx2/;
add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
specialize qw/av1_wedge_sign_from_residuals sse2/;
specialize qw/av1_wedge_sign_from_residuals sse2 avx2/;
add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
specialize qw/av1_wedge_compute_delta_squares sse2/;
specialize qw/av1_wedge_compute_delta_squares sse2 avx2/;
# hash
add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length";
@ -288,34 +288,34 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
# LOOP_RESTORATION functions
add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
specialize qw/apply_selfguided_restoration sse4_1 avx2/;
specialize qw/apply_selfguided_restoration sse4_1 avx2 neon/;
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
int sgr_params_idx, int bit_depth, int highbd";
specialize qw/av1_selfguided_restoration sse4_1 avx2/;
specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
# CONVOLVE_ROUND/COMPOUND_ROUND functions
add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
specialize qw/av1_convolve_2d_sr sse2 avx2 neon/;
specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/;

47
third_party/aom/av1/common/av1_txfm.h поставляемый
Просмотреть файл

@ -171,53 +171,6 @@ static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
}
static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
switch (tx_size) {
case TX_4X4: return TX_4X4;
case TX_8X8: return TX_8X8;
case TX_16X16: return TX_16X16;
case TX_32X32: return TX_32X32;
case TX_64X64: return TX_64X64;
case TX_32X64: return TX_64X32;
case TX_64X32: return TX_32X64;
case TX_4X8: return TX_8X4;
case TX_8X4: return TX_4X8;
case TX_8X16: return TX_16X8;
case TX_16X8: return TX_8X16;
case TX_16X32: return TX_32X16;
case TX_32X16: return TX_16X32;
case TX_4X16: return TX_16X4;
case TX_16X4: return TX_4X16;
case TX_8X32: return TX_32X8;
case TX_32X8: return TX_8X32;
case TX_16X64: return TX_64X16;
case TX_64X16: return TX_16X64;
default: assert(0); return TX_INVALID;
}
}
static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
switch (tx_type) {
case DCT_DCT: return DCT_DCT;
case ADST_DCT: return DCT_ADST;
case DCT_ADST: return ADST_DCT;
case ADST_ADST: return ADST_ADST;
case FLIPADST_DCT: return DCT_FLIPADST;
case DCT_FLIPADST: return FLIPADST_DCT;
case FLIPADST_FLIPADST: return FLIPADST_FLIPADST;
case ADST_FLIPADST: return FLIPADST_ADST;
case FLIPADST_ADST: return ADST_FLIPADST;
case IDTX: return IDTX;
case V_DCT: return H_DCT;
case H_DCT: return V_DCT;
case V_ADST: return H_ADST;
case H_ADST: return V_ADST;
case V_FLIPADST: return H_FLIPADST;
case H_FLIPADST: return V_FLIPADST;
default: assert(0); return TX_TYPES;
}
}
// Utility function that returns the log of the ratio of the col and row
// sizes.
static INLINE int get_rect_tx_log_ratio(int col, int row) {

47
third_party/aom/av1/common/blockd.h поставляемый
Просмотреть файл

@ -605,6 +605,12 @@ static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
}
static INLINE uint8_t *get_buf_by_bd(const MACROBLOCKD *xd, uint8_t *buf16) {
return (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
? CONVERT_TO_BYTEPTR(buf16)
: buf16;
}
static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_4X4: return 0;
@ -674,6 +680,15 @@ static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
};
static const uint16_t av1_ext_tx_used_flag[EXT_TX_SET_TYPES] = {
0x0001, // 0000 0000 0000 0001
0x0201, // 0000 0010 0000 0001
0x020F, // 0000 0010 0000 1111
0x0E0F, // 0000 1110 0000 1111
0x0FFF, // 0000 1111 1111 1111
0xFFFF, // 1111 1111 1111 1111
};
static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
int use_reduced_set) {
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
@ -1145,38 +1160,6 @@ static INLINE PLANE_TYPE get_plane_type(int plane) {
return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
}
static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
const uint8_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
const uint16_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_int16(int16_t *dst, int dst_stride,
const int16_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE void transpose_int32(int32_t *dst, int dst_stride,
const int32_t *src, int src_stride, int w,
int h) {
int r, c;
for (r = 0; r < h; ++r)
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
}
static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
return 1024;

6
third_party/aom/av1/common/cdef.c поставляемый
Просмотреть файл

@ -110,7 +110,7 @@ void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
static void copy_sb8_16(AOM_UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
const uint8_t *src, int src_voffset, int src_hoffset,
int sstride, int vsize, int hsize) {
if (cm->use_highbitdepth) {
if (cm->seq_params.use_highbitdepth) {
const uint16_t *base =
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
@ -153,7 +153,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int mi_high_l2[3];
int xdec[3];
int ydec[3];
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
@ -363,7 +363,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
}
if (cm->use_highbitdepth) {
if (cm->seq_params.use_highbitdepth) {
cdef_filter_fb(
NULL,
&CONVERT_TO_SHORTPTR(

15
third_party/aom/av1/common/cfl.c поставляемый
Просмотреть файл

@ -15,21 +15,14 @@
#include "config/av1_rtcd.h"
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params) {
assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
if (!(cm->subsampling_x == 0 && cm->subsampling_y == 0) &&
!(cm->subsampling_x == 1 && cm->subsampling_y == 1) &&
!(cm->subsampling_x == 1 && cm->subsampling_y == 0)) {
aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Only 4:4:4, 4:2:2 and 4:2:0 are currently supported by "
"CfL, %d %d subsampling is not supported.\n",
cm->subsampling_x, cm->subsampling_y);
}
memset(&cfl->recon_buf_q3, 0, sizeof(cfl->recon_buf_q3));
memset(&cfl->ac_buf_q3, 0, sizeof(cfl->ac_buf_q3));
cfl->subsampling_x = cm->subsampling_x;
cfl->subsampling_y = cm->subsampling_y;
cfl->subsampling_x = seq_params->subsampling_x;
cfl->subsampling_y = seq_params->subsampling_y;
cfl->are_parameters_computed = 0;
cfl->store_y = 0;
// The DC_PRED cache is disabled by default and is only enabled in

154
third_party/aom/av1/common/convolve.c поставляемый
Просмотреть файл

@ -75,8 +75,8 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
@ -91,7 +91,7 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = (1 << (bd + FILTER_BITS - 1));
@ -107,7 +107,7 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
// vertical filter
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
@ -126,8 +126,8 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
const int fo_vert = filter_params_y->taps / 2 - 1;
@ -141,7 +141,7 @@ void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -156,8 +156,8 @@ void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
const int fo_horiz = filter_params_x->taps / 2 - 1;
@ -172,7 +172,7 @@ void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -187,8 +187,8 @@ void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
(void)filter_params_x;
@ -204,8 +204,8 @@ void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -222,7 +222,7 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = (1 << (bd + FILTER_BITS - 1));
@ -238,7 +238,7 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
// vertical filter
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
@ -270,8 +270,8 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -289,7 +289,7 @@ void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -320,8 +320,8 @@ void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -339,7 +339,7 @@ void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -370,8 +370,8 @@ void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
uint8_t *dst8, int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -412,8 +412,8 @@ void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
int dst8_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
@ -439,7 +439,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
int32_t sum = (1 << (bd + FILTER_BITS - 1));
for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_x[k - fo_horiz];
@ -461,7 +461,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
int32_t sum = 1 << offset_bits;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
@ -498,8 +498,8 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
static void convolve_2d_scale_wrapper(
const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_qn,
int h, const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y, const int subpel_x_qn,
const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) {
if (conv_params->is_compound) {
@ -520,25 +520,27 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
(void)y_step_q4;
(void)dst;
(void)dst_stride;
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, &filter_params_x,
&filter_params_y, w, h);
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
const InterpFilterParams *filter_params_x =
av1_get_interp_filter_params_with_block_size(filter_x, w);
const InterpFilterParams *filter_params_y =
av1_get_interp_filter_params_with_block_size(filter_y, h);
if (scaled)
convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
filter_params_x, filter_params_y, subpel_x_q4,
x_step_q4, subpel_y_q4, y_step_q4, conv_params);
else
sf->convolve[subpel_x_q4 != 0][subpel_y_q4 != 0][conv_params->is_compound](
src, src_stride, dst, dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
src, src_stride, dst, dst_stride, w, h, filter_params_x,
filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
}
void av1_highbd_convolve_2d_copy_sr_c(
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_q4,
int h, const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
(void)filter_params_x;
(void)filter_params_y;
@ -554,8 +556,8 @@ void av1_highbd_convolve_2d_copy_sr_c(
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
const int fo_horiz = filter_params_x->taps / 2 - 1;
@ -569,7 +571,7 @@ void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -585,8 +587,8 @@ void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
const int fo_vert = filter_params_y->taps / 2 - 1;
@ -599,7 +601,7 @@ void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -614,8 +616,8 @@ void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
@ -630,7 +632,7 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < im_h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t sum = (1 << (bd + FILTER_BITS - 1));
@ -646,7 +648,7 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
// vertical filter
int16_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
@ -666,8 +668,9 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
uint16_t *dst16, int dst16_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
int h,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
int x, y, k;
@ -685,7 +688,7 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
// horizontal filter
const uint16_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < im_h; ++y) {
for (x = 0; x < w; ++x) {
int32_t sum = (1 << (bd + FILTER_BITS - 1));
@ -703,7 +706,7 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
int16_t *src_vert = im_block + fo_vert * im_stride;
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) {
int32_t sum = 1 << offset_bits;
@ -734,8 +737,9 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
uint16_t *dst16, int dst16_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
int h,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -753,7 +757,7 @@ void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
assert(bits >= 0);
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -784,8 +788,9 @@ void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
uint16_t *dst16, int dst16_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
int h,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params, int bd) {
CONV_BUF_TYPE *dst = conv_params->dst;
@ -803,7 +808,7 @@ void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
assert(bits >= 0);
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
int32_t res = 0;
@ -834,8 +839,8 @@ void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
void av1_highbd_jnt_convolve_2d_copy_c(
const uint16_t *src, int src_stride, uint16_t *dst16, int dst16_stride,
int w, int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_q4,
int w, int h, const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
@ -875,8 +880,8 @@ void av1_highbd_jnt_convolve_2d_copy_c(
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params, int bd) {
@ -900,7 +905,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
int32_t sum = (1 << (bd + FILTER_BITS - 1));
for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_x[k - fo_horiz];
@ -922,7 +927,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
int32_t sum = 1 << offset_bits;
for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
@ -971,9 +976,12 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
(void)dst_stride;
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, &filter_params_x,
&filter_params_y, w, h);
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
const InterpFilterParams *filter_params_x =
av1_get_interp_filter_params_with_block_size(filter_x, w);
const InterpFilterParams *filter_params_y =
av1_get_interp_filter_params_with_block_size(filter_y, h);
if (scaled) {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
@ -981,16 +989,16 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
assert(conv_params->dst != NULL);
}
av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
conv_params, bd);
filter_params_x, filter_params_y, subpel_x_q4,
x_step_q4, subpel_y_q4, y_step_q4, conv_params,
bd);
} else {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
sf->highbd_convolve[subpel_x_q4 != 0][subpel_y_q4 !=
0][conv_params->is_compound](
src, src_stride, dst, dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
src, src_stride, dst, dst_stride, w, h, filter_params_x,
filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
}
}

18
third_party/aom/av1/common/convolve.h поставляемый
Просмотреть файл

@ -40,27 +40,17 @@ typedef struct ConvolveParams {
typedef void (*aom_convolve_fn_t)(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params);
typedef void (*aom_highbd_convolve_fn_t)(
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_q4,
int h, const InterpFilterParams *filter_params_x,
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
const int subpel_y_q4, ConvolveParams *conv_params, int bd);
static INLINE void av1_get_convolve_filter_params(InterpFilters interp_filters,
InterpFilterParams *params_x,
InterpFilterParams *params_y,
int w, int h) {
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
*params_x = av1_get_interp_filter_params_with_block_size(filter_x, w);
*params_y = av1_get_interp_filter_params_with_block_size(filter_y, h);
}
struct AV1Common;
struct scale_factors;

2
third_party/aom/av1/common/enums.h поставляемый
Просмотреть файл

@ -557,6 +557,7 @@ typedef uint8_t TXFM_CONTEXT;
#define BWDREF_FRAME 5
#define ALTREF2_FRAME 6
#define ALTREF_FRAME 7
#define EXTREF_FRAME REF_FRAMES
#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
@ -607,6 +608,7 @@ typedef enum ATTRIBUTE_PACKED {
// In large_scale_tile coding, external references are used.
#define MAX_EXTERNAL_REFERENCES 128
#define MAX_TILES 512
#ifdef __cplusplus
} // extern "C"

120
third_party/aom/av1/common/filter.c поставляемый
Просмотреть файл

@ -1,120 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include "av1/common/filter.h"
DECLARE_ALIGNED(256, static const InterpKernel,
bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
{ 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
{ 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
{ 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
{ 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
{ 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
{ 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
{ 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
{ 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
{ 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
{ 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
{ 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
{ 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
{ 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
{ -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
{ -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
{ -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
{ -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
{ -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
{ -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
{ -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
{ 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
};
static const InterpFilterParams
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
{ (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_REGULAR },
{ (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_SMOOTH },
{ (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
MULTITAP_SHARP },
{ (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
BILINEAR }
};
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_4[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
{ 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
{ 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
{ 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
{ 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
{ 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
{ 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
{ 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
{ 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
};
static const InterpFilterParams av1_interp_4tap[2] = {
{ (const int16_t *)sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_REGULAR },
{ (const int16_t *)sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_SMOOTH },
};
InterpFilterParams av1_get_interp_filter_params_with_block_size(
const InterpFilter interp_filter, const int w) {
if (w <= 4 &&
(interp_filter == MULTITAP_SHARP || interp_filter == EIGHTTAP_REGULAR))
return av1_interp_4tap[0];
else if (w <= 4 && interp_filter == EIGHTTAP_SMOOTH)
return av1_interp_4tap[1];
return av1_interp_filter_params_list[interp_filter];
}
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter) {
return (const int16_t *)av1_interp_filter_params_list[interp_filter]
.filter_ptr;
}

116
third_party/aom/av1/common/filter.h поставляемый
Просмотреть файл

@ -64,8 +64,8 @@ static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) {
return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter;
}
#define LOG_SWITCHABLE_FILTERS \
2 /* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
/* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
#define LOG_SWITCHABLE_FILTERS 2
#define MAX_SUBPEL_TAPS 12
#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
@ -79,14 +79,116 @@ typedef struct InterpFilterParams {
InterpFilter interp_filter;
} InterpFilterParams;
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter);
DECLARE_ALIGNED(256, static const InterpKernel,
av1_bilinear_filters[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
{ 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
{ 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
{ 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
{ 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
{ 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
{ 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
{ 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
};
InterpFilterParams av1_get_interp_filter_params_with_block_size(
const InterpFilter interp_filter, const int w);
DECLARE_ALIGNED(256, static const InterpKernel,
av1_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
{ 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
{ 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
{ 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
{ 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
{ 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
{ 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
{ 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
av1_sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
{ -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
{ -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
{ -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
{ -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
{ -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
{ -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
{ -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
av1_sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
{ 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
};
static const InterpFilterParams
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
{ (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_REGULAR },
{ (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS,
SUBPEL_SHIFTS, EIGHTTAP_SMOOTH },
{ (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
MULTITAP_SHARP },
{ (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
BILINEAR }
};
DECLARE_ALIGNED(256, static const InterpKernel,
av1_sub_pel_filters_4[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
{ 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
{ 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
{ 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
{ 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
{ 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
{ 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
{ 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
};
DECLARE_ALIGNED(256, static const InterpKernel,
av1_sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
{ 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
};
// For w<=4, MULTITAP_SHARP is the same as EIGHTTAP_REGULAR
static const InterpFilterParams av1_interp_4tap[SWITCHABLE_FILTERS + 1] = {
{ (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_REGULAR },
{ (const int16_t *)av1_sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_SMOOTH },
{ (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
EIGHTTAP_REGULAR },
{ (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
BILINEAR },
};
static INLINE const InterpFilterParams *
av1_get_interp_filter_params_with_block_size(const InterpFilter interp_filter,
const int w) {
if (w <= 4) return &av1_interp_4tap[interp_filter];
return &av1_interp_filter_params_list[interp_filter];
}
static INLINE const int16_t *av1_get_interp_filter_kernel(
const InterpFilter interp_filter) {
return av1_interp_filter_params_list[interp_filter].filter_ptr;
}
static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
const InterpFilterParams filter_params, const int subpel) {
return filter_params.filter_ptr + filter_params.taps * subpel;
const InterpFilterParams *const filter_params, const int subpel) {
return filter_params->filter_ptr + filter_params->taps * subpel;
}
#ifdef __cplusplus

3
third_party/aom/av1/common/mv.h поставляемый
Просмотреть файл

@ -294,9 +294,6 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
mv->row = clamp(mv->row, min_row, max_row);
}
static INLINE int mv_has_subpel(const MV *mv) {
return (mv->row & SUBPEL_MASK) || (mv->col & SUBPEL_MASK);
}
#ifdef __cplusplus
} // extern "C"
#endif

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше