зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1476408 - Update libaom to rev b25610052a1398032320008d69b51d2da94f5928; r=TD-Linux
Tags: #secure-revision Bug #: 1476408 Differential Revision: https://phabricator.services.mozilla.com/D2358 --HG-- extra : rebase_source : fa2438ada27a67e400617705014460b6d5ff485c
This commit is contained in:
Родитель
d421ba3540
Коммит
48e87ceaf3
|
@ -22,4 +22,4 @@ To update to a fork, use
|
|||
|
||||
The last update was pulled from https://aomedia.googlesource.com/aom/
|
||||
|
||||
The git commit ID used was d14c5bb4f336ef1842046089849dee4a301fbbf0 (Mon Jun 25 07:54:59 2018 -0700).
|
||||
The git commit ID used was b25610052a1398032320008d69b51d2da94f5928 (Mon Jul 23 18:08:58 2018 +0000).
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -63,22 +63,22 @@ void av1_build_compound_diffwtd_mask_d16_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask
|
|||
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_c
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_convolve_2d_copy_sr av1_convolve_2d_copy_sr_c
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
#define av1_convolve_2d_scale av1_convolve_2d_scale_c
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_convolve_2d_sr av1_convolve_2d_sr_c
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
#define av1_convolve_horiz_rs av1_convolve_horiz_rs_c
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_convolve_x_sr av1_convolve_x_sr_c
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_convolve_y_sr av1_convolve_y_sr_c
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
|
@ -108,13 +108,13 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
|
|||
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_copy_sr av1_highbd_convolve_2d_copy_sr_c
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_sr av1_highbd_convolve_2d_sr_c
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
|
@ -126,10 +126,10 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
|
|||
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
#define av1_highbd_convolve_horiz_rs av1_highbd_convolve_horiz_rs_c
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_x_sr av1_highbd_convolve_x_sr_c
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_y_sr av1_highbd_convolve_y_sr_c
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
|
@ -147,16 +147,16 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_2d av1_highbd_jnt_convolve_2d_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_2d_copy av1_highbd_jnt_convolve_2d_copy_c
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_x av1_highbd_jnt_convolve_x_c
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_y av1_highbd_jnt_convolve_y_c
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -225,16 +225,16 @@ void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride
|
|||
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#define av1_inv_txfm_add av1_inv_txfm_add_c
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_jnt_convolve_2d av1_jnt_convolve_2d_c
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_jnt_convolve_2d_copy av1_jnt_convolve_2d_copy_c
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_jnt_convolve_x av1_jnt_convolve_x_c
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
#define av1_jnt_convolve_y av1_jnt_convolve_y_c
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
.equ CONFIG_COLLECT_INTER_MODE_RD_STATS, 1
|
||||
.equ CONFIG_COLLECT_RD_STATS, 0
|
||||
.equ CONFIG_DEBUG, 0
|
||||
.equ CONFIG_DENOISE, 0
|
||||
.equ CONFIG_DIST_8X8, 1
|
||||
.equ CONFIG_ENTROPY_STATS, 0
|
||||
.equ CONFIG_FILEOPTIONS, 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -1041,13 +1041,15 @@ void aom_lowbd_blend_a64_d16_mask_neon(uint8_t *dst, uint32_t dst_stride, const
|
|||
RTCD_EXTERN void (*aom_lowbd_blend_a64_d16_mask)(uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params);
|
||||
|
||||
void aom_lpf_horizontal_14_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
#define aom_lpf_horizontal_14 aom_lpf_horizontal_14_c
|
||||
void aom_lpf_horizontal_14_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
RTCD_EXTERN void (*aom_lpf_horizontal_14)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
|
||||
void aom_lpf_horizontal_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
|
||||
#define aom_lpf_horizontal_14_dual aom_lpf_horizontal_14_dual_c
|
||||
|
||||
void aom_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
#define aom_lpf_horizontal_4 aom_lpf_horizontal_4_c
|
||||
void aom_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
RTCD_EXTERN void (*aom_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
|
||||
void aom_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
|
||||
#define aom_lpf_horizontal_4_dual aom_lpf_horizontal_4_dual_c
|
||||
|
@ -1074,13 +1076,15 @@ void aom_lpf_vertical_14_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, c
|
|||
#define aom_lpf_vertical_14_dual aom_lpf_vertical_14_dual_c
|
||||
|
||||
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
#define aom_lpf_vertical_4 aom_lpf_vertical_4_c
|
||||
void aom_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
RTCD_EXTERN void (*aom_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
|
||||
void aom_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
|
||||
#define aom_lpf_vertical_4_dual aom_lpf_vertical_4_dual_c
|
||||
|
||||
void aom_lpf_vertical_6_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
#define aom_lpf_vertical_6 aom_lpf_vertical_6_c
|
||||
void aom_lpf_vertical_6_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
RTCD_EXTERN void (*aom_lpf_vertical_6)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
|
||||
|
||||
void aom_lpf_vertical_6_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
|
||||
#define aom_lpf_vertical_6_dual aom_lpf_vertical_6_dual_c
|
||||
|
@ -1468,12 +1472,20 @@ static void setup_rtcd_internal(void)
|
|||
if (flags & HAS_NEON) aom_highbd_dc_predictor_8x8 = aom_highbd_dc_predictor_8x8_neon;
|
||||
aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_c;
|
||||
if (flags & HAS_NEON) aom_lowbd_blend_a64_d16_mask = aom_lowbd_blend_a64_d16_mask_neon;
|
||||
aom_lpf_horizontal_14 = aom_lpf_horizontal_14_c;
|
||||
if (flags & HAS_NEON) aom_lpf_horizontal_14 = aom_lpf_horizontal_14_neon;
|
||||
aom_lpf_horizontal_4 = aom_lpf_horizontal_4_c;
|
||||
if (flags & HAS_NEON) aom_lpf_horizontal_4 = aom_lpf_horizontal_4_neon;
|
||||
aom_lpf_horizontal_6 = aom_lpf_horizontal_6_c;
|
||||
if (flags & HAS_NEON) aom_lpf_horizontal_6 = aom_lpf_horizontal_6_neon;
|
||||
aom_lpf_horizontal_8 = aom_lpf_horizontal_8_c;
|
||||
if (flags & HAS_NEON) aom_lpf_horizontal_8 = aom_lpf_horizontal_8_neon;
|
||||
aom_lpf_vertical_14 = aom_lpf_vertical_14_c;
|
||||
if (flags & HAS_NEON) aom_lpf_vertical_14 = aom_lpf_vertical_14_neon;
|
||||
aom_lpf_vertical_4 = aom_lpf_vertical_4_c;
|
||||
if (flags & HAS_NEON) aom_lpf_vertical_4 = aom_lpf_vertical_4_neon;
|
||||
aom_lpf_vertical_6 = aom_lpf_vertical_6_c;
|
||||
if (flags & HAS_NEON) aom_lpf_vertical_6 = aom_lpf_vertical_6_neon;
|
||||
aom_lpf_vertical_8 = aom_lpf_vertical_8_c;
|
||||
if (flags & HAS_NEON) aom_lpf_vertical_8 = aom_lpf_vertical_8_neon;
|
||||
aom_v_predictor_16x16 = aom_v_predictor_16x16_c;
|
||||
|
|
|
@ -52,7 +52,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
void apply_selfguided_restoration_c(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
#define apply_selfguided_restoration apply_selfguided_restoration_c
|
||||
void apply_selfguided_restoration_neon(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
RTCD_EXTERN void (*apply_selfguided_restoration)(const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd);
|
||||
|
||||
void av1_build_compound_diffwtd_mask_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w);
|
||||
#define av1_build_compound_diffwtd_mask av1_build_compound_diffwtd_mask_c
|
||||
|
@ -64,27 +65,27 @@ RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_d16)(uint8_t *mask, DIFFWTD_M
|
|||
void av1_build_compound_diffwtd_mask_highbd_c(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
#define av1_build_compound_diffwtd_mask_highbd av1_build_compound_diffwtd_mask_highbd_c
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
#define av1_convolve_2d_scale av1_convolve_2d_scale_c
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
#define av1_convolve_horiz_rs av1_convolve_horiz_rs_c
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -113,13 +114,13 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
|
|||
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_copy_sr av1_highbd_convolve_2d_copy_sr_c
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_scale av1_highbd_convolve_2d_scale_c
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_2d_sr av1_highbd_convolve_2d_sr_c
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
|
@ -131,10 +132,10 @@ void av1_highbd_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_
|
|||
void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
#define av1_highbd_convolve_horiz_rs av1_highbd_convolve_horiz_rs_c
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_x_sr av1_highbd_convolve_x_sr_c
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_convolve_y_sr av1_highbd_convolve_y_sr_c
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
|
@ -152,16 +153,16 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_2d av1_highbd_jnt_convolve_2d_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_2d_copy av1_highbd_jnt_convolve_2d_copy_c
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_x av1_highbd_jnt_convolve_x_c
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
#define av1_highbd_jnt_convolve_y av1_highbd_jnt_convolve_y_c
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -228,28 +229,34 @@ void av1_inv_txfm2d_add_8x8_c(const int32_t *input, uint16_t *output, int stride
|
|||
#define av1_inv_txfm2d_add_8x8 av1_inv_txfm2d_add_8x8_c
|
||||
|
||||
void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
#define av1_inv_txfm_add av1_inv_txfm_add_c
|
||||
void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
#define av1_selfguided_restoration av1_selfguided_restoration_c
|
||||
void av1_selfguided_restoration_neon(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
RTCD_EXTERN void (*av1_selfguided_restoration)(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd);
|
||||
|
||||
void av1_upsample_intra_edge_c(uint8_t *p, int sz);
|
||||
#define av1_upsample_intra_edge av1_upsample_intra_edge_c
|
||||
|
@ -328,6 +335,8 @@ static void setup_rtcd_internal(void)
|
|||
|
||||
(void)flags;
|
||||
|
||||
apply_selfguided_restoration = apply_selfguided_restoration_c;
|
||||
if (flags & HAS_NEON) apply_selfguided_restoration = apply_selfguided_restoration_neon;
|
||||
av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_c;
|
||||
if (flags & HAS_NEON) av1_build_compound_diffwtd_mask_d16 = av1_build_compound_diffwtd_mask_d16_neon;
|
||||
av1_convolve_2d_copy_sr = av1_convolve_2d_copy_sr_c;
|
||||
|
@ -338,6 +347,8 @@ static void setup_rtcd_internal(void)
|
|||
if (flags & HAS_NEON) av1_convolve_x_sr = av1_convolve_x_sr_neon;
|
||||
av1_convolve_y_sr = av1_convolve_y_sr_c;
|
||||
if (flags & HAS_NEON) av1_convolve_y_sr = av1_convolve_y_sr_neon;
|
||||
av1_inv_txfm_add = av1_inv_txfm_add_c;
|
||||
if (flags & HAS_NEON) av1_inv_txfm_add = av1_inv_txfm_add_neon;
|
||||
av1_jnt_convolve_2d = av1_jnt_convolve_2d_c;
|
||||
if (flags & HAS_NEON) av1_jnt_convolve_2d = av1_jnt_convolve_2d_neon;
|
||||
av1_jnt_convolve_2d_copy = av1_jnt_convolve_2d_copy_c;
|
||||
|
@ -346,6 +357,8 @@ static void setup_rtcd_internal(void)
|
|||
if (flags & HAS_NEON) av1_jnt_convolve_x = av1_jnt_convolve_x_neon;
|
||||
av1_jnt_convolve_y = av1_jnt_convolve_y_c;
|
||||
if (flags & HAS_NEON) av1_jnt_convolve_y = av1_jnt_convolve_y_neon;
|
||||
av1_selfguided_restoration = av1_selfguided_restoration_c;
|
||||
if (flags & HAS_NEON) av1_selfguided_restoration = av1_selfguided_restoration_neon;
|
||||
av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_c;
|
||||
if (flags & HAS_NEON) av1_wiener_convolve_add_src = av1_wiener_convolve_add_src_neon;
|
||||
cdef_filter_block = cdef_filter_block_c;
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
|
|||
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
|
|||
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
|
|||
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
|
|||
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -127,19 +127,19 @@ void av1_highbd_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint
|
|||
void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_c
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -151,15 +151,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -176,25 +176,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -272,25 +272,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
|
|||
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -24,6 +24,7 @@ CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
|
|||
CONFIG_COLLECT_INTER_MODE_RD_STATS equ 1
|
||||
CONFIG_COLLECT_RD_STATS equ 0
|
||||
CONFIG_DEBUG equ 0
|
||||
CONFIG_DENOISE equ 0
|
||||
CONFIG_DIST_8X8 equ 1
|
||||
CONFIG_ENTROPY_STATS equ 0
|
||||
CONFIG_FILEOPTIONS equ 1
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define CONFIG_COLLECT_INTER_MODE_RD_STATS 1
|
||||
#define CONFIG_COLLECT_RD_STATS 0
|
||||
#define CONFIG_DEBUG 0
|
||||
#define CONFIG_DENOISE 0
|
||||
#define CONFIG_DIST_8X8 1
|
||||
#define CONFIG_ENTROPY_STATS 0
|
||||
#define CONFIG_FILEOPTIONS 1
|
||||
|
|
|
@ -69,33 +69,33 @@ void av1_build_compound_diffwtd_mask_highbd_ssse3(uint8_t *mask, DIFFWTD_MASK_TY
|
|||
void av1_build_compound_diffwtd_mask_highbd_avx2(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
RTCD_EXTERN void (*av1_build_compound_diffwtd_mask_highbd)(uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0, int src0_stride, const uint8_t *src1, int src1_stride, int h, int w, int bd);
|
||||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_copy_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_copy_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_scale_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_scale)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_2d_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_2d_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
void av1_convolve_horiz_rs_sse4_1(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
RTCD_EXTERN void (*av1_convolve_horiz_rs)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn);
|
||||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_x_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_x_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_convolve_y_sr_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_convolve_y_sr)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_dr_prediction_z1_c(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left, int upsample_above, int dx, int dy);
|
||||
#define av1_dr_prediction_z1 av1_dr_prediction_z1_c
|
||||
|
@ -130,19 +130,19 @@ void av1_highbd_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8
|
|||
void av1_highbd_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve8_vert av1_highbd_convolve8_vert_sse2
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_sse2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_copy_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_copy_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_scale_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_scale)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_2d_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_2d_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps);
|
||||
#define av1_highbd_convolve_avg av1_highbd_convolve_avg_c
|
||||
|
@ -154,15 +154,15 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, uint16_
|
|||
void av1_highbd_convolve_horiz_rs_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_horiz_rs)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const int16_t *x_filters, int x0_qn, int x_step_qn, int bd);
|
||||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_x_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_convolve_y_sr)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_dr_prediction_z1_c(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int upsample_above, int dx, int dy, int bd);
|
||||
#define av1_highbd_dr_prediction_z1 av1_highbd_dr_prediction_z1_c
|
||||
|
@ -179,25 +179,25 @@ void av1_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int des
|
|||
void av1_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int bd);
|
||||
#define av1_highbd_iwht4x4_1_add av1_highbd_iwht4x4_1_add_c
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_2d_copy_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_2d_copy)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_x_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_x)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_sse4_1(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
void av1_highbd_jnt_convolve_y_avx2(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
RTCD_EXTERN void (*av1_highbd_jnt_convolve_y)(const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta);
|
||||
|
@ -275,25 +275,25 @@ void av1_inv_txfm_add_ssse3(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
|||
void av1_inv_txfm_add_avx2(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
RTCD_EXTERN void (*av1_inv_txfm_add)(const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param);
|
||||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_2d_copy_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_2d_copy)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_x_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_x)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_sse2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
void av1_jnt_convolve_y_avx2(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
RTCD_EXTERN void (*av1_jnt_convolve_y)(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params);
|
||||
|
||||
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
|
|
|
@ -58,14 +58,15 @@ files = {
|
|||
'../../third_party/aom/aom_util/debug_util.c',
|
||||
'../../third_party/aom/av1/av1_dx_iface.c',
|
||||
'../../third_party/aom/av1/common/alloccommon.c',
|
||||
'../../third_party/aom/av1/common/arm/av1_inv_txfm_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/av1_txfm_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/blend_a64_hmask_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/blend_a64_vmask_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/cfl_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/convolve_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/intrapred_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/jnt_convolve_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/reconinter_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/selfguided_neon.c',
|
||||
'../../third_party/aom/av1/common/arm/wiener_convolve_neon.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm1d.c',
|
||||
'../../third_party/aom/av1/common/av1_inv_txfm2d.c',
|
||||
|
@ -82,7 +83,6 @@ files = {
|
|||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/filter.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
|
@ -109,6 +109,8 @@ files = {
|
|||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
'../../third_party/aom/av1/encoder/arm/neon/quantize_neon.c',
|
||||
'../../third_party/aom/stats/aomstats.c',
|
||||
'../../third_party/aom/stats/rate_hist.c',
|
||||
],
|
||||
'GENERIC_EXPORTS': [
|
||||
'../../third_party/aom/aom/aom.h',
|
||||
|
@ -174,7 +176,6 @@ files = {
|
|||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/filter.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
|
@ -200,6 +201,8 @@ files = {
|
|||
'../../third_party/aom/av1/decoder/detokenize.c',
|
||||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
'../../third_party/aom/stats/aomstats.c',
|
||||
'../../third_party/aom/stats/rate_hist.c',
|
||||
],
|
||||
'IA32_EXPORTS': [
|
||||
'../../third_party/aom/aom/aom.h',
|
||||
|
@ -298,7 +301,6 @@ files = {
|
|||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/filter.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
|
@ -361,6 +363,8 @@ files = {
|
|||
'../../third_party/aom/av1/decoder/detokenize.c',
|
||||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
'../../third_party/aom/stats/aomstats.c',
|
||||
'../../third_party/aom/stats/rate_hist.c',
|
||||
],
|
||||
'X64_EXPORTS': [
|
||||
'../../third_party/aom/aom/aom.h',
|
||||
|
@ -458,7 +462,6 @@ files = {
|
|||
'../../third_party/aom/av1/common/entropy.c',
|
||||
'../../third_party/aom/av1/common/entropymode.c',
|
||||
'../../third_party/aom/av1/common/entropymv.c',
|
||||
'../../third_party/aom/av1/common/filter.c',
|
||||
'../../third_party/aom/av1/common/frame_buffers.c',
|
||||
'../../third_party/aom/av1/common/idct.c',
|
||||
'../../third_party/aom/av1/common/mvref_common.c',
|
||||
|
@ -521,5 +524,7 @@ files = {
|
|||
'../../third_party/aom/av1/decoder/detokenize.c',
|
||||
'../../third_party/aom/av1/decoder/dthread.c',
|
||||
'../../third_party/aom/av1/decoder/obu.c',
|
||||
'../../third_party/aom/stats/aomstats.c',
|
||||
'../../third_party/aom/stats/rate_hist.c',
|
||||
],
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
# Generated with cmake-format 0.3.6
|
||||
# How wide to allow formatted cmake files
|
||||
line_width = 80
|
||||
|
||||
|
|
|
@ -1,631 +1,5 @@
|
|||
Next Release
|
||||
- Incompatible changes:
|
||||
The AV1 encoder's default keyframe interval changed to 128 from 9999.
|
||||
Support for armv6 was removed.
|
||||
2018-06-28 v1.0.0
|
||||
AOMedia Codec Workgroup Approved version 1.0
|
||||
|
||||
2016-04-07 v0.1.0 "AOMedia Codec 1"
|
||||
This release is the first Alliance for Open Media codec.
|
||||
2015-11-09 v1.5.0 "Javan Whistling Duck"
|
||||
This release improves upon the VP9 encoder and speeds up the encoding and
|
||||
decoding processes.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.4.0. It drops deprecated VP8
|
||||
controls and adds a variety of VP9 controls for testing.
|
||||
|
||||
The vpxenc utility now prefers VP9 by default.
|
||||
|
||||
- Enhancements:
|
||||
Faster VP9 encoding and decoding
|
||||
Smaller library size by combining functions used by VP8 and VP9
|
||||
|
||||
- Bug Fixes:
|
||||
A variety of fuzzing issues
|
||||
|
||||
2015-04-03 v1.4.0 "Indian Runner Duck"
|
||||
This release includes significant improvements to the VP9 codec.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with 1.3.0. It drops the compatibility
|
||||
layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
|
||||
controls for VP9.
|
||||
|
||||
- Enhancements:
|
||||
Faster VP9 encoding and decoding
|
||||
Multithreaded VP9 decoding (tile and frame-based)
|
||||
Multithreaded VP9 encoding - on by default
|
||||
YUV 4:2:2 and 4:4:4 support in VP9
|
||||
10 and 12bit support in VP9
|
||||
64bit ARM support by replacing ARM assembly with intrinsics
|
||||
|
||||
- Bug Fixes:
|
||||
Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
|
||||
files.
|
||||
|
||||
- Known Issues:
|
||||
Frame Parallel decoding fails for segmented and non-420 files.
|
||||
|
||||
2013-11-15 v1.3.0 "Forest"
|
||||
This release introduces the VP9 codec in a backward-compatible way.
|
||||
All existing users of VP8 can continue to use the library without
|
||||
modification. However, some VP8 options do not map to VP9 in the same manner.
|
||||
|
||||
The VP9 encoder in this release is not feature complete. Users interested in
|
||||
the encoder are advised to use the git master branch and discuss issues on
|
||||
libvpx mailing lists.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this document
|
||||
for that release.
|
||||
|
||||
- Enhancements:
|
||||
Get rid of bashisms in the main build scripts
|
||||
Added usage info on command line options
|
||||
Add lossless compression mode
|
||||
Dll build of libvpx
|
||||
Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13)
|
||||
Add option to disable documentation
|
||||
configure: add --enable-external-build support
|
||||
make: support V=1 as short form of verbose=yes
|
||||
configure: support mingw-w64
|
||||
configure: support hardfloat armv7 CHOSTS
|
||||
configure: add support for android x86
|
||||
Add estimated completion time to vpxenc
|
||||
Don't exit on decode errors in vpxenc
|
||||
vpxenc: support scaling prior to encoding
|
||||
vpxdec: support scaling output
|
||||
vpxenc: improve progress indicators with --skip
|
||||
msvs: Don't link to winmm.lib
|
||||
Add a new script for producing vcxproj files
|
||||
Produce Visual Studio 10 and 11 project files
|
||||
Produce Windows Phone project files
|
||||
msvs-build: use msbuild for vs >= 2005
|
||||
configure: default configure log to config.log
|
||||
Add encoding option --static-thresh
|
||||
|
||||
- Speed:
|
||||
Miscellaneous speed optimizations for VP8 and VP9.
|
||||
|
||||
- Quality:
|
||||
In general, quality is consistent with the Eider release.
|
||||
|
||||
- Bug Fixes:
|
||||
This release represents approximately a year of engineering effort,
|
||||
and contains multiple bug fixes. Please refer to git history for details.
|
||||
|
||||
|
||||
2012-12-21 v1.2.0
|
||||
This release acts as a checkpoint for a large amount of internal refactoring
|
||||
and testing. It also contains a number of small bugfixes, so all users are
|
||||
encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
- Enhancements:
|
||||
VP8 optimizations for MIPS dspr2
|
||||
vpxenc: add -quiet option
|
||||
|
||||
- Speed:
|
||||
Encoder and decoder speed is consistent with the Eider release.
|
||||
|
||||
- Quality:
|
||||
In general, quality is consistent with the Eider release.
|
||||
|
||||
Minor tweaks to ARNR filtering
|
||||
Minor improvements to real time encoding with multiple temporal layers
|
||||
|
||||
- Bug Fixes:
|
||||
Fixes multithreaded encoder race condition in loopfilter
|
||||
Fixes multi-resolution threaded encoding
|
||||
Fix potential encoder dead-lock after picture resize
|
||||
|
||||
|
||||
2012-05-09 v1.1.0 "Eider"
|
||||
This introduces a number of enhancements, mostly focused on real-time
|
||||
encoding. In addition, it fixes a decoder bug (first introduced in
|
||||
Duclair) so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI and API compatible with Duclair (v1.0.0). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
This release introduces a new temporal denoiser, controlled by the
|
||||
VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not
|
||||
currently take a strength parameter, so the control is effectively
|
||||
a boolean - zero (off) or non-zero (on). For compatibility with
|
||||
existing applications, the values accepted are the same as those
|
||||
for the spatial denoiser (0-6). The temporal denoiser is enabled
|
||||
by default, and the older spatial denoiser may be restored by
|
||||
configuring with --disable-temporal-denoising. The temporal denoiser
|
||||
is more computationally intensive than the spatial one.
|
||||
|
||||
This release removes support for a legacy, decode only API that was
|
||||
supported, but deprecated, at the initial release of libvpx
|
||||
(v0.9.0). This is not expected to have any impact. If you are
|
||||
impacted, you can apply a reversion to commit 2bf8fb58 locally.
|
||||
Please update to the latest libvpx API if you are affected.
|
||||
|
||||
- Enhancements:
|
||||
Adds a motion compensated temporal denoiser to the encoder, which
|
||||
gives higher quality than the older spatial denoiser. (See above
|
||||
for notes on upgrading).
|
||||
|
||||
In addition, support for new compilers and platforms were added,
|
||||
including:
|
||||
improved support for XCode
|
||||
Android x86 NDK build
|
||||
OS/2 support
|
||||
SunCC support
|
||||
|
||||
Changing resolution with vpx_codec_enc_config_set() is now
|
||||
supported. Previously, reinitializing the codec was required to
|
||||
change the input resolution.
|
||||
|
||||
The vpxenc application has initial support for producing multiple
|
||||
encodes from the same input in one call. Resizing is not yet
|
||||
supported, but varying other codec parameters is. Use -- to
|
||||
delineate output streams. Options persist from one stream to the
|
||||
next.
|
||||
|
||||
Also, the vpxenc application will now use a keyframe interval of
|
||||
5 seconds by default. Use the --kf-max-dist option to override.
|
||||
|
||||
- Speed:
|
||||
Decoder performance improved 2.5% versus Duclair. Encoder speed is
|
||||
consistent with Duclair for most material. Two pass encoding of
|
||||
slideshow-like material will see significant improvements.
|
||||
|
||||
Large realtime encoding speed gains at a small quality expense are
|
||||
possible by configuring the on-the-fly bitpacking experiment with
|
||||
--enable-onthefly-bitpacking. Realtime encoder can be up to 13%
|
||||
faster (ARM) depending on the number of threads and bitrate
|
||||
settings. This technique sees constant gain over the 5-16 speed
|
||||
range. For VC style input the loss seen is up to 0.2dB. See commit
|
||||
52cf4dca for further details.
|
||||
|
||||
- Quality:
|
||||
On the whole, quality is consistent with the Duclair release. Some
|
||||
tweaks:
|
||||
|
||||
Reduced blockiness in easy sections by applying a penalty to
|
||||
intra modes.
|
||||
|
||||
Improved quality of static sections (like slideshows) with
|
||||
two pass encoding.
|
||||
|
||||
Improved keyframe sizing with multiple temporal layers
|
||||
|
||||
- Bug Fixes:
|
||||
Corrected alt-ref contribution to frame rate for visible updates
|
||||
to the alt-ref buffer. This affected applications making manual
|
||||
usage of the frame reference flags, or temporal layers.
|
||||
|
||||
Additional constraints were added to disable multi-frame quality
|
||||
enhancement (MFQE) in sections of the frame where there is motion.
|
||||
(#392)
|
||||
|
||||
Fixed corruption issues when vpx_codec_enc_config_set() was called
|
||||
with spatial resampling enabled.
|
||||
|
||||
Fixed a decoder error introduced in Duclair where the segmentation
|
||||
map was not being reinitialized on keyframes (#378)
|
||||
|
||||
|
||||
2012-01-27 v1.0.0 "Duclair"
|
||||
Our fourth named release, focused on performance and features related to
|
||||
real-time encoding. It also fixes a decoder crash bug introduced in
|
||||
v0.9.7, so all users of that release are encouraged to upgrade.
|
||||
|
||||
- Upgrading:
|
||||
This release is ABI incompatible with prior releases of libvpx, so the
|
||||
"major" version number has been bumped to 1. You must recompile your
|
||||
applications against the latest version of the libvpx headers. The
|
||||
API remains compatible, and this should not require code changes in most
|
||||
applications.
|
||||
|
||||
- Enhancements:
|
||||
This release introduces several substantial new features to the encoder,
|
||||
of particular interest to real time streaming applications.
|
||||
|
||||
Temporal scalability allows the encoder to produce a stream that can
|
||||
be decimated to different frame rates, with independent rate targetting
|
||||
for each substream.
|
||||
|
||||
Multiframe quality enhancement postprocessing can make visual quality
|
||||
more consistent in the presence of frames that are substantially
|
||||
different quality than the surrounding frames, as in the temporal
|
||||
scalability case and in some forced keyframe scenarios.
|
||||
|
||||
Multiple-resolution encoding support allows the encoding of the
|
||||
same content at different resolutions faster than encoding them
|
||||
separately.
|
||||
|
||||
- Speed:
|
||||
Optimization targets for this release included the decoder and the real-
|
||||
time modes of the encoder. Decoder speed on x86 has improved 10.5% with
|
||||
this release. Encoder improvements followed a curve where speeds 1-3
|
||||
improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved
|
||||
1.5% to 10.5%, respectively. "Best" mode speed is consistent with the
|
||||
Cayuga release.
|
||||
|
||||
- Quality:
|
||||
Encoder quality in the single stream case is consistent with the Cayuga
|
||||
release.
|
||||
|
||||
- Bug Fixes:
|
||||
This release fixes an OOB read decoder crash bug present in v0.9.7
|
||||
related to the clamping of motion vectors in SPLITMV blocks. This
|
||||
behavior could be triggered by corrupt input or by starting
|
||||
decoding from a P-frame.
|
||||
|
||||
|
||||
2011-08-15 v0.9.7-p1 "Cayuga" patch 1
|
||||
This is an incremental bugfix release against Cayuga. All users of that
|
||||
release are strongly encouraged to upgrade.
|
||||
|
||||
- Fix potential OOB reads (cdae03a)
|
||||
|
||||
An unbounded out of bounds read was discovered when the
|
||||
decoder was requested to perform error concealment (new in
|
||||
Cayuga) given a frame with corrupt partition sizes.
|
||||
|
||||
A bounded out of bounds read was discovered affecting all
|
||||
versions of libvpx. Given an multipartition input frame that
|
||||
is truncated between the mode/mv partition and the first
|
||||
residiual paritition (in the block of partition offsets), up
|
||||
to 3 extra bytes could have been read from the source buffer.
|
||||
The code will not take any action regardless of the contents
|
||||
of these undefined bytes, as the truncated buffer is detected
|
||||
immediately following the read based on the calculated
|
||||
starting position of the coefficient partition.
|
||||
|
||||
- Fix potential error concealment crash when the very first frame
|
||||
is missing or corrupt (a609be5)
|
||||
|
||||
- Fix significant artifacts in error concealment (a4c2211, 99d870a)
|
||||
|
||||
- Revert 1-pass CBR rate control changes (e961317)
|
||||
Further testing showed this change produced undesirable visual
|
||||
artifacts, rolling back for now.
|
||||
|
||||
|
||||
2011-08-02 v0.9.7 "Cayuga"
|
||||
Our third named release, focused on a faster, higher quality, encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release is backwards compatible with Aylesbury (v0.9.5) and
|
||||
Bali (v0.9.6). Users of older releases should refer to the Upgrading
|
||||
notes in this document for that release.
|
||||
|
||||
- Enhancements:
|
||||
Stereo 3D format support for vpxenc
|
||||
Runtime detection of available processor cores.
|
||||
Allow specifying --end-usage by enum name
|
||||
vpxdec: test for frame corruption
|
||||
vpxenc: add quantizer histogram display
|
||||
vpxenc: add rate histogram display
|
||||
Set VPX_FRAME_IS_DROPPABLE
|
||||
update configure for ios sdk 4.3
|
||||
Avoid text relocations in ARM vp8 decoder
|
||||
Generate a vpx.pc file for pkg-config.
|
||||
New ways of passing encoded data between encoder and decoder.
|
||||
|
||||
- Speed:
|
||||
This release includes across-the-board speed improvements to the
|
||||
encoder. On x86, these measure at approximately 11.5% in Best mode,
|
||||
21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6).
|
||||
On ARM Cortex A9 with Neon extensions, real-time encoding of video
|
||||
telephony content is 35% faster than Bali on single core and 48%
|
||||
faster on multi-core. On the NVidia Tegra2 platform, real time
|
||||
encoding is 40% faster than Bali.
|
||||
|
||||
Decoder speed was not a priority for this release, but improved
|
||||
approximately 8.4% on x86.
|
||||
|
||||
Reduce motion vector search on alt-ref frame.
|
||||
Encoder loopfilter running in its own thread
|
||||
Reworked loopfilter to precalculate more parameters
|
||||
SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}().
|
||||
Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3.
|
||||
Removed redundant checks
|
||||
Reduced structure sizes
|
||||
utilize preload in ARMv6 MC/LPF/Copy routines
|
||||
ARM optimized quantization, dfct, variance, subtract
|
||||
Increase chrow row alignment to 16 bytes.
|
||||
disable trellis optimization for first pass
|
||||
Write SSSE3 sub-pixel filter function
|
||||
Improve SSE2 half-pixel filter funtions
|
||||
Add vp8_sub_pixel_variance16x8_ssse3 function
|
||||
Reduce unnecessary distortion computation
|
||||
Use diamond search to replace full search
|
||||
Preload reference area in sub-pixel motion search (real-time mode)
|
||||
|
||||
- Quality:
|
||||
This release focused primarily on one-pass use cases, including
|
||||
video conferencing. Low latency data rate control was significantly
|
||||
improved, improving streamability over bandwidth constrained links.
|
||||
Added support for error concealment, allowing frames to maintain
|
||||
visual quality in the presence of substantial packet loss.
|
||||
|
||||
Add rc_max_intra_bitrate_pct control
|
||||
Limit size of initial keyframe in one-pass.
|
||||
Improve framerate adaptation
|
||||
Improved 1-pass CBR rate control
|
||||
Improved KF insertion after fades to still.
|
||||
Improved key frame detection.
|
||||
Improved activity masking (lower PSNR impact for same SSIM boost)
|
||||
Improved interaction between GF and ARFs
|
||||
Adding error-concealment to the decoder.
|
||||
Adding support for independent partitions
|
||||
Adjusted rate-distortion constants
|
||||
|
||||
|
||||
- Bug Fixes:
|
||||
Removed firstpass motion map
|
||||
Fix parallel make install
|
||||
Fix multithreaded encoding for 1 MB wide frame
|
||||
Fixed iwalsh_neon build problems with RVDS4.1
|
||||
Fix semaphore emulation, spin-wait intrinsics on Windows
|
||||
Fix build with xcode4 and simplify GLOBAL.
|
||||
Mark ARM asm objects as allowing a non-executable stack.
|
||||
Fix vpxenc encoding incorrect webm file header on big endian
|
||||
|
||||
|
||||
2011-03-07 v0.9.6 "Bali"
|
||||
Our second named release, focused on a faster, higher quality, encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release is backwards compatible with Aylesbury (v0.9.5). Users
|
||||
of older releases should refer to the Upgrading notes in this
|
||||
document for that release.
|
||||
|
||||
- Enhancements:
|
||||
vpxenc --psnr shows a summary when encode completes
|
||||
--tune=ssim option to enable activity masking
|
||||
improved postproc visualizations for development
|
||||
updated support for Apple iOS to SDK 4.2
|
||||
query decoder to determine which reference frames were updated
|
||||
implemented error tracking in the decoder
|
||||
fix pipe support on windows
|
||||
|
||||
- Speed:
|
||||
Primary focus was on good quality mode, speed 0. Average improvement
|
||||
on x86 about 40%, up to 100% on user-generated content at that speed.
|
||||
Best quality mode speed improved 35%, and realtime speed 10-20%. This
|
||||
release also saw significant improvement in realtime encoding speed
|
||||
on ARM platforms.
|
||||
|
||||
Improved encoder threading
|
||||
Dont pick encoder filter level when loopfilter is disabled.
|
||||
Avoid double copying of key frames into alt and golden buffer
|
||||
FDCT optimizations.
|
||||
x86 sse2 temporal filter
|
||||
SSSE3 version of fast quantizer
|
||||
vp8_rd_pick_best_mbsegmentation code restructure
|
||||
Adjusted breakout RD for SPLITMV
|
||||
Changed segmentation check order
|
||||
Improved rd_pick_intra4x4block
|
||||
Adds armv6 optimized variance calculation
|
||||
ARMv6 optimized sad16x16
|
||||
ARMv6 optimized half pixel variance calculations
|
||||
Full search SAD function optimization in SSE4.1
|
||||
Improve MV prediction accuracy to achieve performance gain
|
||||
Improve MV prediction in vp8_pick_inter_mode() for speed>3
|
||||
|
||||
- Quality:
|
||||
Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release
|
||||
also includes support for "activity masking," which greatly improves
|
||||
SSIM at the expense of PSNR. For now, this feature is available with
|
||||
the --tune=ssim option. Further experimentation in this area
|
||||
is ongoing. This release also introduces a new rate control mode
|
||||
called "CQ," which changes the allocation of bits within a clip to
|
||||
the sections where they will have the most visual impact.
|
||||
|
||||
Tuning for the more exact quantizer.
|
||||
Relax rate control for last few frames
|
||||
CQ Mode
|
||||
Limit key frame quantizer for forced key frames.
|
||||
KF/GF Pulsing
|
||||
Add simple version of activity masking.
|
||||
make rdmult adaptive for intra in quantizer RDO
|
||||
cap the best quantizer for 2nd order DC
|
||||
change the threshold of DC check for encode breakout
|
||||
|
||||
- Bug Fixes:
|
||||
Fix crash on Sparc Solaris.
|
||||
Fix counter of fixed keyframe distance
|
||||
ARNR filter pointer update bug fix
|
||||
Fixed use of motion percentage in KF/GF group calc
|
||||
Changed condition for using RD in Intra Mode
|
||||
Fix encoder real-time only configuration.
|
||||
Fix ARM encoder crash with multiple token partitions
|
||||
Fixed bug first cluster timecode of webm file is wrong.
|
||||
Fixed various encoder bugs with odd-sized images
|
||||
vp8e_get_preview fixed when spatial resampling enabled
|
||||
quantizer: fix assertion in fast quantizer path
|
||||
Allocate source buffers to be multiples of 16
|
||||
Fix for manual Golden frame frequency
|
||||
Fix drastic undershoot in long form content
|
||||
|
||||
|
||||
2010-10-28 v0.9.5 "Aylesbury"
|
||||
Our first named release, focused on a faster decoder, and a better encoder.
|
||||
|
||||
- Upgrading:
|
||||
This release incorporates backwards-incompatible changes to the
|
||||
ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
|
||||
|
||||
vpxdec
|
||||
* the -q (quiet) option has been removed, and replaced with
|
||||
-v (verbose). the output is quiet by default. Use -v to see
|
||||
the version number of the binary.
|
||||
|
||||
* The default behavior is now to write output to a single file
|
||||
instead of individual frames. The -y option has been removed.
|
||||
Y4M output is the default.
|
||||
|
||||
* For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
|
||||
options must be specified.
|
||||
|
||||
$ ivfdec -o OUTPUT INPUT
|
||||
$ vpxdec --i420 -o OUTPUT INPUT
|
||||
|
||||
* If an output file is not specified, the default is to write
|
||||
Y4M to stdout. This makes piping more natural.
|
||||
|
||||
$ ivfdec -y -o - INPUT | ...
|
||||
$ vpxdec INPUT | ...
|
||||
|
||||
* The output file has additional flexibility for formatting the
|
||||
filename. It supports escape characters for constructing a
|
||||
filename from the width, height, and sequence number. This
|
||||
replaces the -p option. To get the equivalent:
|
||||
|
||||
$ ivfdec -p frame INPUT
|
||||
$ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
|
||||
|
||||
vpxenc
|
||||
* The output file must be specified with -o, rather than as the
|
||||
last argument.
|
||||
|
||||
$ ivfenc <options> INPUT OUTPUT
|
||||
$ vpxenc <options> -o OUTPUT INPUT
|
||||
|
||||
* The output defaults to webm. To get IVF output, use the --ivf
|
||||
option.
|
||||
|
||||
$ ivfenc <options> INPUT OUTPUT.ivf
|
||||
$ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
|
||||
|
||||
|
||||
- Enhancements:
|
||||
ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
|
||||
vpxdec supports .webm input
|
||||
vpxdec writes .y4m by default
|
||||
vpxenc writes .webm output by default
|
||||
vpxenc --psnr now shows the average/overall PSNR at the end
|
||||
ARM platforms now support runtime cpu detection
|
||||
vpxdec visualizations added for motion vectors, block modes, references
|
||||
vpxdec now silent by default
|
||||
vpxdec --progress shows frame-by-frame timing information
|
||||
vpxenc supports the distinction between --fps and --timebase
|
||||
NASM is now a supported assembler
|
||||
configure: enable PIC for shared libs by default
|
||||
configure: add --enable-small
|
||||
configure: support for ppc32-linux-gcc
|
||||
configure: support for sparc-solaris-gcc
|
||||
|
||||
- Bugs:
|
||||
Improve handling of invalid frames
|
||||
Fix valgrind errors in the NEON loop filters.
|
||||
Fix loopfilter delta zero transitions
|
||||
Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
|
||||
Build fixes for darwin-icc
|
||||
|
||||
- Speed:
|
||||
20-40% (average 28%) improvement in libvpx decoder speed,
|
||||
including:
|
||||
Rewrite vp8_short_walsh4x4_sse2()
|
||||
Optimizations on the loopfilters.
|
||||
Miscellaneous improvements for Atom
|
||||
Add 4-tap version of 2nd-pass ARMv6 MC filter.
|
||||
Improved multithread utilization
|
||||
Better instruction choices on x86
|
||||
reorder data to use wider instructions
|
||||
Update NEON wide idcts
|
||||
Make block access to frame buffer sequential
|
||||
Improved subset block search
|
||||
Bilinear subpixel optimizations for ssse3.
|
||||
Decrease memory footprint
|
||||
|
||||
Encoder speed improvements (percentage gain not measured):
|
||||
Skip unnecessary search of identical frames
|
||||
Add SSE2 subtract functions
|
||||
Improve bounds checking in vp8_diamond_search_sadx4()
|
||||
Added vp8_fast_quantize_b_sse2
|
||||
|
||||
- Quality:
|
||||
Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
|
||||
encoding mode, and up to 60% improvement on very noisy, still
|
||||
or slow moving source video
|
||||
|
||||
Motion compensated temporal filter for Alt-Ref Noise Reduction
|
||||
Improved use of trellis quantization on 2nd order Y blocks
|
||||
Tune effect of motion on KF/GF boost in two pass
|
||||
Allow coefficient optimization for good quality speed 0.
|
||||
Improved control of active min quantizer for two pass.
|
||||
Enable ARFs for non-lagged compress
|
||||
|
||||
2010-09-02 v0.9.2
|
||||
- Enhancements:
|
||||
Disable frame dropping by default
|
||||
Improved multithreaded performance
|
||||
Improved Force Key Frame Behaviour
|
||||
Increased rate control buffer level precision
|
||||
Fix bug in 1st pass motion compensation
|
||||
ivfenc: correct fixed kf interval, --disable-kf
|
||||
- Speed:
|
||||
Changed above and left context data layout
|
||||
Rework idct calling structure.
|
||||
Removed unnecessary MB_MODE_INFO copies
|
||||
x86: SSSE3 sixtap prediction
|
||||
Reworked IDCT to include reconstruction (add) step
|
||||
Swap alt/gold/new/last frame buffer ptrs instead of copying.
|
||||
Improve SSE2 loopfilter functions
|
||||
Change bitreader to use a larger window.
|
||||
Avoid loopfilter reinitialization when possible
|
||||
- Quality:
|
||||
Normalize quantizer's zero bin and rounding factors
|
||||
Add trellis quantization.
|
||||
Make the quantizer exact.
|
||||
Updates to ARNR filtering algorithm
|
||||
Fix breakout thresh computation for golden & AltRef frames
|
||||
Redo the forward 4x4 dct
|
||||
Improve the accuracy of forward walsh-hadamard transform
|
||||
Further adjustment of RD behaviour with Q and Zbin.
|
||||
- Build System:
|
||||
Allow linking of libs built with MinGW to MSVC
|
||||
Fix target auto-detection on mingw32
|
||||
Allow --cpu= to work for x86.
|
||||
configure: pass original arguments through to make dist
|
||||
Fix builds without runtime CPU detection
|
||||
msvs: fix install of codec sources
|
||||
msvs: Change devenv.com command line for better msys support
|
||||
msvs: Add vs9 targets.
|
||||
Add x86_64-linux-icc target
|
||||
- Bugs:
|
||||
Potential crashes on older MinGW builds
|
||||
Fix two-pass framrate for Y4M input.
|
||||
Fixed simple loop filter, other crashes on ARM v6
|
||||
arm: fix missing dependency with --enable-shared
|
||||
configure: support directories containing .o
|
||||
Replace pinsrw (SSE) with MMX instructions
|
||||
apple: include proper mach primatives
|
||||
Fixed rate control bug with long key frame interval.
|
||||
Fix DSO link errors on x86-64 when not using a version script
|
||||
Fixed buffer selection for UV in AltRef filtering
|
||||
|
||||
|
||||
2010-06-17 v0.9.1
|
||||
- Enhancements:
|
||||
* ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
|
||||
* Speed optimizations
|
||||
- Bugfixes:
|
||||
* Rate control
|
||||
* Prevent out-of-bounds accesses on invalid data
|
||||
- Build system updates:
|
||||
* Detect toolchain to be used automatically for native builds
|
||||
* Support building shared libraries
|
||||
* Better autotools emulation (--prefix, --libdir, DESTDIR)
|
||||
- Updated LICENSE
|
||||
* http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
|
||||
|
||||
|
||||
2010-05-18 v0.9.0
|
||||
- Initial open source release. Welcome to WebM and VP8!
|
||||
|
||||
|
|
|
@ -186,11 +186,9 @@ list(APPEND AOM_ENCODER_APP_UTIL_SOURCES
|
|||
"${AOM_ROOT}/examples/encoder_util.h"
|
||||
"${AOM_ROOT}/examples/encoder_util.c")
|
||||
|
||||
if (ENABLE_EXAMPLES)
|
||||
list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
|
||||
"${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
|
||||
"${AOM_ROOT}/stats/rate_hist.h")
|
||||
endif ()
|
||||
list(APPEND AOM_ENCODER_STATS_SOURCES "${AOM_ROOT}/stats/aomstats.c"
|
||||
"${AOM_ROOT}/stats/aomstats.h" "${AOM_ROOT}/stats/rate_hist.c"
|
||||
"${AOM_ROOT}/stats/rate_hist.h")
|
||||
|
||||
list(APPEND AOM_PKG_CONFIG_SOURCES "${AOM_CONFIG_DIR}/aom.pc")
|
||||
|
||||
|
|
|
@ -854,6 +854,12 @@ enum aome_enc_control_id {
|
|||
/*!\brief Codec control function to set the path to the film grain parameters
|
||||
*/
|
||||
AV1E_SET_FILM_GRAIN_TABLE,
|
||||
|
||||
/*!\brief Sets the noise level */
|
||||
AV1E_SET_DENOISE_NOISE_LEVEL,
|
||||
|
||||
/*!\brief Sets the denoisers block size */
|
||||
AV1E_SET_DENOISE_BLOCK_SIZE,
|
||||
};
|
||||
|
||||
/*!\brief aom 1-D scaling mode
|
||||
|
@ -1165,6 +1171,14 @@ AOM_CTRL_USE_TYPE(AV1E_SET_FILM_GRAIN_TABLE, const char *)
|
|||
AOM_CTRL_USE_TYPE(AV1E_SET_CDF_UPDATE_MODE, int)
|
||||
#define AOM_CTRL_AV1E_SET_CDF_UPDATE_MODE
|
||||
|
||||
#ifdef CONFIG_DENOISE
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_NOISE_LEVEL, int);
|
||||
#define AOM_CTRL_AV1E_SET_DENOISE_NOISE_LEVEL
|
||||
|
||||
AOM_CTRL_USE_TYPE(AV1E_SET_DENOISE_BLOCK_SIZE, unsigned int);
|
||||
#define AOM_CTRL_AV1E_SET_DENOISE_BLOCK_SIZE
|
||||
#endif
|
||||
|
||||
/*!\endcond */
|
||||
/*! @} - end defgroup aom_encoder */
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -119,6 +119,12 @@ enum aom_dec_control_id {
|
|||
/** control function to get the bit depth of the stream. */
|
||||
AV1D_GET_BIT_DEPTH,
|
||||
|
||||
/** control function to get the image format of the stream. */
|
||||
AV1D_GET_IMG_FORMAT,
|
||||
|
||||
/** control function to get the size of the tile. */
|
||||
AV1D_GET_TILE_SIZE,
|
||||
|
||||
/** control function to set the byte alignment of the planes in the reference
|
||||
* buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets
|
||||
* legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly
|
||||
|
@ -187,6 +193,12 @@ enum aom_dec_control_id {
|
|||
*/
|
||||
AV1D_EXT_TILE_DEBUG,
|
||||
|
||||
/** control function to enable the row based multi-threading of decoding. A
|
||||
* value that is equal to 1 indicates that row based multi-threading is
|
||||
* enabled.
|
||||
*/
|
||||
AV1D_SET_ROW_MT,
|
||||
|
||||
/** control function to indicate whether bitstream is in Annex-B format. */
|
||||
AV1D_SET_IS_ANNEXB,
|
||||
|
||||
|
@ -238,6 +250,10 @@ AOM_CTRL_USE_TYPE(AV1D_GET_DISPLAY_SIZE, int *)
|
|||
#define AOM_CTRL_AV1D_GET_DISPLAY_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_BIT_DEPTH, unsigned int *)
|
||||
#define AOM_CTRL_AV1D_GET_BIT_DEPTH
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_IMG_FORMAT, aom_img_fmt_t *)
|
||||
#define AOM_CTRL_AV1D_GET_IMG_FORMAT
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_TILE_SIZE, unsigned int *)
|
||||
#define AOM_CTRL_AV1D_GET_TILE_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1D_GET_FRAME_SIZE, int *)
|
||||
#define AOM_CTRL_AV1D_GET_FRAME_SIZE
|
||||
AOM_CTRL_USE_TYPE(AV1_INVERT_TILE_DECODE_ORDER, int)
|
||||
|
@ -258,6 +274,8 @@ AOM_CTRL_USE_TYPE(AV1D_SET_EXT_REF_PTR, av1_ext_ref_frame_t *)
|
|||
#define AOM_CTRL_AV1D_SET_EXT_REF_PTR
|
||||
AOM_CTRL_USE_TYPE(AV1D_EXT_TILE_DEBUG, unsigned int)
|
||||
#define AOM_CTRL_AV1D_EXT_TILE_DEBUG
|
||||
AOM_CTRL_USE_TYPE(AV1D_SET_ROW_MT, unsigned int)
|
||||
#define AOM_CTRL_AV1D_SET_ROW_MT
|
||||
AOM_CTRL_USE_TYPE(AV1D_SET_IS_ANNEXB, unsigned int)
|
||||
#define AOM_CTRL_AV1D_SET_IS_ANNEXB
|
||||
AOM_CTRL_USE_TYPE(AV1D_SET_OPERATING_POINT, int)
|
||||
|
|
|
@ -417,7 +417,7 @@ struct aom_internal_error_info {
|
|||
aom_codec_err_t error_code;
|
||||
int has_detail;
|
||||
char detail[80];
|
||||
int setjmp;
|
||||
int setjmp; // Boolean: whether 'jmp' is valid.
|
||||
jmp_buf jmp;
|
||||
};
|
||||
|
||||
|
|
|
@ -83,6 +83,7 @@ list(APPEND AOM_DSP_COMMON_INTRIN_SSE4_1
|
|||
list(APPEND AOM_DSP_COMMON_INTRIN_AVX2
|
||||
"${AOM_ROOT}/aom_dsp/x86/aom_subpixel_8t_intrin_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/common_avx2.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/txfm_common_avx2.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/convolve_avx2.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/fft_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/highbd_convolve_avx2.c"
|
||||
|
@ -190,13 +191,16 @@ if(CONFIG_AV1_ENCODER)
|
|||
"${AOM_ROOT}/aom_dsp/x86/ssim_opt_x86_64.asm")
|
||||
|
||||
list(APPEND AOM_DSP_ENCODER_INTRIN_AVX2
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/subtract_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/highbd_quantize_intrin_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/sad4d_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/sad_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/sad_highbd_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/sad_impl_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/variance_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/variance_impl_avx2.c")
|
||||
"${AOM_ROOT}/aom_dsp/x86/variance_impl_avx2.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/obmc_sad_avx2.c")
|
||||
|
||||
list(APPEND AOM_DSP_ENCODER_ASM_SSSE3_X86_64
|
||||
"${AOM_ROOT}/aom_dsp/x86/quantize_ssse3_x86_64.asm")
|
||||
|
@ -205,9 +209,11 @@ if(CONFIG_AV1_ENCODER)
|
|||
"${AOM_ROOT}/aom_dsp/x86/quantize_avx_x86_64.asm")
|
||||
|
||||
list(APPEND AOM_DSP_ENCODER_INTRIN_SSSE3
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_sad_intrin_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.h"
|
||||
"${AOM_ROOT}/aom_dsp/x86/masked_variance_intrin_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/variance_impl_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/jnt_variance_ssse3.c"
|
||||
"${AOM_ROOT}/aom_dsp/x86/jnt_sad_ssse3.c")
|
||||
|
||||
|
|
|
@ -15,4 +15,4 @@
|
|||
|
||||
#include "aom_ports/aom_once.h"
|
||||
|
||||
void aom_dsp_rtcd() { once(setup_rtcd_internal); }
|
||||
void aom_dsp_rtcd() { aom_once(setup_rtcd_internal); }
|
||||
|
|
|
@ -377,7 +377,7 @@ add_proto qw/void aom_lpf_vertical_14_dual/, "uint8_t *s, int pitch, const uint8
|
|||
specialize qw/aom_lpf_vertical_14_dual sse2/;
|
||||
|
||||
add_proto qw/void aom_lpf_vertical_6/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/aom_lpf_vertical_6 sse2/;
|
||||
specialize qw/aom_lpf_vertical_6 sse2 neon/;
|
||||
|
||||
add_proto qw/void aom_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/aom_lpf_vertical_8 sse2 neon/;
|
||||
|
@ -386,13 +386,13 @@ add_proto qw/void aom_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_
|
|||
specialize qw/aom_lpf_vertical_8_dual sse2/;
|
||||
|
||||
add_proto qw/void aom_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/aom_lpf_vertical_4 sse2/;
|
||||
specialize qw/aom_lpf_vertical_4 sse2 neon/;
|
||||
|
||||
add_proto qw/void aom_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/aom_lpf_vertical_4_dual sse2/;
|
||||
|
||||
add_proto qw/void aom_lpf_horizontal_14/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/aom_lpf_horizontal_14 sse2/;
|
||||
specialize qw/aom_lpf_horizontal_14 sse2 neon/;
|
||||
|
||||
add_proto qw/void aom_lpf_horizontal_14_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/aom_lpf_horizontal_14_dual sse2/;
|
||||
|
@ -410,7 +410,7 @@ add_proto qw/void aom_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint
|
|||
specialize qw/aom_lpf_horizontal_8_dual sse2/;
|
||||
|
||||
add_proto qw/void aom_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
|
||||
specialize qw/aom_lpf_horizontal_4 sse2/;
|
||||
specialize qw/aom_lpf_horizontal_4 sse2 neon/;
|
||||
|
||||
add_proto qw/void aom_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
|
||||
specialize qw/aom_lpf_horizontal_4_dual sse2/;
|
||||
|
@ -564,7 +564,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
# Block subtraction
|
||||
#
|
||||
add_proto qw/void aom_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
|
||||
specialize qw/aom_subtract_block neon msa sse2/;
|
||||
specialize qw/aom_subtract_block neon msa sse2 avx2/;
|
||||
|
||||
add_proto qw/void aom_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd";
|
||||
specialize qw/aom_highbd_subtract_block sse2/;
|
||||
|
@ -732,14 +732,14 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
foreach (@block_sizes) {
|
||||
($w, $h) = @$_;
|
||||
add_proto qw/unsigned int/, "aom_masked_sad${w}x${h}", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, const uint8_t *msk, int msk_stride, int invert_mask";
|
||||
specialize "aom_masked_sad${w}x${h}", qw/ssse3/;
|
||||
specialize "aom_masked_sad${w}x${h}", qw/ssse3 avx2/;
|
||||
}
|
||||
|
||||
|
||||
foreach (@block_sizes) {
|
||||
($w, $h) = @$_;
|
||||
add_proto qw/unsigned int/, "aom_highbd_masked_sad${w}x${h}", "const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, int msk_stride, int invert_mask";
|
||||
specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3/;
|
||||
specialize "aom_highbd_masked_sad${w}x${h}", qw/ssse3 avx2/;
|
||||
}
|
||||
|
||||
|
||||
|
@ -750,7 +750,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
($w, $h) = @$_;
|
||||
add_proto qw/unsigned int/, "aom_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
|
||||
if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
|
||||
specialize "aom_obmc_sad${w}x${h}", qw/sse4_1/;
|
||||
specialize "aom_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -759,7 +759,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
($w, $h) = @$_;
|
||||
add_proto qw/unsigned int/, "aom_highbd_obmc_sad${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask";
|
||||
if (! (($w == 128 && $h == 32) || ($w == 32 && $h == 128))) {
|
||||
specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1/;
|
||||
specialize "aom_highbd_obmc_sad${w}x${h}", qw/sse4_1 avx2/;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1102,6 +1102,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
add_proto qw/unsigned int/, "aom_obmc_variance${w}x${h}", "const uint8_t *pre, int pre_stride, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
|
||||
add_proto qw/unsigned int/, "aom_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc, const int32_t *mask, unsigned int *sse";
|
||||
specialize "aom_obmc_variance${w}x${h}", q/sse4_1/;
|
||||
specialize "aom_obmc_sub_pixel_variance${w}x${h}", q/sse4_1/;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1539,9 +1540,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
specialize qw/aom_comp_mask_pred ssse3 avx2/;
|
||||
|
||||
add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
|
||||
add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col, const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
|
||||
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
|
||||
|
||||
specialize qw/aom_highbd_comp_mask_pred avx2/;
|
||||
|
||||
} # CONFIG_AV1_ENCODER
|
||||
|
||||
|
|
|
@ -528,3 +528,63 @@ void aom_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
|
||||
const uint16_t *above,
|
||||
const uint16_t *left) {
|
||||
assert(bw >= 4);
|
||||
assert(IS_POWER_OF_TWO(bw));
|
||||
int expected_dc, sum = 0;
|
||||
const int count = bw * 2;
|
||||
uint32x4_t sum_q = vdupq_n_u32(0);
|
||||
uint32x2_t sum_d;
|
||||
uint16_t *dst_1;
|
||||
if (bw >= 8) {
|
||||
for (int i = 0; i < bw; i += 8) {
|
||||
sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
|
||||
sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
|
||||
above += 8;
|
||||
left += 8;
|
||||
}
|
||||
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
|
||||
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
|
||||
expected_dc = (sum + (count >> 1)) / count;
|
||||
const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
|
||||
for (int r = 0; r < bw; r++) {
|
||||
dst_1 = dst;
|
||||
for (int i = 0; i < bw; i += 8) {
|
||||
vst1q_u16(dst_1, dc);
|
||||
dst_1 += 8;
|
||||
}
|
||||
dst += stride;
|
||||
}
|
||||
} else { // 4x4
|
||||
sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
|
||||
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
|
||||
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
|
||||
expected_dc = (sum + (count >> 1)) / count;
|
||||
const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
|
||||
for (int r = 0; r < bw; r++) {
|
||||
vst1_u16(dst, dc);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define intra_pred_highbd_sized_neon(type, width) \
|
||||
void aom_highbd_##type##_predictor_##width##x##width##_neon( \
|
||||
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
|
||||
const uint16_t *left, int bd) { \
|
||||
(void)bd; \
|
||||
highbd_##type##_predictor(dst, stride, width, above, left); \
|
||||
}
|
||||
|
||||
#define intra_pred_square(type) \
|
||||
intra_pred_highbd_sized_neon(type, 4); \
|
||||
intra_pred_highbd_sized_neon(type, 8); \
|
||||
intra_pred_highbd_sized_neon(type, 16); \
|
||||
intra_pred_highbd_sized_neon(type, 32); \
|
||||
intra_pred_highbd_sized_neon(type, 64);
|
||||
|
||||
intra_pred_square(dc);
|
||||
#undef intra_pred_square
|
||||
|
|
|
@ -52,6 +52,36 @@ static INLINE uint8x8_t lpf_mask(uint8x8_t p3q3, uint8x8_t p2q2, uint8x8_t p1q1,
|
|||
return mask_8x8;
|
||||
}
|
||||
|
||||
static INLINE uint8x8_t lpf_mask2(uint8x8_t p1q1, uint8x8_t p0q0,
|
||||
const uint8_t blimit, const uint8_t limit) {
|
||||
uint32x2x2_t p0q0_p1q1;
|
||||
uint16x8_t temp_16x8;
|
||||
uint16x4_t temp0_16x4, temp1_16x4;
|
||||
const uint16x4_t blimit_16x4 = vdup_n_u16(blimit);
|
||||
const uint8x8_t limit_8x8 = vdup_n_u8(limit);
|
||||
uint8x8_t mask_8x8, temp_8x8;
|
||||
|
||||
mask_8x8 = vabd_u8(p1q1, p0q0);
|
||||
mask_8x8 = vcle_u8(mask_8x8, limit_8x8);
|
||||
|
||||
temp_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(mask_8x8)));
|
||||
mask_8x8 = vand_u8(mask_8x8, temp_8x8);
|
||||
|
||||
p0q0_p1q1 = vtrn_u32(vreinterpret_u32_u8(p0q0), vreinterpret_u32_u8(p1q1));
|
||||
temp_8x8 = vabd_u8(vreinterpret_u8_u32(p0q0_p1q1.val[0]),
|
||||
vreinterpret_u8_u32(p0q0_p1q1.val[1]));
|
||||
temp_16x8 = vmovl_u8(temp_8x8);
|
||||
temp0_16x4 = vshl_n_u16(vget_low_u16(temp_16x8), 1);
|
||||
temp1_16x4 = vshr_n_u16(vget_high_u16(temp_16x8), 1);
|
||||
temp0_16x4 = vadd_u16(temp0_16x4, temp1_16x4);
|
||||
temp0_16x4 = vcle_u16(temp0_16x4, blimit_16x4);
|
||||
temp_8x8 = vmovn_u16(vcombine_u16(temp0_16x4, temp0_16x4));
|
||||
|
||||
mask_8x8 = vand_u8(mask_8x8, temp_8x8);
|
||||
|
||||
return mask_8x8;
|
||||
}
|
||||
|
||||
static INLINE uint8x8_t lpf_flat_mask4(uint8x8_t p3q3, uint8x8_t p2q2,
|
||||
uint8x8_t p1q1, uint8x8_t p0q0) {
|
||||
const uint8x8_t thresh_8x8 = vdup_n_u8(1); // for bd==8 threshold is always 1
|
||||
|
@ -523,6 +553,68 @@ static void lpf_6_neon(uint8x8_t *p2q2, uint8x8_t *p1q1, uint8x8_t *p0q0,
|
|||
}
|
||||
}
|
||||
|
||||
static void lpf_4_neon(uint8x8_t *p1q1, uint8x8_t *p0q0, const uint8_t blimit,
|
||||
const uint8_t limit, const uint8_t thresh) {
|
||||
int32x2x2_t ps0_qs0, ps1_qs1;
|
||||
int16x8_t filter_s16;
|
||||
const uint8x8_t thresh_f4 = vdup_n_u8(thresh);
|
||||
uint8x8_t mask_8x8, temp0_8x8, temp1_8x8;
|
||||
int8x8_t ps0_s8, ps1_s8, qs0_s8, qs1_s8, temp_s8;
|
||||
int8x8_t op0, oq0, op1, oq1;
|
||||
int8x8_t pq_s0, pq_s1;
|
||||
int8x8_t filter_s8, filter1_s8, filter2_s8;
|
||||
int8x8_t hev_8x8;
|
||||
const int8x8_t sign_mask = vdup_n_s8(0x80);
|
||||
const int8x8_t val_4 = vdup_n_s8(4);
|
||||
const int8x8_t val_3 = vdup_n_s8(3);
|
||||
|
||||
// Calculate filter mask
|
||||
mask_8x8 = lpf_mask2(*p1q1, *p0q0, blimit, limit);
|
||||
|
||||
pq_s0 = veor_s8(vreinterpret_s8_u8(*p0q0), sign_mask);
|
||||
pq_s1 = veor_s8(vreinterpret_s8_u8(*p1q1), sign_mask);
|
||||
|
||||
ps0_qs0 = vtrn_s32(vreinterpret_s32_s8(pq_s0), vreinterpret_s32_s8(pq_s0));
|
||||
ps1_qs1 = vtrn_s32(vreinterpret_s32_s8(pq_s1), vreinterpret_s32_s8(pq_s1));
|
||||
ps0_s8 = vreinterpret_s8_s32(ps0_qs0.val[0]);
|
||||
qs0_s8 = vreinterpret_s8_s32(ps0_qs0.val[1]);
|
||||
ps1_s8 = vreinterpret_s8_s32(ps1_qs1.val[0]);
|
||||
qs1_s8 = vreinterpret_s8_s32(ps1_qs1.val[1]);
|
||||
|
||||
// hev_mask
|
||||
temp0_8x8 = vcgt_u8(vabd_u8(*p0q0, *p1q1), thresh_f4);
|
||||
temp1_8x8 = vreinterpret_u8_u32(vrev64_u32(vreinterpret_u32_u8(temp0_8x8)));
|
||||
hev_8x8 = vreinterpret_s8_u8(vorr_u8(temp0_8x8, temp1_8x8));
|
||||
|
||||
// add outer taps if we have high edge variance
|
||||
filter_s8 = vqsub_s8(ps1_s8, qs1_s8);
|
||||
filter_s8 = vand_s8(filter_s8, hev_8x8);
|
||||
|
||||
// inner taps
|
||||
temp_s8 = vqsub_s8(qs0_s8, ps0_s8);
|
||||
filter_s16 = vmovl_s8(filter_s8);
|
||||
filter_s16 = vmlal_s8(filter_s16, temp_s8, val_3);
|
||||
filter_s8 = vqmovn_s16(filter_s16);
|
||||
filter_s8 = vand_s8(filter_s8, vreinterpret_s8_u8(mask_8x8));
|
||||
|
||||
filter1_s8 = vqadd_s8(filter_s8, val_4);
|
||||
filter2_s8 = vqadd_s8(filter_s8, val_3);
|
||||
filter1_s8 = vshr_n_s8(filter1_s8, 3);
|
||||
filter2_s8 = vshr_n_s8(filter2_s8, 3);
|
||||
|
||||
oq0 = veor_s8(vqsub_s8(qs0_s8, filter1_s8), sign_mask);
|
||||
op0 = veor_s8(vqadd_s8(ps0_s8, filter2_s8), sign_mask);
|
||||
|
||||
filter_s8 = vrshr_n_s8(filter1_s8, 1);
|
||||
filter_s8 = vbic_s8(filter_s8, hev_8x8);
|
||||
|
||||
oq1 = veor_s8(vqsub_s8(qs1_s8, filter_s8), sign_mask);
|
||||
op1 = veor_s8(vqadd_s8(ps1_s8, filter_s8), sign_mask);
|
||||
|
||||
*p0q0 = vreinterpret_u8_s8(vext_s8(op0, oq0, 4));
|
||||
*p1q1 = vreinterpret_u8_s8(vext_s8(op1, oq1, 4));
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint8x16_t row0, row1, row2, row3;
|
||||
|
@ -646,6 +738,125 @@ void aom_lpf_vertical_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
|||
store_u8_8x4(src - 4, stride, p3q0, p2q1, p1q2, p0q3);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint32x2x2_t p2q2_p1q1, pxqy_p0q0;
|
||||
uint32x2_t pq_rev;
|
||||
uint8x8_t pxq0, p2q1, p1q2, p0qy;
|
||||
uint8x8_t p0q0, p1q1, p2q2, pxqy;
|
||||
|
||||
// row0: px p2 p1 p0 | q0 q1 q2 qy
|
||||
// row1: px p2 p1 p0 | q0 q1 q2 qy
|
||||
// row2: px p2 p1 p0 | q0 q1 q2 qy
|
||||
// row3: px p2 p1 p0 | q0 q1 q2 qy
|
||||
load_u8_8x4(src - 4, stride, &pxq0, &p2q1, &p1q2, &p0qy);
|
||||
|
||||
transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
|
||||
|
||||
pq_rev = vrev64_u32(vreinterpret_u32_u8(p0qy));
|
||||
pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxq0), pq_rev);
|
||||
|
||||
pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q2));
|
||||
p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q1), pq_rev);
|
||||
|
||||
p0q0 = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
|
||||
p1q1 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
|
||||
p2q2 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
|
||||
pxqy = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
|
||||
|
||||
lpf_6_neon(&p2q2, &p1q1, &p0q0, *blimit, *limit, *thresh);
|
||||
|
||||
pq_rev = vrev64_u32(vreinterpret_u32_u8(p0q0));
|
||||
pxqy_p0q0 = vtrn_u32(vreinterpret_u32_u8(pxqy), pq_rev);
|
||||
|
||||
pq_rev = vrev64_u32(vreinterpret_u32_u8(p1q1));
|
||||
p2q2_p1q1 = vtrn_u32(vreinterpret_u32_u8(p2q2), pq_rev);
|
||||
|
||||
p0qy = vreinterpret_u8_u32(vrev64_u32(pxqy_p0q0.val[1]));
|
||||
p1q2 = vreinterpret_u8_u32(vrev64_u32(p2q2_p1q1.val[1]));
|
||||
p2q1 = vreinterpret_u8_u32(p2q2_p1q1.val[0]);
|
||||
pxq0 = vreinterpret_u8_u32(pxqy_p0q0.val[0]);
|
||||
transpose_u8_8x4(&pxq0, &p2q1, &p1q2, &p0qy);
|
||||
|
||||
store_u8_8x4(src - 4, stride, pxq0, p2q1, p1q2, p0qy);
|
||||
}
|
||||
|
||||
void aom_lpf_vertical_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint32x2x2_t p1q0_p0q1, p1q1_p0q0, p1p0_q1q0;
|
||||
uint32x2_t pq_rev;
|
||||
uint8x8_t UNINITIALIZED_IS_SAFE(p1p0), q0q1, p0q0, p1q1;
|
||||
|
||||
// row0: p1 p0 | q0 q1
|
||||
// row1: p1 p0 | q0 q1
|
||||
// row2: p1 p0 | q0 q1
|
||||
// row3: p1 p0 | q0 q1
|
||||
load_u8_4x1(src - 2, &p1p0, 0);
|
||||
load_u8_4x1((src - 2) + 1 * stride, &p1p0, 1);
|
||||
load_u8_4x1((src - 2) + 2 * stride, &q0q1, 0);
|
||||
load_u8_4x1((src - 2) + 3 * stride, &q0q1, 1);
|
||||
|
||||
transpose_u8_4x4(&p1p0, &q0q1);
|
||||
|
||||
p1q0_p0q1 = vtrn_u32(vreinterpret_u32_u8(p1p0), vreinterpret_u32_u8(q0q1));
|
||||
|
||||
pq_rev = vrev64_u32(p1q0_p0q1.val[1]);
|
||||
p1q1_p0q0 = vtrn_u32(p1q0_p0q1.val[0], pq_rev);
|
||||
|
||||
p1q1 = vreinterpret_u8_u32(p1q1_p0q0.val[0]);
|
||||
p0q0 = vreinterpret_u8_u32(p1q1_p0q0.val[1]);
|
||||
|
||||
lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
|
||||
|
||||
p1p0_q1q0 = vtrn_u32(vreinterpret_u32_u8(p1q1), vreinterpret_u32_u8(p0q0));
|
||||
|
||||
p1p0 = vreinterpret_u8_u32(p1p0_q1q0.val[0]);
|
||||
q0q1 = vreinterpret_u8_u32(vrev64_u32(p1p0_q1q0.val[1]));
|
||||
|
||||
transpose_u8_4x4(&p1p0, &q0q1);
|
||||
|
||||
store_u8_4x1(src - 2, p1p0, 0);
|
||||
store_u8_4x1((src - 2) + 1 * stride, q0q1, 0);
|
||||
store_u8_4x1((src - 2) + 2 * stride, p1p0, 1);
|
||||
store_u8_4x1((src - 2) + 3 * stride, q0q1, 1);
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_14_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint8x8_t p0q0, p1q1, p2q2, p3q3, p4q4, p5q5, UNINITIALIZED_IS_SAFE(p6q6);
|
||||
|
||||
load_u8_4x1(src - 7 * stride, &p6q6, 0);
|
||||
load_u8_4x1(src - 6 * stride, &p5q5, 0);
|
||||
load_u8_4x1(src - 5 * stride, &p4q4, 0);
|
||||
load_u8_4x1(src - 4 * stride, &p3q3, 0);
|
||||
load_u8_4x1(src - 3 * stride, &p2q2, 0);
|
||||
load_u8_4x1(src - 2 * stride, &p1q1, 0);
|
||||
load_u8_4x1(src - 1 * stride, &p0q0, 0);
|
||||
load_u8_4x1(src + 0 * stride, &p0q0, 1);
|
||||
load_u8_4x1(src + 1 * stride, &p1q1, 1);
|
||||
load_u8_4x1(src + 2 * stride, &p2q2, 1);
|
||||
load_u8_4x1(src + 3 * stride, &p3q3, 1);
|
||||
load_u8_4x1(src + 4 * stride, &p4q4, 1);
|
||||
load_u8_4x1(src + 5 * stride, &p5q5, 1);
|
||||
load_u8_4x1(src + 6 * stride, &p6q6, 1);
|
||||
|
||||
lpf_14_neon(&p6q6, &p5q5, &p4q4, &p3q3, &p2q2, &p1q1, &p0q0, *blimit, *limit,
|
||||
*thresh);
|
||||
|
||||
store_u8_4x1(src - 6 * stride, p5q5, 0);
|
||||
store_u8_4x1(src - 5 * stride, p4q4, 0);
|
||||
store_u8_4x1(src - 4 * stride, p3q3, 0);
|
||||
store_u8_4x1(src - 3 * stride, p2q2, 0);
|
||||
store_u8_4x1(src - 2 * stride, p1q1, 0);
|
||||
store_u8_4x1(src - 1 * stride, p0q0, 0);
|
||||
store_u8_4x1(src + 0 * stride, p0q0, 1);
|
||||
store_u8_4x1(src + 1 * stride, p1q1, 1);
|
||||
store_u8_4x1(src + 2 * stride, p2q2, 1);
|
||||
store_u8_4x1(src + 3 * stride, p3q3, 1);
|
||||
store_u8_4x1(src + 4 * stride, p4q4, 1);
|
||||
store_u8_4x1(src + 5 * stride, p5q5, 1);
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_8_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint8x8_t p0q0, p1q1, p2q2, p3q3;
|
||||
|
@ -698,3 +909,20 @@ void aom_lpf_horizontal_6_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
|||
vst1_lane_u32((uint32_t *)(src + 1 * stride), vreinterpret_u32_u8(p1q1), 1);
|
||||
vst1_lane_u32((uint32_t *)(src + 2 * stride), vreinterpret_u32_u8(p2q2), 1);
|
||||
}
|
||||
|
||||
void aom_lpf_horizontal_4_neon(uint8_t *src, int stride, const uint8_t *blimit,
|
||||
const uint8_t *limit, const uint8_t *thresh) {
|
||||
uint8x8_t p0q0, UNINITIALIZED_IS_SAFE(p1q1);
|
||||
|
||||
load_u8_4x1(src - 2 * stride, &p1q1, 0);
|
||||
load_u8_4x1(src - 1 * stride, &p0q0, 0);
|
||||
load_u8_4x1(src + 0 * stride, &p0q0, 1);
|
||||
load_u8_4x1(src + 1 * stride, &p1q1, 1);
|
||||
|
||||
lpf_4_neon(&p1q1, &p0q0, *blimit, *limit, *thresh);
|
||||
|
||||
store_u8_4x1(src - 2 * stride, p1q1, 0);
|
||||
store_u8_4x1(src - 1 * stride, p0q0, 0);
|
||||
store_u8_4x1(src + 0 * stride, p0q0, 1);
|
||||
store_u8_4x1(src + 1 * stride, p1q1, 1);
|
||||
}
|
||||
|
|
|
@ -8,11 +8,14 @@
|
|||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
|
||||
#include "aom_dsp/bitreader_buffer.h"
|
||||
|
||||
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb) {
|
||||
size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb) {
|
||||
return (rb->bit_offset + 7) >> 3;
|
||||
}
|
||||
|
||||
|
@ -31,6 +34,7 @@ int aom_rb_read_bit(struct aom_read_bit_buffer *rb) {
|
|||
}
|
||||
|
||||
int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
|
||||
assert(bits <= 31);
|
||||
int value = 0, bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--) value |= aom_rb_read_bit(rb) << bit;
|
||||
return value;
|
||||
|
@ -38,6 +42,7 @@ int aom_rb_read_literal(struct aom_read_bit_buffer *rb, int bits) {
|
|||
|
||||
uint32_t aom_rb_read_unsigned_literal(struct aom_read_bit_buffer *rb,
|
||||
int bits) {
|
||||
assert(bits <= 32);
|
||||
uint32_t value = 0;
|
||||
int bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--)
|
||||
|
|
|
@ -31,7 +31,7 @@ struct aom_read_bit_buffer {
|
|||
aom_rb_error_handler error_handler;
|
||||
};
|
||||
|
||||
size_t aom_rb_bytes_read(struct aom_read_bit_buffer *rb);
|
||||
size_t aom_rb_bytes_read(const struct aom_read_bit_buffer *rb);
|
||||
|
||||
int aom_rb_read_bit(struct aom_read_bit_buffer *rb);
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
@ -49,12 +50,14 @@ void aom_wb_overwrite_bit(struct aom_write_bit_buffer *wb, int bit) {
|
|||
}
|
||||
|
||||
void aom_wb_write_literal(struct aom_write_bit_buffer *wb, int data, int bits) {
|
||||
assert(bits <= 31);
|
||||
int bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
|
||||
}
|
||||
|
||||
void aom_wb_write_unsigned_literal(struct aom_write_bit_buffer *wb,
|
||||
uint32_t data, int bits) {
|
||||
assert(bits <= 32);
|
||||
int bit;
|
||||
for (bit = bits - 1; bit >= 0; bit--) aom_wb_write_bit(wb, (data >> bit) & 1);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "aom_dsp/grain_synthesis.h"
|
||||
#include "aom_mem/aom_mem.h"
|
||||
|
||||
|
@ -237,7 +238,7 @@ static int grain_max;
|
|||
|
||||
static uint16_t random_register = 0; // random number generator register
|
||||
|
||||
static void init_arrays(aom_film_grain_t *params, int luma_stride,
|
||||
static void init_arrays(const aom_film_grain_t *params, int luma_stride,
|
||||
int chroma_stride, int ***pred_pos_luma_p,
|
||||
int ***pred_pos_chroma_p, int **luma_grain_block,
|
||||
int **cb_grain_block, int **cr_grain_block,
|
||||
|
@ -331,7 +332,7 @@ static void init_arrays(aom_film_grain_t *params, int luma_stride,
|
|||
(int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
|
||||
}
|
||||
|
||||
static void dealloc_arrays(aom_film_grain_t *params, int ***pred_pos_luma,
|
||||
static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
|
||||
int ***pred_pos_chroma, int **luma_grain_block,
|
||||
int **cb_grain_block, int **cr_grain_block,
|
||||
int **y_line_buf, int **cb_line_buf,
|
||||
|
@ -396,10 +397,14 @@ static void init_random_generator(int luma_line, uint16_t seed) {
|
|||
}
|
||||
|
||||
static void generate_luma_grain_block(
|
||||
aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
|
||||
const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
|
||||
int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
|
||||
int left_pad, int top_pad, int right_pad, int bottom_pad) {
|
||||
if (params->num_y_points == 0) return;
|
||||
if (params->num_y_points == 0) {
|
||||
memset(luma_grain_block, 0,
|
||||
sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
|
||||
return;
|
||||
}
|
||||
|
||||
int bit_depth = params->bit_depth;
|
||||
int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
|
||||
|
@ -431,7 +436,7 @@ static void generate_luma_grain_block(
|
|||
}
|
||||
|
||||
static void generate_chroma_grain_blocks(
|
||||
aom_film_grain_t *params,
|
||||
const aom_film_grain_t *params,
|
||||
// int** pred_pos_luma,
|
||||
int **pred_pos_chroma, int *luma_grain_block, int *cb_grain_block,
|
||||
int *cr_grain_block, int luma_grain_stride, int chroma_block_size_y,
|
||||
|
@ -443,7 +448,7 @@ static void generate_chroma_grain_blocks(
|
|||
int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
|
||||
if (params->num_y_points > 0) ++num_pos_chroma;
|
||||
int rounding_offset = (1 << (params->ar_coeff_shift - 1));
|
||||
int chroma_grain_samples = chroma_block_size_y * chroma_block_size_x;
|
||||
int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
|
||||
|
||||
if (params->num_cb_points || params->chroma_scaling_from_luma) {
|
||||
init_random_generator(7 << 5, params->random_seed);
|
||||
|
@ -455,7 +460,8 @@ static void generate_chroma_grain_blocks(
|
|||
((1 << gauss_sec_shift) >> 1)) >>
|
||||
gauss_sec_shift;
|
||||
} else {
|
||||
memset(cr_grain_block, 0, sizeof(*cr_grain_block) * chroma_grain_samples);
|
||||
memset(cb_grain_block, 0,
|
||||
sizeof(*cb_grain_block) * chroma_grain_block_size);
|
||||
}
|
||||
|
||||
if (params->num_cr_points || params->chroma_scaling_from_luma) {
|
||||
|
@ -468,7 +474,8 @@ static void generate_chroma_grain_blocks(
|
|||
((1 << gauss_sec_shift) >> 1)) >>
|
||||
gauss_sec_shift;
|
||||
} else {
|
||||
memset(cb_grain_block, 0, sizeof(*cb_grain_block) * chroma_grain_samples);
|
||||
memset(cr_grain_block, 0,
|
||||
sizeof(*cr_grain_block) * chroma_grain_block_size);
|
||||
}
|
||||
|
||||
for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
|
||||
|
@ -522,7 +529,7 @@ static void generate_chroma_grain_blocks(
|
|||
}
|
||||
}
|
||||
|
||||
static void init_scaling_function(int scaling_points[][2], int num_points,
|
||||
static void init_scaling_function(const int scaling_points[][2], int num_points,
|
||||
int scaling_lut[]) {
|
||||
if (num_points == 0) return;
|
||||
|
||||
|
@ -559,7 +566,7 @@ static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
|
|||
(bit_depth - 8));
|
||||
}
|
||||
|
||||
static void add_noise_to_block(aom_film_grain_t *params, uint8_t *luma,
|
||||
static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
|
||||
uint8_t *cb, uint8_t *cr, int luma_stride,
|
||||
int chroma_stride, int *luma_grain,
|
||||
int *cb_grain, int *cr_grain,
|
||||
|
@ -675,7 +682,7 @@ static void add_noise_to_block(aom_film_grain_t *params, uint8_t *luma,
|
|||
}
|
||||
|
||||
static void add_noise_to_block_hbd(
|
||||
aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
|
||||
const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
|
||||
int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
|
||||
int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
|
||||
int half_luma_height, int half_luma_width, int bit_depth,
|
||||
|
@ -903,7 +910,7 @@ static void hor_boundary_overlap(int *top_block, int top_stride,
|
|||
}
|
||||
}
|
||||
|
||||
void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
|
||||
void av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
|
||||
aom_image_t *dst) {
|
||||
uint8_t *luma, *cb, *cr;
|
||||
int height, width, luma_stride, chroma_stride;
|
||||
|
@ -950,6 +957,11 @@ void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
|
|||
exit(1);
|
||||
}
|
||||
|
||||
assert(params->bit_depth == src->bit_depth);
|
||||
|
||||
dst->fmt = src->fmt;
|
||||
dst->bit_depth = src->bit_depth;
|
||||
|
||||
dst->r_w = src->r_w;
|
||||
dst->r_h = src->r_h;
|
||||
dst->d_w = src->d_w;
|
||||
|
@ -999,15 +1011,13 @@ void av1_add_film_grain(aom_film_grain_t *params, aom_image_t *src,
|
|||
luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
|
||||
chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
|
||||
|
||||
params->bit_depth = dst->bit_depth;
|
||||
|
||||
av1_add_film_grain_run(params, luma, cb, cr, height, width, luma_stride,
|
||||
chroma_stride, use_high_bit_depth, chroma_subsamp_y,
|
||||
chroma_subsamp_x, mc_identity);
|
||||
return;
|
||||
}
|
||||
|
||||
void av1_add_film_grain_run(aom_film_grain_t *params, uint8_t *luma,
|
||||
void av1_add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
|
||||
uint8_t *cb, uint8_t *cr, int height, int width,
|
||||
int luma_stride, int chroma_stride,
|
||||
int use_high_bit_depth, int chroma_subsamp_y,
|
||||
|
|
|
@ -72,7 +72,7 @@ typedef struct {
|
|||
|
||||
int clip_to_restricted_range;
|
||||
|
||||
int bit_depth; // video bit depth
|
||||
unsigned int bit_depth; // video bit depth
|
||||
|
||||
int chroma_scaling_from_luma;
|
||||
|
||||
|
@ -94,7 +94,7 @@ typedef struct {
|
|||
* \param[in] luma_stride luma plane stride
|
||||
* \param[in] chroma_stride chroma plane stride
|
||||
*/
|
||||
void av1_add_film_grain_run(aom_film_grain_t *grain_params, uint8_t *luma,
|
||||
void av1_add_film_grain_run(const aom_film_grain_t *grain_params, uint8_t *luma,
|
||||
uint8_t *cb, uint8_t *cr, int height, int width,
|
||||
int luma_stride, int chroma_stride,
|
||||
int use_high_bit_depth, int chroma_subsamp_y,
|
||||
|
@ -106,10 +106,10 @@ void av1_add_film_grain_run(aom_film_grain_t *grain_params, uint8_t *luma,
|
|||
*
|
||||
* \param[in] grain_params Grain parameters
|
||||
* \param[in] src Source image
|
||||
* \param[in] dst Resulting image with grain
|
||||
* \param[out] dst Resulting image with grain
|
||||
*/
|
||||
void av1_add_film_grain(aom_film_grain_t *grain_params, aom_image_t *src,
|
||||
aom_image_t *dst);
|
||||
void av1_add_film_grain(const aom_film_grain_t *grain_params,
|
||||
const aom_image_t *src, aom_image_t *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -1458,3 +1458,189 @@ int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
|
|||
}
|
||||
return init_success;
|
||||
}
|
||||
|
||||
struct aom_denoise_and_model_t {
|
||||
int block_size;
|
||||
int bit_depth;
|
||||
float noise_level;
|
||||
|
||||
// Size of current denoised buffer and flat_block buffer
|
||||
int width;
|
||||
int height;
|
||||
int y_stride;
|
||||
int uv_stride;
|
||||
int num_blocks_w;
|
||||
int num_blocks_h;
|
||||
|
||||
// Buffers for image and noise_psd allocated on the fly
|
||||
float *noise_psd[3];
|
||||
uint8_t *denoised[3];
|
||||
uint8_t *flat_blocks;
|
||||
|
||||
aom_flat_block_finder_t flat_block_finder;
|
||||
aom_noise_model_t noise_model;
|
||||
};
|
||||
|
||||
struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
|
||||
int block_size,
|
||||
float noise_level) {
|
||||
struct aom_denoise_and_model_t *ctx =
|
||||
(struct aom_denoise_and_model_t *)aom_malloc(
|
||||
sizeof(struct aom_denoise_and_model_t));
|
||||
if (!ctx) {
|
||||
fprintf(stderr, "Unable to allocate denoise_and_model struct\n");
|
||||
return NULL;
|
||||
}
|
||||
memset(ctx, 0, sizeof(*ctx));
|
||||
|
||||
ctx->block_size = block_size;
|
||||
ctx->noise_level = noise_level;
|
||||
ctx->bit_depth = bit_depth;
|
||||
|
||||
ctx->noise_psd[0] =
|
||||
aom_malloc(sizeof(*ctx->noise_psd[0]) * block_size * block_size);
|
||||
ctx->noise_psd[1] =
|
||||
aom_malloc(sizeof(*ctx->noise_psd[1]) * block_size * block_size);
|
||||
ctx->noise_psd[2] =
|
||||
aom_malloc(sizeof(*ctx->noise_psd[2]) * block_size * block_size);
|
||||
if (!ctx->noise_psd[0] || !ctx->noise_psd[1] || !ctx->noise_psd[2]) {
|
||||
fprintf(stderr, "Unable to allocate noise PSD buffers\n");
|
||||
aom_denoise_and_model_free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
void aom_denoise_and_model_free(struct aom_denoise_and_model_t *ctx) {
|
||||
aom_free(ctx->flat_blocks);
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
aom_free(ctx->denoised[i]);
|
||||
aom_free(ctx->noise_psd[i]);
|
||||
}
|
||||
aom_noise_model_free(&ctx->noise_model);
|
||||
aom_flat_block_finder_free(&ctx->flat_block_finder);
|
||||
aom_free(ctx);
|
||||
}
|
||||
|
||||
static int denoise_and_model_realloc_if_necessary(
|
||||
struct aom_denoise_and_model_t *ctx, YV12_BUFFER_CONFIG *sd) {
|
||||
if (ctx->width == sd->y_width && ctx->height == sd->y_height &&
|
||||
ctx->y_stride == sd->y_stride && ctx->uv_stride == sd->uv_stride)
|
||||
return 1;
|
||||
const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
||||
const int block_size = ctx->block_size;
|
||||
|
||||
ctx->width = sd->y_width;
|
||||
ctx->height = sd->y_height;
|
||||
ctx->y_stride = sd->y_stride;
|
||||
ctx->uv_stride = sd->uv_stride;
|
||||
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
aom_free(ctx->denoised[i]);
|
||||
ctx->denoised[i] = NULL;
|
||||
}
|
||||
aom_free(ctx->flat_blocks);
|
||||
ctx->flat_blocks = NULL;
|
||||
|
||||
ctx->denoised[0] = aom_malloc((sd->y_stride * sd->y_height) << use_highbd);
|
||||
ctx->denoised[1] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
|
||||
ctx->denoised[2] = aom_malloc((sd->uv_stride * sd->uv_height) << use_highbd);
|
||||
if (!ctx->denoised[0] || !ctx->denoised[1] || !ctx->denoised[2]) {
|
||||
fprintf(stderr, "Unable to allocate denoise buffers\n");
|
||||
return 0;
|
||||
}
|
||||
ctx->num_blocks_w = (sd->y_width + ctx->block_size - 1) / ctx->block_size;
|
||||
ctx->num_blocks_h = (sd->y_height + ctx->block_size - 1) / ctx->block_size;
|
||||
ctx->flat_blocks = aom_malloc(ctx->num_blocks_w * ctx->num_blocks_h);
|
||||
|
||||
aom_flat_block_finder_free(&ctx->flat_block_finder);
|
||||
if (!aom_flat_block_finder_init(&ctx->flat_block_finder, ctx->block_size,
|
||||
ctx->bit_depth, use_highbd)) {
|
||||
fprintf(stderr, "Unable to init flat block finder\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const aom_noise_model_params_t params = { AOM_NOISE_SHAPE_SQUARE, 3,
|
||||
ctx->bit_depth, use_highbd };
|
||||
aom_noise_model_free(&ctx->noise_model);
|
||||
if (!aom_noise_model_init(&ctx->noise_model, params)) {
|
||||
fprintf(stderr, "Unable to init noise model\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Simply use a flat PSD (although we could use the flat blocks to estimate
|
||||
// PSD) those to estimate an actual noise PSD)
|
||||
const float y_noise_level =
|
||||
aom_noise_psd_get_default_value(ctx->block_size, ctx->noise_level);
|
||||
const float uv_noise_level = aom_noise_psd_get_default_value(
|
||||
ctx->block_size >> sd->subsampling_x, ctx->noise_level);
|
||||
for (int i = 0; i < block_size * block_size; ++i) {
|
||||
ctx->noise_psd[0][i] = y_noise_level;
|
||||
ctx->noise_psd[1][i] = ctx->noise_psd[2][i] = uv_noise_level;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
|
||||
YV12_BUFFER_CONFIG *sd,
|
||||
aom_film_grain_t *film_grain) {
|
||||
const int block_size = ctx->block_size;
|
||||
const int use_highbd = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0;
|
||||
uint8_t *raw_data[3] = {
|
||||
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->y_buffer) : sd->y_buffer,
|
||||
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->u_buffer) : sd->u_buffer,
|
||||
use_highbd ? (uint8_t *)CONVERT_TO_SHORTPTR(sd->v_buffer) : sd->v_buffer,
|
||||
};
|
||||
const uint8_t *const data[3] = { raw_data[0], raw_data[1], raw_data[2] };
|
||||
int strides[3] = { sd->y_stride, sd->uv_stride, sd->uv_stride };
|
||||
int chroma_sub_log2[2] = { sd->subsampling_x, sd->subsampling_y };
|
||||
|
||||
if (!denoise_and_model_realloc_if_necessary(ctx, sd)) {
|
||||
fprintf(stderr, "Unable to realloc buffers\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
aom_flat_block_finder_run(&ctx->flat_block_finder, data[0], sd->y_width,
|
||||
sd->y_height, strides[0], ctx->flat_blocks);
|
||||
|
||||
if (!aom_wiener_denoise_2d(data, ctx->denoised, sd->y_width, sd->y_height,
|
||||
strides, chroma_sub_log2, ctx->noise_psd,
|
||||
block_size, ctx->bit_depth, use_highbd)) {
|
||||
fprintf(stderr, "Unable to denoise image\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
const aom_noise_status_t status = aom_noise_model_update(
|
||||
&ctx->noise_model, data, (const uint8_t *const *)ctx->denoised,
|
||||
sd->y_width, sd->y_height, strides, chroma_sub_log2, ctx->flat_blocks,
|
||||
block_size);
|
||||
int have_noise_estimate = 0;
|
||||
if (status == AOM_NOISE_STATUS_OK) {
|
||||
have_noise_estimate = 1;
|
||||
} else if (status == AOM_NOISE_STATUS_DIFFERENT_NOISE_TYPE) {
|
||||
aom_noise_model_save_latest(&ctx->noise_model);
|
||||
have_noise_estimate = 1;
|
||||
} else {
|
||||
// Unable to update noise model; proceed if we have a previous estimate.
|
||||
have_noise_estimate =
|
||||
(ctx->noise_model.combined_state[0].strength_solver.num_equations > 0);
|
||||
}
|
||||
|
||||
film_grain->apply_grain = 0;
|
||||
if (have_noise_estimate) {
|
||||
if (!aom_noise_model_get_grain_parameters(&ctx->noise_model, film_grain)) {
|
||||
fprintf(stderr, "Unable to get grain parameters.\n");
|
||||
return 0;
|
||||
}
|
||||
if (!film_grain->random_seed) {
|
||||
film_grain->random_seed = 1071;
|
||||
}
|
||||
memcpy(raw_data[0], ctx->denoised[0],
|
||||
(strides[0] * sd->y_height) << use_highbd);
|
||||
memcpy(raw_data[1], ctx->denoised[1],
|
||||
(strides[1] * sd->uv_height) << use_highbd);
|
||||
memcpy(raw_data[2], ctx->denoised[2],
|
||||
(strides[2] * sd->uv_height) << use_highbd);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ extern "C" {
|
|||
|
||||
#include <stdint.h>
|
||||
#include "aom_dsp/grain_synthesis.h"
|
||||
#include "aom_scale/yv12config.h"
|
||||
|
||||
/*!\brief Wrapper of data required to represent linear system of eqns and soln.
|
||||
*/
|
||||
|
@ -280,6 +281,42 @@ int aom_wiener_denoise_2d(const uint8_t *const data[3], uint8_t *denoised[3],
|
|||
int w, int h, int stride[3], int chroma_sub_log2[2],
|
||||
float *noise_psd[3], int block_size, int bit_depth,
|
||||
int use_highbd);
|
||||
|
||||
struct aom_denoise_and_model_t;
|
||||
|
||||
/*!\brief Denoise the buffer and model the residual noise.
|
||||
*
|
||||
* This is meant to be called sequentially on input frames. The input buffer
|
||||
* is denoised and the residual noise is modelled. The current noise estimate
|
||||
* is populated in film_grain. Returns true on success. The grain.apply_grain
|
||||
* parameter will be true when the input buffer was successfully denoised and
|
||||
* grain was modelled. Returns false on error.
|
||||
*
|
||||
* \param[in] ctx Struct allocated with aom_denoise_and_model_alloc
|
||||
* that holds some buffers for denoising and the current
|
||||
* noise estimate.
|
||||
* \param[in/out] buf The raw input buffer to be denoised.
|
||||
* \param[out] grain Output film grain parameters
|
||||
*/
|
||||
int aom_denoise_and_model_run(struct aom_denoise_and_model_t *ctx,
|
||||
YV12_BUFFER_CONFIG *buf, aom_film_grain_t *grain);
|
||||
|
||||
/*!\brief Allocates a context that can be used for denoising and noise modeling.
|
||||
*
|
||||
* \param[in] bit_depth Bit depth of buffers this will be run on.
|
||||
* \param[in] block_size Block size for noise modeling and flat block
|
||||
* estimation
|
||||
* \param[in] noise_level The noise_level (2.5 for moderate noise, and 5 for
|
||||
* higher levels of noise)
|
||||
*/
|
||||
struct aom_denoise_and_model_t *aom_denoise_and_model_alloc(int bit_depth,
|
||||
int block_size,
|
||||
float noise_level);
|
||||
|
||||
/*!\brief Frees the denoise context allocated with aom_denoise_and_model_alloc
|
||||
*/
|
||||
void aom_denoise_and_model_free(struct aom_denoise_and_model_t *denoise_model);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif // __cplusplus
|
||||
|
|
|
@ -289,6 +289,15 @@ SIMD_INLINE v256 v256_shr_u32(v256 a, unsigned int c) {
|
|||
SIMD_INLINE v256 v256_shr_s32(v256 a, unsigned int c) {
|
||||
return c_v256_shr_s32(a, c);
|
||||
}
|
||||
SIMD_INLINE v256 v256_shl_64(v256 a, unsigned int c) {
|
||||
return c_v256_shl_64(a, c);
|
||||
}
|
||||
SIMD_INLINE v256 v256_shr_u64(v256 a, unsigned int c) {
|
||||
return c_v256_shr_u64(a, c);
|
||||
}
|
||||
SIMD_INLINE v256 v256_shr_s64(v256 a, unsigned int c) {
|
||||
return c_v256_shr_s64(a, c);
|
||||
}
|
||||
|
||||
SIMD_INLINE v256 v256_shr_n_byte(v256 a, unsigned int n) {
|
||||
return c_v256_shr_n_byte(a, n);
|
||||
|
|
|
@ -386,7 +386,7 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
|
|||
}
|
||||
}
|
||||
|
||||
const InterpFilterParams filter =
|
||||
const InterpFilterParams *filter =
|
||||
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
|
||||
|
||||
if (!subpel_x_q3 && !subpel_y_q3) {
|
||||
|
@ -413,12 +413,12 @@ void aom_upsampled_pred_c(MACROBLOCKD *xd, const AV1_COMMON *const cm,
|
|||
const int16_t *const kernel_y =
|
||||
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
|
||||
const int intermediate_height =
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
|
||||
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
|
||||
aom_convolve8_horiz(ref - ref_stride * ((filter.taps >> 1) - 1), ref_stride,
|
||||
temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
|
||||
intermediate_height);
|
||||
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
|
||||
aom_convolve8_horiz(ref - ref_stride * ((filter->taps >> 1) - 1),
|
||||
ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
|
||||
width, intermediate_height);
|
||||
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
|
||||
MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
|
||||
width, height);
|
||||
}
|
||||
|
@ -974,7 +974,7 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
|
|||
}
|
||||
}
|
||||
|
||||
const InterpFilterParams filter =
|
||||
const InterpFilterParams *filter =
|
||||
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
|
||||
|
||||
if (!subpel_x_q3 && !subpel_y_q3) {
|
||||
|
@ -1004,14 +1004,14 @@ void aom_highbd_upsampled_pred_c(MACROBLOCKD *xd,
|
|||
const int16_t *const kernel_y =
|
||||
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
|
||||
const int intermediate_height =
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
|
||||
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
|
||||
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1),
|
||||
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
|
||||
ref_stride, CONVERT_TO_BYTEPTR(temp),
|
||||
MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
|
||||
intermediate_height, bd);
|
||||
aom_highbd_convolve8_vert(
|
||||
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
|
||||
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
|
||||
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
|
||||
16, width, height, bd);
|
||||
}
|
||||
|
@ -1185,29 +1185,18 @@ void aom_highbd_comp_mask_pred_c(uint16_t *comp_pred, const uint8_t *pred8,
|
|||
}
|
||||
}
|
||||
|
||||
void aom_highbd_comp_mask_upsampled_pred_c(
|
||||
void aom_highbd_comp_mask_upsampled_pred(
|
||||
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
|
||||
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
|
||||
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
|
||||
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
|
||||
int bd) {
|
||||
int i, j;
|
||||
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, comp_pred, width,
|
||||
height, subpel_x_q3, subpel_y_q3, ref8, ref_stride,
|
||||
bd);
|
||||
for (i = 0; i < height; ++i) {
|
||||
for (j = 0; j < width; ++j) {
|
||||
if (!invert_mask)
|
||||
comp_pred[j] = AOM_BLEND_A64(mask[j], comp_pred[j], pred[j]);
|
||||
else
|
||||
comp_pred[j] = AOM_BLEND_A64(mask[j], pred[j], comp_pred[j]);
|
||||
}
|
||||
comp_pred += width;
|
||||
pred += width;
|
||||
mask += mask_stride;
|
||||
}
|
||||
aom_highbd_comp_mask_pred(comp_pred, pred8, width, height,
|
||||
CONVERT_TO_BYTEPTR(comp_pred), width, mask,
|
||||
mask_stride, invert_mask);
|
||||
}
|
||||
|
||||
#define HIGHBD_MASK_SUBPIX_VAR(W, H) \
|
||||
|
|
|
@ -76,6 +76,13 @@ void aom_comp_mask_upsampled_pred(
|
|||
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref,
|
||||
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask);
|
||||
|
||||
void aom_highbd_comp_mask_upsampled_pred(
|
||||
MACROBLOCKD *xd, const struct AV1Common *const cm, int mi_row, int mi_col,
|
||||
const MV *const mv, uint16_t *comp_pred, const uint8_t *pred8, int width,
|
||||
int height, int subpel_x_q3, int subpel_y_q3, const uint8_t *ref8,
|
||||
int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask,
|
||||
int bd);
|
||||
|
||||
typedef unsigned int (*aom_obmc_sad_fn_t)(const uint8_t *pred, int pred_stride,
|
||||
const int32_t *wsrc,
|
||||
const int32_t *msk);
|
||||
|
|
|
@ -41,25 +41,163 @@
|
|||
#define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x)
|
||||
#endif // __clang__
|
||||
|
||||
static void aom_filter_block1d16_h8_avx2(
|
||||
static INLINE void xx_storeu2_epi32(const uint8_t *output_ptr,
|
||||
const ptrdiff_t stride, const __m256i *a) {
|
||||
*((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*a));
|
||||
*((uint32_t *)(output_ptr + stride)) =
|
||||
_mm_cvtsi128_si32(_mm256_extracti128_si256(*a, 1));
|
||||
}
|
||||
|
||||
static INLINE __m256i xx_loadu2_epi64(const void *hi, const void *lo) {
|
||||
__m256i a = _mm256_castsi128_si256(_mm_loadl_epi64((const __m128i *)(lo)));
|
||||
a = _mm256_inserti128_si256(a, _mm_loadl_epi64((const __m128i *)(hi)), 1);
|
||||
return a;
|
||||
}
|
||||
|
||||
static INLINE void xx_storeu2_epi64(const uint8_t *output_ptr,
|
||||
const ptrdiff_t stride, const __m256i *a) {
|
||||
_mm_storel_epi64((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
|
||||
_mm_storel_epi64((__m128i *)(output_ptr + stride),
|
||||
_mm256_extractf128_si256(*a, 1));
|
||||
}
|
||||
|
||||
static INLINE __m256i xx_loadu2_mi128(const void *hi, const void *lo) {
|
||||
__m256i a = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(lo)));
|
||||
a = _mm256_inserti128_si256(a, _mm_loadu_si128((const __m128i *)(hi)), 1);
|
||||
return a;
|
||||
}
|
||||
|
||||
static INLINE void xx_store2_mi128(const uint8_t *output_ptr,
|
||||
const ptrdiff_t stride, const __m256i *a) {
|
||||
_mm_store_si128((__m128i *)output_ptr, _mm256_castsi256_si128(*a));
|
||||
_mm_store_si128((__m128i *)(output_ptr + stride),
|
||||
_mm256_extractf128_si256(*a, 1));
|
||||
}
|
||||
|
||||
static void aom_filter_block1d4_h8_avx2(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
|
||||
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
__m128i filtersReg;
|
||||
__m256i addFilterReg64, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
|
||||
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
|
||||
__m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
|
||||
__m256i srcReg32b1, srcReg32b2, filtersReg32;
|
||||
__m256i addFilterReg32, filt1Reg, filt2Reg;
|
||||
__m256i firstFilters, secondFilters;
|
||||
__m256i srcRegFilt32b1_1, srcRegFilt32b2;
|
||||
__m256i srcReg32b1;
|
||||
unsigned int i;
|
||||
ptrdiff_t src_stride, dst_stride;
|
||||
|
||||
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
|
||||
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
|
||||
src_ptr -= 3;
|
||||
addFilterReg32 = _mm256_set1_epi16(32);
|
||||
filtersReg = _mm_loadu_si128((const __m128i *)filter);
|
||||
filtersReg = _mm_srai_epi16(filtersReg, 1);
|
||||
// converting the 16 bit (short) to 8 bit (byte) and have the same data
|
||||
// in both lanes of 128 bit register.
|
||||
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
|
||||
// have the same data in both lanes of a 256 bit register
|
||||
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
|
||||
// duplicate only the first 32 bits
|
||||
firstFilters = _mm256_shuffle_epi32(filtersReg32, 0);
|
||||
// duplicate only the second 32 bits
|
||||
secondFilters = _mm256_shuffle_epi32(filtersReg32, 0x55);
|
||||
|
||||
filt1Reg = _mm256_load_si256((__m256i const *)filt_d4_global_avx2);
|
||||
filt2Reg = _mm256_load_si256((__m256i const *)(filt_d4_global_avx2 + 32));
|
||||
|
||||
// multiple the size of the source and destination stride by two
|
||||
src_stride = src_pixels_per_line << 1;
|
||||
dst_stride = output_pitch << 1;
|
||||
for (i = output_height; i > 1; i -= 2) {
|
||||
// load the 2 strides of source
|
||||
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
|
||||
|
||||
// multiply 4 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
|
||||
|
||||
// multiply 4 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, secondFilters);
|
||||
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
|
||||
|
||||
srcRegFilt32b1_1 =
|
||||
_mm256_hadds_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
|
||||
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
|
||||
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve result
|
||||
srcRegFilt32b1_1 =
|
||||
_mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
|
||||
|
||||
src_ptr += src_stride;
|
||||
|
||||
xx_storeu2_epi32(output_ptr, output_pitch, &srcRegFilt32b1_1);
|
||||
output_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// if the number of strides is odd.
|
||||
// process only 4 bytes
|
||||
if (i > 0) {
|
||||
__m128i srcReg1, srcRegFilt1_1;
|
||||
__m128i srcRegFilt2;
|
||||
|
||||
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
|
||||
|
||||
// multiply 4 adjacent elements with the filter and add the result
|
||||
srcRegFilt1_1 =
|
||||
_mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
|
||||
|
||||
// multiply 4 adjacent elements with the filter and add the result
|
||||
srcRegFilt2 =
|
||||
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(secondFilters));
|
||||
|
||||
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
|
||||
srcRegFilt1_1 = _mm_hadds_epi16(srcRegFilt1_1, _mm_setzero_si128());
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt1_1 =
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve result
|
||||
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
|
||||
|
||||
// save 4 bytes
|
||||
*((uint32_t *)(output_ptr)) = _mm_cvtsi128_si32(srcRegFilt1_1);
|
||||
}
|
||||
}
|
||||
|
||||
static void aom_filter_block1d8_h8_avx2(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
|
||||
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
__m128i filtersReg;
|
||||
__m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
|
||||
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
|
||||
__m256i srcRegFilt32b1_1, srcRegFilt32b2, srcRegFilt32b3;
|
||||
__m256i srcReg32b1;
|
||||
unsigned int i;
|
||||
ptrdiff_t src_stride, dst_stride;
|
||||
src_ptr -= 3;
|
||||
addFilterReg32 = _mm256_set1_epi16(32);
|
||||
filtersReg = _mm_loadu_si128((const __m128i *)filter);
|
||||
filtersReg = _mm_srai_epi16(filtersReg, 1);
|
||||
// converting the 16 bit (short) to 8 bit (byte) and have the same data
|
||||
// in both lanes of 128 bit register.
|
||||
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
|
||||
// have the same data in both lanes of a 256 bit register
|
||||
const __m256i filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
|
||||
// duplicate only the first 16 bits (first and second byte)
|
||||
// across 256 bit register
|
||||
|
@ -74,22 +212,17 @@ static void aom_filter_block1d16_h8_avx2(
|
|||
// across 256 bit register
|
||||
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
|
||||
|
||||
filt1Reg = _mm256_load_si256((__m256i const *)filt1_global_avx2);
|
||||
filt2Reg = _mm256_load_si256((__m256i const *)filt2_global_avx2);
|
||||
filt3Reg = _mm256_load_si256((__m256i const *)filt3_global_avx2);
|
||||
filt4Reg = _mm256_load_si256((__m256i const *)filt4_global_avx2);
|
||||
filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
|
||||
filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
|
||||
filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
|
||||
filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
|
||||
|
||||
// multiple the size of the source and destination stride by two
|
||||
src_stride = src_pixels_per_line << 1;
|
||||
dst_stride = output_pitch << 1;
|
||||
for (i = output_height; i > 1; i -= 2) {
|
||||
// load the 2 strides of source
|
||||
srcReg32b1 =
|
||||
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr - 3)));
|
||||
srcReg32b1 = _mm256_inserti128_si256(
|
||||
srcReg32b1,
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line - 3)),
|
||||
1);
|
||||
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
|
||||
|
@ -110,80 +243,31 @@ static void aom_filter_block1d16_h8_avx2(
|
|||
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(
|
||||
srcRegFilt32b1_1, _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
|
||||
__m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
|
||||
|
||||
// reading 2 strides of the next 16 bytes
|
||||
// (part of it was being read by earlier read)
|
||||
srcReg32b2 =
|
||||
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr + 5)));
|
||||
srcReg32b2 = _mm256_inserti128_si256(
|
||||
srcReg32b2,
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line + 5)),
|
||||
1);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(
|
||||
srcRegFilt32b1_1, _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(
|
||||
srcRegFilt32b2_1, _mm256_min_epi16(srcRegFilt32b3, srcRegFilt32b2));
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(
|
||||
srcRegFilt32b2_1, _mm256_max_epi16(srcRegFilt32b3, srcRegFilt32b2));
|
||||
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg64);
|
||||
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg64);
|
||||
|
||||
// shift by 7 bit each 16 bit
|
||||
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 7);
|
||||
srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 7);
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
|
||||
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
// result
|
||||
srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
|
||||
// convolve result and the second lane contain the second convolve result
|
||||
srcRegFilt32b1_1 =
|
||||
_mm256_packus_epi16(srcRegFilt32b1_1, _mm256_setzero_si256());
|
||||
|
||||
src_ptr += src_stride;
|
||||
|
||||
// save 16 bytes
|
||||
_mm_store_si128((__m128i *)output_ptr,
|
||||
_mm256_castsi256_si128(srcRegFilt32b1_1));
|
||||
|
||||
// save the next 16 bits
|
||||
_mm_store_si128((__m128i *)(output_ptr + output_pitch),
|
||||
_mm256_extractf128_si256(srcRegFilt32b1_1, 1));
|
||||
xx_storeu2_epi64(output_ptr, output_pitch, &srcRegFilt32b1_1);
|
||||
output_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// if the number of strides is odd.
|
||||
// process only 16 bytes
|
||||
// process only 8 bytes
|
||||
if (i > 0) {
|
||||
__m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
|
||||
__m128i srcReg1, srcRegFilt1_1;
|
||||
__m128i srcRegFilt2, srcRegFilt3;
|
||||
|
||||
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
|
||||
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
|
||||
|
@ -210,15 +294,172 @@ static void aom_filter_block1d16_h8_avx2(
|
|||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1_1 =
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm_min_epi16(srcRegFilt3, srcRegFilt2));
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
|
||||
|
||||
// reading the next 16 bytes
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt1_1 =
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
// result
|
||||
srcRegFilt1_1 = _mm_packus_epi16(srcRegFilt1_1, _mm_setzero_si128());
|
||||
|
||||
// save 8 bytes
|
||||
_mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1_1);
|
||||
}
|
||||
}
|
||||
|
||||
static void aom_filter_block1d16_h8_avx2(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr,
|
||||
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
__m128i filtersReg;
|
||||
__m256i addFilterReg32, filt1Reg, filt2Reg, filt3Reg, filt4Reg;
|
||||
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
|
||||
__m256i srcRegFilt32b1_1, srcRegFilt32b2_1, srcRegFilt32b2, srcRegFilt32b3;
|
||||
__m256i srcReg32b1, srcReg32b2, filtersReg32;
|
||||
unsigned int i;
|
||||
ptrdiff_t src_stride, dst_stride;
|
||||
src_ptr -= 3;
|
||||
addFilterReg32 = _mm256_set1_epi16(32);
|
||||
filtersReg = _mm_loadu_si128((const __m128i *)filter);
|
||||
filtersReg = _mm_srai_epi16(filtersReg, 1);
|
||||
// converting the 16 bit (short) to 8 bit (byte) and have the same data
|
||||
// in both lanes of 128 bit register.
|
||||
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
|
||||
// have the same data in both lanes of a 256 bit register
|
||||
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
|
||||
// duplicate only the first 16 bits (first and second byte)
|
||||
// across 256 bit register
|
||||
firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
|
||||
// duplicate only the second 16 bits (third and forth byte)
|
||||
// across 256 bit register
|
||||
secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
|
||||
// duplicate only the third 16 bits (fifth and sixth byte)
|
||||
// across 256 bit register
|
||||
thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
|
||||
// duplicate only the forth 16 bits (seventh and eighth byte)
|
||||
// across 256 bit register
|
||||
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
|
||||
|
||||
filt1Reg = _mm256_load_si256((__m256i const *)filt_global_avx2);
|
||||
filt2Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32));
|
||||
filt3Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 2));
|
||||
filt4Reg = _mm256_load_si256((__m256i const *)(filt_global_avx2 + 32 * 3));
|
||||
|
||||
// multiple the size of the source and destination stride by two
|
||||
src_stride = src_pixels_per_line << 1;
|
||||
dst_stride = output_pitch << 1;
|
||||
for (i = output_height; i > 1; i -= 2) {
|
||||
// load the 2 strides of source
|
||||
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pixels_per_line, src_ptr);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b1_1 = _mm256_shuffle_epi8(srcReg32b1, filt1Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt4Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b1_1 = _mm256_maddubs_epi16(srcRegFilt32b1_1, firstFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, srcRegFilt32b2);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b1, filt2Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b1, filt3Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
|
||||
|
||||
__m256i sum23 = _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2);
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, sum23);
|
||||
|
||||
// reading 2 strides of the next 16 bytes
|
||||
// (part of it was being read by earlier read)
|
||||
srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 5));
|
||||
srcReg32b2 =
|
||||
xx_loadu2_mi128(src_ptr + src_pixels_per_line + 8, src_ptr + 8);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b2_1 = _mm256_shuffle_epi8(srcReg32b2, filt1Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt4Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b2_1 = _mm256_maddubs_epi16(srcRegFilt32b2_1, firstFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, forthFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, srcRegFilt32b2);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt32b3 = _mm256_shuffle_epi8(srcReg32b2, filt2Reg);
|
||||
srcRegFilt32b2 = _mm256_shuffle_epi8(srcReg32b2, filt3Reg);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt32b3 = _mm256_maddubs_epi16(srcRegFilt32b3, secondFilters);
|
||||
srcRegFilt32b2 = _mm256_maddubs_epi16(srcRegFilt32b2, thirdFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(
|
||||
srcRegFilt32b2_1, _mm256_adds_epi16(srcRegFilt32b3, srcRegFilt32b2));
|
||||
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt32b1_1 = _mm256_adds_epi16(srcRegFilt32b1_1, addFilterReg32);
|
||||
srcRegFilt32b2_1 = _mm256_adds_epi16(srcRegFilt32b2_1, addFilterReg32);
|
||||
srcRegFilt32b1_1 = _mm256_srai_epi16(srcRegFilt32b1_1, 6);
|
||||
srcRegFilt32b2_1 = _mm256_srai_epi16(srcRegFilt32b2_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve result
|
||||
srcRegFilt32b1_1 = _mm256_packus_epi16(srcRegFilt32b1_1, srcRegFilt32b2_1);
|
||||
|
||||
src_ptr += src_stride;
|
||||
|
||||
xx_store2_mi128(output_ptr, output_pitch, &srcRegFilt32b1_1);
|
||||
output_ptr += dst_stride;
|
||||
}
|
||||
|
||||
// if the number of strides is odd.
|
||||
// process only 16 bytes
|
||||
if (i > 0) {
|
||||
__m128i srcReg1, srcReg2, srcRegFilt1_1, srcRegFilt2_1;
|
||||
__m128i srcRegFilt2, srcRegFilt3;
|
||||
|
||||
srcReg1 = _mm_loadu_si128((const __m128i *)(src_ptr));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt1_1 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt1Reg));
|
||||
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt4Reg));
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt1_1 =
|
||||
_mm_maddubs_epi16(srcRegFilt1_1, _mm256_castsi256_si128(firstFilters));
|
||||
srcRegFilt2 =
|
||||
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(forthFilters));
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1_1 = _mm_adds_epi16(srcRegFilt1_1, srcRegFilt2);
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt3 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt2Reg));
|
||||
srcRegFilt2 = _mm_shuffle_epi8(srcReg1, _mm256_castsi256_si128(filt3Reg));
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt3 =
|
||||
_mm_maddubs_epi16(srcRegFilt3, _mm256_castsi256_si128(secondFilters));
|
||||
srcRegFilt2 =
|
||||
_mm_maddubs_epi16(srcRegFilt2, _mm256_castsi256_si128(thirdFilters));
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1_1 =
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm_max_epi16(srcRegFilt3, srcRegFilt2));
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
|
||||
|
||||
// reading the next 16 bytes
|
||||
// (part of it was being read by earlier read)
|
||||
srcReg2 = _mm_loadu_si128((const __m128i *)(src_ptr + 8));
|
||||
|
||||
// filter the source buffer
|
||||
srcRegFilt2_1 = _mm_shuffle_epi8(srcReg2, _mm256_castsi256_si128(filt1Reg));
|
||||
|
@ -245,19 +486,16 @@ static void aom_filter_block1d16_h8_avx2(
|
|||
|
||||
// add and saturate the results together
|
||||
srcRegFilt2_1 =
|
||||
_mm_adds_epi16(srcRegFilt2_1, _mm_min_epi16(srcRegFilt3, srcRegFilt2));
|
||||
srcRegFilt2_1 =
|
||||
_mm_adds_epi16(srcRegFilt2_1, _mm_max_epi16(srcRegFilt3, srcRegFilt2));
|
||||
_mm_adds_epi16(srcRegFilt2_1, _mm_adds_epi16(srcRegFilt3, srcRegFilt2));
|
||||
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt1_1 =
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg64));
|
||||
_mm_adds_epi16(srcRegFilt1_1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 6);
|
||||
|
||||
srcRegFilt2_1 =
|
||||
_mm_adds_epi16(srcRegFilt2_1, _mm256_castsi256_si128(addFilterReg64));
|
||||
|
||||
// shift by 7 bit each 16 bit
|
||||
srcRegFilt1_1 = _mm_srai_epi16(srcRegFilt1_1, 7);
|
||||
srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 7);
|
||||
_mm_adds_epi16(srcRegFilt2_1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt2_1 = _mm_srai_epi16(srcRegFilt2_1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
|
@ -269,11 +507,11 @@ static void aom_filter_block1d16_h8_avx2(
|
|||
}
|
||||
}
|
||||
|
||||
static void aom_filter_block1d16_v8_avx2(
|
||||
static void aom_filter_block1d8_v8_avx2(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
|
||||
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
__m128i filtersReg;
|
||||
__m256i addFilterReg64;
|
||||
__m256i addFilterReg32;
|
||||
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
|
||||
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
|
||||
__m256i srcReg32b11, srcReg32b12, filtersReg32;
|
||||
|
@ -281,11 +519,11 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
unsigned int i;
|
||||
ptrdiff_t src_stride, dst_stride;
|
||||
|
||||
// create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
|
||||
addFilterReg64 = _mm256_set1_epi32((int)0x0400040u);
|
||||
addFilterReg32 = _mm256_set1_epi16(32);
|
||||
filtersReg = _mm_loadu_si128((const __m128i *)filter);
|
||||
// converting the 16 bit (short) to 8 bit (byte) and have the
|
||||
// same data in both lanes of 128 bit register.
|
||||
filtersReg = _mm_srai_epi16(filtersReg, 1);
|
||||
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
|
||||
// have the same data in both lanes of a 256 bit register
|
||||
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
|
@ -308,49 +546,178 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
dst_stride = out_pitch << 1;
|
||||
|
||||
// load 16 bytes 7 times in stride of src_pitch
|
||||
srcReg32b1 =
|
||||
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr)));
|
||||
srcReg32b2 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch)));
|
||||
srcReg32b3 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 2)));
|
||||
srcReg32b4 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 3)));
|
||||
srcReg32b5 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 4)));
|
||||
srcReg32b6 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 5)));
|
||||
srcReg32b1 = xx_loadu2_epi64(src_ptr + src_pitch, src_ptr);
|
||||
srcReg32b3 =
|
||||
xx_loadu2_epi64(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
|
||||
srcReg32b5 =
|
||||
xx_loadu2_epi64(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
|
||||
srcReg32b7 = _mm256_castsi128_si256(
|
||||
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6)));
|
||||
|
||||
// have each consecutive loads on the same 256 register
|
||||
srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
|
||||
srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
|
||||
srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
|
||||
// merge every two consecutive registers except the last one
|
||||
srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
|
||||
srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
|
||||
srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
|
||||
|
||||
for (i = output_height; i > 1; i -= 2) {
|
||||
// load the last 2 loads of 16 bytes and have every two
|
||||
// consecutive loads in the same 256 bit register
|
||||
srcReg32b8 = _mm256_castsi128_si256(
|
||||
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7)));
|
||||
srcReg32b7 = _mm256_inserti128_si256(srcReg32b7,
|
||||
_mm256_castsi256_si128(srcReg32b8), 1);
|
||||
srcReg32b9 = _mm256_castsi128_si256(
|
||||
_mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 8)));
|
||||
srcReg32b8 = _mm256_inserti128_si256(srcReg32b8,
|
||||
_mm256_castsi256_si128(srcReg32b9), 1);
|
||||
|
||||
// merge every two consecutive registers
|
||||
// save
|
||||
srcReg32b4 = _mm256_unpacklo_epi8(srcReg32b7, srcReg32b8);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
|
||||
srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
|
||||
srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
|
||||
|
||||
// add and saturate the results together
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
|
||||
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
|
||||
|
||||
// shift by 6 bit each 16 bit
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
|
||||
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
// result
|
||||
srcReg32b1 = _mm256_packus_epi16(srcReg32b10, _mm256_setzero_si256());
|
||||
|
||||
src_ptr += src_stride;
|
||||
|
||||
xx_storeu2_epi64(output_ptr, out_pitch, &srcReg32b1);
|
||||
|
||||
output_ptr += dst_stride;
|
||||
|
||||
// save part of the registers for next strides
|
||||
srcReg32b10 = srcReg32b11;
|
||||
srcReg32b11 = srcReg32b2;
|
||||
srcReg32b2 = srcReg32b4;
|
||||
srcReg32b7 = srcReg32b9;
|
||||
}
|
||||
if (i > 0) {
|
||||
__m128i srcRegFilt1, srcRegFilt4, srcRegFilt6, srcRegFilt8;
|
||||
// load the last 16 bytes
|
||||
srcRegFilt8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
|
||||
|
||||
// merge the last 2 results together
|
||||
srcRegFilt4 =
|
||||
_mm_unpacklo_epi8(_mm256_castsi256_si128(srcReg32b7), srcRegFilt8);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt1 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b10),
|
||||
_mm256_castsi256_si128(firstFilters));
|
||||
srcRegFilt4 =
|
||||
_mm_maddubs_epi16(srcRegFilt4, _mm256_castsi256_si128(forthFilters));
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt4 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b11),
|
||||
_mm256_castsi256_si128(secondFilters));
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcRegFilt6 = _mm_maddubs_epi16(_mm256_castsi256_si128(srcReg32b2),
|
||||
_mm256_castsi256_si128(thirdFilters));
|
||||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1 =
|
||||
_mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
|
||||
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt1 =
|
||||
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve result
|
||||
srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, _mm_setzero_si128());
|
||||
|
||||
// save 8 bytes
|
||||
_mm_storel_epi64((__m128i *)output_ptr, srcRegFilt1);
|
||||
}
|
||||
}
|
||||
|
||||
static void aom_filter_block1d16_v8_avx2(
|
||||
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
|
||||
ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) {
|
||||
__m128i filtersReg;
|
||||
__m256i addFilterReg32;
|
||||
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
|
||||
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
|
||||
__m256i srcReg32b11, srcReg32b12, filtersReg32;
|
||||
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
|
||||
unsigned int i;
|
||||
ptrdiff_t src_stride, dst_stride;
|
||||
|
||||
addFilterReg32 = _mm256_set1_epi16(32);
|
||||
filtersReg = _mm_loadu_si128((const __m128i *)filter);
|
||||
// converting the 16 bit (short) to 8 bit (byte) and have the
|
||||
// same data in both lanes of 128 bit register.
|
||||
filtersReg = _mm_srai_epi16(filtersReg, 1);
|
||||
filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
|
||||
// have the same data in both lanes of a 256 bit register
|
||||
filtersReg32 = MM256_BROADCASTSI128_SI256(filtersReg);
|
||||
|
||||
// duplicate only the first 16 bits (first and second byte)
|
||||
// across 256 bit register
|
||||
firstFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x100u));
|
||||
// duplicate only the second 16 bits (third and forth byte)
|
||||
// across 256 bit register
|
||||
secondFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x302u));
|
||||
// duplicate only the third 16 bits (fifth and sixth byte)
|
||||
// across 256 bit register
|
||||
thirdFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x504u));
|
||||
// duplicate only the forth 16 bits (seventh and eighth byte)
|
||||
// across 256 bit register
|
||||
forthFilters = _mm256_shuffle_epi8(filtersReg32, _mm256_set1_epi16(0x706u));
|
||||
|
||||
// multiple the size of the source and destination stride by two
|
||||
src_stride = src_pitch << 1;
|
||||
dst_stride = out_pitch << 1;
|
||||
|
||||
// load 16 bytes 7 times in stride of src_pitch
|
||||
srcReg32b1 = xx_loadu2_mi128(src_ptr + src_pitch, src_ptr);
|
||||
srcReg32b3 =
|
||||
xx_loadu2_mi128(src_ptr + src_pitch * 3, src_ptr + src_pitch * 2);
|
||||
srcReg32b5 =
|
||||
xx_loadu2_mi128(src_ptr + src_pitch * 5, src_ptr + src_pitch * 4);
|
||||
srcReg32b7 = _mm256_castsi128_si256(
|
||||
_mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 6)));
|
||||
|
||||
// have each consecutive loads on the same 256 register
|
||||
srcReg32b1 = _mm256_inserti128_si256(srcReg32b1,
|
||||
_mm256_castsi256_si128(srcReg32b2), 1);
|
||||
srcReg32b2 = _mm256_inserti128_si256(srcReg32b2,
|
||||
_mm256_castsi256_si128(srcReg32b3), 1);
|
||||
srcReg32b3 = _mm256_inserti128_si256(srcReg32b3,
|
||||
_mm256_castsi256_si128(srcReg32b4), 1);
|
||||
srcReg32b4 = _mm256_inserti128_si256(srcReg32b4,
|
||||
_mm256_castsi256_si128(srcReg32b5), 1);
|
||||
srcReg32b5 = _mm256_inserti128_si256(srcReg32b5,
|
||||
_mm256_castsi256_si128(srcReg32b6), 1);
|
||||
srcReg32b6 = _mm256_inserti128_si256(srcReg32b6,
|
||||
_mm256_castsi256_si128(srcReg32b7), 1);
|
||||
|
||||
srcReg32b2 = _mm256_permute2x128_si256(srcReg32b1, srcReg32b3, 0x21);
|
||||
srcReg32b4 = _mm256_permute2x128_si256(srcReg32b3, srcReg32b5, 0x21);
|
||||
srcReg32b6 = _mm256_permute2x128_si256(srcReg32b5, srcReg32b7, 0x21);
|
||||
// merge every two consecutive registers except the last one
|
||||
srcReg32b10 = _mm256_unpacklo_epi8(srcReg32b1, srcReg32b2);
|
||||
srcReg32b1 = _mm256_unpackhi_epi8(srcReg32b1, srcReg32b2);
|
||||
|
||||
// save
|
||||
srcReg32b11 = _mm256_unpacklo_epi8(srcReg32b3, srcReg32b4);
|
||||
|
||||
// save
|
||||
srcReg32b3 = _mm256_unpackhi_epi8(srcReg32b3, srcReg32b4);
|
||||
|
||||
// save
|
||||
srcReg32b2 = _mm256_unpacklo_epi8(srcReg32b5, srcReg32b6);
|
||||
|
||||
// save
|
||||
srcReg32b5 = _mm256_unpackhi_epi8(srcReg32b5, srcReg32b6);
|
||||
|
||||
for (i = output_height; i > 1; i -= 2) {
|
||||
|
@ -383,9 +750,7 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
|
||||
// add and saturate the results together
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
|
||||
_mm256_min_epi16(srcReg32b8, srcReg32b12));
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
|
||||
_mm256_max_epi16(srcReg32b8, srcReg32b12));
|
||||
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
|
||||
|
||||
// multiply 2 adjacent elements with the filter and add the result
|
||||
srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
|
||||
|
@ -399,16 +764,13 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
|
||||
// add and saturate the results together
|
||||
srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
|
||||
_mm256_min_epi16(srcReg32b8, srcReg32b12));
|
||||
srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
|
||||
_mm256_max_epi16(srcReg32b8, srcReg32b12));
|
||||
_mm256_adds_epi16(srcReg32b8, srcReg32b12));
|
||||
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
|
||||
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
|
||||
|
||||
// shift by 7 bit each 16 bit
|
||||
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 7);
|
||||
srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 7);
|
||||
// shift by 6 bit each 16 bit
|
||||
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg32);
|
||||
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg32);
|
||||
srcReg32b10 = _mm256_srai_epi16(srcReg32b10, 6);
|
||||
srcReg32b1 = _mm256_srai_epi16(srcReg32b1, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
|
@ -417,12 +779,7 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
|
||||
src_ptr += src_stride;
|
||||
|
||||
// save 16 bytes
|
||||
_mm_store_si128((__m128i *)output_ptr, _mm256_castsi256_si128(srcReg32b1));
|
||||
|
||||
// save the next 16 bits
|
||||
_mm_store_si128((__m128i *)(output_ptr + out_pitch),
|
||||
_mm256_extractf128_si256(srcReg32b1, 1));
|
||||
xx_store2_mi128(output_ptr, out_pitch, &srcReg32b1);
|
||||
|
||||
output_ptr += dst_stride;
|
||||
|
||||
|
@ -475,24 +832,17 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
|
||||
// add and saturate the results together
|
||||
srcRegFilt1 =
|
||||
_mm_adds_epi16(srcRegFilt1, _mm_min_epi16(srcRegFilt4, srcRegFilt6));
|
||||
_mm_adds_epi16(srcRegFilt1, _mm_adds_epi16(srcRegFilt4, srcRegFilt6));
|
||||
srcRegFilt3 =
|
||||
_mm_adds_epi16(srcRegFilt3, _mm_min_epi16(srcRegFilt5, srcRegFilt7));
|
||||
_mm_adds_epi16(srcRegFilt3, _mm_adds_epi16(srcRegFilt5, srcRegFilt7));
|
||||
|
||||
// add and saturate the results together
|
||||
// shift by 6 bit each 16 bit
|
||||
srcRegFilt1 =
|
||||
_mm_adds_epi16(srcRegFilt1, _mm_max_epi16(srcRegFilt4, srcRegFilt6));
|
||||
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt3 =
|
||||
_mm_adds_epi16(srcRegFilt3, _mm_max_epi16(srcRegFilt5, srcRegFilt7));
|
||||
|
||||
srcRegFilt1 =
|
||||
_mm_adds_epi16(srcRegFilt1, _mm256_castsi256_si128(addFilterReg64));
|
||||
srcRegFilt3 =
|
||||
_mm_adds_epi16(srcRegFilt3, _mm256_castsi256_si128(addFilterReg64));
|
||||
|
||||
// shift by 7 bit each 16 bit
|
||||
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
|
||||
srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 7);
|
||||
_mm_adds_epi16(srcRegFilt3, _mm256_castsi256_si128(addFilterReg32));
|
||||
srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 6);
|
||||
srcRegFilt3 = _mm_srai_epi16(srcRegFilt3, 6);
|
||||
|
||||
// shrink to 8 bit each 16 bits, the first lane contain the first
|
||||
// convolve result and the second lane contain the second convolve
|
||||
|
@ -506,21 +856,6 @@ static void aom_filter_block1d16_v8_avx2(
|
|||
|
||||
#if HAVE_AVX2 && HAVE_SSSE3
|
||||
filter8_1dfunction aom_filter_block1d4_v8_ssse3;
|
||||
#if ARCH_X86_64
|
||||
filter8_1dfunction aom_filter_block1d8_v8_intrin_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d8_h8_intrin_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d4_h8_intrin_ssse3;
|
||||
#define aom_filter_block1d8_v8_avx2 aom_filter_block1d8_v8_intrin_ssse3
|
||||
#define aom_filter_block1d8_h8_avx2 aom_filter_block1d8_h8_intrin_ssse3
|
||||
#define aom_filter_block1d4_h8_avx2 aom_filter_block1d4_h8_intrin_ssse3
|
||||
#else // ARCH_X86
|
||||
filter8_1dfunction aom_filter_block1d8_v8_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d8_h8_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d4_h8_ssse3;
|
||||
#define aom_filter_block1d8_v8_avx2 aom_filter_block1d8_v8_ssse3
|
||||
#define aom_filter_block1d8_h8_avx2 aom_filter_block1d8_h8_ssse3
|
||||
#define aom_filter_block1d4_h8_avx2 aom_filter_block1d4_h8_ssse3
|
||||
#endif // ARCH_X86_64
|
||||
filter8_1dfunction aom_filter_block1d16_v2_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d16_h2_ssse3;
|
||||
filter8_1dfunction aom_filter_block1d8_v2_ssse3;
|
||||
|
|
|
@ -13,31 +13,27 @@
|
|||
#define AOM_DSP_X86_CONVOLVE_AVX2_H_
|
||||
|
||||
// filters for 16
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = {
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt_global_avx2[]) = {
|
||||
0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1,
|
||||
2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 2, 3, 3, 4, 4, 5,
|
||||
5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6,
|
||||
7, 7, 8, 8, 9, 9, 10, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
|
||||
10, 11, 11, 12, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
|
||||
12, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 6, 7,
|
||||
7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = {
|
||||
2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
|
||||
2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = {
|
||||
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12,
|
||||
4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = {
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14,
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14
|
||||
DECLARE_ALIGNED(32, static const uint8_t, filt_d4_global_avx2[]) = {
|
||||
0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, 3, 1, 2,
|
||||
3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9,
|
||||
7, 8, 9, 10, 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10,
|
||||
};
|
||||
|
||||
static INLINE void prepare_coeffs_lowbd(
|
||||
const InterpFilterParams *const filter_params, const int subpel_q4,
|
||||
__m256i *const coeffs /* [4] */) {
|
||||
const int16_t *const filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
const __m128i coeffs_8 = _mm_loadu_si128((__m128i *)filter);
|
||||
const __m256i filter_coeffs = _mm256_broadcastsi128_si256(coeffs_8);
|
||||
|
||||
|
@ -65,7 +61,7 @@ static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
|
|||
const int subpel_q4,
|
||||
__m256i *const coeffs /* [4] */) {
|
||||
const int16_t *filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
|
||||
const __m128i coeff_8 = _mm_loadu_si128((__m128i *)filter);
|
||||
const __m256i coeff = _mm256_broadcastsi128_si256(coeff_8);
|
||||
|
|
|
@ -19,7 +19,7 @@ static INLINE void prepare_coeffs(const InterpFilterParams *const filter_params,
|
|||
const int subpel_q4,
|
||||
__m128i *const coeffs /* [4] */) {
|
||||
const int16_t *filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
filter_params, subpel_q4 & SUBPEL_MASK);
|
||||
const __m128i coeff = _mm_loadu_si128((__m128i *)filter);
|
||||
|
||||
// coeffs 0 1 0 1 0 1 0 1
|
||||
|
|
|
@ -105,8 +105,8 @@ void aom_highbd_convolve_copy_avx2(const uint8_t *src8, ptrdiff_t src_stride,
|
|||
|
||||
void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
int i, j;
|
||||
|
@ -254,8 +254,8 @@ void av1_highbd_convolve_y_sr_avx2(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_convolve_x_sr_avx2(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
int i, j;
|
||||
|
|
|
@ -18,8 +18,8 @@
|
|||
|
||||
void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4,
|
||||
const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
|
@ -166,8 +166,8 @@ void av1_highbd_convolve_y_sr_ssse3(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_convolve_x_sr_ssse3(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4,
|
||||
const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
|
|
|
@ -676,7 +676,7 @@ void aom_highbd_upsampled_pred_sse2(MACROBLOCKD *xd,
|
|||
}
|
||||
}
|
||||
|
||||
const InterpFilterParams filter =
|
||||
const InterpFilterParams *filter =
|
||||
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
|
||||
|
||||
if (!subpel_x_q3 && !subpel_y_q3) {
|
||||
|
@ -726,14 +726,14 @@ void aom_highbd_upsampled_pred_sse2(MACROBLOCKD *xd,
|
|||
const int16_t *const kernel_y =
|
||||
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
|
||||
const int intermediate_height =
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
|
||||
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
|
||||
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1),
|
||||
aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter->taps >> 1) - 1),
|
||||
ref_stride, CONVERT_TO_BYTEPTR(temp),
|
||||
MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
|
||||
intermediate_height, bd);
|
||||
aom_highbd_convolve8_vert(
|
||||
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
|
||||
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1)),
|
||||
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
|
||||
16, width, height, bd);
|
||||
}
|
||||
|
|
|
@ -22,118 +22,12 @@
|
|||
void aom_var_filter_block2d_bil_first_pass_ssse3(
|
||||
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter) {
|
||||
// Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
|
||||
// in computation using _mm_maddubs_epi16.
|
||||
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
|
||||
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
|
||||
const __m128i r = _mm_set1_epi16(round);
|
||||
const uint8_t f0 = filter[0] >> 1;
|
||||
const uint8_t f1 = filter[1] >> 1;
|
||||
const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
|
||||
f0, f1, f0, f1, f0, f1);
|
||||
const __m128i shuffle_mask =
|
||||
_mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8);
|
||||
unsigned int i, j;
|
||||
(void)pixel_step;
|
||||
|
||||
if (output_width >= 8) {
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
for (j = 0; j < output_width; j += 8) {
|
||||
// load source
|
||||
__m128i source_low = xx_loadl_64(a);
|
||||
__m128i source_hi = _mm_setzero_si128();
|
||||
|
||||
// avoid load undefined memory
|
||||
if (a + 8 != NULL) source_hi = xx_loadl_64(a + 8);
|
||||
__m128i source = _mm_unpacklo_epi64(source_low, source_hi);
|
||||
|
||||
// shuffle to:
|
||||
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
|
||||
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
|
||||
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
|
||||
|
||||
// b[i] = a[i] * filter[0] + a[i + 1] * filter[1]
|
||||
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
|
||||
|
||||
// round
|
||||
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
|
||||
|
||||
xx_storeu_128(b, res);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
a += src_pixels_per_line - output_width;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
// load source, only first 5 values are meaningful:
|
||||
// { a[0], a[1], a[2], a[3], a[4], xxxx }
|
||||
__m128i source = xx_loadl_64(a);
|
||||
|
||||
// shuffle, up to the first 8 are useful
|
||||
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
|
||||
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
|
||||
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
|
||||
|
||||
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
|
||||
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
|
||||
|
||||
xx_storel_64(b, res);
|
||||
|
||||
a += src_pixels_per_line;
|
||||
b += output_width;
|
||||
}
|
||||
}
|
||||
}
|
||||
unsigned int output_width, const uint8_t *filter);
|
||||
|
||||
void aom_var_filter_block2d_bil_second_pass_ssse3(
|
||||
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter) {
|
||||
const int16_t round = (1 << FILTER_BITS) >> 1;
|
||||
const __m128i r = _mm_set1_epi32(round);
|
||||
const __m128i filters =
|
||||
_mm_setr_epi16(filter[0], filter[1], filter[0], filter[1], filter[0],
|
||||
filter[1], filter[0], filter[1]);
|
||||
const __m128i shuffle_mask =
|
||||
_mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
const __m128i mask =
|
||||
_mm_setr_epi8(0, 4, 8, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
for (j = 0; j < output_width; j += 4) {
|
||||
// load source as:
|
||||
// { a[0], a[1], a[2], a[3], a[w], a[w+1], a[w+2], a[w+3] }
|
||||
__m128i source1 = xx_loadl_64(a);
|
||||
__m128i source2 = xx_loadl_64(a + pixel_step);
|
||||
__m128i source = _mm_unpacklo_epi64(source1, source2);
|
||||
|
||||
// shuffle source to:
|
||||
// { a[0], a[w], a[1], a[w+1], a[2], a[w+2], a[3], a[w+3] }
|
||||
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
|
||||
|
||||
// b[i] = a[i] * filter[0] + a[w + i] * filter[1]
|
||||
__m128i res = _mm_madd_epi16(source_shuffle, filters);
|
||||
|
||||
// round
|
||||
res = _mm_srai_epi32(_mm_add_epi32(res, r), FILTER_BITS);
|
||||
|
||||
// shuffle to get each lower 8 bit of every 32 bit
|
||||
res = _mm_shuffle_epi8(res, mask);
|
||||
|
||||
xx_storel_32(b, res);
|
||||
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
a += src_pixels_per_line - output_width;
|
||||
}
|
||||
}
|
||||
unsigned int output_width, const uint8_t *filter);
|
||||
|
||||
static INLINE void compute_jnt_comp_avg(__m128i *p0, __m128i *p1,
|
||||
const __m128i *w, const __m128i *r,
|
||||
|
|
|
@ -0,0 +1,390 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
#include "aom_dsp/blend.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/x86/synonyms.h"
|
||||
#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
|
||||
|
||||
static INLINE unsigned int masked_sad32xh_avx2(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int width, int height) {
|
||||
int x, y;
|
||||
__m256i res = _mm256_setzero_si256();
|
||||
const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
const __m256i round_scale =
|
||||
_mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
|
||||
for (y = 0; y < height; y++) {
|
||||
for (x = 0; x < width; x += 32) {
|
||||
const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
|
||||
const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
|
||||
const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
|
||||
const __m256i m = _mm256_lddqu_si256((const __m256i *)&m_ptr[x]);
|
||||
const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
|
||||
|
||||
// Calculate 16 predicted pixels.
|
||||
// Note that the maximum value of any entry of 'pred_l' or 'pred_r'
|
||||
// is 64 * 255, so we have plenty of space to add rounding constants.
|
||||
const __m256i data_l = _mm256_unpacklo_epi8(a, b);
|
||||
const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
|
||||
__m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
|
||||
pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
|
||||
|
||||
const __m256i data_r = _mm256_unpackhi_epi8(a, b);
|
||||
const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
|
||||
__m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
|
||||
pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
|
||||
|
||||
const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
|
||||
res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
|
||||
}
|
||||
|
||||
src_ptr += src_stride;
|
||||
a_ptr += a_stride;
|
||||
b_ptr += b_stride;
|
||||
m_ptr += m_stride;
|
||||
}
|
||||
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
|
||||
res = _mm256_shuffle_epi32(res, 0xd8);
|
||||
res = _mm256_permute4x64_epi64(res, 0xd8);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
int32_t sad = _mm256_extract_epi32(res, 0);
|
||||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE __m256i xx_loadu2_m128i(const void *hi, const void *lo) {
|
||||
__m128i a0 = _mm_lddqu_si128((const __m128i *)(lo));
|
||||
__m128i a1 = _mm_lddqu_si128((const __m128i *)(hi));
|
||||
__m256i a = _mm256_castsi128_si256(a0);
|
||||
return _mm256_inserti128_si256(a, a1, 1);
|
||||
}
|
||||
|
||||
static INLINE unsigned int masked_sad16xh_avx2(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
int y;
|
||||
__m256i res = _mm256_setzero_si256();
|
||||
const __m256i mask_max = _mm256_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
const __m256i round_scale =
|
||||
_mm256_set1_epi16(1 << (15 - AOM_BLEND_A64_ROUND_BITS));
|
||||
for (y = 0; y < height; y += 2) {
|
||||
const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
|
||||
const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
|
||||
const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
|
||||
const __m256i m = xx_loadu2_m128i(m_ptr + m_stride, m_ptr);
|
||||
const __m256i m_inv = _mm256_sub_epi8(mask_max, m);
|
||||
|
||||
// Calculate 16 predicted pixels.
|
||||
// Note that the maximum value of any entry of 'pred_l' or 'pred_r'
|
||||
// is 64 * 255, so we have plenty of space to add rounding constants.
|
||||
const __m256i data_l = _mm256_unpacklo_epi8(a, b);
|
||||
const __m256i mask_l = _mm256_unpacklo_epi8(m, m_inv);
|
||||
__m256i pred_l = _mm256_maddubs_epi16(data_l, mask_l);
|
||||
pred_l = _mm256_mulhrs_epi16(pred_l, round_scale);
|
||||
|
||||
const __m256i data_r = _mm256_unpackhi_epi8(a, b);
|
||||
const __m256i mask_r = _mm256_unpackhi_epi8(m, m_inv);
|
||||
__m256i pred_r = _mm256_maddubs_epi16(data_r, mask_r);
|
||||
pred_r = _mm256_mulhrs_epi16(pred_r, round_scale);
|
||||
|
||||
const __m256i pred = _mm256_packus_epi16(pred_l, pred_r);
|
||||
res = _mm256_add_epi32(res, _mm256_sad_epu8(pred, src));
|
||||
|
||||
src_ptr += src_stride << 1;
|
||||
a_ptr += a_stride << 1;
|
||||
b_ptr += b_stride << 1;
|
||||
m_ptr += m_stride << 1;
|
||||
}
|
||||
// At this point, we have two 32-bit partial SADs in lanes 0 and 2 of 'res'.
|
||||
res = _mm256_shuffle_epi32(res, 0xd8);
|
||||
res = _mm256_permute4x64_epi64(res, 0xd8);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
int32_t sad = _mm256_extract_epi32(res, 0);
|
||||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int aom_masked_sad_avx2(
|
||||
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
|
||||
const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
|
||||
int invert_mask, int m, int n) {
|
||||
unsigned int sad;
|
||||
if (!invert_mask) {
|
||||
switch (m) {
|
||||
case 4:
|
||||
sad = aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
|
||||
second_pred, m, msk, msk_stride, n);
|
||||
break;
|
||||
case 8:
|
||||
sad = aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride,
|
||||
second_pred, m, msk, msk_stride, n);
|
||||
break;
|
||||
case 16:
|
||||
sad = masked_sad16xh_avx2(src, src_stride, ref, ref_stride, second_pred,
|
||||
m, msk, msk_stride, n);
|
||||
break;
|
||||
default:
|
||||
sad = masked_sad32xh_avx2(src, src_stride, ref, ref_stride, second_pred,
|
||||
m, msk, msk_stride, m, n);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (m) {
|
||||
case 4:
|
||||
sad = aom_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, n);
|
||||
break;
|
||||
case 8:
|
||||
sad = aom_masked_sad8xh_ssse3(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, n);
|
||||
break;
|
||||
case 16:
|
||||
sad = masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, n);
|
||||
break;
|
||||
default:
|
||||
sad = masked_sad32xh_avx2(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, m, n);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sad;
|
||||
}
|
||||
|
||||
#define MASKSADMXN_AVX2(m, n) \
|
||||
unsigned int aom_masked_sad##m##x##n##_avx2( \
|
||||
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
|
||||
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
|
||||
int invert_mask) { \
|
||||
return aom_masked_sad_avx2(src, src_stride, ref, ref_stride, second_pred, \
|
||||
msk, msk_stride, invert_mask, m, n); \
|
||||
}
|
||||
|
||||
MASKSADMXN_AVX2(4, 4)
|
||||
MASKSADMXN_AVX2(4, 8)
|
||||
MASKSADMXN_AVX2(8, 4)
|
||||
MASKSADMXN_AVX2(8, 8)
|
||||
MASKSADMXN_AVX2(8, 16)
|
||||
MASKSADMXN_AVX2(16, 8)
|
||||
MASKSADMXN_AVX2(16, 16)
|
||||
MASKSADMXN_AVX2(16, 32)
|
||||
MASKSADMXN_AVX2(32, 16)
|
||||
MASKSADMXN_AVX2(32, 32)
|
||||
MASKSADMXN_AVX2(32, 64)
|
||||
MASKSADMXN_AVX2(64, 32)
|
||||
MASKSADMXN_AVX2(64, 64)
|
||||
MASKSADMXN_AVX2(64, 128)
|
||||
MASKSADMXN_AVX2(128, 64)
|
||||
MASKSADMXN_AVX2(128, 128)
|
||||
MASKSADMXN_AVX2(4, 16)
|
||||
MASKSADMXN_AVX2(16, 4)
|
||||
MASKSADMXN_AVX2(8, 32)
|
||||
MASKSADMXN_AVX2(32, 8)
|
||||
MASKSADMXN_AVX2(16, 64)
|
||||
MASKSADMXN_AVX2(64, 16)
|
||||
|
||||
static INLINE unsigned int highbd_masked_sad8xh_avx2(
|
||||
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
|
||||
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
|
||||
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
|
||||
int y;
|
||||
__m256i res = _mm256_setzero_si256();
|
||||
const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
const __m256i round_const =
|
||||
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
|
||||
const __m256i one = _mm256_set1_epi16(1);
|
||||
|
||||
for (y = 0; y < height; y += 2) {
|
||||
const __m256i src = xx_loadu2_m128i(src_ptr + src_stride, src_ptr);
|
||||
const __m256i a = xx_loadu2_m128i(a_ptr + a_stride, a_ptr);
|
||||
const __m256i b = xx_loadu2_m128i(b_ptr + b_stride, b_ptr);
|
||||
// Zero-extend mask to 16 bits
|
||||
const __m256i m = _mm256_cvtepu8_epi16(_mm_unpacklo_epi64(
|
||||
_mm_loadl_epi64((const __m128i *)(m_ptr)),
|
||||
_mm_loadl_epi64((const __m128i *)(m_ptr + m_stride))));
|
||||
const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
|
||||
|
||||
const __m256i data_l = _mm256_unpacklo_epi16(a, b);
|
||||
const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
|
||||
__m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
|
||||
pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
|
||||
AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
const __m256i data_r = _mm256_unpackhi_epi16(a, b);
|
||||
const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
|
||||
__m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
|
||||
pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
|
||||
AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
// Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
|
||||
// so it is safe to do signed saturation here.
|
||||
const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
|
||||
// There is no 16-bit SAD instruction, so we have to synthesize
|
||||
// an 8-element SAD. We do this by storing 4 32-bit partial SADs,
|
||||
// and accumulating them at the end
|
||||
const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
|
||||
res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
|
||||
|
||||
src_ptr += src_stride << 1;
|
||||
a_ptr += a_stride << 1;
|
||||
b_ptr += b_stride << 1;
|
||||
m_ptr += m_stride << 1;
|
||||
}
|
||||
// At this point, we have four 32-bit partial SADs stored in 'res'.
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
|
||||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int highbd_masked_sad16xh_avx2(
|
||||
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int width, int height) {
|
||||
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
|
||||
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
|
||||
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
|
||||
int x, y;
|
||||
__m256i res = _mm256_setzero_si256();
|
||||
const __m256i mask_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
const __m256i round_const =
|
||||
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
|
||||
const __m256i one = _mm256_set1_epi16(1);
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
for (x = 0; x < width; x += 16) {
|
||||
const __m256i src = _mm256_lddqu_si256((const __m256i *)&src_ptr[x]);
|
||||
const __m256i a = _mm256_lddqu_si256((const __m256i *)&a_ptr[x]);
|
||||
const __m256i b = _mm256_lddqu_si256((const __m256i *)&b_ptr[x]);
|
||||
// Zero-extend mask to 16 bits
|
||||
const __m256i m =
|
||||
_mm256_cvtepu8_epi16(_mm_lddqu_si128((const __m128i *)&m_ptr[x]));
|
||||
const __m256i m_inv = _mm256_sub_epi16(mask_max, m);
|
||||
|
||||
const __m256i data_l = _mm256_unpacklo_epi16(a, b);
|
||||
const __m256i mask_l = _mm256_unpacklo_epi16(m, m_inv);
|
||||
__m256i pred_l = _mm256_madd_epi16(data_l, mask_l);
|
||||
pred_l = _mm256_srai_epi32(_mm256_add_epi32(pred_l, round_const),
|
||||
AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
const __m256i data_r = _mm256_unpackhi_epi16(a, b);
|
||||
const __m256i mask_r = _mm256_unpackhi_epi16(m, m_inv);
|
||||
__m256i pred_r = _mm256_madd_epi16(data_r, mask_r);
|
||||
pred_r = _mm256_srai_epi32(_mm256_add_epi32(pred_r, round_const),
|
||||
AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
// Note: the maximum value in pred_l/r is (2^bd)-1 < 2^15,
|
||||
// so it is safe to do signed saturation here.
|
||||
const __m256i pred = _mm256_packs_epi32(pred_l, pred_r);
|
||||
// There is no 16-bit SAD instruction, so we have to synthesize
|
||||
// an 8-element SAD. We do this by storing 4 32-bit partial SADs,
|
||||
// and accumulating them at the end
|
||||
const __m256i diff = _mm256_abs_epi16(_mm256_sub_epi16(pred, src));
|
||||
res = _mm256_add_epi32(res, _mm256_madd_epi16(diff, one));
|
||||
}
|
||||
|
||||
src_ptr += src_stride;
|
||||
a_ptr += a_stride;
|
||||
b_ptr += b_stride;
|
||||
m_ptr += m_stride;
|
||||
}
|
||||
// At this point, we have four 32-bit partial SADs stored in 'res'.
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
res = _mm256_hadd_epi32(res, res);
|
||||
int sad = _mm256_extract_epi32(res, 0) + _mm256_extract_epi32(res, 4);
|
||||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int aom_highbd_masked_sad_avx2(
|
||||
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride,
|
||||
const uint8_t *second_pred, const uint8_t *msk, int msk_stride,
|
||||
int invert_mask, int m, int n) {
|
||||
unsigned int sad;
|
||||
if (!invert_mask) {
|
||||
switch (m) {
|
||||
case 4:
|
||||
sad =
|
||||
aom_highbd_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride,
|
||||
second_pred, m, msk, msk_stride, n);
|
||||
break;
|
||||
case 8:
|
||||
sad = highbd_masked_sad8xh_avx2(src, src_stride, ref, ref_stride,
|
||||
second_pred, m, msk, msk_stride, n);
|
||||
break;
|
||||
default:
|
||||
sad = highbd_masked_sad16xh_avx2(src, src_stride, ref, ref_stride,
|
||||
second_pred, m, msk, msk_stride, m, n);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (m) {
|
||||
case 4:
|
||||
sad =
|
||||
aom_highbd_masked_sad4xh_ssse3(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, n);
|
||||
break;
|
||||
case 8:
|
||||
sad = highbd_masked_sad8xh_avx2(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, n);
|
||||
break;
|
||||
default:
|
||||
sad = highbd_masked_sad16xh_avx2(src, src_stride, second_pred, m, ref,
|
||||
ref_stride, msk, msk_stride, m, n);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return sad;
|
||||
}
|
||||
|
||||
#define HIGHBD_MASKSADMXN_AVX2(m, n) \
|
||||
unsigned int aom_highbd_masked_sad##m##x##n##_avx2( \
|
||||
const uint8_t *src8, int src_stride, const uint8_t *ref8, \
|
||||
int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
|
||||
int msk_stride, int invert_mask) { \
|
||||
return aom_highbd_masked_sad_avx2(src8, src_stride, ref8, ref_stride, \
|
||||
second_pred8, msk, msk_stride, \
|
||||
invert_mask, m, n); \
|
||||
}
|
||||
|
||||
HIGHBD_MASKSADMXN_AVX2(4, 4);
|
||||
HIGHBD_MASKSADMXN_AVX2(4, 8);
|
||||
HIGHBD_MASKSADMXN_AVX2(8, 4);
|
||||
HIGHBD_MASKSADMXN_AVX2(8, 8);
|
||||
HIGHBD_MASKSADMXN_AVX2(8, 16);
|
||||
HIGHBD_MASKSADMXN_AVX2(16, 8);
|
||||
HIGHBD_MASKSADMXN_AVX2(16, 16);
|
||||
HIGHBD_MASKSADMXN_AVX2(16, 32);
|
||||
HIGHBD_MASKSADMXN_AVX2(32, 16);
|
||||
HIGHBD_MASKSADMXN_AVX2(32, 32);
|
||||
HIGHBD_MASKSADMXN_AVX2(32, 64);
|
||||
HIGHBD_MASKSADMXN_AVX2(64, 32);
|
||||
HIGHBD_MASKSADMXN_AVX2(64, 64);
|
||||
HIGHBD_MASKSADMXN_AVX2(64, 128);
|
||||
HIGHBD_MASKSADMXN_AVX2(128, 64);
|
||||
HIGHBD_MASKSADMXN_AVX2(128, 128);
|
||||
HIGHBD_MASKSADMXN_AVX2(4, 16);
|
||||
HIGHBD_MASKSADMXN_AVX2(16, 4);
|
||||
HIGHBD_MASKSADMXN_AVX2(8, 32);
|
||||
HIGHBD_MASKSADMXN_AVX2(32, 8);
|
||||
HIGHBD_MASKSADMXN_AVX2(16, 64);
|
||||
HIGHBD_MASKSADMXN_AVX2(64, 16);
|
|
@ -19,6 +19,8 @@
|
|||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/x86/synonyms.h"
|
||||
|
||||
#include "aom_dsp/x86//masked_sad_intrin_ssse3.h"
|
||||
|
||||
// For width a multiple of 16
|
||||
static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
|
||||
int src_stride,
|
||||
|
@ -27,16 +29,6 @@ static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
|
|||
const uint8_t *m_ptr, int m_stride,
|
||||
int width, int height);
|
||||
|
||||
static INLINE unsigned int masked_sad8xh_ssse3(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
static INLINE unsigned int masked_sad4xh_ssse3(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
#define MASKSADMXN_SSSE3(m, n) \
|
||||
unsigned int aom_masked_sad##m##x##n##_ssse3( \
|
||||
const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \
|
||||
|
@ -56,11 +48,11 @@ static INLINE unsigned int masked_sad4xh_ssse3(
|
|||
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
|
||||
int invert_mask) { \
|
||||
if (!invert_mask) \
|
||||
return masked_sad8xh_ssse3(src, src_stride, ref, ref_stride, \
|
||||
second_pred, 8, msk, msk_stride, n); \
|
||||
return aom_masked_sad8xh_ssse3(src, src_stride, ref, ref_stride, \
|
||||
second_pred, 8, msk, msk_stride, n); \
|
||||
else \
|
||||
return masked_sad8xh_ssse3(src, src_stride, second_pred, 8, ref, \
|
||||
ref_stride, msk, msk_stride, n); \
|
||||
return aom_masked_sad8xh_ssse3(src, src_stride, second_pred, 8, ref, \
|
||||
ref_stride, msk, msk_stride, n); \
|
||||
}
|
||||
|
||||
#define MASKSAD4XN_SSSE3(n) \
|
||||
|
@ -69,11 +61,11 @@ static INLINE unsigned int masked_sad4xh_ssse3(
|
|||
const uint8_t *second_pred, const uint8_t *msk, int msk_stride, \
|
||||
int invert_mask) { \
|
||||
if (!invert_mask) \
|
||||
return masked_sad4xh_ssse3(src, src_stride, ref, ref_stride, \
|
||||
second_pred, 4, msk, msk_stride, n); \
|
||||
return aom_masked_sad4xh_ssse3(src, src_stride, ref, ref_stride, \
|
||||
second_pred, 4, msk, msk_stride, n); \
|
||||
else \
|
||||
return masked_sad4xh_ssse3(src, src_stride, second_pred, 4, ref, \
|
||||
ref_stride, msk, msk_stride, n); \
|
||||
return aom_masked_sad4xh_ssse3(src, src_stride, second_pred, 4, ref, \
|
||||
ref_stride, msk, msk_stride, n); \
|
||||
}
|
||||
|
||||
MASKSADMXN_SSSE3(128, 128)
|
||||
|
@ -145,10 +137,11 @@ static INLINE unsigned int masked_sad_ssse3(const uint8_t *src_ptr,
|
|||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int masked_sad8xh_ssse3(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
int y;
|
||||
__m128i res = _mm_setzero_si128();
|
||||
const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
|
@ -189,10 +182,11 @@ static INLINE unsigned int masked_sad8xh_ssse3(
|
|||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int masked_sad4xh_ssse3(
|
||||
const uint8_t *src_ptr, int src_stride, const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
int y;
|
||||
__m128i res = _mm_setzero_si128();
|
||||
const __m128i mask_max = _mm_set1_epi8((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
|
@ -238,11 +232,6 @@ static INLINE unsigned int highbd_masked_sad_ssse3(
|
|||
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int width, int height);
|
||||
|
||||
static INLINE unsigned int highbd_masked_sad4xh_ssse3(
|
||||
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
#define HIGHBD_MASKSADMXN_SSSE3(m, n) \
|
||||
unsigned int aom_highbd_masked_sad##m##x##n##_ssse3( \
|
||||
const uint8_t *src8, int src_stride, const uint8_t *ref8, \
|
||||
|
@ -262,11 +251,13 @@ static INLINE unsigned int highbd_masked_sad4xh_ssse3(
|
|||
int ref_stride, const uint8_t *second_pred8, const uint8_t *msk, \
|
||||
int msk_stride, int invert_mask) { \
|
||||
if (!invert_mask) \
|
||||
return highbd_masked_sad4xh_ssse3(src8, src_stride, ref8, ref_stride, \
|
||||
second_pred8, 4, msk, msk_stride, n); \
|
||||
return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, ref8, \
|
||||
ref_stride, second_pred8, 4, msk, \
|
||||
msk_stride, n); \
|
||||
else \
|
||||
return highbd_masked_sad4xh_ssse3(src8, src_stride, second_pred8, 4, \
|
||||
ref8, ref_stride, msk, msk_stride, n); \
|
||||
return aom_highbd_masked_sad4xh_ssse3(src8, src_stride, second_pred8, 4, \
|
||||
ref8, ref_stride, msk, msk_stride, \
|
||||
n); \
|
||||
}
|
||||
|
||||
HIGHBD_MASKSADMXN_SSSE3(128, 128)
|
||||
|
@ -350,10 +341,11 @@ static INLINE unsigned int highbd_masked_sad_ssse3(
|
|||
return (sad + 31) >> 6;
|
||||
}
|
||||
|
||||
static INLINE unsigned int highbd_masked_sad4xh_ssse3(
|
||||
const uint8_t *src8, int src_stride, const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride, const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
|
||||
const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height) {
|
||||
const uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8);
|
||||
const uint16_t *a_ptr = CONVERT_TO_SHORTPTR(a8);
|
||||
const uint16_t *b_ptr = CONVERT_TO_SHORTPTR(b8);
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef _AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H
|
||||
#define _AOM_DSP_X86_MASKED_SAD_INTRIN_SSSE3_H
|
||||
|
||||
unsigned int aom_masked_sad8xh_ssse3(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
unsigned int aom_masked_sad4xh_ssse3(const uint8_t *src_ptr, int src_stride,
|
||||
const uint8_t *a_ptr, int a_stride,
|
||||
const uint8_t *b_ptr, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
unsigned int aom_highbd_masked_sad4xh_ssse3(const uint8_t *src8, int src_stride,
|
||||
const uint8_t *a8, int a_stride,
|
||||
const uint8_t *b8, int b_stride,
|
||||
const uint8_t *m_ptr, int m_stride,
|
||||
int height);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,270 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
|
||||
#include "aom_ports/mem.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
#include "aom_dsp/x86/obmc_intrinsic_ssse3.h"
|
||||
#include "aom_dsp/x86/synonyms.h"
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// 8 bit
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static INLINE unsigned int obmc_sad_w4_avx2(const uint8_t *pre,
|
||||
const int pre_stride,
|
||||
const int32_t *wsrc,
|
||||
const int32_t *mask,
|
||||
const int height) {
|
||||
int n = 0;
|
||||
__m256i v_sad_d = _mm256_setzero_si256();
|
||||
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
|
||||
|
||||
do {
|
||||
const __m128i v_p_b_0 = xx_loadl_32(pre);
|
||||
const __m128i v_p_b_1 = xx_loadl_32(pre + pre_stride);
|
||||
const __m128i v_p_b = _mm_unpacklo_epi32(v_p_b_0, v_p_b_1);
|
||||
const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
|
||||
const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
|
||||
|
||||
const __m256i v_p_d = _mm256_cvtepu8_epi32(v_p_b);
|
||||
|
||||
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
|
||||
// boundaries. We use pmaddwd, as it has lower latency on Haswell
|
||||
// than pmulld but produces the same result with these inputs.
|
||||
const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
|
||||
|
||||
const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
|
||||
const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
|
||||
|
||||
// Rounded absolute difference
|
||||
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
|
||||
const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
|
||||
|
||||
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
|
||||
|
||||
n += 8;
|
||||
pre += pre_stride << 1;
|
||||
} while (n < 8 * (height >> 1));
|
||||
|
||||
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
|
||||
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
|
||||
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
|
||||
return xx_hsum_epi32_si32(v_sad_d_0);
|
||||
}
|
||||
|
||||
static INLINE unsigned int obmc_sad_w8n_avx2(
|
||||
const uint8_t *pre, const int pre_stride, const int32_t *wsrc,
|
||||
const int32_t *mask, const int width, const int height) {
|
||||
const int pre_step = pre_stride - width;
|
||||
int n = 0;
|
||||
__m256i v_sad_d = _mm256_setzero_si256();
|
||||
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
|
||||
assert(width >= 8);
|
||||
assert(IS_POWER_OF_TWO(width));
|
||||
|
||||
do {
|
||||
const __m128i v_p0_b = xx_loadl_64(pre + n);
|
||||
const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
|
||||
const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
|
||||
|
||||
const __m256i v_p0_d = _mm256_cvtepu8_epi32(v_p0_b);
|
||||
|
||||
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
|
||||
// boundaries. We use pmaddwd, as it has lower latency on Haswell
|
||||
// than pmulld but produces the same result with these inputs.
|
||||
const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
|
||||
|
||||
const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
|
||||
const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
|
||||
|
||||
// Rounded absolute difference
|
||||
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
|
||||
const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
|
||||
|
||||
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
|
||||
|
||||
n += 8;
|
||||
|
||||
if ((n & (width - 1)) == 0) pre += pre_step;
|
||||
} while (n < width * height);
|
||||
|
||||
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
|
||||
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
|
||||
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
|
||||
return xx_hsum_epi32_si32(v_sad_d_0);
|
||||
}
|
||||
|
||||
#define OBMCSADWXH(w, h) \
|
||||
unsigned int aom_obmc_sad##w##x##h##_avx2( \
|
||||
const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
|
||||
const int32_t *msk) { \
|
||||
if (w == 4) { \
|
||||
return obmc_sad_w4_avx2(pre, pre_stride, wsrc, msk, h); \
|
||||
} else { \
|
||||
return obmc_sad_w8n_avx2(pre, pre_stride, wsrc, msk, w, h); \
|
||||
} \
|
||||
}
|
||||
|
||||
OBMCSADWXH(128, 128)
|
||||
OBMCSADWXH(128, 64)
|
||||
OBMCSADWXH(64, 128)
|
||||
OBMCSADWXH(64, 64)
|
||||
OBMCSADWXH(64, 32)
|
||||
OBMCSADWXH(32, 64)
|
||||
OBMCSADWXH(32, 32)
|
||||
OBMCSADWXH(32, 16)
|
||||
OBMCSADWXH(16, 32)
|
||||
OBMCSADWXH(16, 16)
|
||||
OBMCSADWXH(16, 8)
|
||||
OBMCSADWXH(8, 16)
|
||||
OBMCSADWXH(8, 8)
|
||||
OBMCSADWXH(8, 4)
|
||||
OBMCSADWXH(4, 8)
|
||||
OBMCSADWXH(4, 4)
|
||||
OBMCSADWXH(4, 16)
|
||||
OBMCSADWXH(16, 4)
|
||||
OBMCSADWXH(8, 32)
|
||||
OBMCSADWXH(32, 8)
|
||||
OBMCSADWXH(16, 64)
|
||||
OBMCSADWXH(64, 16)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// High bit-depth
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static INLINE unsigned int hbd_obmc_sad_w4_avx2(const uint8_t *pre8,
|
||||
const int pre_stride,
|
||||
const int32_t *wsrc,
|
||||
const int32_t *mask,
|
||||
const int height) {
|
||||
const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
|
||||
int n = 0;
|
||||
__m256i v_sad_d = _mm256_setzero_si256();
|
||||
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
|
||||
do {
|
||||
const __m128i v_p_w_0 = xx_loadl_64(pre);
|
||||
const __m128i v_p_w_1 = xx_loadl_64(pre + pre_stride);
|
||||
const __m128i v_p_w = _mm_unpacklo_epi64(v_p_w_0, v_p_w_1);
|
||||
const __m256i v_m_d = _mm256_lddqu_si256((__m256i *)(mask + n));
|
||||
const __m256i v_w_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
|
||||
|
||||
const __m256i v_p_d = _mm256_cvtepu16_epi32(v_p_w);
|
||||
|
||||
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
|
||||
// boundaries. We use pmaddwd, as it has lower latency on Haswell
|
||||
// than pmulld but produces the same result with these inputs.
|
||||
const __m256i v_pm_d = _mm256_madd_epi16(v_p_d, v_m_d);
|
||||
|
||||
const __m256i v_diff_d = _mm256_sub_epi32(v_w_d, v_pm_d);
|
||||
const __m256i v_absdiff_d = _mm256_abs_epi32(v_diff_d);
|
||||
|
||||
// Rounded absolute difference
|
||||
|
||||
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff_d, v_bias_d);
|
||||
const __m256i v_rad_d = _mm256_srli_epi32(v_tmp_d, 12);
|
||||
|
||||
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad_d);
|
||||
|
||||
n += 8;
|
||||
|
||||
pre += pre_stride << 1;
|
||||
} while (n < 8 * (height >> 1));
|
||||
|
||||
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
|
||||
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
|
||||
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
|
||||
return xx_hsum_epi32_si32(v_sad_d_0);
|
||||
}
|
||||
|
||||
static INLINE unsigned int hbd_obmc_sad_w8n_avx2(
|
||||
const uint8_t *pre8, const int pre_stride, const int32_t *wsrc,
|
||||
const int32_t *mask, const int width, const int height) {
|
||||
const uint16_t *pre = CONVERT_TO_SHORTPTR(pre8);
|
||||
const int pre_step = pre_stride - width;
|
||||
int n = 0;
|
||||
__m256i v_sad_d = _mm256_setzero_si256();
|
||||
const __m256i v_bias_d = _mm256_set1_epi32((1 << 12) >> 1);
|
||||
|
||||
assert(width >= 8);
|
||||
assert(IS_POWER_OF_TWO(width));
|
||||
|
||||
do {
|
||||
const __m128i v_p0_w = _mm_lddqu_si128((__m128i *)(pre + n));
|
||||
const __m256i v_m0_d = _mm256_lddqu_si256((__m256i *)(mask + n));
|
||||
const __m256i v_w0_d = _mm256_lddqu_si256((__m256i *)(wsrc + n));
|
||||
|
||||
const __m256i v_p0_d = _mm256_cvtepu16_epi32(v_p0_w);
|
||||
|
||||
// Values in both pre and mask fit in 15 bits, and are packed at 32 bit
|
||||
// boundaries. We use pmaddwd, as it has lower latency on Haswell
|
||||
// than pmulld but produces the same result with these inputs.
|
||||
const __m256i v_pm0_d = _mm256_madd_epi16(v_p0_d, v_m0_d);
|
||||
|
||||
const __m256i v_diff0_d = _mm256_sub_epi32(v_w0_d, v_pm0_d);
|
||||
const __m256i v_absdiff0_d = _mm256_abs_epi32(v_diff0_d);
|
||||
|
||||
// Rounded absolute difference
|
||||
const __m256i v_tmp_d = _mm256_add_epi32(v_absdiff0_d, v_bias_d);
|
||||
const __m256i v_rad0_d = _mm256_srli_epi32(v_tmp_d, 12);
|
||||
|
||||
v_sad_d = _mm256_add_epi32(v_sad_d, v_rad0_d);
|
||||
|
||||
n += 8;
|
||||
|
||||
if (n % width == 0) pre += pre_step;
|
||||
} while (n < width * height);
|
||||
|
||||
__m128i v_sad_d_0 = _mm256_castsi256_si128(v_sad_d);
|
||||
__m128i v_sad_d_1 = _mm256_extracti128_si256(v_sad_d, 1);
|
||||
v_sad_d_0 = _mm_add_epi32(v_sad_d_0, v_sad_d_1);
|
||||
return xx_hsum_epi32_si32(v_sad_d_0);
|
||||
}
|
||||
|
||||
#define HBD_OBMCSADWXH(w, h) \
|
||||
unsigned int aom_highbd_obmc_sad##w##x##h##_avx2( \
|
||||
const uint8_t *pre, int pre_stride, const int32_t *wsrc, \
|
||||
const int32_t *mask) { \
|
||||
if (w == 4) { \
|
||||
return hbd_obmc_sad_w4_avx2(pre, pre_stride, wsrc, mask, h); \
|
||||
} else { \
|
||||
return hbd_obmc_sad_w8n_avx2(pre, pre_stride, wsrc, mask, w, h); \
|
||||
} \
|
||||
}
|
||||
|
||||
HBD_OBMCSADWXH(128, 128)
|
||||
HBD_OBMCSADWXH(128, 64)
|
||||
HBD_OBMCSADWXH(64, 128)
|
||||
HBD_OBMCSADWXH(64, 64)
|
||||
HBD_OBMCSADWXH(64, 32)
|
||||
HBD_OBMCSADWXH(32, 64)
|
||||
HBD_OBMCSADWXH(32, 32)
|
||||
HBD_OBMCSADWXH(32, 16)
|
||||
HBD_OBMCSADWXH(16, 32)
|
||||
HBD_OBMCSADWXH(16, 16)
|
||||
HBD_OBMCSADWXH(16, 8)
|
||||
HBD_OBMCSADWXH(8, 16)
|
||||
HBD_OBMCSADWXH(8, 8)
|
||||
HBD_OBMCSADWXH(8, 4)
|
||||
HBD_OBMCSADWXH(4, 8)
|
||||
HBD_OBMCSADWXH(4, 4)
|
||||
HBD_OBMCSADWXH(4, 16)
|
||||
HBD_OBMCSADWXH(16, 4)
|
||||
HBD_OBMCSADWXH(8, 32)
|
||||
HBD_OBMCSADWXH(32, 8)
|
||||
HBD_OBMCSADWXH(16, 64)
|
||||
HBD_OBMCSADWXH(64, 16)
|
|
@ -26,6 +26,16 @@
|
|||
// 8 bit
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void aom_var_filter_block2d_bil_first_pass_ssse3(
|
||||
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter);
|
||||
|
||||
void aom_var_filter_block2d_bil_second_pass_ssse3(
|
||||
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter);
|
||||
|
||||
static INLINE void obmc_variance_w4(const uint8_t *pre, const int pre_stride,
|
||||
const int32_t *wsrc, const int32_t *mask,
|
||||
unsigned int *const sse, int *const sum,
|
||||
|
@ -152,6 +162,46 @@ OBMCVARWXH(32, 8)
|
|||
OBMCVARWXH(16, 64)
|
||||
OBMCVARWXH(64, 16)
|
||||
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
#define OBMC_SUBPIX_VAR(W, H) \
|
||||
uint32_t aom_obmc_sub_pixel_variance##W##x##H##_sse4_1( \
|
||||
const uint8_t *pre, int pre_stride, int xoffset, int yoffset, \
|
||||
const int32_t *wsrc, const int32_t *mask, unsigned int *sse) { \
|
||||
uint16_t fdata3[(H + 1) * W]; \
|
||||
uint8_t temp2[H * W]; \
|
||||
\
|
||||
aom_var_filter_block2d_bil_first_pass_ssse3( \
|
||||
pre, fdata3, pre_stride, 1, H + 1, W, bilinear_filters_2t[xoffset]); \
|
||||
aom_var_filter_block2d_bil_second_pass_ssse3( \
|
||||
fdata3, temp2, W, W, H, W, bilinear_filters_2t[yoffset]); \
|
||||
\
|
||||
return aom_obmc_variance##W##x##H##_sse4_1(temp2, W, wsrc, mask, sse); \
|
||||
}
|
||||
|
||||
OBMC_SUBPIX_VAR(128, 128)
|
||||
OBMC_SUBPIX_VAR(128, 64)
|
||||
OBMC_SUBPIX_VAR(64, 128)
|
||||
OBMC_SUBPIX_VAR(64, 64)
|
||||
OBMC_SUBPIX_VAR(64, 32)
|
||||
OBMC_SUBPIX_VAR(32, 64)
|
||||
OBMC_SUBPIX_VAR(32, 32)
|
||||
OBMC_SUBPIX_VAR(32, 16)
|
||||
OBMC_SUBPIX_VAR(16, 32)
|
||||
OBMC_SUBPIX_VAR(16, 16)
|
||||
OBMC_SUBPIX_VAR(16, 8)
|
||||
OBMC_SUBPIX_VAR(8, 16)
|
||||
OBMC_SUBPIX_VAR(8, 8)
|
||||
OBMC_SUBPIX_VAR(8, 4)
|
||||
OBMC_SUBPIX_VAR(4, 8)
|
||||
OBMC_SUBPIX_VAR(4, 4)
|
||||
OBMC_SUBPIX_VAR(4, 16)
|
||||
OBMC_SUBPIX_VAR(16, 4)
|
||||
OBMC_SUBPIX_VAR(8, 32)
|
||||
OBMC_SUBPIX_VAR(32, 8)
|
||||
OBMC_SUBPIX_VAR(16, 64)
|
||||
OBMC_SUBPIX_VAR(64, 16)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// High bit-depth
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
static INLINE void subtract32_avx2(int16_t *diff_ptr, const uint8_t *src_ptr,
|
||||
const uint8_t *pred_ptr) {
|
||||
__m256i s = _mm256_lddqu_si256((__m256i *)(src_ptr));
|
||||
__m256i p = _mm256_lddqu_si256((__m256i *)(pred_ptr));
|
||||
__m256i s_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(s));
|
||||
__m256i s_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(s, 1));
|
||||
__m256i p_0 = _mm256_cvtepu8_epi16(_mm256_castsi256_si128(p));
|
||||
__m256i p_1 = _mm256_cvtepu8_epi16(_mm256_extracti128_si256(p, 1));
|
||||
const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
|
||||
const __m256i d_1 = _mm256_sub_epi16(s_1, p_1);
|
||||
_mm256_store_si256((__m256i *)(diff_ptr), d_0);
|
||||
_mm256_store_si256((__m256i *)(diff_ptr + 16), d_1);
|
||||
}
|
||||
|
||||
static INLINE void aom_subtract_block_16xn_avx2(
|
||||
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
|
||||
for (int32_t j = 0; j < rows; ++j) {
|
||||
__m128i s = _mm_lddqu_si128((__m128i *)(src_ptr));
|
||||
__m128i p = _mm_lddqu_si128((__m128i *)(pred_ptr));
|
||||
__m256i s_0 = _mm256_cvtepu8_epi16(s);
|
||||
__m256i p_0 = _mm256_cvtepu8_epi16(p);
|
||||
const __m256i d_0 = _mm256_sub_epi16(s_0, p_0);
|
||||
_mm256_store_si256((__m256i *)(diff_ptr), d_0);
|
||||
src_ptr += src_stride;
|
||||
pred_ptr += pred_stride;
|
||||
diff_ptr += diff_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void aom_subtract_block_32xn_avx2(
|
||||
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
|
||||
for (int32_t j = 0; j < rows; ++j) {
|
||||
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
|
||||
src_ptr += src_stride;
|
||||
pred_ptr += pred_stride;
|
||||
diff_ptr += diff_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void aom_subtract_block_64xn_avx2(
|
||||
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
|
||||
for (int32_t j = 0; j < rows; ++j) {
|
||||
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
|
||||
subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
|
||||
src_ptr += src_stride;
|
||||
pred_ptr += pred_stride;
|
||||
diff_ptr += diff_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void aom_subtract_block_128xn_avx2(
|
||||
int rows, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
|
||||
for (int32_t j = 0; j < rows; ++j) {
|
||||
subtract32_avx2(diff_ptr, src_ptr, pred_ptr);
|
||||
subtract32_avx2(diff_ptr + 32, src_ptr + 32, pred_ptr + 32);
|
||||
subtract32_avx2(diff_ptr + 64, src_ptr + 64, pred_ptr + 64);
|
||||
subtract32_avx2(diff_ptr + 96, src_ptr + 96, pred_ptr + 96);
|
||||
src_ptr += src_stride;
|
||||
pred_ptr += pred_stride;
|
||||
diff_ptr += diff_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void aom_subtract_block_avx2(int rows, int cols, int16_t *diff_ptr,
|
||||
ptrdiff_t diff_stride, const uint8_t *src_ptr,
|
||||
ptrdiff_t src_stride, const uint8_t *pred_ptr,
|
||||
ptrdiff_t pred_stride) {
|
||||
switch (cols) {
|
||||
case 16:
|
||||
aom_subtract_block_16xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
case 32:
|
||||
aom_subtract_block_32xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
case 64:
|
||||
aom_subtract_block_64xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
case 128:
|
||||
aom_subtract_block_128xn_avx2(rows, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
default:
|
||||
aom_subtract_block_sse2(rows, cols, diff_ptr, diff_stride, src_ptr,
|
||||
src_stride, pred_ptr, pred_stride);
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,199 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AOM_DSP_X86_TXFM_COMMON_AVX2_H_
|
||||
#define AOM_DSP_X86_TXFM_COMMON_AVX2_H_
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include "aom/aom_integer.h"
|
||||
#include "aom_dsp/x86/synonyms.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void (*transform_1d_avx2)(const __m256i *input, __m256i *output,
|
||||
int8_t cos_bit);
|
||||
|
||||
static INLINE __m256i pair_set_w16_epi16(int16_t a, int16_t b) {
|
||||
return _mm256_set1_epi32(
|
||||
(int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)));
|
||||
}
|
||||
|
||||
static INLINE void btf_16_w16_avx2(const __m256i w0, const __m256i w1,
|
||||
__m256i *in0, __m256i *in1, const __m256i _r,
|
||||
const int32_t cos_bit) {
|
||||
__m256i t0 = _mm256_unpacklo_epi16(*in0, *in1);
|
||||
__m256i t1 = _mm256_unpackhi_epi16(*in0, *in1);
|
||||
__m256i u0 = _mm256_madd_epi16(t0, w0);
|
||||
__m256i u1 = _mm256_madd_epi16(t1, w0);
|
||||
__m256i v0 = _mm256_madd_epi16(t0, w1);
|
||||
__m256i v1 = _mm256_madd_epi16(t1, w1);
|
||||
|
||||
__m256i a0 = _mm256_add_epi32(u0, _r);
|
||||
__m256i a1 = _mm256_add_epi32(u1, _r);
|
||||
__m256i b0 = _mm256_add_epi32(v0, _r);
|
||||
__m256i b1 = _mm256_add_epi32(v1, _r);
|
||||
|
||||
__m256i c0 = _mm256_srai_epi32(a0, cos_bit);
|
||||
__m256i c1 = _mm256_srai_epi32(a1, cos_bit);
|
||||
__m256i d0 = _mm256_srai_epi32(b0, cos_bit);
|
||||
__m256i d1 = _mm256_srai_epi32(b1, cos_bit);
|
||||
|
||||
*in0 = _mm256_packs_epi32(c0, c1);
|
||||
*in1 = _mm256_packs_epi32(d0, d1);
|
||||
}
|
||||
|
||||
static INLINE void btf_16_adds_subs_avx2(__m256i *in0, __m256i *in1) {
|
||||
const __m256i _in0 = *in0;
|
||||
const __m256i _in1 = *in1;
|
||||
*in0 = _mm256_adds_epi16(_in0, _in1);
|
||||
*in1 = _mm256_subs_epi16(_in0, _in1);
|
||||
}
|
||||
|
||||
static INLINE void btf_32_add_sub_avx2(__m256i *in0, __m256i *in1) {
|
||||
const __m256i _in0 = *in0;
|
||||
const __m256i _in1 = *in1;
|
||||
*in0 = _mm256_add_epi32(_in0, _in1);
|
||||
*in1 = _mm256_sub_epi32(_in0, _in1);
|
||||
}
|
||||
|
||||
static INLINE void btf_16_adds_subs_out_avx2(__m256i *out0, __m256i *out1,
|
||||
__m256i in0, __m256i in1) {
|
||||
const __m256i _in0 = in0;
|
||||
const __m256i _in1 = in1;
|
||||
*out0 = _mm256_adds_epi16(_in0, _in1);
|
||||
*out1 = _mm256_subs_epi16(_in0, _in1);
|
||||
}
|
||||
|
||||
static INLINE void btf_32_add_sub_out_avx2(__m256i *out0, __m256i *out1,
|
||||
__m256i in0, __m256i in1) {
|
||||
const __m256i _in0 = in0;
|
||||
const __m256i _in1 = in1;
|
||||
*out0 = _mm256_add_epi32(_in0, _in1);
|
||||
*out1 = _mm256_sub_epi32(_in0, _in1);
|
||||
}
|
||||
|
||||
static INLINE __m256i load_16bit_to_16bit_avx2(const int16_t *a) {
|
||||
return _mm256_load_si256((const __m256i *)a);
|
||||
}
|
||||
|
||||
static INLINE void load_buffer_16bit_to_16bit_avx2(const int16_t *in,
|
||||
int stride, __m256i *out,
|
||||
int out_size) {
|
||||
for (int i = 0; i < out_size; ++i) {
|
||||
out[i] = load_16bit_to_16bit_avx2(in + i * stride);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void load_buffer_16bit_to_16bit_flip_avx2(const int16_t *in,
|
||||
int stride,
|
||||
__m256i *out,
|
||||
int out_size) {
|
||||
for (int i = 0; i < out_size; ++i) {
|
||||
out[out_size - i - 1] = load_16bit_to_16bit_avx2(in + i * stride);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE __m256i load_32bit_to_16bit_w16_avx2(const int32_t *a) {
|
||||
const __m256i a_low = _mm256_lddqu_si256((const __m256i *)a);
|
||||
const __m256i b = _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
|
||||
return _mm256_permute4x64_epi64(b, 0xD8);
|
||||
}
|
||||
|
||||
static INLINE void load_buffer_32bit_to_16bit_w16_avx2(const int32_t *in,
|
||||
int stride, __m256i *out,
|
||||
int out_size) {
|
||||
for (int i = 0; i < out_size; ++i) {
|
||||
out[i] = load_32bit_to_16bit_w16_avx2(in + i * stride);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void transpose_16bit_16x16_avx2(const __m256i *const in,
|
||||
__m256i *const out) {
|
||||
// Unpack 16 bit elements. Goes from:
|
||||
// in[0]: 00 01 02 03 08 09 0a 0b 04 05 06 07 0c 0d 0e 0f
|
||||
// in[1]: 10 11 12 13 18 19 1a 1b 14 15 16 17 1c 1d 1e 1f
|
||||
// in[2]: 20 21 22 23 28 29 2a 2b 24 25 26 27 2c 2d 2e 2f
|
||||
// in[3]: 30 31 32 33 38 39 3a 3b 34 35 36 37 3c 3d 3e 3f
|
||||
// in[4]: 40 41 42 43 48 49 4a 4b 44 45 46 47 4c 4d 4e 4f
|
||||
// in[5]: 50 51 52 53 58 59 5a 5b 54 55 56 57 5c 5d 5e 5f
|
||||
// in[6]: 60 61 62 63 68 69 6a 6b 64 65 66 67 6c 6d 6e 6f
|
||||
// in[7]: 70 71 72 73 78 79 7a 7b 74 75 76 77 7c 7d 7e 7f
|
||||
// in[8]: 80 81 82 83 88 89 8a 8b 84 85 86 87 8c 8d 8e 8f
|
||||
// to:
|
||||
// a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17
|
||||
// a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37
|
||||
// a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57
|
||||
// a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77
|
||||
// ...
|
||||
__m256i a[16];
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
a[i / 2 + 0] = _mm256_unpacklo_epi16(in[i], in[i + 1]);
|
||||
a[i / 2 + 8] = _mm256_unpackhi_epi16(in[i], in[i + 1]);
|
||||
}
|
||||
__m256i b[16];
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
b[i / 2 + 0] = _mm256_unpacklo_epi32(a[i], a[i + 1]);
|
||||
b[i / 2 + 8] = _mm256_unpackhi_epi32(a[i], a[i + 1]);
|
||||
}
|
||||
__m256i c[16];
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
c[i / 2 + 0] = _mm256_unpacklo_epi64(b[i], b[i + 1]);
|
||||
c[i / 2 + 8] = _mm256_unpackhi_epi64(b[i], b[i + 1]);
|
||||
}
|
||||
out[0 + 0] = _mm256_permute2x128_si256(c[0], c[1], 0x20);
|
||||
out[1 + 0] = _mm256_permute2x128_si256(c[8], c[9], 0x20);
|
||||
out[2 + 0] = _mm256_permute2x128_si256(c[4], c[5], 0x20);
|
||||
out[3 + 0] = _mm256_permute2x128_si256(c[12], c[13], 0x20);
|
||||
|
||||
out[0 + 8] = _mm256_permute2x128_si256(c[0], c[1], 0x31);
|
||||
out[1 + 8] = _mm256_permute2x128_si256(c[8], c[9], 0x31);
|
||||
out[2 + 8] = _mm256_permute2x128_si256(c[4], c[5], 0x31);
|
||||
out[3 + 8] = _mm256_permute2x128_si256(c[12], c[13], 0x31);
|
||||
|
||||
out[4 + 0] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x20);
|
||||
out[5 + 0] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x20);
|
||||
out[6 + 0] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x20);
|
||||
out[7 + 0] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x20);
|
||||
|
||||
out[4 + 8] = _mm256_permute2x128_si256(c[0 + 2], c[1 + 2], 0x31);
|
||||
out[5 + 8] = _mm256_permute2x128_si256(c[8 + 2], c[9 + 2], 0x31);
|
||||
out[6 + 8] = _mm256_permute2x128_si256(c[4 + 2], c[5 + 2], 0x31);
|
||||
out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
|
||||
}
|
||||
|
||||
static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
out[size - i - 1] = in[i];
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void round_shift_16bit_w16_avx2(__m256i *in, int size, int bit) {
|
||||
if (bit < 0) {
|
||||
bit = -bit;
|
||||
__m256i round = _mm256_set1_epi16(1 << (bit - 1));
|
||||
for (int i = 0; i < size; ++i) {
|
||||
in[i] = _mm256_adds_epi16(in[i], round);
|
||||
in[i] = _mm256_srai_epi16(in[i], bit);
|
||||
}
|
||||
} else if (bit > 0) {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
in[i] = _mm256_slli_epi16(in[i], bit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // AOM_DSP_X86_TXFM_COMMON_AVX2_H_
|
|
@ -324,6 +324,12 @@ static INLINE __m256i mm256_loadu2(const uint8_t *p0, const uint8_t *p1) {
|
|||
return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
|
||||
}
|
||||
|
||||
static INLINE __m256i mm256_loadu2_16(const uint16_t *p0, const uint16_t *p1) {
|
||||
const __m256i d =
|
||||
_mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)p1));
|
||||
return _mm256_insertf128_si256(d, _mm_loadu_si128((const __m128i *)p0), 1);
|
||||
}
|
||||
|
||||
static INLINE void comp_mask_pred_line_avx2(const __m256i s0, const __m256i s1,
|
||||
const __m256i a,
|
||||
uint8_t *comp_pred) {
|
||||
|
@ -401,3 +407,110 @@ void aom_comp_mask_pred_avx2(uint8_t *comp_pred, const uint8_t *pred, int width,
|
|||
} while (i < height);
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE __m256i highbd_comp_mask_pred_line_avx2(const __m256i s0,
|
||||
const __m256i s1,
|
||||
const __m256i a) {
|
||||
const __m256i alpha_max = _mm256_set1_epi16((1 << AOM_BLEND_A64_ROUND_BITS));
|
||||
const __m256i round_const =
|
||||
_mm256_set1_epi32((1 << AOM_BLEND_A64_ROUND_BITS) >> 1);
|
||||
const __m256i a_inv = _mm256_sub_epi16(alpha_max, a);
|
||||
|
||||
const __m256i s_lo = _mm256_unpacklo_epi16(s0, s1);
|
||||
const __m256i a_lo = _mm256_unpacklo_epi16(a, a_inv);
|
||||
const __m256i pred_lo = _mm256_madd_epi16(s_lo, a_lo);
|
||||
const __m256i pred_l = _mm256_srai_epi32(
|
||||
_mm256_add_epi32(pred_lo, round_const), AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
const __m256i s_hi = _mm256_unpackhi_epi16(s0, s1);
|
||||
const __m256i a_hi = _mm256_unpackhi_epi16(a, a_inv);
|
||||
const __m256i pred_hi = _mm256_madd_epi16(s_hi, a_hi);
|
||||
const __m256i pred_h = _mm256_srai_epi32(
|
||||
_mm256_add_epi32(pred_hi, round_const), AOM_BLEND_A64_ROUND_BITS);
|
||||
|
||||
const __m256i comp = _mm256_packs_epi32(pred_l, pred_h);
|
||||
|
||||
return comp;
|
||||
}
|
||||
|
||||
void aom_highbd_comp_mask_pred_avx2(uint16_t *comp_pred, const uint8_t *pred8,
|
||||
int width, int height, const uint8_t *ref8,
|
||||
int ref_stride, const uint8_t *mask,
|
||||
int mask_stride, int invert_mask) {
|
||||
int i = 0;
|
||||
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
|
||||
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
|
||||
const uint16_t *src0 = invert_mask ? pred : ref;
|
||||
const uint16_t *src1 = invert_mask ? ref : pred;
|
||||
const int stride0 = invert_mask ? width : ref_stride;
|
||||
const int stride1 = invert_mask ? ref_stride : width;
|
||||
const __m256i zero = _mm256_setzero_si256();
|
||||
|
||||
if (width == 8) {
|
||||
do {
|
||||
const __m256i s0 = mm256_loadu2_16(src0 + stride0, src0);
|
||||
const __m256i s1 = mm256_loadu2_16(src1 + stride1, src1);
|
||||
|
||||
const __m128i m_l = _mm_loadl_epi64((const __m128i *)mask);
|
||||
const __m128i m_h = _mm_loadl_epi64((const __m128i *)(mask + 8));
|
||||
|
||||
__m256i m = _mm256_castsi128_si256(m_l);
|
||||
m = _mm256_insertf128_si256(m, m_h, 1);
|
||||
const __m256i m_16 = _mm256_unpacklo_epi8(m, zero);
|
||||
|
||||
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
|
||||
|
||||
_mm_storeu_si128((__m128i *)(comp_pred), _mm256_castsi256_si128(comp));
|
||||
|
||||
_mm_storeu_si128((__m128i *)(comp_pred + width),
|
||||
_mm256_extractf128_si256(comp, 1));
|
||||
|
||||
src0 += (stride0 << 1);
|
||||
src1 += (stride1 << 1);
|
||||
mask += (mask_stride << 1);
|
||||
comp_pred += (width << 1);
|
||||
i += 2;
|
||||
} while (i < height);
|
||||
} else if (width == 16) {
|
||||
do {
|
||||
const __m256i s0 = _mm256_loadu_si256((const __m256i *)(src0));
|
||||
const __m256i s1 = _mm256_loadu_si256((const __m256i *)(src1));
|
||||
const __m256i m_16 =
|
||||
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
|
||||
|
||||
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m_16);
|
||||
|
||||
_mm256_storeu_si256((__m256i *)comp_pred, comp);
|
||||
|
||||
src0 += stride0;
|
||||
src1 += stride1;
|
||||
mask += mask_stride;
|
||||
comp_pred += width;
|
||||
i += 1;
|
||||
} while (i < height);
|
||||
} else if (width == 32) {
|
||||
do {
|
||||
const __m256i s0 = _mm256_loadu_si256((const __m256i *)src0);
|
||||
const __m256i s2 = _mm256_loadu_si256((const __m256i *)(src0 + 16));
|
||||
const __m256i s1 = _mm256_loadu_si256((const __m256i *)src1);
|
||||
const __m256i s3 = _mm256_loadu_si256((const __m256i *)(src1 + 16));
|
||||
|
||||
const __m256i m01_16 =
|
||||
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)mask));
|
||||
const __m256i m23_16 =
|
||||
_mm256_cvtepu8_epi16(_mm_loadu_si128((const __m128i *)(mask + 16)));
|
||||
|
||||
const __m256i comp = highbd_comp_mask_pred_line_avx2(s0, s1, m01_16);
|
||||
const __m256i comp1 = highbd_comp_mask_pred_line_avx2(s2, s3, m23_16);
|
||||
|
||||
_mm256_storeu_si256((__m256i *)comp_pred, comp);
|
||||
_mm256_storeu_si256((__m256i *)(comp_pred + 16), comp1);
|
||||
|
||||
src0 += stride0;
|
||||
src1 += stride1;
|
||||
mask += mask_stride;
|
||||
comp_pred += width;
|
||||
i += 1;
|
||||
} while (i < height);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <tmmintrin.h>
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
#include "aom_dsp/x86/synonyms.h"
|
||||
|
||||
void aom_var_filter_block2d_bil_first_pass_ssse3(
|
||||
const uint8_t *a, uint16_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter) {
|
||||
// Note: filter[0], filter[1] could be {128, 0}, where 128 will overflow
|
||||
// in computation using _mm_maddubs_epi16.
|
||||
// Change {128, 0} to {64, 0} and reduce FILTER_BITS by 1 to avoid overflow.
|
||||
const int16_t round = (1 << (FILTER_BITS - 1)) >> 1;
|
||||
const __m128i r = _mm_set1_epi16(round);
|
||||
const uint8_t f0 = filter[0] >> 1;
|
||||
const uint8_t f1 = filter[1] >> 1;
|
||||
const __m128i filters = _mm_setr_epi8(f0, f1, f0, f1, f0, f1, f0, f1, f0, f1,
|
||||
f0, f1, f0, f1, f0, f1);
|
||||
unsigned int i, j;
|
||||
(void)pixel_step;
|
||||
|
||||
if (output_width >= 8) {
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
for (j = 0; j < output_width; j += 8) {
|
||||
// load source
|
||||
__m128i source_low = xx_loadl_64(a);
|
||||
__m128i source_hi = xx_loadl_64(a + 1);
|
||||
|
||||
// unpack to:
|
||||
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
|
||||
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
|
||||
__m128i source = _mm_unpacklo_epi8(source_low, source_hi);
|
||||
|
||||
// b[i] = a[i] * filter[0] + a[i + 1] * filter[1]
|
||||
__m128i res = _mm_maddubs_epi16(source, filters);
|
||||
|
||||
// round
|
||||
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
|
||||
|
||||
xx_storeu_128(b, res);
|
||||
|
||||
a += 8;
|
||||
b += 8;
|
||||
}
|
||||
|
||||
a += src_pixels_per_line - output_width;
|
||||
}
|
||||
} else {
|
||||
const __m128i shuffle_mask =
|
||||
_mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8);
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
// load source, only first 5 values are meaningful:
|
||||
// { a[0], a[1], a[2], a[3], a[4], xxxx }
|
||||
__m128i source = xx_loadl_64(a);
|
||||
|
||||
// shuffle, up to the first 8 are useful
|
||||
// { a[0], a[1], a[1], a[2], a[2], a[3], a[3], a[4],
|
||||
// a[4], a[5], a[5], a[6], a[6], a[7], a[7], a[8] }
|
||||
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
|
||||
|
||||
__m128i res = _mm_maddubs_epi16(source_shuffle, filters);
|
||||
res = _mm_srai_epi16(_mm_add_epi16(res, r), FILTER_BITS - 1);
|
||||
|
||||
xx_storel_64(b, res);
|
||||
|
||||
a += src_pixels_per_line;
|
||||
b += output_width;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void aom_var_filter_block2d_bil_second_pass_ssse3(
|
||||
const uint16_t *a, uint8_t *b, unsigned int src_pixels_per_line,
|
||||
unsigned int pixel_step, unsigned int output_height,
|
||||
unsigned int output_width, const uint8_t *filter) {
|
||||
const int16_t round = (1 << FILTER_BITS) >> 1;
|
||||
const __m128i r = _mm_set1_epi32(round);
|
||||
const __m128i filters =
|
||||
_mm_setr_epi16(filter[0], filter[1], filter[0], filter[1], filter[0],
|
||||
filter[1], filter[0], filter[1]);
|
||||
const __m128i shuffle_mask =
|
||||
_mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15);
|
||||
const __m128i mask =
|
||||
_mm_setr_epi8(0, 4, 8, 12, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
|
||||
unsigned int i, j;
|
||||
|
||||
for (i = 0; i < output_height; ++i) {
|
||||
for (j = 0; j < output_width; j += 4) {
|
||||
// load source as:
|
||||
// { a[0], a[1], a[2], a[3], a[w], a[w+1], a[w+2], a[w+3] }
|
||||
__m128i source1 = xx_loadl_64(a);
|
||||
__m128i source2 = xx_loadl_64(a + pixel_step);
|
||||
__m128i source = _mm_unpacklo_epi64(source1, source2);
|
||||
|
||||
// shuffle source to:
|
||||
// { a[0], a[w], a[1], a[w+1], a[2], a[w+2], a[3], a[w+3] }
|
||||
__m128i source_shuffle = _mm_shuffle_epi8(source, shuffle_mask);
|
||||
|
||||
// b[i] = a[i] * filter[0] + a[w + i] * filter[1]
|
||||
__m128i res = _mm_madd_epi16(source_shuffle, filters);
|
||||
|
||||
// round
|
||||
res = _mm_srai_epi32(_mm_add_epi32(res, r), FILTER_BITS);
|
||||
|
||||
// shuffle to get each lower 8 bit of every 32 bit
|
||||
res = _mm_shuffle_epi8(res, mask);
|
||||
|
||||
xx_storel_32(b, res);
|
||||
|
||||
a += 4;
|
||||
b += 4;
|
||||
}
|
||||
|
||||
a += src_pixels_per_line - output_width;
|
||||
}
|
||||
}
|
|
@ -569,7 +569,7 @@ void aom_upsampled_pred_sse2(MACROBLOCKD *xd, const struct AV1Common *const cm,
|
|||
}
|
||||
}
|
||||
|
||||
const InterpFilterParams filter =
|
||||
const InterpFilterParams *filter =
|
||||
av1_get_interp_filter_params_with_block_size(EIGHTTAP_REGULAR, 8);
|
||||
|
||||
if (!subpel_x_q3 && !subpel_y_q3) {
|
||||
|
@ -633,12 +633,12 @@ void aom_upsampled_pred_sse2(MACROBLOCKD *xd, const struct AV1Common *const cm,
|
|||
const int16_t *const kernel_y =
|
||||
av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
|
||||
const int intermediate_height =
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
|
||||
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter->taps;
|
||||
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
|
||||
aom_convolve8_horiz(ref - ref_stride * ((filter.taps >> 1) - 1), ref_stride,
|
||||
temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
|
||||
intermediate_height);
|
||||
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1),
|
||||
aom_convolve8_horiz(ref - ref_stride * ((filter->taps >> 1) - 1),
|
||||
ref_stride, temp, MAX_SB_SIZE, kernel_x, 16, NULL, -1,
|
||||
width, intermediate_height);
|
||||
aom_convolve8_vert(temp + MAX_SB_SIZE * ((filter->taps >> 1) - 1),
|
||||
MAX_SB_SIZE, comp_pred, width, NULL, -1, kernel_y, 16,
|
||||
width, height);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
/* Implement a function wrapper to guarantee initialization
|
||||
* thread-safety for library singletons.
|
||||
*
|
||||
* NOTE: These functions use static locks, and can only be
|
||||
* NOTE: This function uses static locks, and can only be
|
||||
* used with one common argument per compilation unit. So
|
||||
*
|
||||
* file1.c:
|
||||
|
@ -25,8 +25,8 @@
|
|||
* ...
|
||||
* aom_once(foo);
|
||||
*
|
||||
* file2.c:
|
||||
* aom_once(bar);
|
||||
* file2.c:
|
||||
* aom_once(bar);
|
||||
*
|
||||
* will ensure foo() and bar() are each called only once, but in
|
||||
*
|
||||
|
@ -46,19 +46,19 @@
|
|||
* local initializers are not thread-safe in MSVC prior to Visual
|
||||
* Studio 2015.
|
||||
*
|
||||
* As a static, once_state will be zero-initialized as program start.
|
||||
* As a static, aom_once_state will be zero-initialized as program start.
|
||||
*/
|
||||
static LONG once_state;
|
||||
static void once(void (*func)(void)) {
|
||||
/* Try to advance once_state from its initial value of 0 to 1.
|
||||
static LONG aom_once_state;
|
||||
static void aom_once(void (*func)(void)) {
|
||||
/* Try to advance aom_once_state from its initial value of 0 to 1.
|
||||
* Only one thread can succeed in doing so.
|
||||
*/
|
||||
if (InterlockedCompareExchange(&once_state, 1, 0) == 0) {
|
||||
/* We're the winning thread, having set once_state to 1.
|
||||
if (InterlockedCompareExchange(&aom_once_state, 1, 0) == 0) {
|
||||
/* We're the winning thread, having set aom_once_state to 1.
|
||||
* Call our function. */
|
||||
func();
|
||||
/* Now advance once_state to 2, unblocking any other threads. */
|
||||
InterlockedIncrement(&once_state);
|
||||
/* Now advance aom_once_state to 2, unblocking any other threads. */
|
||||
InterlockedIncrement(&aom_once_state);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -66,10 +66,10 @@ static void once(void (*func)(void)) {
|
|||
* the state variable so we don't return before func()
|
||||
* has finished executing elsewhere.
|
||||
*
|
||||
* Try to advance once_state from 2 to 2, which is only possible
|
||||
* Try to advance aom_once_state from 2 to 2, which is only possible
|
||||
* after the winning thead advances it from 1 to 2.
|
||||
*/
|
||||
while (InterlockedCompareExchange(&once_state, 2, 2) != 2) {
|
||||
while (InterlockedCompareExchange(&aom_once_state, 2, 2) != 2) {
|
||||
/* State isn't yet 2. Try again.
|
||||
*
|
||||
* We are used for singleton initialization functions,
|
||||
|
@ -83,8 +83,8 @@ static void once(void (*func)(void)) {
|
|||
Sleep(0);
|
||||
}
|
||||
|
||||
/* We've seen once_state advance to 2, so we know func()
|
||||
* has been called. And we've left once_state as we found it,
|
||||
/* We've seen aom_once_state advance to 2, so we know func()
|
||||
* has been called. And we've left aom_once_state as we found it,
|
||||
* so other threads will have the same experience.
|
||||
*
|
||||
* It's safe to return now.
|
||||
|
@ -95,7 +95,7 @@ static void once(void (*func)(void)) {
|
|||
#elif CONFIG_MULTITHREAD && defined(__OS2__)
|
||||
#define INCL_DOS
|
||||
#include <os2.h>
|
||||
static void once(void (*func)(void)) {
|
||||
static void aom_once(void (*func)(void)) {
|
||||
static int done;
|
||||
|
||||
/* If the initialization is complete, return early. */
|
||||
|
@ -117,18 +117,15 @@ static void once(void (*func)(void)) {
|
|||
|
||||
#elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H
|
||||
#include <pthread.h>
|
||||
static void once(void (*func)(void)) {
|
||||
static void aom_once(void (*func)(void)) {
|
||||
static pthread_once_t lock = PTHREAD_ONCE_INIT;
|
||||
pthread_once(&lock, func);
|
||||
}
|
||||
|
||||
#else
|
||||
/* No-op version that performs no synchronization. *_rtcd() is idempotent,
|
||||
* so as long as your platform provides atomic loads/stores of pointers
|
||||
* no synchronization is strictly necessary.
|
||||
*/
|
||||
/* Default version that performs no synchronization. */
|
||||
|
||||
static void once(void (*func)(void)) {
|
||||
static void aom_once(void (*func)(void)) {
|
||||
static int done;
|
||||
|
||||
if (!done) {
|
||||
|
|
|
@ -15,4 +15,4 @@
|
|||
|
||||
#include "aom_ports/aom_once.h"
|
||||
|
||||
void aom_scale_rtcd() { once(setup_rtcd_internal); }
|
||||
void aom_scale_rtcd() { aom_once(setup_rtcd_internal); }
|
||||
|
|
|
@ -51,6 +51,10 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
|
|||
aom_codec_frame_buffer_t *fb,
|
||||
aom_get_frame_buffer_cb_fn_t cb, void *cb_priv) {
|
||||
if (ybf) {
|
||||
#if CONFIG_SIZE_LIMIT
|
||||
if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) return -1;
|
||||
#endif
|
||||
|
||||
const int aom_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
|
||||
const int aligned_width = (width + 7) & ~7;
|
||||
const int aligned_height = (height + 7) & ~7;
|
||||
|
@ -154,7 +158,7 @@ int aom_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height,
|
|||
(uv_border_h * uv_stride) + uv_border_w,
|
||||
aom_byte_align);
|
||||
|
||||
ybf->use_external_refernce_buffers = 0;
|
||||
ybf->use_external_reference_buffers = 0;
|
||||
|
||||
if (use_highbitdepth) {
|
||||
if (ybf->y_buffer_8bit) aom_free(ybf->y_buffer_8bit);
|
||||
|
|
|
@ -81,7 +81,7 @@ typedef struct yv12_buffer_config {
|
|||
|
||||
// Indicate whether y_buffer, u_buffer, and v_buffer points to the internally
|
||||
// allocated memory or external buffers.
|
||||
int use_external_refernce_buffers;
|
||||
int use_external_reference_buffers;
|
||||
// This is needed to store y_buffer, u_buffer, and v_buffer when set reference
|
||||
// uses an external refernece, and restore those buffer pointers after the
|
||||
// external reference frame is no longer used.
|
||||
|
|
|
@ -369,7 +369,8 @@ typedef enum {
|
|||
} AVxWorkerStatus;
|
||||
|
||||
// Function to be called by the worker thread. Takes two opaque pointers as
|
||||
// arguments (data1 and data2), and should return false in case of error.
|
||||
// arguments (data1 and data2). Should return true on success and return false
|
||||
// in case of error.
|
||||
typedef int (*AVxWorkerHook)(void *, void *);
|
||||
|
||||
// Platform-dependent implementation details for the worker.
|
||||
|
@ -382,7 +383,7 @@ typedef struct {
|
|||
AVxWorkerHook hook; // hook to call
|
||||
void *data1; // first argument passed to 'hook'
|
||||
void *data2; // second argument passed to 'hook'
|
||||
int had_error; // return value of the last call to 'hook'
|
||||
int had_error; // true if a call to 'hook' returned false
|
||||
} AVxWorker;
|
||||
|
||||
// The interface for all thread-worker related functions. All these functions
|
||||
|
|
|
@ -83,6 +83,8 @@ static const arg_def_t outputfile =
|
|||
ARG_DEF("o", "output", 1, "Output file name pattern (see below)");
|
||||
static const arg_def_t threadsarg =
|
||||
ARG_DEF("t", "threads", 1, "Max threads to use");
|
||||
static const arg_def_t rowmtarg =
|
||||
ARG_DEF(NULL, "row-mt", 1, "Enable row based multi-threading");
|
||||
static const arg_def_t verbosearg =
|
||||
ARG_DEF("v", "verbose", 0, "Show version string");
|
||||
static const arg_def_t scalearg =
|
||||
|
@ -114,12 +116,12 @@ static const arg_def_t outallarg = ARG_DEF(
|
|||
NULL, "all-layers", 0, "Output all decoded frames of a scalable bitstream");
|
||||
|
||||
static const arg_def_t *all_args[] = {
|
||||
&help, &codecarg, &use_yv12, &use_i420, &flipuvarg,
|
||||
&rawvideo, &noblitarg, &progressarg, &limitarg, &skiparg,
|
||||
&postprocarg, &summaryarg, &outputfile, &threadsarg, &verbosearg,
|
||||
&scalearg, &fb_arg, &md5arg, &framestatsarg, &continuearg,
|
||||
&outbitdeptharg, &tilem, &tiler, &tilec, &isannexb,
|
||||
&oppointarg, &outallarg, NULL
|
||||
&help, &codecarg, &use_yv12, &use_i420, &flipuvarg,
|
||||
&rawvideo, &noblitarg, &progressarg, &limitarg, &skiparg,
|
||||
&postprocarg, &summaryarg, &outputfile, &threadsarg, &rowmtarg,
|
||||
&verbosearg, &scalearg, &fb_arg, &md5arg, &framestatsarg,
|
||||
&continuearg, &outbitdeptharg, &tilem, &tiler, &tilec,
|
||||
&isannexb, &oppointarg, &outallarg, NULL
|
||||
};
|
||||
|
||||
#if CONFIG_LIBYUV
|
||||
|
@ -512,6 +514,7 @@ static int main_loop(int argc, const char **argv_) {
|
|||
int do_scale = 0;
|
||||
int operating_point = 0;
|
||||
int output_all_layers = 0;
|
||||
unsigned int row_mt = 0;
|
||||
aom_image_t *scaled_img = NULL;
|
||||
aom_image_t *img_shifted = NULL;
|
||||
int frame_avail, got_data, flush_decoder = 0;
|
||||
|
@ -601,6 +604,15 @@ static int main_loop(int argc, const char **argv_) {
|
|||
summary = 1;
|
||||
} else if (arg_match(&arg, &threadsarg, argi)) {
|
||||
cfg.threads = arg_parse_uint(&arg);
|
||||
#if !CONFIG_MULTITHREAD
|
||||
if (cfg.threads > 1) {
|
||||
die("Error: --threads=%d is not supported when CONFIG_MULTITHREAD = "
|
||||
"0.\n",
|
||||
cfg.threads);
|
||||
}
|
||||
#endif
|
||||
} else if (arg_match(&arg, &rowmtarg, argi)) {
|
||||
row_mt = arg_parse_uint(&arg);
|
||||
} else if (arg_match(&arg, &verbosearg, argi)) {
|
||||
quiet = 0;
|
||||
} else if (arg_match(&arg, &scalearg, argi)) {
|
||||
|
@ -763,6 +775,11 @@ static int main_loop(int argc, const char **argv_) {
|
|||
aom_codec_error(&decoder));
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (aom_codec_control(&decoder, AV1D_SET_ROW_MT, row_mt)) {
|
||||
fprintf(stderr, "Failed to set row_mt: %s\n", aom_codec_error(&decoder));
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip);
|
||||
|
@ -910,9 +927,8 @@ static int main_loop(int argc, const char **argv_) {
|
|||
// Shift up or down if necessary
|
||||
if (output_bit_depth != 0) {
|
||||
const aom_img_fmt_t shifted_fmt =
|
||||
output_bit_depth == 8
|
||||
? img->fmt ^ (img->fmt & AOM_IMG_FMT_HIGHBITDEPTH)
|
||||
: img->fmt | AOM_IMG_FMT_HIGHBITDEPTH;
|
||||
output_bit_depth == 8 ? img->fmt & ~AOM_IMG_FMT_HIGHBITDEPTH
|
||||
: img->fmt | AOM_IMG_FMT_HIGHBITDEPTH;
|
||||
|
||||
if (shifted_fmt != img->fmt || output_bit_depth != img->bit_depth) {
|
||||
if (img_shifted &&
|
||||
|
|
|
@ -475,6 +475,13 @@ static const arg_def_t film_grain_test =
|
|||
static const arg_def_t film_grain_table =
|
||||
ARG_DEF(NULL, "film-grain-table", 1,
|
||||
"Path to file containing film grain parameters");
|
||||
#if CONFIG_DENOISE
|
||||
static const arg_def_t denoise_noise_level =
|
||||
ARG_DEF(NULL, "denoise-noise-level", 1,
|
||||
"Amount of noise (from 0 = don't denoise, to 50)");
|
||||
static const arg_def_t denoise_block_size =
|
||||
ARG_DEF(NULL, "denoise-block-size", 1, "Denoise block size (default = 32)");
|
||||
#endif
|
||||
static const arg_def_t enable_ref_frame_mvs =
|
||||
ARG_DEF(NULL, "enable-ref-frame-mvs", 1,
|
||||
"Enable temporal mv prediction (default is 1)");
|
||||
|
@ -656,6 +663,10 @@ static const arg_def_t *av1_args[] = { &cpu_used_av1,
|
|||
&timing_info,
|
||||
&film_grain_test,
|
||||
&film_grain_table,
|
||||
#if CONFIG_DENOISE
|
||||
&denoise_noise_level,
|
||||
&denoise_block_size,
|
||||
#endif
|
||||
&enable_ref_frame_mvs,
|
||||
&bitdeptharg,
|
||||
&inbitdeptharg,
|
||||
|
@ -708,6 +719,10 @@ static const int av1_arg_ctrl_map[] = { AOME_SET_CPUUSED,
|
|||
AV1E_SET_TIMING_INFO_TYPE,
|
||||
AV1E_SET_FILM_GRAIN_TEST_VECTOR,
|
||||
AV1E_SET_FILM_GRAIN_TABLE,
|
||||
#if CONFIG_DENOISE
|
||||
AV1E_SET_DENOISE_NOISE_LEVEL,
|
||||
AV1E_SET_DENOISE_BLOCK_SIZE,
|
||||
#endif
|
||||
AV1E_SET_ENABLE_REF_FRAME_MVS,
|
||||
AV1E_SET_ENABLE_DF,
|
||||
AV1E_SET_ENABLE_ORDER_HINT,
|
||||
|
|
|
@ -45,7 +45,6 @@ list(APPEND AOM_AV1_COMMON_SOURCES
|
|||
"${AOM_ROOT}/av1/common/entropymv.c"
|
||||
"${AOM_ROOT}/av1/common/entropymv.h"
|
||||
"${AOM_ROOT}/av1/common/enums.h"
|
||||
"${AOM_ROOT}/av1/common/filter.c"
|
||||
"${AOM_ROOT}/av1/common/filter.h"
|
||||
"${AOM_ROOT}/av1/common/frame_buffers.c"
|
||||
"${AOM_ROOT}/av1/common/frame_buffers.h"
|
||||
|
@ -274,7 +273,10 @@ list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_1
|
|||
list(APPEND AOM_AV1_ENCODER_INTRIN_AVX2
|
||||
"${AOM_ROOT}/av1/encoder/x86/av1_quantize_avx2.c"
|
||||
"${AOM_ROOT}/av1/encoder/x86/av1_highbd_quantize_avx2.c"
|
||||
"${AOM_ROOT}/av1/encoder/x86/error_intrin_avx2.c")
|
||||
"${AOM_ROOT}/av1/encoder/x86/error_intrin_avx2.c"
|
||||
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm_avx2.h"
|
||||
"${AOM_ROOT}/av1/encoder/x86/av1_fwd_txfm2d_avx2.c"
|
||||
"${AOM_ROOT}/av1/encoder/x86/wedge_utils_avx2.c")
|
||||
|
||||
list(APPEND AOM_AV1_ENCODER_INTRIN_NEON
|
||||
"${AOM_ROOT}/av1/encoder/arm/neon/quantize_neon.c")
|
||||
|
@ -296,7 +298,9 @@ list(APPEND AOM_AV1_COMMON_INTRIN_NEON
|
|||
"${AOM_ROOT}/av1/common/arm/blend_a64_vmask_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/reconinter_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/wiener_convolve_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/intrapred_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/selfguided_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.c"
|
||||
"${AOM_ROOT}/av1/common/arm/av1_inv_txfm_neon.h"
|
||||
"${AOM_ROOT}/av1/common/cdef_block_neon.c")
|
||||
|
||||
list(APPEND AOM_AV1_ENCODER_INTRIN_SSE4_2
|
||||
|
|
|
@ -94,6 +94,10 @@ struct av1_extracfg {
|
|||
int enable_warped_motion; // sequence level
|
||||
int allow_warped_motion; // frame level
|
||||
int enable_superres;
|
||||
#if CONFIG_DENOISE
|
||||
float noise_level;
|
||||
int noise_block_size;
|
||||
#endif
|
||||
};
|
||||
|
||||
static struct av1_extracfg default_extra_cfg = {
|
||||
|
@ -160,6 +164,10 @@ static struct av1_extracfg default_extra_cfg = {
|
|||
1, // enable_warped_motion at sequence level
|
||||
1, // allow_warped_motion at frame level
|
||||
1, // superres
|
||||
#if CONFIG_DENOISE
|
||||
0, // noise_level
|
||||
32, // noise_block_size
|
||||
#endif
|
||||
};
|
||||
|
||||
struct aom_codec_alg_priv {
|
||||
|
@ -464,7 +472,7 @@ static aom_codec_err_t set_encoder_config(
|
|||
oxcf->buffer_model.num_units_in_decoding_tick = cfg->g_timebase.num;
|
||||
oxcf->timing_info.equal_picture_interval = 0;
|
||||
oxcf->decoder_model_info_present_flag = 1;
|
||||
oxcf->buffer_removal_delay_present = 1;
|
||||
oxcf->buffer_removal_time_present = 1;
|
||||
oxcf->display_model_info_present_flag = 1;
|
||||
}
|
||||
if (oxcf->init_framerate > 180) {
|
||||
|
@ -612,6 +620,10 @@ static aom_codec_err_t set_encoder_config(
|
|||
oxcf->film_grain_test_vector = extra_cfg->film_grain_test_vector;
|
||||
oxcf->film_grain_table_filename = extra_cfg->film_grain_table_filename;
|
||||
}
|
||||
#if CONFIG_DENOISE
|
||||
oxcf->noise_level = extra_cfg->noise_level;
|
||||
oxcf->noise_block_size = extra_cfg->noise_block_size;
|
||||
#endif
|
||||
oxcf->large_scale_tile = cfg->large_scale_tile;
|
||||
oxcf->single_tile_decoding =
|
||||
(oxcf->large_scale_tile) ? extra_cfg->single_tile_decoding : 0;
|
||||
|
@ -710,7 +722,7 @@ static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
|
|||
ctx->cfg = *cfg;
|
||||
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
|
||||
// On profile change, request a key frame
|
||||
force_key |= ctx->cpi->common.profile != ctx->oxcf.profile;
|
||||
force_key |= ctx->cpi->common.seq_params.profile != ctx->oxcf.profile;
|
||||
av1_change_config(ctx->cpi, &ctx->oxcf);
|
||||
}
|
||||
|
||||
|
@ -1055,6 +1067,23 @@ static aom_codec_err_t ctrl_set_film_grain_table(aom_codec_alg_priv_t *ctx,
|
|||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
#if CONFIG_DENOISE
|
||||
static aom_codec_err_t ctrl_set_denoise_noise_level(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.noise_level =
|
||||
((float)CAST(AV1E_SET_DENOISE_NOISE_LEVEL, args)) / 10.0f;
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_denoise_block_size(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
extra_cfg.noise_block_size = CAST(AV1E_SET_DENOISE_BLOCK_SIZE, args);
|
||||
return update_extra_cfg(ctx, &extra_cfg);
|
||||
}
|
||||
#endif
|
||||
|
||||
static aom_codec_err_t ctrl_set_deltaq_mode(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
struct av1_extracfg extra_cfg = ctx->extra_cfg;
|
||||
|
@ -1119,7 +1148,7 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx,
|
|||
}
|
||||
|
||||
priv->extra_cfg = default_extra_cfg;
|
||||
once(av1_initialize_enc);
|
||||
aom_once(av1_initialize_enc);
|
||||
|
||||
res = validate_config(priv, &priv->cfg, &priv->extra_cfg);
|
||||
|
||||
|
@ -1200,6 +1229,9 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
|
||||
volatile aom_enc_frame_flags_t flags = enc_flags;
|
||||
|
||||
// The jmp_buf is valid only for the duration of the function that calls
|
||||
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
|
||||
// before it returns.
|
||||
if (setjmp(cpi->common.error.jmp)) {
|
||||
cpi->common.error.setjmp = 0;
|
||||
res = update_error_state(ctx, &cpi->common.error);
|
||||
|
@ -1259,7 +1291,6 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
if (cx_data_sz < ctx->cx_data_sz / 2) {
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
|
||||
"Compressed data buffer too small");
|
||||
return AOM_CODEC_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1275,8 +1306,8 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
!img, timebase)) {
|
||||
if (cpi->common.seq_params.frame_id_numbers_present_flag) {
|
||||
if (cpi->common.invalid_delta_frame_id_minus_1) {
|
||||
ctx->base.err_detail = "Invalid delta_frame_id_minus_1";
|
||||
return AOM_CODEC_ERROR;
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
|
||||
"Invalid delta_frame_id_minus_1");
|
||||
}
|
||||
}
|
||||
cpi->seq_params_locked = 1;
|
||||
|
@ -1305,7 +1336,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
// OBUs are preceded/succeeded by an unsigned leb128 coded integer.
|
||||
if (write_uleb_obu_size(obu_header_size, obu_payload_size,
|
||||
ctx->pending_cx_data) != AOM_CODEC_OK) {
|
||||
return AOM_CODEC_ERROR;
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
|
||||
}
|
||||
|
||||
frame_size += obu_header_size + obu_payload_size + length_field_size;
|
||||
|
@ -1315,7 +1346,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
size_t curr_frame_size = frame_size;
|
||||
if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
|
||||
AOM_CODEC_OK) {
|
||||
return AOM_CODEC_ERROR;
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
|
||||
}
|
||||
frame_size = curr_frame_size;
|
||||
|
||||
|
@ -1327,7 +1358,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
}
|
||||
if (write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
|
||||
AOM_CODEC_OK) {
|
||||
return AOM_CODEC_ERROR;
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
|
||||
}
|
||||
frame_size += length_field_size;
|
||||
}
|
||||
|
@ -1358,7 +1389,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
|
|||
}
|
||||
if (write_uleb_obu_size(0, (uint32_t)tu_size, ctx->pending_cx_data) !=
|
||||
AOM_CODEC_OK) {
|
||||
return AOM_CODEC_ERROR;
|
||||
aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
|
||||
}
|
||||
ctx->pending_cx_data_sz += length_field_size;
|
||||
}
|
||||
|
@ -1710,6 +1741,10 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
|
|||
{ AV1E_SET_SINGLE_TILE_DECODING, ctrl_set_single_tile_decoding },
|
||||
{ AV1E_SET_FILM_GRAIN_TEST_VECTOR, ctrl_set_film_grain_test_vector },
|
||||
{ AV1E_SET_FILM_GRAIN_TABLE, ctrl_set_film_grain_table },
|
||||
#if CONFIG_DENOISE
|
||||
{ AV1E_SET_DENOISE_NOISE_LEVEL, ctrl_set_denoise_noise_level },
|
||||
{ AV1E_SET_DENOISE_BLOCK_SIZE, ctrl_set_denoise_block_size },
|
||||
#endif // CONFIG_FILM_GRAIN
|
||||
{ AV1E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
|
||||
|
||||
// Getters
|
||||
|
@ -1728,7 +1763,7 @@ static aom_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
|
|||
{
|
||||
// NOLINT
|
||||
0, // g_usage
|
||||
8, // g_threads
|
||||
0, // g_threads
|
||||
0, // g_profile
|
||||
|
||||
320, // g_width
|
||||
|
@ -1810,7 +1845,7 @@ CODEC_INTERFACE(aom_codec_av1_cx) = {
|
|||
NULL, // aom_codec_peek_si_fn_t
|
||||
NULL, // aom_codec_get_si_fn_t
|
||||
NULL, // aom_codec_decode_fn_t
|
||||
NULL, // aom_codec_frame_get_fn_t
|
||||
NULL, // aom_codec_get_frame_fn_t
|
||||
NULL // aom_codec_set_fb_fn_t
|
||||
},
|
||||
{
|
||||
|
|
|
@ -50,6 +50,7 @@ struct aom_codec_alg_priv {
|
|||
int decode_tile_col;
|
||||
unsigned int tile_mode;
|
||||
unsigned int ext_tile_debug;
|
||||
unsigned int row_mt;
|
||||
EXTERNAL_REFERENCES ext_refs;
|
||||
unsigned int is_annexb;
|
||||
int operating_point;
|
||||
|
@ -61,7 +62,7 @@ struct aom_codec_alg_priv {
|
|||
int last_submit_worker_id;
|
||||
int next_output_worker_id;
|
||||
int available_threads;
|
||||
aom_image_t *image_with_grain;
|
||||
aom_image_t *image_with_grain[MAX_NUM_SPATIAL_LAYERS];
|
||||
int need_resync; // wait for key/intra-only frame
|
||||
// BufferPool that holds all reference frames. Shared by all the FrameWorkers.
|
||||
BufferPool *buffer_pool;
|
||||
|
@ -101,7 +102,7 @@ static aom_codec_err_t decoder_init(aom_codec_ctx_t *ctx,
|
|||
// default values
|
||||
priv->cfg.cfg.ext_partition = 1;
|
||||
}
|
||||
priv->image_with_grain = NULL;
|
||||
av1_zero(priv->image_with_grain);
|
||||
}
|
||||
|
||||
return AOM_CODEC_OK;
|
||||
|
@ -139,7 +140,9 @@ static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
|
|||
|
||||
aom_free(ctx->frame_workers);
|
||||
aom_free(ctx->buffer_pool);
|
||||
if (ctx->image_with_grain) aom_img_free(ctx->image_with_grain);
|
||||
for (int i = 0; i < MAX_NUM_SPATIAL_LAYERS; i++) {
|
||||
if (ctx->image_with_grain[i]) aom_img_free(ctx->image_with_grain[i]);
|
||||
}
|
||||
aom_free(ctx);
|
||||
return AOM_CODEC_OK;
|
||||
}
|
||||
|
@ -339,16 +342,16 @@ static int frame_worker_hook(void *arg1, void *arg2) {
|
|||
const uint8_t *data = frame_worker_data->data;
|
||||
(void)arg2;
|
||||
|
||||
frame_worker_data->result = av1_receive_compressed_data(
|
||||
frame_worker_data->pbi, frame_worker_data->data_size, &data);
|
||||
int result = av1_receive_compressed_data(frame_worker_data->pbi,
|
||||
frame_worker_data->data_size, &data);
|
||||
frame_worker_data->data_end = data;
|
||||
|
||||
if (frame_worker_data->result != 0) {
|
||||
if (result != 0) {
|
||||
// Check decode result in serial decode.
|
||||
frame_worker_data->pbi->cur_buf->buf.corrupted = 1;
|
||||
frame_worker_data->pbi->need_resync = 1;
|
||||
}
|
||||
return !frame_worker_data->result;
|
||||
return !result;
|
||||
}
|
||||
|
||||
static aom_codec_err_t init_decoder(aom_codec_alg_priv_t *ctx) {
|
||||
|
@ -429,6 +432,7 @@ static aom_codec_err_t init_decoder(aom_codec_alg_priv_t *ctx) {
|
|||
frame_worker_data->pbi->operating_point = ctx->operating_point;
|
||||
frame_worker_data->pbi->output_all_layers = ctx->output_all_layers;
|
||||
frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
|
||||
frame_worker_data->pbi->row_mt = ctx->row_mt;
|
||||
|
||||
worker->hook = (AVxWorkerHook)frame_worker_hook;
|
||||
if (!winterface->reset(worker)) {
|
||||
|
@ -489,6 +493,7 @@ static aom_codec_err_t decode_one(aom_codec_alg_priv_t *ctx,
|
|||
frame_worker_data->pbi->dec_tile_row = ctx->decode_tile_row;
|
||||
frame_worker_data->pbi->dec_tile_col = ctx->decode_tile_col;
|
||||
frame_worker_data->pbi->ext_tile_debug = ctx->ext_tile_debug;
|
||||
frame_worker_data->pbi->row_mt = ctx->row_mt;
|
||||
frame_worker_data->pbi->ext_refs = ctx->ext_refs;
|
||||
|
||||
frame_worker_data->pbi->common.is_annexb = ctx->is_annexb;
|
||||
|
@ -592,21 +597,31 @@ static aom_codec_err_t decoder_decode(aom_codec_alg_priv_t *ctx,
|
|||
return res;
|
||||
}
|
||||
|
||||
aom_image_t *add_grain_if_needed(aom_image_t *img, aom_image_t *grain_img_buf,
|
||||
aom_film_grain_t *grain_params) {
|
||||
// If grain_params->apply_grain is false, returns img. Otherwise, adds film
|
||||
// grain to img, saves the result in *grain_img_ptr (allocating *grain_img_ptr
|
||||
// if necessary), and returns *grain_img_ptr.
|
||||
static aom_image_t *add_grain_if_needed(aom_image_t *img,
|
||||
aom_image_t **grain_img_ptr,
|
||||
aom_film_grain_t *grain_params) {
|
||||
if (!grain_params->apply_grain) return img;
|
||||
|
||||
if (grain_img_buf &&
|
||||
(img->d_w != grain_img_buf->d_w || img->d_h != grain_img_buf->d_h ||
|
||||
img->fmt != grain_img_buf->fmt || !(img->d_h % 2) || !(img->d_w % 2))) {
|
||||
aom_img_free(grain_img_buf);
|
||||
grain_img_buf = NULL;
|
||||
aom_image_t *grain_img_buf = *grain_img_ptr;
|
||||
|
||||
const int w_even = ALIGN_POWER_OF_TWO(img->d_w, 1);
|
||||
const int h_even = ALIGN_POWER_OF_TWO(img->d_h, 1);
|
||||
|
||||
if (grain_img_buf) {
|
||||
const int alloc_w = ALIGN_POWER_OF_TWO(grain_img_buf->d_w, 1);
|
||||
const int alloc_h = ALIGN_POWER_OF_TWO(grain_img_buf->d_h, 1);
|
||||
if (w_even != alloc_w || h_even != alloc_h ||
|
||||
img->fmt != grain_img_buf->fmt) {
|
||||
aom_img_free(grain_img_buf);
|
||||
grain_img_buf = NULL;
|
||||
}
|
||||
}
|
||||
if (!grain_img_buf) {
|
||||
int w_even = img->d_w % 2 ? img->d_w + 1 : img->d_w;
|
||||
int h_even = img->d_h % 2 ? img->d_h + 1 : img->d_h;
|
||||
grain_img_buf = aom_img_alloc(NULL, img->fmt, w_even, h_even, 16);
|
||||
grain_img_buf->bit_depth = img->bit_depth;
|
||||
*grain_img_ptr = grain_img_buf;
|
||||
}
|
||||
|
||||
av1_add_film_grain(grain_params, img, grain_img_buf);
|
||||
|
@ -649,8 +664,6 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
|
|||
aom_film_grain_t *grain_params;
|
||||
if (av1_get_raw_frame(frame_worker_data->pbi, *index, &sd,
|
||||
&grain_params) == 0) {
|
||||
*index += 1; // Advance the iterator to point to the next image
|
||||
|
||||
AV1Decoder *const pbi = frame_worker_data->pbi;
|
||||
AV1_COMMON *const cm = &pbi->common;
|
||||
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
|
||||
|
@ -659,6 +672,7 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
|
|||
yuvconfig2image(&ctx->img, sd, frame_worker_data->user_priv);
|
||||
|
||||
if (!pbi->ext_tile_debug && cm->large_scale_tile) {
|
||||
*index += 1; // Advance the iterator to point to the next image
|
||||
img = &ctx->img;
|
||||
img->img_data = pbi->tile_list_output;
|
||||
img->sz = pbi->tile_list_size;
|
||||
|
@ -688,11 +702,14 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
|
|||
const int tile_col = AOMMIN(pbi->dec_tile_col, cm->tile_cols - 1);
|
||||
const int mi_col = tile_col * cm->tile_width;
|
||||
const int ssx = ctx->img.x_chroma_shift;
|
||||
const int is_hbd =
|
||||
(ctx->img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) ? 1 : 0;
|
||||
int plane;
|
||||
ctx->img.planes[0] += mi_col * MI_SIZE;
|
||||
ctx->img.planes[0] += mi_col * MI_SIZE * (1 + is_hbd);
|
||||
if (num_planes > 1) {
|
||||
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
|
||||
ctx->img.planes[plane] += mi_col * (MI_SIZE >> ssx);
|
||||
ctx->img.planes[plane] +=
|
||||
mi_col * (MI_SIZE >> ssx) * (1 + is_hbd);
|
||||
}
|
||||
}
|
||||
ctx->img.d_w =
|
||||
|
@ -703,7 +720,10 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
|
|||
img = &ctx->img;
|
||||
img->temporal_id = cm->temporal_layer_id;
|
||||
img->spatial_id = cm->spatial_layer_id;
|
||||
return add_grain_if_needed(img, ctx->image_with_grain, grain_params);
|
||||
aom_image_t *res = add_grain_if_needed(
|
||||
img, &ctx->image_with_grain[*index], grain_params);
|
||||
*index += 1; // Advance the iterator to point to the next image
|
||||
return res;
|
||||
}
|
||||
} else {
|
||||
// Decoding failed. Release the worker thread.
|
||||
|
@ -999,7 +1019,7 @@ static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
|
|||
FrameWorkerData *const frame_worker_data =
|
||||
(FrameWorkerData *)worker->data1;
|
||||
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
|
||||
*bit_depth = cm->bit_depth;
|
||||
*bit_depth = cm->seq_params.bit_depth;
|
||||
return AOM_CODEC_OK;
|
||||
} else {
|
||||
return AOM_CODEC_ERROR;
|
||||
|
@ -1009,6 +1029,64 @@ static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
|
|||
return AOM_CODEC_INVALID_PARAM;
|
||||
}
|
||||
|
||||
static aom_img_fmt_t get_img_format(int subsampling_x, int subsampling_y,
|
||||
int use_highbitdepth) {
|
||||
aom_img_fmt_t fmt = 0;
|
||||
|
||||
if (subsampling_x == 0 && subsampling_y == 0)
|
||||
fmt = AOM_IMG_FMT_I444;
|
||||
else if (subsampling_x == 1 && subsampling_y == 0)
|
||||
fmt = AOM_IMG_FMT_I422;
|
||||
else if (subsampling_x == 1 && subsampling_y == 1)
|
||||
fmt = AOM_IMG_FMT_I420;
|
||||
|
||||
if (use_highbitdepth) fmt |= AOM_IMG_FMT_HIGHBITDEPTH;
|
||||
return fmt;
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_get_img_format(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
aom_img_fmt_t *const img_fmt = va_arg(args, aom_img_fmt_t *);
|
||||
AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
|
||||
|
||||
if (img_fmt) {
|
||||
if (worker) {
|
||||
FrameWorkerData *const frame_worker_data =
|
||||
(FrameWorkerData *)worker->data1;
|
||||
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
|
||||
|
||||
*img_fmt = get_img_format(cm->seq_params.subsampling_x,
|
||||
cm->seq_params.subsampling_y,
|
||||
cm->seq_params.use_highbitdepth);
|
||||
return AOM_CODEC_OK;
|
||||
} else {
|
||||
return AOM_CODEC_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return AOM_CODEC_INVALID_PARAM;
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_get_tile_size(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
unsigned int *const tile_size = va_arg(args, unsigned int *);
|
||||
AVxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
|
||||
|
||||
if (tile_size) {
|
||||
if (worker) {
|
||||
FrameWorkerData *const frame_worker_data =
|
||||
(FrameWorkerData *)worker->data1;
|
||||
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
|
||||
*tile_size =
|
||||
((cm->tile_width * MI_SIZE) << 16) + cm->tile_height * MI_SIZE;
|
||||
return AOM_CODEC_OK;
|
||||
} else {
|
||||
return AOM_CODEC_ERROR;
|
||||
}
|
||||
}
|
||||
return AOM_CODEC_INVALID_PARAM;
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_invert_tile_order(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
ctx->invert_tile_order = va_arg(args, int);
|
||||
|
@ -1124,6 +1202,12 @@ static aom_codec_err_t ctrl_ext_tile_debug(aom_codec_alg_priv_t *ctx,
|
|||
return AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
static aom_codec_err_t ctrl_set_row_mt(aom_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
ctx->row_mt = va_arg(args, unsigned int);
|
||||
return AOM_CODEC_OK;
|
||||
}
|
||||
|
||||
static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
|
||||
{ AV1_COPY_REFERENCE, ctrl_copy_reference },
|
||||
|
||||
|
@ -1145,6 +1229,7 @@ static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
|
|||
{ AV1D_SET_OUTPUT_ALL_LAYERS, ctrl_set_output_all_layers },
|
||||
{ AV1_SET_INSPECTION_CALLBACK, ctrl_set_inspection_callback },
|
||||
{ AV1D_EXT_TILE_DEBUG, ctrl_ext_tile_debug },
|
||||
{ AV1D_SET_ROW_MT, ctrl_set_row_mt },
|
||||
{ AV1D_SET_EXT_REF_PTR, ctrl_set_ext_ref_ptr },
|
||||
|
||||
// Getters
|
||||
|
@ -1152,6 +1237,8 @@ static aom_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
|
|||
{ AOMD_GET_LAST_QUANTIZER, ctrl_get_last_quantizer },
|
||||
{ AOMD_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates },
|
||||
{ AV1D_GET_BIT_DEPTH, ctrl_get_bit_depth },
|
||||
{ AV1D_GET_IMG_FORMAT, ctrl_get_img_format },
|
||||
{ AV1D_GET_TILE_SIZE, ctrl_get_tile_size },
|
||||
{ AV1D_GET_DISPLAY_SIZE, ctrl_get_render_size },
|
||||
{ AV1D_GET_FRAME_SIZE, ctrl_get_frame_size },
|
||||
{ AV1_GET_ACCOUNTING, ctrl_get_accounting },
|
||||
|
@ -1180,7 +1267,7 @@ CODEC_INTERFACE(aom_codec_av1_dx) = {
|
|||
decoder_peek_si, // aom_codec_peek_si_fn_t
|
||||
decoder_get_si, // aom_codec_get_si_fn_t
|
||||
decoder_decode, // aom_codec_decode_fn_t
|
||||
decoder_get_frame, // aom_codec_frame_get_fn_t
|
||||
decoder_get_frame, // aom_codec_get_frame_fn_t
|
||||
decoder_set_fb_fn, // aom_codec_set_fb_fn_t
|
||||
},
|
||||
{
|
||||
|
|
|
@ -137,11 +137,11 @@ void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
|
|||
// Now we need to allocate enough space to store the line buffers for the
|
||||
// stripes
|
||||
const int frame_w = cm->superres_upscaled_width;
|
||||
const int use_highbd = cm->use_highbitdepth ? 1 : 0;
|
||||
const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0;
|
||||
|
||||
for (int p = 0; p < num_planes; ++p) {
|
||||
const int is_uv = p > 0;
|
||||
const int ss_x = is_uv && cm->subsampling_x;
|
||||
const int ss_x = is_uv && cm->seq_params.subsampling_x;
|
||||
const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
|
||||
const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
|
||||
const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
|
||||
|
|
|
@ -0,0 +1,844 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
#include "config/av1_rtcd.h"
|
||||
|
||||
#include "av1/common/av1_inv_txfm1d.h"
|
||||
#include "av1/common/av1_inv_txfm1d_cfg.h"
|
||||
#include "av1/common/av1_txfm.h"
|
||||
#include "av1/common/enums.h"
|
||||
#include "av1/common/idct.h"
|
||||
#include "av1/common/arm/av1_inv_txfm_neon.h"
|
||||
|
||||
static INLINE TxSetType find_TxSetType(TX_SIZE tx_size) {
|
||||
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
|
||||
TxSetType tx_set_type;
|
||||
if (tx_size_sqr_up > TX_32X32) {
|
||||
tx_set_type = EXT_TX_SET_DCTONLY;
|
||||
} else if (tx_size_sqr_up == TX_32X32) {
|
||||
tx_set_type = EXT_TX_SET_DCT_IDTX;
|
||||
} else {
|
||||
tx_set_type = EXT_TX_SET_ALL16;
|
||||
}
|
||||
return tx_set_type;
|
||||
}
|
||||
|
||||
// 1D itx types
|
||||
typedef enum ATTRIBUTE_PACKED {
|
||||
IDCT_1D,
|
||||
IADST_1D,
|
||||
IFLIPADST_1D = IADST_1D,
|
||||
IIDENTITY_1D,
|
||||
ITX_TYPES_1D,
|
||||
} ITX_TYPE_1D;
|
||||
|
||||
static const ITX_TYPE_1D vitx_1d_tab[TX_TYPES] = {
|
||||
IDCT_1D, IADST_1D, IDCT_1D, IADST_1D,
|
||||
IFLIPADST_1D, IDCT_1D, IFLIPADST_1D, IADST_1D,
|
||||
IFLIPADST_1D, IIDENTITY_1D, IDCT_1D, IIDENTITY_1D,
|
||||
IADST_1D, IIDENTITY_1D, IFLIPADST_1D, IIDENTITY_1D,
|
||||
};
|
||||
|
||||
static const ITX_TYPE_1D hitx_1d_tab[TX_TYPES] = {
|
||||
IDCT_1D, IDCT_1D, IADST_1D, IADST_1D,
|
||||
IDCT_1D, IFLIPADST_1D, IFLIPADST_1D, IFLIPADST_1D,
|
||||
IADST_1D, IIDENTITY_1D, IIDENTITY_1D, IDCT_1D,
|
||||
IIDENTITY_1D, IADST_1D, IIDENTITY_1D, IFLIPADST_1D,
|
||||
};
|
||||
|
||||
// 1D functions
|
||||
static const transform_1d_neon lowbd_txfm_all_1d_arr[TX_SIZES][ITX_TYPES_1D] = {
|
||||
{ av1_idct4_new, av1_iadst4_new, av1_iidentity4_c },
|
||||
{ av1_idct8_new, av1_iadst8_new, av1_iidentity8_c },
|
||||
{ av1_idct16_new, av1_iadst16_new, av1_iidentity16_c },
|
||||
{ av1_idct32_new, NULL, NULL },
|
||||
{ av1_idct64_new, NULL, NULL },
|
||||
};
|
||||
|
||||
// Functions for blocks with eob at DC and within
|
||||
// topleft 8x8, 16x16, 32x32 corner
|
||||
static const transform_1d_neon
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[TX_SIZES][ITX_TYPES_1D][4] = {
|
||||
{
|
||||
{ av1_idct4_new, av1_idct4_new, NULL, NULL },
|
||||
{ av1_iadst4_new, av1_iadst4_new, NULL, NULL },
|
||||
{ av1_iidentity4_c, av1_iidentity4_c, NULL, NULL },
|
||||
},
|
||||
{ { av1_idct8_new, av1_idct8_new, NULL, NULL },
|
||||
{ av1_iadst8_new, av1_iadst8_new, NULL, NULL },
|
||||
{ av1_iidentity8_c, av1_iidentity8_c, NULL, NULL } },
|
||||
{
|
||||
{ av1_idct16_new, av1_idct16_new, av1_idct16_new, NULL },
|
||||
{ av1_iadst16_new, av1_iadst16_new, av1_iadst16_new, NULL },
|
||||
{ av1_iidentity16_c, av1_iidentity16_c, av1_iidentity16_c, NULL },
|
||||
},
|
||||
{ { av1_idct32_new, av1_idct32_new, av1_idct32_new, av1_idct32_new },
|
||||
{ NULL, NULL, NULL, NULL },
|
||||
{ av1_iidentity32_c, av1_iidentity32_c, av1_iidentity32_c,
|
||||
av1_iidentity32_c } },
|
||||
{ { av1_idct64_new, av1_idct64_new, av1_idct64_new, av1_idct64_new },
|
||||
{ NULL, NULL, NULL, NULL },
|
||||
{ NULL, NULL, NULL, NULL } }
|
||||
};
|
||||
static INLINE void lowbd_inv_txfm2d_add_idtx_neon(const int32_t *input,
|
||||
uint8_t *output, int stride,
|
||||
TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
int eobx, eoby;
|
||||
get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
|
||||
|
||||
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
|
||||
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
|
||||
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
|
||||
|
||||
assert(col_txfm != NULL);
|
||||
assert(row_txfm != NULL);
|
||||
|
||||
// row tx
|
||||
int row_start = (buf_size_nonzero_h_div8 * 8);
|
||||
for (int i = 0; i < row_start; i++) {
|
||||
if (abs(rect_type) == 1) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
} else {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
}
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
// Doing memset for the rows which are not processed in row transform.
|
||||
memset(buf_ptr, 0,
|
||||
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
|
||||
|
||||
// col tx
|
||||
for (int c = 0; c < txfm_size_col; c++) {
|
||||
for (r = 0; r < txfm_size_row; ++r) temp_in[r] = buf[r * txfm_size_col + c];
|
||||
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void lowbd_inv_txfm2d_add_v_identity_neon(
|
||||
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
int eobx, eoby;
|
||||
get_eobx_eoby_scan_v_identity(&eobx, &eoby, tx_size, eob);
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
|
||||
|
||||
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
|
||||
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
|
||||
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
|
||||
|
||||
assert(col_txfm != NULL);
|
||||
assert(row_txfm != NULL);
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
// row tx
|
||||
int row_start = (buf_size_nonzero_h_div8 * 8);
|
||||
for (int i = 0; i < row_start; i++) {
|
||||
if (abs(rect_type) == 1) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
} else {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
}
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
// Doing memset for the rows which are not processed in row transform.
|
||||
memset(buf_ptr, 0,
|
||||
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
|
||||
|
||||
// col tx
|
||||
for (int c = 0; c < txfm_size_col; c++) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void lowbd_inv_txfm2d_add_h_identity_neon(
|
||||
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[32 * 32 + 32 + 32]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
int eobx, eoby;
|
||||
get_eobx_eoby_scan_h_identity(&eobx, &eoby, tx_size, eob);
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
|
||||
|
||||
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
|
||||
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
|
||||
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
|
||||
|
||||
assert(col_txfm != NULL);
|
||||
assert(row_txfm != NULL);
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
// row tx
|
||||
int row_start = (buf_size_nonzero_h_div8 * 8);
|
||||
for (int i = 0; i < row_start; i++) {
|
||||
if (abs(rect_type) == 1) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
} else {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
}
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
// Doing memset for the rows which are not processed in row transform.
|
||||
memset(buf_ptr, 0,
|
||||
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
|
||||
|
||||
// col tx
|
||||
for (int c = 0; c < txfm_size_col; c++) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void lowbd_inv_txfm2d_add_4x4_neon(const int32_t *input,
|
||||
uint8_t *output, int stride,
|
||||
TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
(void)eob;
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[4 * 4 + 8 + 8]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
|
||||
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
for (int i = 0; i < txfm_size_row; i++) {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
for (int c = 0; c < txfm_size_col; ++c) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void lowbd_inv_txfm2d_add_4x8_neon(const int32_t *input, uint8_t *output,
|
||||
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
|
||||
int eob) {
|
||||
(void)eob;
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[4 * 8 + 8 + 8]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
|
||||
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
for (int i = 0; i < txfm_size_row; i++) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
for (int c = 0; c < txfm_size_col; ++c) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void lowbd_inv_txfm2d_add_8x4_neon(const int32_t *input, uint8_t *output,
|
||||
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
|
||||
int eob) {
|
||||
(void)eob;
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[8 * 4 + 8 + 8]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
|
||||
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
for (int i = 0; i < txfm_size_row; i++) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
for (int c = 0; c < txfm_size_col; ++c) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void lowbd_inv_txfm2d_add_4x16_neon(const int32_t *input, uint8_t *output,
|
||||
int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
(void)eob;
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[4 * 16 + 16 + 16]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
|
||||
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
for (int i = 0; i < txfm_size_row; i++) {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
for (int c = 0; c < txfm_size_col; ++c) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void lowbd_inv_txfm2d_add_16x4_neon(const int32_t *input, uint8_t *output,
|
||||
int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
(void)eob;
|
||||
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[16 * 4 + 16 + 16]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
int r, bd = 8;
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_arr[txw_idx][hitx_1d_tab[tx_type]];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_arr[txh_idx][vitx_1d_tab[tx_type]];
|
||||
|
||||
int ud_flip, lr_flip;
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
|
||||
for (int i = 0; i < txfm_size_row; i++) {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
for (int c = 0; c < txfm_size_col; ++c) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void lowbd_inv_txfm2d_add_no_identity_neon(
|
||||
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
DECLARE_ALIGNED(32, int, txfm_buf[64 * 64 + 64 + 64]);
|
||||
int32_t *temp_in = txfm_buf;
|
||||
|
||||
int eobx, eoby, ud_flip, lr_flip, row_start;
|
||||
get_eobx_eoby_scan_default(&eobx, &eoby, tx_size, eob);
|
||||
const int8_t *shift = inv_txfm_shift_ls[tx_size];
|
||||
const int txw_idx = get_txw_idx(tx_size);
|
||||
const int txh_idx = get_txh_idx(tx_size);
|
||||
const int cos_bit_col = inv_cos_bit_col[txw_idx][txh_idx];
|
||||
const int cos_bit_row = inv_cos_bit_row[txw_idx][txh_idx];
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int buf_size_nonzero_h_div8 = (eoby + 8) >> 3;
|
||||
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
|
||||
const int buf_offset = AOMMAX(txfm_size_row, txfm_size_col);
|
||||
|
||||
int32_t *temp_out = temp_in + buf_offset;
|
||||
int32_t *buf = temp_out + buf_offset;
|
||||
int32_t *buf_ptr = buf;
|
||||
const int8_t stage_range[MAX_TXFM_STAGE_NUM] = { 16 };
|
||||
const int bd = 8;
|
||||
int r;
|
||||
|
||||
const int fun_idx_x = lowbd_txfm_all_1d_zeros_idx[eobx];
|
||||
const int fun_idx_y = lowbd_txfm_all_1d_zeros_idx[eoby];
|
||||
const transform_1d_neon row_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txw_idx][hitx_1d_tab[tx_type]][fun_idx_x];
|
||||
const transform_1d_neon col_txfm =
|
||||
lowbd_txfm_all_1d_zeros_w8_arr[txh_idx][vitx_1d_tab[tx_type]][fun_idx_y];
|
||||
|
||||
assert(col_txfm != NULL);
|
||||
assert(row_txfm != NULL);
|
||||
|
||||
get_flip_cfg(tx_type, &ud_flip, &lr_flip);
|
||||
row_start = (buf_size_nonzero_h_div8 << 3);
|
||||
|
||||
for (int i = 0; i < row_start; i++) {
|
||||
if (abs(rect_type) == 1) {
|
||||
for (int j = 0; j < txfm_size_col; j++)
|
||||
temp_in[j] = round_shift((int64_t)input[j] * NewInvSqrt2, NewSqrt2Bits);
|
||||
row_txfm(temp_in, buf_ptr, cos_bit_row, stage_range);
|
||||
} else {
|
||||
row_txfm(input, buf_ptr, cos_bit_row, stage_range);
|
||||
}
|
||||
av1_round_shift_array(buf_ptr, txfm_size_col, -shift[0]);
|
||||
input += txfm_size_col;
|
||||
buf_ptr += txfm_size_col;
|
||||
}
|
||||
|
||||
// Doing memset for the rows which are not processed in row transform.
|
||||
memset(buf_ptr, 0,
|
||||
sizeof(int32_t) * txfm_size_col * (txfm_size_row - row_start));
|
||||
|
||||
for (int c = 0; c < txfm_size_col; c++) {
|
||||
if (lr_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + c];
|
||||
} else {
|
||||
// flip left right
|
||||
for (r = 0; r < txfm_size_row; ++r)
|
||||
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
|
||||
}
|
||||
col_txfm(temp_in, temp_out, cos_bit_col, stage_range);
|
||||
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
|
||||
|
||||
if (ud_flip == 0) {
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] =
|
||||
highbd_clip_pixel_add(output[r * stride + c], temp_out[r], bd);
|
||||
}
|
||||
} else {
|
||||
// flip upside down
|
||||
for (r = 0; r < txfm_size_row; ++r) {
|
||||
output[r * stride + c] = highbd_clip_pixel_add(
|
||||
output[r * stride + c], temp_out[txfm_size_row - r - 1], bd);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void lowbd_inv_txfm2d_add_universe_neon(
|
||||
const int32_t *input, uint8_t *output, int stride, TX_TYPE tx_type,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
switch (tx_type) {
|
||||
case IDTX:
|
||||
lowbd_inv_txfm2d_add_idtx_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case H_DCT:
|
||||
case H_ADST:
|
||||
case H_FLIPADST:
|
||||
lowbd_inv_txfm2d_add_v_identity_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
break;
|
||||
|
||||
case V_DCT:
|
||||
case V_ADST:
|
||||
case V_FLIPADST:
|
||||
lowbd_inv_txfm2d_add_h_identity_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
break;
|
||||
|
||||
default:
|
||||
lowbd_inv_txfm2d_add_no_identity_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
break;
|
||||
}
|
||||
}
|
||||
void av1_lowbd_inv_txfm2d_add_neon(const int32_t *input, uint8_t *output,
|
||||
int stride, TX_TYPE tx_type, TX_SIZE tx_size,
|
||||
int eob) {
|
||||
int row;
|
||||
switch (tx_size) {
|
||||
case TX_4X4:
|
||||
lowbd_inv_txfm2d_add_4x4_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case TX_4X8:
|
||||
lowbd_inv_txfm2d_add_4x8_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case TX_8X4:
|
||||
lowbd_inv_txfm2d_add_8x4_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case TX_4X16:
|
||||
lowbd_inv_txfm2d_add_4x16_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case TX_16X4:
|
||||
lowbd_inv_txfm2d_add_16x4_neon(input, output, stride, tx_type, tx_size,
|
||||
eob);
|
||||
break;
|
||||
|
||||
case TX_16X64: {
|
||||
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
} break;
|
||||
|
||||
case TX_64X16: {
|
||||
int32_t mod_input[64 * 16];
|
||||
for (row = 0; row < 16; ++row) {
|
||||
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
|
||||
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
|
||||
}
|
||||
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
} break;
|
||||
|
||||
case TX_32X64: {
|
||||
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
} break;
|
||||
|
||||
case TX_64X32: {
|
||||
int32_t mod_input[64 * 32];
|
||||
for (row = 0; row < 32; ++row) {
|
||||
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
|
||||
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
|
||||
}
|
||||
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
} break;
|
||||
|
||||
case TX_64X64: {
|
||||
int32_t mod_input[64 * 64];
|
||||
for (row = 0; row < 32; ++row) {
|
||||
memcpy(mod_input + row * 64, input + row * 32, 32 * sizeof(*mod_input));
|
||||
memset(mod_input + row * 64 + 32, 0, 32 * sizeof(*mod_input));
|
||||
}
|
||||
lowbd_inv_txfm2d_add_universe_neon(mod_input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
} break;
|
||||
|
||||
default:
|
||||
lowbd_inv_txfm2d_add_universe_neon(input, output, stride, tx_type,
|
||||
tx_size, eob);
|
||||
break;
|
||||
}
|
||||
}
|
||||
void av1_inv_txfm_add_neon(const tran_low_t *dqcoeff, uint8_t *dst, int stride,
|
||||
const TxfmParam *txfm_param) {
|
||||
const TX_TYPE tx_type = txfm_param->tx_type;
|
||||
if (!txfm_param->lossless) {
|
||||
av1_lowbd_inv_txfm2d_add_neon(dqcoeff, dst, stride, tx_type,
|
||||
txfm_param->tx_size, txfm_param->eob);
|
||||
} else {
|
||||
av1_inv_txfm_add_c(dqcoeff, dst, stride, txfm_param);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#ifndef AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
|
||||
#define AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
|
||||
|
||||
#include "config/aom_config.h"
|
||||
#include "config/av1_rtcd.h"
|
||||
|
||||
#include "aom/aom_integer.h"
|
||||
#include "av1/common/enums.h"
|
||||
#include "av1/common/av1_inv_txfm1d.h"
|
||||
#include "av1/common/av1_inv_txfm1d_cfg.h"
|
||||
#include "av1/common/av1_txfm.h"
|
||||
|
||||
typedef void (*transform_1d_neon)(const int32_t *input, int32_t *output,
|
||||
const int8_t cos_bit,
|
||||
const int8_t *stage_ptr);
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x8_default[8]) = {
|
||||
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0707,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t,
|
||||
av1_eob_to_eobxy_16x16_default[16]) = {
|
||||
0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
|
||||
0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t,
|
||||
av1_eob_to_eobxy_32x32_default[32]) = {
|
||||
0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
|
||||
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
|
||||
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
|
||||
0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f, 0x1f1f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x16_default[16]) = {
|
||||
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
|
||||
0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_16x8_default[8]) = {
|
||||
0x0707, 0x0707, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f, 0x070f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t,
|
||||
av1_eob_to_eobxy_16x32_default[32]) = {
|
||||
0x0707, 0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f0f,
|
||||
0x0f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
|
||||
0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
|
||||
0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f, 0x1f0f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t,
|
||||
av1_eob_to_eobxy_32x16_default[16]) = {
|
||||
0x0707, 0x0f0f, 0x0f0f, 0x0f0f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
|
||||
0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f, 0x0f1f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_8x32_default[32]) = {
|
||||
0x0707, 0x0707, 0x0707, 0x0707, 0x0707, 0x0f07, 0x0f07, 0x0f07,
|
||||
0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x0f07, 0x1f07, 0x1f07, 0x1f07,
|
||||
0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
|
||||
0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07, 0x1f07,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t, av1_eob_to_eobxy_32x8_default[8]) = {
|
||||
0x0707, 0x070f, 0x070f, 0x071f, 0x071f, 0x071f, 0x071f, 0x071f,
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(16, static const int16_t *,
|
||||
av1_eob_to_eobxy_default[TX_SIZES_ALL]) = {
|
||||
NULL,
|
||||
av1_eob_to_eobxy_8x8_default,
|
||||
av1_eob_to_eobxy_16x16_default,
|
||||
av1_eob_to_eobxy_32x32_default,
|
||||
av1_eob_to_eobxy_32x32_default,
|
||||
NULL,
|
||||
NULL,
|
||||
av1_eob_to_eobxy_8x16_default,
|
||||
av1_eob_to_eobxy_16x8_default,
|
||||
av1_eob_to_eobxy_16x32_default,
|
||||
av1_eob_to_eobxy_32x16_default,
|
||||
av1_eob_to_eobxy_32x32_default,
|
||||
av1_eob_to_eobxy_32x32_default,
|
||||
NULL,
|
||||
NULL,
|
||||
av1_eob_to_eobxy_8x32_default,
|
||||
av1_eob_to_eobxy_32x8_default,
|
||||
av1_eob_to_eobxy_16x32_default,
|
||||
av1_eob_to_eobxy_32x16_default,
|
||||
};
|
||||
|
||||
static const int lowbd_txfm_all_1d_zeros_idx[32] = {
|
||||
0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
|
||||
};
|
||||
|
||||
// Transform block width in log2 for eob (size of 64 map to 32)
|
||||
static const int tx_size_wide_log2_eob[TX_SIZES_ALL] = {
|
||||
2, 3, 4, 5, 5, 2, 3, 3, 4, 4, 5, 5, 5, 2, 4, 3, 5, 4, 5,
|
||||
};
|
||||
|
||||
static int eob_fill[32] = {
|
||||
0, 7, 7, 7, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15,
|
||||
31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31,
|
||||
};
|
||||
|
||||
static INLINE void get_eobx_eoby_scan_default(int *eobx, int *eoby,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
if (eob == 1) {
|
||||
*eobx = 0;
|
||||
*eoby = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const int tx_w_log2 = tx_size_wide_log2_eob[tx_size];
|
||||
const int eob_row = (eob - 1) >> tx_w_log2;
|
||||
const int eobxy = av1_eob_to_eobxy_default[tx_size][eob_row];
|
||||
*eobx = eobxy & 0xFF;
|
||||
*eoby = eobxy >> 8;
|
||||
}
|
||||
|
||||
static INLINE void get_eobx_eoby_scan_v_identity(int *eobx, int *eoby,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
eob -= 1;
|
||||
const int txfm_size_row = tx_size_high[tx_size];
|
||||
const int eoby_max = AOMMIN(32, txfm_size_row) - 1;
|
||||
*eobx = eob / (eoby_max + 1);
|
||||
*eoby = (eob >= eoby_max) ? eoby_max : eob_fill[eob];
|
||||
}
|
||||
|
||||
static INLINE void get_eobx_eoby_scan_h_identity(int *eobx, int *eoby,
|
||||
TX_SIZE tx_size, int eob) {
|
||||
eob -= 1;
|
||||
const int txfm_size_col = tx_size_wide[tx_size];
|
||||
const int eobx_max = AOMMIN(32, txfm_size_col) - 1;
|
||||
*eobx = (eob >= eobx_max) ? eobx_max : eob_fill[eob];
|
||||
const int temp_eoby = eob / (eobx_max + 1);
|
||||
assert(temp_eoby < 32);
|
||||
*eoby = eob_fill[temp_eoby];
|
||||
}
|
||||
|
||||
#endif // AV1_COMMON_ARM_AV1_INV_TXFM_NEON_H_
|
|
@ -164,8 +164,8 @@ static INLINE uint8x8_t convolve8_vert_8x4_s32(
|
|||
|
||||
void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
const uint8_t horiz_offset = filter_params_x->taps / 2 - 1;
|
||||
|
@ -182,7 +182,7 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
((conv_params->round_0 + conv_params->round_1) == 2 * FILTER_BITS));
|
||||
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
|
||||
const int16x8_t shift_round_0 = vdupq_n_s16(-conv_params->round_0);
|
||||
const int16x8_t shift_by_bits = vdupq_n_s16(-bits);
|
||||
|
@ -485,8 +485,8 @@ void av1_convolve_x_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
const int vert_offset = filter_params_y->taps / 2 - 1;
|
||||
|
@ -502,7 +502,7 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
|
||||
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
|
||||
if (w <= 4) {
|
||||
uint8x8_t d01, d23;
|
||||
|
@ -680,8 +680,8 @@ void av1_convolve_y_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
int im_dst_stride;
|
||||
|
@ -711,7 +711,7 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
const int16x8_t vec_round_bits = vdupq_n_s16(-round_bits);
|
||||
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
|
||||
int16_t x_filter_tmp[8];
|
||||
int16x8_t filter_x_coef = vld1q_s16(x_filter);
|
||||
|
@ -896,7 +896,7 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
const int32_t sub_const = (1 << (offset_bits - conv_params->round_1)) +
|
||||
(1 << (offset_bits - conv_params->round_1 - 1));
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
|
||||
const int32x4_t round_shift_vec = vdupq_n_s32(-(conv_params->round_1));
|
||||
const int32x4_t offset_const = vdupq_n_s32(1 << offset_bits);
|
||||
|
@ -1086,8 +1086,8 @@ void av1_convolve_2d_sr_neon(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
}
|
||||
void av1_convolve_2d_copy_sr_neon(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
(void)filter_params_x;
|
||||
|
|
|
@ -1,79 +0,0 @@
|
|||
/*
|
||||
*
|
||||
* Copyright (c) 2018, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "aom_mem/aom_mem.h"
|
||||
#include "aom_ports/mem.h"
|
||||
#include "av1/common/arm/mem_neon.h"
|
||||
#include "config/aom_dsp_rtcd.h"
|
||||
|
||||
static INLINE void highbd_dc_predictor_neon(uint16_t *dst, ptrdiff_t stride,
|
||||
int bw, const uint16_t *above,
|
||||
const uint16_t *left) {
|
||||
assert(bw >= 4);
|
||||
assert(IS_POWER_OF_TWO(bw));
|
||||
int expected_dc, sum = 0;
|
||||
const int count = bw * 2;
|
||||
uint32x4_t sum_q = vdupq_n_u32(0);
|
||||
uint32x2_t sum_d;
|
||||
uint16_t *dst_1;
|
||||
if (bw >= 8) {
|
||||
for (int i = 0; i < bw; i += 8) {
|
||||
sum_q = vpadalq_u16(sum_q, vld1q_u16(above));
|
||||
sum_q = vpadalq_u16(sum_q, vld1q_u16(left));
|
||||
above += 8;
|
||||
left += 8;
|
||||
}
|
||||
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
|
||||
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
|
||||
expected_dc = (sum + (count >> 1)) / count;
|
||||
const uint16x8_t dc = vdupq_n_u16((uint16_t)expected_dc);
|
||||
for (int r = 0; r < bw; r++) {
|
||||
dst_1 = dst;
|
||||
for (int i = 0; i < bw; i += 8) {
|
||||
vst1q_u16(dst_1, dc);
|
||||
dst_1 += 8;
|
||||
}
|
||||
dst += stride;
|
||||
}
|
||||
} else { // 4x4
|
||||
sum_q = vaddl_u16(vld1_u16(above), vld1_u16(left));
|
||||
sum_d = vadd_u32(vget_low_u32(sum_q), vget_high_u32(sum_q));
|
||||
sum = vget_lane_s32(vreinterpret_s32_u64(vpaddl_u32(sum_d)), 0);
|
||||
expected_dc = (sum + (count >> 1)) / count;
|
||||
const uint16x4_t dc = vdup_n_u16((uint16_t)expected_dc);
|
||||
for (int r = 0; r < bw; r++) {
|
||||
vst1_u16(dst, dc);
|
||||
dst += stride;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define intra_pred_highbd_sized(type, width) \
|
||||
void aom_highbd_##type##_predictor_##width##x##width##_neon( \
|
||||
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
|
||||
const uint16_t *left, int bd) { \
|
||||
(void)bd; \
|
||||
highbd_##type##_predictor_neon(dst, stride, width, above, left); \
|
||||
}
|
||||
|
||||
#define intra_pred_square(type) \
|
||||
intra_pred_highbd_sized(type, 4); \
|
||||
intra_pred_highbd_sized(type, 8); \
|
||||
intra_pred_highbd_sized(type, 16); \
|
||||
intra_pred_highbd_sized(type, 32); \
|
||||
intra_pred_highbd_sized(type, 64);
|
||||
|
||||
intra_pred_square(dc);
|
||||
|
||||
#undef intra_pred_square
|
|
@ -515,8 +515,8 @@ static INLINE void jnt_convolve_2d_vert_neon(
|
|||
|
||||
void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
assert(!(w % 4));
|
||||
|
@ -532,9 +532,9 @@ void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
const int round_0 = conv_params->round_0 - 1;
|
||||
const uint8_t *src_ptr = src - vert_offset * src_stride - horiz_offset;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
|
||||
int16_t x_filter_tmp[8];
|
||||
int16x8_t filter_x_coef = vld1q_s16(x_filter);
|
||||
|
@ -553,8 +553,8 @@ void av1_jnt_convolve_2d_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst8, int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
uint8x8_t res0_8, res1_8, res2_8, res3_8, tmp_shift0, tmp_shift1, tmp_shift2,
|
||||
|
@ -679,8 +679,8 @@ void av1_jnt_convolve_2d_copy_neon(const uint8_t *src, int src_stride,
|
|||
|
||||
void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
assert(!(w % 4));
|
||||
|
@ -705,7 +705,7 @@ void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
// horizontal filter
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
|
||||
const uint8_t *src_ptr = src - horiz_offset;
|
||||
|
||||
|
@ -1013,8 +1013,8 @@ void av1_jnt_convolve_x_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
assert(!(w % 4));
|
||||
|
@ -1040,7 +1040,7 @@ void av1_jnt_convolve_y_neon(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
// vertical filter
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
|
||||
const uint8_t *src_ptr = src - (vert_offset * src_stride);
|
||||
|
||||
|
|
|
@ -22,6 +22,14 @@ static INLINE void store_row2_u8_8x8(uint8_t *s, int p, const uint8x8_t s0,
|
|||
s += p;
|
||||
}
|
||||
|
||||
/* These intrinsics require immediate values, so we must use #defines
|
||||
to enforce that. */
|
||||
#define load_u8_4x1(s, s0, lane) \
|
||||
do { \
|
||||
*(s0) = vreinterpret_u8_u32( \
|
||||
vld1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(*(s0)), lane)); \
|
||||
} while (0)
|
||||
|
||||
static INLINE void load_u8_8x8(const uint8_t *s, ptrdiff_t p,
|
||||
uint8x8_t *const s0, uint8x8_t *const s1,
|
||||
uint8x8_t *const s2, uint8x8_t *const s3,
|
||||
|
@ -128,6 +136,13 @@ static INLINE void load_s16_4x4(const int16_t *s, ptrdiff_t p,
|
|||
*s3 = vld1_s16(s);
|
||||
}
|
||||
|
||||
/* These intrinsics require immediate values, so we must use #defines
|
||||
to enforce that. */
|
||||
#define store_u8_4x1(s, s0, lane) \
|
||||
do { \
|
||||
vst1_lane_u32((uint32_t *)(s), vreinterpret_u32_u8(s0), lane); \
|
||||
} while (0)
|
||||
|
||||
static INLINE void store_u8_8x8(uint8_t *s, ptrdiff_t p, const uint8x8_t s0,
|
||||
const uint8x8_t s1, const uint8x8_t s2,
|
||||
const uint8x8_t s3, const uint8x8_t s4,
|
||||
|
@ -242,6 +257,30 @@ static INLINE void store_s16_8x8(int16_t *s, ptrdiff_t dst_stride,
|
|||
vst1q_s16(s, s7);
|
||||
}
|
||||
|
||||
static INLINE void store_s16_4x4(int16_t *s, ptrdiff_t dst_stride,
|
||||
const int16x4_t s0, const int16x4_t s1,
|
||||
const int16x4_t s2, const int16x4_t s3) {
|
||||
vst1_s16(s, s0);
|
||||
s += dst_stride;
|
||||
vst1_s16(s, s1);
|
||||
s += dst_stride;
|
||||
vst1_s16(s, s2);
|
||||
s += dst_stride;
|
||||
vst1_s16(s, s3);
|
||||
}
|
||||
|
||||
static INLINE void store_s16_8x4(int16_t *s, ptrdiff_t dst_stride,
|
||||
const int16x8_t s0, const int16x8_t s1,
|
||||
const int16x8_t s2, const int16x8_t s3) {
|
||||
vst1q_s16(s, s0);
|
||||
s += dst_stride;
|
||||
vst1q_s16(s, s1);
|
||||
s += dst_stride;
|
||||
vst1q_s16(s, s2);
|
||||
s += dst_stride;
|
||||
vst1q_s16(s, s3);
|
||||
}
|
||||
|
||||
static INLINE void load_s16_8x8(const int16_t *s, ptrdiff_t p,
|
||||
int16x8_t *const s0, int16x8_t *const s1,
|
||||
int16x8_t *const s2, int16x8_t *const s3,
|
||||
|
@ -398,4 +437,49 @@ static INLINE void load_unaligned_u16_4x4(const uint16_t *buf, uint32_t stride,
|
|||
*tu1 = vsetq_lane_u64(a, *tu1, 1);
|
||||
}
|
||||
|
||||
static INLINE void load_s32_4x4(int32_t *s, int32_t p, int32x4_t *s1,
|
||||
int32x4_t *s2, int32x4_t *s3, int32x4_t *s4) {
|
||||
*s1 = vld1q_s32(s);
|
||||
s += p;
|
||||
*s2 = vld1q_s32(s);
|
||||
s += p;
|
||||
*s3 = vld1q_s32(s);
|
||||
s += p;
|
||||
*s4 = vld1q_s32(s);
|
||||
}
|
||||
|
||||
static INLINE void store_s32_4x4(int32_t *s, int32_t p, int32x4_t s1,
|
||||
int32x4_t s2, int32x4_t s3, int32x4_t s4) {
|
||||
vst1q_s32(s, s1);
|
||||
s += p;
|
||||
vst1q_s32(s, s2);
|
||||
s += p;
|
||||
vst1q_s32(s, s3);
|
||||
s += p;
|
||||
vst1q_s32(s, s4);
|
||||
}
|
||||
|
||||
static INLINE void load_u32_4x4(uint32_t *s, int32_t p, uint32x4_t *s1,
|
||||
uint32x4_t *s2, uint32x4_t *s3,
|
||||
uint32x4_t *s4) {
|
||||
*s1 = vld1q_u32(s);
|
||||
s += p;
|
||||
*s2 = vld1q_u32(s);
|
||||
s += p;
|
||||
*s3 = vld1q_u32(s);
|
||||
s += p;
|
||||
*s4 = vld1q_u32(s);
|
||||
}
|
||||
|
||||
static INLINE void store_u32_4x4(uint32_t *s, int32_t p, uint32x4_t s1,
|
||||
uint32x4_t s2, uint32x4_t s3, uint32x4_t s4) {
|
||||
vst1q_u32(s, s1);
|
||||
s += p;
|
||||
vst1q_u32(s, s2);
|
||||
s += p;
|
||||
vst1q_u32(s, s3);
|
||||
s += p;
|
||||
vst1q_u32(s, s4);
|
||||
}
|
||||
|
||||
#endif // AV1_COMMON_ARM_MEM_NEON_H_
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -419,4 +419,42 @@ static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1,
|
|||
*a3 = vreinterpret_s16_s32(c1.val[1]);
|
||||
}
|
||||
|
||||
static INLINE int32x4x2_t aom_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) {
|
||||
int32x4x2_t b0;
|
||||
b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1));
|
||||
b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1));
|
||||
return b0;
|
||||
}
|
||||
|
||||
static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1,
|
||||
int32x4_t *a2, int32x4_t *a3) {
|
||||
// Swap 32 bit elements. Goes from:
|
||||
// a0: 00 01 02 03
|
||||
// a1: 10 11 12 13
|
||||
// a2: 20 21 22 23
|
||||
// a3: 30 31 32 33
|
||||
// to:
|
||||
// b0.val[0]: 00 10 02 12
|
||||
// b0.val[1]: 01 11 03 13
|
||||
// b1.val[0]: 20 30 22 32
|
||||
// b1.val[1]: 21 31 23 33
|
||||
|
||||
const int32x4x2_t b0 = vtrnq_s32(*a0, *a1);
|
||||
const int32x4x2_t b1 = vtrnq_s32(*a2, *a3);
|
||||
|
||||
// Swap 64 bit elements resulting in:
|
||||
// c0.val[0]: 00 10 20 30
|
||||
// c0.val[1]: 02 12 22 32
|
||||
// c1.val[0]: 01 11 21 31
|
||||
// c1.val[1]: 03 13 23 33
|
||||
|
||||
const int32x4x2_t c0 = aom_vtrnq_s64_to_s32(b0.val[0], b1.val[0]);
|
||||
const int32x4x2_t c1 = aom_vtrnq_s64_to_s32(b0.val[1], b1.val[1]);
|
||||
|
||||
*a0 = c0.val[0];
|
||||
*a1 = c1.val[0];
|
||||
*a2 = c0.val[1];
|
||||
*a3 = c1.val[1];
|
||||
}
|
||||
|
||||
#endif // AV1_COMMON_ARM_TRANSPOSE_NEON_H_
|
||||
|
|
|
@ -1308,7 +1308,7 @@ static int compare_ref_dst(AV1_COMMON *const cm, uint8_t *ref_buf,
|
|||
end <<= MI_SIZE_LOG2;
|
||||
uint8_t *ref0 = ref_buf;
|
||||
uint8_t *dst0 = dst_buf;
|
||||
if (cm->use_highbitdepth) {
|
||||
if (cm->seq_params.use_highbitdepth) {
|
||||
const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref_buf);
|
||||
const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst_buf);
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
|
@ -1404,11 +1404,11 @@ void av1_filter_block_plane_ver(AV1_COMMON *const cm,
|
|||
uint64_t mask_8x8_1 = (mask_8x8 >> shift_next) & mask_cutoff;
|
||||
uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
|
||||
|
||||
if (cm->use_highbitdepth)
|
||||
if (cm->seq_params.use_highbitdepth)
|
||||
highbd_filter_selectively_vert_row2(
|
||||
ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
|
||||
mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
|
||||
&cm->lf_info, lfl, lfl2, (int)cm->bit_depth);
|
||||
&cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
|
||||
else
|
||||
filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
|
||||
mask_16x16_0, mask_8x8_0, mask_4x4_0,
|
||||
|
@ -1474,10 +1474,11 @@ void av1_filter_block_plane_hor(AV1_COMMON *const cm,
|
|||
mask_8x8 = (mask_8x8 >> shift) & mask_cutoff;
|
||||
mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
|
||||
|
||||
if (cm->use_highbitdepth)
|
||||
highbd_filter_selectively_horiz(
|
||||
CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
|
||||
mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->bit_depth);
|
||||
if (cm->seq_params.use_highbitdepth)
|
||||
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
|
||||
dst->stride, pl, ssx, mask_16x16,
|
||||
mask_8x8, mask_4x4, &cm->lf_info, lfl,
|
||||
(int)cm->seq_params.bit_depth);
|
||||
else
|
||||
filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
|
||||
mask_8x8, mask_4x4, &cm->lf_info, lfl);
|
||||
|
@ -1652,6 +1653,8 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
|
|||
const int dst_stride = plane_ptr->dst.stride;
|
||||
const int y_range = (MAX_MIB_SIZE >> scale_vert);
|
||||
const int x_range = (MAX_MIB_SIZE >> scale_horz);
|
||||
const int use_highbitdepth = cm->seq_params.use_highbitdepth;
|
||||
const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
|
||||
for (int y = 0; y < y_range; y += row_step) {
|
||||
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
|
||||
for (int x = 0; x < x_range;) {
|
||||
|
@ -1677,40 +1680,40 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
|
|||
switch (params.filter_length) {
|
||||
// apply 4-tap filtering
|
||||
case 4:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_vertical_4(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim, params.hev_thr,
|
||||
cm->bit_depth);
|
||||
bit_depth);
|
||||
else
|
||||
aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
break;
|
||||
case 6: // apply 6-tap filter for chroma plane only
|
||||
assert(plane != 0);
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_vertical_6(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim, params.hev_thr,
|
||||
cm->bit_depth);
|
||||
bit_depth);
|
||||
else
|
||||
aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
break;
|
||||
// apply 8-tap filtering
|
||||
case 8:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_vertical_8(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim, params.hev_thr,
|
||||
cm->bit_depth);
|
||||
bit_depth);
|
||||
else
|
||||
aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
break;
|
||||
// apply 14-tap filtering
|
||||
case 14:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_vertical_14(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim, params.hev_thr,
|
||||
cm->bit_depth);
|
||||
bit_depth);
|
||||
else
|
||||
aom_lpf_vertical_14(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
|
@ -1737,6 +1740,8 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
|
|||
const int dst_stride = plane_ptr->dst.stride;
|
||||
const int y_range = (MAX_MIB_SIZE >> scale_vert);
|
||||
const int x_range = (MAX_MIB_SIZE >> scale_horz);
|
||||
const int use_highbitdepth = cm->seq_params.use_highbitdepth;
|
||||
const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
|
||||
for (int x = 0; x < x_range; x += col_step) {
|
||||
uint8_t *p = dst_ptr + x * MI_SIZE;
|
||||
for (int y = 0; y < y_range;) {
|
||||
|
@ -1762,10 +1767,10 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
|
|||
switch (params.filter_length) {
|
||||
// apply 4-tap filtering
|
||||
case 4:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_horizontal_4(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim,
|
||||
params.hev_thr, cm->bit_depth);
|
||||
params.hev_thr, bit_depth);
|
||||
else
|
||||
aom_lpf_horizontal_4(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
|
@ -1773,30 +1778,30 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
|
|||
// apply 6-tap filtering
|
||||
case 6:
|
||||
assert(plane != 0);
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_horizontal_6(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim,
|
||||
params.hev_thr, cm->bit_depth);
|
||||
params.hev_thr, bit_depth);
|
||||
else
|
||||
aom_lpf_horizontal_6(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
break;
|
||||
// apply 8-tap filtering
|
||||
case 8:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_horizontal_8(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim,
|
||||
params.hev_thr, cm->bit_depth);
|
||||
params.hev_thr, bit_depth);
|
||||
else
|
||||
aom_lpf_horizontal_8(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
break;
|
||||
// apply 14-tap filtering
|
||||
case 14:
|
||||
if (cm->use_highbitdepth)
|
||||
if (use_highbitdepth)
|
||||
aom_highbd_lpf_horizontal_14(CONVERT_TO_SHORTPTR(p), dst_stride,
|
||||
params.mblim, params.lim,
|
||||
params.hev_thr, cm->bit_depth);
|
||||
params.hev_thr, bit_depth);
|
||||
else
|
||||
aom_lpf_horizontal_14(p, dst_stride, params.mblim, params.lim,
|
||||
params.hev_thr);
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#include "aom_ports/aom_once.h"
|
||||
|
||||
void av1_rtcd() {
|
||||
// TODO(JBB): Remove this once, by insuring that both the encoder and
|
||||
// decoder setup functions are protected by once();
|
||||
once(setup_rtcd_internal);
|
||||
// TODO(JBB): Remove this aom_once, by insuring that both the encoder and
|
||||
// decoder setup functions are protected by aom_once();
|
||||
aom_once(setup_rtcd_internal);
|
||||
}
|
||||
|
|
|
@ -106,7 +106,7 @@ specialize qw/av1_highbd_convolve8_vert/, "$sse2_x86_64";
|
|||
|
||||
#inv txfm
|
||||
add_proto qw/void av1_inv_txfm_add/, "const tran_low_t *dqcoeff, uint8_t *dst, int stride, const TxfmParam *txfm_param";
|
||||
specialize qw/av1_inv_txfm_add ssse3 avx2/;
|
||||
specialize qw/av1_inv_txfm_add ssse3 avx2 neon/;
|
||||
|
||||
add_proto qw/void av1_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
add_proto qw/void av1_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
|
||||
|
@ -181,7 +181,7 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
|
||||
#fwd txfm
|
||||
add_proto qw/void av1_lowbd_fwd_txfm/, "const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TxfmParam *txfm_param";
|
||||
specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1/;
|
||||
specialize qw/av1_lowbd_fwd_txfm sse2 sse4_1 avx2/;
|
||||
|
||||
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
|
||||
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, TX_TYPE tx_type, int bd";
|
||||
|
@ -241,11 +241,11 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
specialize qw/av1_txb_init_levels sse4_1/;
|
||||
|
||||
add_proto qw/uint64_t av1_wedge_sse_from_residuals/, "const int16_t *r1, const int16_t *d, const uint8_t *m, int N";
|
||||
specialize qw/av1_wedge_sse_from_residuals sse2/;
|
||||
specialize qw/av1_wedge_sse_from_residuals sse2 avx2/;
|
||||
add_proto qw/int av1_wedge_sign_from_residuals/, "const int16_t *ds, const uint8_t *m, int N, int64_t limit";
|
||||
specialize qw/av1_wedge_sign_from_residuals sse2/;
|
||||
specialize qw/av1_wedge_sign_from_residuals sse2 avx2/;
|
||||
add_proto qw/void av1_wedge_compute_delta_squares/, "int16_t *d, const int16_t *a, const int16_t *b, int N";
|
||||
specialize qw/av1_wedge_compute_delta_squares sse2/;
|
||||
specialize qw/av1_wedge_compute_delta_squares sse2 avx2/;
|
||||
|
||||
# hash
|
||||
add_proto qw/uint32_t av1_get_crc32c_value/, "void *crc_calculator, uint8_t *p, int length";
|
||||
|
@ -288,34 +288,34 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
|
|||
# LOOP_RESTORATION functions
|
||||
|
||||
add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
|
||||
specialize qw/apply_selfguided_restoration sse4_1 avx2/;
|
||||
specialize qw/apply_selfguided_restoration sse4_1 avx2 neon/;
|
||||
|
||||
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd8, int width, int height,
|
||||
int dgd_stride, int32_t *flt0, int32_t *flt1, int flt_stride,
|
||||
int sgr_params_idx, int bit_depth, int highbd";
|
||||
specialize qw/av1_selfguided_restoration sse4_1 avx2/;
|
||||
specialize qw/av1_selfguided_restoration sse4_1 avx2 neon/;
|
||||
|
||||
# CONVOLVE_ROUND/COMPOUND_ROUND functions
|
||||
|
||||
add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_convolve_2d_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_jnt_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_highbd_convolve_2d_copy_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_2d_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_x_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_convolve_y_sr/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_2d/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_x/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_y/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_highbd_jnt_convolve_2d_copy/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params, int bd";
|
||||
|
||||
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
|
||||
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
|
||||
add_proto qw/void av1_highbd_convolve_2d_scale/, "const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w, int h, const InterpFilterParams *filter_params_x, const InterpFilterParams *filter_params_y, const int subpel_x_q4, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params, int bd";
|
||||
|
||||
specialize qw/av1_convolve_2d_sr sse2 avx2 neon/;
|
||||
specialize qw/av1_convolve_2d_copy_sr sse2 avx2 neon/;
|
||||
|
|
|
@ -171,53 +171,6 @@ static INLINE void set_flip_cfg(TX_TYPE tx_type, TXFM_2D_FLIP_CFG *cfg) {
|
|||
get_flip_cfg(tx_type, &cfg->ud_flip, &cfg->lr_flip);
|
||||
}
|
||||
|
||||
static INLINE TX_SIZE av1_rotate_tx_size(TX_SIZE tx_size) {
|
||||
switch (tx_size) {
|
||||
case TX_4X4: return TX_4X4;
|
||||
case TX_8X8: return TX_8X8;
|
||||
case TX_16X16: return TX_16X16;
|
||||
case TX_32X32: return TX_32X32;
|
||||
case TX_64X64: return TX_64X64;
|
||||
case TX_32X64: return TX_64X32;
|
||||
case TX_64X32: return TX_32X64;
|
||||
case TX_4X8: return TX_8X4;
|
||||
case TX_8X4: return TX_4X8;
|
||||
case TX_8X16: return TX_16X8;
|
||||
case TX_16X8: return TX_8X16;
|
||||
case TX_16X32: return TX_32X16;
|
||||
case TX_32X16: return TX_16X32;
|
||||
case TX_4X16: return TX_16X4;
|
||||
case TX_16X4: return TX_4X16;
|
||||
case TX_8X32: return TX_32X8;
|
||||
case TX_32X8: return TX_8X32;
|
||||
case TX_16X64: return TX_64X16;
|
||||
case TX_64X16: return TX_16X64;
|
||||
default: assert(0); return TX_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE TX_TYPE av1_rotate_tx_type(TX_TYPE tx_type) {
|
||||
switch (tx_type) {
|
||||
case DCT_DCT: return DCT_DCT;
|
||||
case ADST_DCT: return DCT_ADST;
|
||||
case DCT_ADST: return ADST_DCT;
|
||||
case ADST_ADST: return ADST_ADST;
|
||||
case FLIPADST_DCT: return DCT_FLIPADST;
|
||||
case DCT_FLIPADST: return FLIPADST_DCT;
|
||||
case FLIPADST_FLIPADST: return FLIPADST_FLIPADST;
|
||||
case ADST_FLIPADST: return FLIPADST_ADST;
|
||||
case FLIPADST_ADST: return ADST_FLIPADST;
|
||||
case IDTX: return IDTX;
|
||||
case V_DCT: return H_DCT;
|
||||
case H_DCT: return V_DCT;
|
||||
case V_ADST: return H_ADST;
|
||||
case H_ADST: return V_ADST;
|
||||
case V_FLIPADST: return H_FLIPADST;
|
||||
case H_FLIPADST: return V_FLIPADST;
|
||||
default: assert(0); return TX_TYPES;
|
||||
}
|
||||
}
|
||||
|
||||
// Utility function that returns the log of the ratio of the col and row
|
||||
// sizes.
|
||||
static INLINE int get_rect_tx_log_ratio(int col, int row) {
|
||||
|
|
|
@ -605,6 +605,12 @@ static INLINE int get_bitdepth_data_path_index(const MACROBLOCKD *xd) {
|
|||
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
|
||||
}
|
||||
|
||||
static INLINE uint8_t *get_buf_by_bd(const MACROBLOCKD *xd, uint8_t *buf16) {
|
||||
return (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
|
||||
? CONVERT_TO_BYTEPTR(buf16)
|
||||
: buf16;
|
||||
}
|
||||
|
||||
static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) {
|
||||
switch (bsize) {
|
||||
case BLOCK_4X4: return 0;
|
||||
|
@ -674,6 +680,15 @@ static const int av1_ext_tx_used[EXT_TX_SET_TYPES][TX_TYPES] = {
|
|||
{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
|
||||
};
|
||||
|
||||
static const uint16_t av1_ext_tx_used_flag[EXT_TX_SET_TYPES] = {
|
||||
0x0001, // 0000 0000 0000 0001
|
||||
0x0201, // 0000 0010 0000 0001
|
||||
0x020F, // 0000 0010 0000 1111
|
||||
0x0E0F, // 0000 1110 0000 1111
|
||||
0x0FFF, // 0000 1111 1111 1111
|
||||
0xFFFF, // 1111 1111 1111 1111
|
||||
};
|
||||
|
||||
static INLINE TxSetType av1_get_ext_tx_set_type(TX_SIZE tx_size, int is_inter,
|
||||
int use_reduced_set) {
|
||||
const TX_SIZE tx_size_sqr_up = txsize_sqr_up_map[tx_size];
|
||||
|
@ -1145,38 +1160,6 @@ static INLINE PLANE_TYPE get_plane_type(int plane) {
|
|||
return (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
|
||||
}
|
||||
|
||||
static INLINE void transpose_uint8(uint8_t *dst, int dst_stride,
|
||||
const uint8_t *src, int src_stride, int w,
|
||||
int h) {
|
||||
int r, c;
|
||||
for (r = 0; r < h; ++r)
|
||||
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
|
||||
}
|
||||
|
||||
static INLINE void transpose_uint16(uint16_t *dst, int dst_stride,
|
||||
const uint16_t *src, int src_stride, int w,
|
||||
int h) {
|
||||
int r, c;
|
||||
for (r = 0; r < h; ++r)
|
||||
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
|
||||
}
|
||||
|
||||
static INLINE void transpose_int16(int16_t *dst, int dst_stride,
|
||||
const int16_t *src, int src_stride, int w,
|
||||
int h) {
|
||||
int r, c;
|
||||
for (r = 0; r < h; ++r)
|
||||
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
|
||||
}
|
||||
|
||||
static INLINE void transpose_int32(int32_t *dst, int dst_stride,
|
||||
const int32_t *src, int src_stride, int w,
|
||||
int h) {
|
||||
int r, c;
|
||||
for (r = 0; r < h; ++r)
|
||||
for (c = 0; c < w; ++c) dst[c * dst_stride + r] = src[r * src_stride + c];
|
||||
}
|
||||
|
||||
static INLINE int av1_get_max_eob(TX_SIZE tx_size) {
|
||||
if (tx_size == TX_64X64 || tx_size == TX_64X32 || tx_size == TX_32X64) {
|
||||
return 1024;
|
||||
|
|
|
@ -110,7 +110,7 @@ void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
|
|||
static void copy_sb8_16(AOM_UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
|
||||
const uint8_t *src, int src_voffset, int src_hoffset,
|
||||
int sstride, int vsize, int hsize) {
|
||||
if (cm->use_highbitdepth) {
|
||||
if (cm->seq_params.use_highbitdepth) {
|
||||
const uint16_t *base =
|
||||
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
|
||||
copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
|
||||
|
@ -153,7 +153,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
int mi_high_l2[3];
|
||||
int xdec[3];
|
||||
int ydec[3];
|
||||
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
|
||||
int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
|
||||
const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
||||
const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
|
||||
av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
|
||||
|
@ -363,7 +363,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
|
||||
}
|
||||
|
||||
if (cm->use_highbitdepth) {
|
||||
if (cm->seq_params.use_highbitdepth) {
|
||||
cdef_filter_fb(
|
||||
NULL,
|
||||
&CONVERT_TO_SHORTPTR(
|
||||
|
|
|
@ -15,21 +15,14 @@
|
|||
|
||||
#include "config/av1_rtcd.h"
|
||||
|
||||
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
|
||||
void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params) {
|
||||
assert(block_size_wide[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
|
||||
assert(block_size_high[CFL_MAX_BLOCK_SIZE] == CFL_BUF_LINE);
|
||||
if (!(cm->subsampling_x == 0 && cm->subsampling_y == 0) &&
|
||||
!(cm->subsampling_x == 1 && cm->subsampling_y == 1) &&
|
||||
!(cm->subsampling_x == 1 && cm->subsampling_y == 0)) {
|
||||
aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
|
||||
"Only 4:4:4, 4:2:2 and 4:2:0 are currently supported by "
|
||||
"CfL, %d %d subsampling is not supported.\n",
|
||||
cm->subsampling_x, cm->subsampling_y);
|
||||
}
|
||||
|
||||
memset(&cfl->recon_buf_q3, 0, sizeof(cfl->recon_buf_q3));
|
||||
memset(&cfl->ac_buf_q3, 0, sizeof(cfl->ac_buf_q3));
|
||||
cfl->subsampling_x = cm->subsampling_x;
|
||||
cfl->subsampling_y = cm->subsampling_y;
|
||||
cfl->subsampling_x = seq_params->subsampling_x;
|
||||
cfl->subsampling_y = seq_params->subsampling_y;
|
||||
cfl->are_parameters_computed = 0;
|
||||
cfl->store_y = 0;
|
||||
// The DC_PRED cache is disabled by default and is only enabled in
|
||||
|
|
|
@ -75,8 +75,8 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
|
||||
|
@ -91,7 +91,7 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
// horizontal filter
|
||||
const uint8_t *src_horiz = src - fo_vert * src_stride;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < im_h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
|
@ -107,7 +107,7 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
// vertical filter
|
||||
int16_t *src_vert = im_block + fo_vert * im_stride;
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
|
@ -126,8 +126,8 @@ void av1_convolve_2d_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
const int fo_vert = filter_params_y->taps / 2 - 1;
|
||||
|
@ -141,7 +141,7 @@ void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
// vertical filter
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -156,8 +156,8 @@ void av1_convolve_y_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
const int fo_horiz = filter_params_x->taps / 2 - 1;
|
||||
|
@ -172,7 +172,7 @@ void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
// horizontal filter
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -187,8 +187,8 @@ void av1_convolve_x_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
||||
int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
(void)filter_params_x;
|
||||
|
@ -204,8 +204,8 @@ void av1_convolve_2d_copy_sr_c(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
|
||||
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -222,7 +222,7 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
// horizontal filter
|
||||
const uint8_t *src_horiz = src - fo_vert * src_stride;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < im_h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
|
@ -238,7 +238,7 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
// vertical filter
|
||||
int16_t *src_vert = im_block + fo_vert * im_stride;
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
|
@ -270,8 +270,8 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -289,7 +289,7 @@ void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
// vertical filter
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -320,8 +320,8 @@ void av1_jnt_convolve_y_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -339,7 +339,7 @@ void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
// horizontal filter
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -370,8 +370,8 @@ void av1_jnt_convolve_x_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst8, int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -412,8 +412,8 @@ void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
|
|||
|
||||
void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
||||
int dst8_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_qn, const int x_step_qn,
|
||||
const int subpel_y_qn, const int y_step_qn,
|
||||
ConvolveParams *conv_params) {
|
||||
|
@ -439,7 +439,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
|
||||
assert(x_filter_idx < SUBPEL_SHIFTS);
|
||||
const int16_t *x_filter =
|
||||
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
for (int k = 0; k < filter_params_x->taps; ++k) {
|
||||
sum += x_filter[k] * src_x[k - fo_horiz];
|
||||
|
@ -461,7 +461,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
|
||||
assert(y_filter_idx < SUBPEL_SHIFTS);
|
||||
const int16_t *y_filter =
|
||||
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
|
||||
int32_t sum = 1 << offset_bits;
|
||||
for (int k = 0; k < filter_params_y->taps; ++k) {
|
||||
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
|
||||
|
@ -498,8 +498,8 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, uint8_t *dst8,
|
|||
|
||||
static void convolve_2d_scale_wrapper(
|
||||
const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y, const int subpel_x_qn,
|
||||
int h, const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y, const int subpel_x_qn,
|
||||
const int x_step_qn, const int subpel_y_qn, const int y_step_qn,
|
||||
ConvolveParams *conv_params) {
|
||||
if (conv_params->is_compound) {
|
||||
|
@ -520,25 +520,27 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
|
|||
(void)y_step_q4;
|
||||
(void)dst;
|
||||
(void)dst_stride;
|
||||
|
||||
InterpFilterParams filter_params_x, filter_params_y;
|
||||
av1_get_convolve_filter_params(interp_filters, &filter_params_x,
|
||||
&filter_params_y, w, h);
|
||||
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
|
||||
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
|
||||
const InterpFilterParams *filter_params_x =
|
||||
av1_get_interp_filter_params_with_block_size(filter_x, w);
|
||||
const InterpFilterParams *filter_params_y =
|
||||
av1_get_interp_filter_params_with_block_size(filter_y, h);
|
||||
|
||||
if (scaled)
|
||||
convolve_2d_scale_wrapper(src, src_stride, dst, dst_stride, w, h,
|
||||
&filter_params_x, &filter_params_y, subpel_x_q4,
|
||||
filter_params_x, filter_params_y, subpel_x_q4,
|
||||
x_step_q4, subpel_y_q4, y_step_q4, conv_params);
|
||||
else
|
||||
sf->convolve[subpel_x_q4 != 0][subpel_y_q4 != 0][conv_params->is_compound](
|
||||
src, src_stride, dst, dst_stride, w, h, &filter_params_x,
|
||||
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
|
||||
src, src_stride, dst, dst_stride, w, h, filter_params_x,
|
||||
filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
|
||||
}
|
||||
|
||||
void av1_highbd_convolve_2d_copy_sr_c(
|
||||
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
int h, const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
|
||||
(void)filter_params_x;
|
||||
(void)filter_params_y;
|
||||
|
@ -554,8 +556,8 @@ void av1_highbd_convolve_2d_copy_sr_c(
|
|||
|
||||
void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
const int fo_horiz = filter_params_x->taps / 2 - 1;
|
||||
|
@ -569,7 +571,7 @@ void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
|
|||
|
||||
// horizontal filter
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -585,8 +587,8 @@ void av1_highbd_convolve_x_sr_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
const int fo_vert = filter_params_y->taps / 2 - 1;
|
||||
|
@ -599,7 +601,7 @@ void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
|
|||
((conv_params->round_0 + conv_params->round_1) == (2 * FILTER_BITS)));
|
||||
// vertical filter
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -614,8 +616,8 @@ void av1_highbd_convolve_y_sr_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
int16_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
|
||||
|
@ -630,7 +632,7 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
|
|||
// horizontal filter
|
||||
const uint16_t *src_horiz = src - fo_vert * src_stride;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < im_h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
|
@ -646,7 +648,7 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
|
|||
// vertical filter
|
||||
int16_t *src_vert = im_block + fo_vert * im_stride;
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
|
@ -666,8 +668,9 @@ void av1_highbd_convolve_2d_sr_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst16, int dst16_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
int h,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
int x, y, k;
|
||||
|
@ -685,7 +688,7 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
|
|||
// horizontal filter
|
||||
const uint16_t *src_horiz = src - fo_vert * src_stride;
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (y = 0; y < im_h; ++y) {
|
||||
for (x = 0; x < w; ++x) {
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
|
@ -703,7 +706,7 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
|
|||
int16_t *src_vert = im_block + fo_vert * im_stride;
|
||||
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
for (y = 0; y < h; ++y) {
|
||||
for (x = 0; x < w; ++x) {
|
||||
int32_t sum = 1 << offset_bits;
|
||||
|
@ -734,8 +737,9 @@ void av1_highbd_jnt_convolve_2d_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst16, int dst16_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
int h,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -753,7 +757,7 @@ void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
|
|||
assert(bits >= 0);
|
||||
// horizontal filter
|
||||
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
filter_params_x, subpel_x_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -784,8 +788,9 @@ void av1_highbd_jnt_convolve_x_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst16, int dst16_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
int h,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
|
@ -803,7 +808,7 @@ void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
|
|||
assert(bits >= 0);
|
||||
// vertical filter
|
||||
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
|
||||
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
filter_params_y, subpel_y_q4 & SUBPEL_MASK);
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int32_t res = 0;
|
||||
|
@ -834,8 +839,8 @@ void av1_highbd_jnt_convolve_y_c(const uint16_t *src, int src_stride,
|
|||
|
||||
void av1_highbd_jnt_convolve_2d_copy_c(
|
||||
const uint16_t *src, int src_stride, uint16_t *dst16, int dst16_stride,
|
||||
int w, int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
int w, int h, const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
const int subpel_y_q4, ConvolveParams *conv_params, int bd) {
|
||||
CONV_BUF_TYPE *dst = conv_params->dst;
|
||||
int dst_stride = conv_params->dst_stride;
|
||||
|
@ -875,8 +880,8 @@ void av1_highbd_jnt_convolve_2d_copy_c(
|
|||
|
||||
void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
|
||||
uint16_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_qn, const int x_step_qn,
|
||||
const int subpel_y_qn, const int y_step_qn,
|
||||
ConvolveParams *conv_params, int bd) {
|
||||
|
@ -900,7 +905,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
|
|||
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
|
||||
assert(x_filter_idx < SUBPEL_SHIFTS);
|
||||
const int16_t *x_filter =
|
||||
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_x, x_filter_idx);
|
||||
int32_t sum = (1 << (bd + FILTER_BITS - 1));
|
||||
for (int k = 0; k < filter_params_x->taps; ++k) {
|
||||
sum += x_filter[k] * src_x[k - fo_horiz];
|
||||
|
@ -922,7 +927,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
|
|||
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
|
||||
assert(y_filter_idx < SUBPEL_SHIFTS);
|
||||
const int16_t *y_filter =
|
||||
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
|
||||
av1_get_interp_filter_subpel_kernel(filter_params_y, y_filter_idx);
|
||||
int32_t sum = 1 << offset_bits;
|
||||
for (int k = 0; k < filter_params_y->taps; ++k) {
|
||||
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
|
||||
|
@ -971,9 +976,12 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
|
|||
(void)dst_stride;
|
||||
|
||||
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
|
||||
InterpFilterParams filter_params_x, filter_params_y;
|
||||
av1_get_convolve_filter_params(interp_filters, &filter_params_x,
|
||||
&filter_params_y, w, h);
|
||||
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
|
||||
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
|
||||
const InterpFilterParams *filter_params_x =
|
||||
av1_get_interp_filter_params_with_block_size(filter_x, w);
|
||||
const InterpFilterParams *filter_params_y =
|
||||
av1_get_interp_filter_params_with_block_size(filter_y, h);
|
||||
|
||||
if (scaled) {
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
||||
|
@ -981,16 +989,16 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
|
|||
assert(conv_params->dst != NULL);
|
||||
}
|
||||
av1_highbd_convolve_2d_scale(src, src_stride, dst, dst_stride, w, h,
|
||||
&filter_params_x, &filter_params_y,
|
||||
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4,
|
||||
conv_params, bd);
|
||||
filter_params_x, filter_params_y, subpel_x_q4,
|
||||
x_step_q4, subpel_y_q4, y_step_q4, conv_params,
|
||||
bd);
|
||||
} else {
|
||||
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
|
||||
|
||||
sf->highbd_convolve[subpel_x_q4 != 0][subpel_y_q4 !=
|
||||
0][conv_params->is_compound](
|
||||
src, src_stride, dst, dst_stride, w, h, &filter_params_x,
|
||||
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
|
||||
src, src_stride, dst, dst_stride, w, h, filter_params_x,
|
||||
filter_params_y, subpel_x_q4, subpel_y_q4, conv_params, bd);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,27 +40,17 @@ typedef struct ConvolveParams {
|
|||
|
||||
typedef void (*aom_convolve_fn_t)(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride, int w, int h,
|
||||
InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y,
|
||||
const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y,
|
||||
const int subpel_x_q4, const int subpel_y_q4,
|
||||
ConvolveParams *conv_params);
|
||||
|
||||
typedef void (*aom_highbd_convolve_fn_t)(
|
||||
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, int w,
|
||||
int h, InterpFilterParams *filter_params_x,
|
||||
InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
int h, const InterpFilterParams *filter_params_x,
|
||||
const InterpFilterParams *filter_params_y, const int subpel_x_q4,
|
||||
const int subpel_y_q4, ConvolveParams *conv_params, int bd);
|
||||
|
||||
static INLINE void av1_get_convolve_filter_params(InterpFilters interp_filters,
|
||||
InterpFilterParams *params_x,
|
||||
InterpFilterParams *params_y,
|
||||
int w, int h) {
|
||||
InterpFilter filter_x = av1_extract_interp_filter(interp_filters, 1);
|
||||
InterpFilter filter_y = av1_extract_interp_filter(interp_filters, 0);
|
||||
*params_x = av1_get_interp_filter_params_with_block_size(filter_x, w);
|
||||
*params_y = av1_get_interp_filter_params_with_block_size(filter_y, h);
|
||||
}
|
||||
|
||||
struct AV1Common;
|
||||
struct scale_factors;
|
||||
|
||||
|
|
|
@ -557,6 +557,7 @@ typedef uint8_t TXFM_CONTEXT;
|
|||
#define BWDREF_FRAME 5
|
||||
#define ALTREF2_FRAME 6
|
||||
#define ALTREF_FRAME 7
|
||||
#define EXTREF_FRAME REF_FRAMES
|
||||
#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
|
||||
|
||||
#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
|
||||
|
@ -607,6 +608,7 @@ typedef enum ATTRIBUTE_PACKED {
|
|||
|
||||
// In large_scale_tile coding, external references are used.
|
||||
#define MAX_EXTERNAL_REFERENCES 128
|
||||
#define MAX_TILES 512
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
|
|
@ -1,120 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "av1/common/filter.h"
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
bilinear_filters[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
|
||||
{ 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
|
||||
{ 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
|
||||
{ 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
|
||||
{ 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
|
||||
{ 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
|
||||
{ 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
|
||||
{ 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
sub_pel_filters_8[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
|
||||
{ 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
|
||||
{ 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
|
||||
{ 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
|
||||
{ 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
|
||||
{ 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
|
||||
{ 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
|
||||
{ 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
|
||||
{ -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
|
||||
{ -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
|
||||
{ -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
|
||||
{ -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
|
||||
{ -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
|
||||
{ -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
|
||||
{ -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
|
||||
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
|
||||
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
|
||||
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
|
||||
{ 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
|
||||
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
|
||||
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
|
||||
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
|
||||
};
|
||||
|
||||
static const InterpFilterParams
|
||||
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
|
||||
{ (const int16_t *)sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_REGULAR },
|
||||
{ (const int16_t *)sub_pel_filters_8smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_SMOOTH },
|
||||
{ (const int16_t *)sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
MULTITAP_SHARP },
|
||||
{ (const int16_t *)bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
BILINEAR }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
sub_pel_filters_4[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
|
||||
{ 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
|
||||
{ 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
|
||||
{ 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
|
||||
{ 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
|
||||
{ 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
|
||||
{ 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
|
||||
{ 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
|
||||
};
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
|
||||
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
|
||||
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
|
||||
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
|
||||
{ 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
|
||||
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
|
||||
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
|
||||
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
|
||||
};
|
||||
|
||||
static const InterpFilterParams av1_interp_4tap[2] = {
|
||||
{ (const int16_t *)sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_REGULAR },
|
||||
{ (const int16_t *)sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_SMOOTH },
|
||||
};
|
||||
|
||||
InterpFilterParams av1_get_interp_filter_params_with_block_size(
|
||||
const InterpFilter interp_filter, const int w) {
|
||||
if (w <= 4 &&
|
||||
(interp_filter == MULTITAP_SHARP || interp_filter == EIGHTTAP_REGULAR))
|
||||
return av1_interp_4tap[0];
|
||||
else if (w <= 4 && interp_filter == EIGHTTAP_SMOOTH)
|
||||
return av1_interp_4tap[1];
|
||||
|
||||
return av1_interp_filter_params_list[interp_filter];
|
||||
}
|
||||
|
||||
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter) {
|
||||
return (const int16_t *)av1_interp_filter_params_list[interp_filter]
|
||||
.filter_ptr;
|
||||
}
|
|
@ -64,8 +64,8 @@ static INLINE InterpFilter av1_unswitchable_filter(InterpFilter filter) {
|
|||
return filter == SWITCHABLE ? EIGHTTAP_REGULAR : filter;
|
||||
}
|
||||
|
||||
#define LOG_SWITCHABLE_FILTERS \
|
||||
2 /* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
|
||||
/* (1 << LOG_SWITCHABLE_FILTERS) > SWITCHABLE_FILTERS */
|
||||
#define LOG_SWITCHABLE_FILTERS 2
|
||||
|
||||
#define MAX_SUBPEL_TAPS 12
|
||||
#define SWITCHABLE_FILTER_CONTEXTS ((SWITCHABLE_FILTERS + 1) * 4)
|
||||
|
@ -79,14 +79,116 @@ typedef struct InterpFilterParams {
|
|||
InterpFilter interp_filter;
|
||||
} InterpFilterParams;
|
||||
|
||||
const int16_t *av1_get_interp_filter_kernel(const InterpFilter interp_filter);
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_bilinear_filters[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 },
|
||||
{ 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 },
|
||||
{ 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 },
|
||||
{ 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 },
|
||||
{ 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 },
|
||||
{ 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 },
|
||||
{ 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 },
|
||||
{ 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 }
|
||||
};
|
||||
|
||||
InterpFilterParams av1_get_interp_filter_params_with_block_size(
|
||||
const InterpFilter interp_filter, const int w);
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_sub_pel_filters_8[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, -6, 126, 8, -2, 0, 0 },
|
||||
{ 0, 2, -10, 122, 18, -4, 0, 0 }, { 0, 2, -12, 116, 28, -8, 2, 0 },
|
||||
{ 0, 2, -14, 110, 38, -10, 2, 0 }, { 0, 2, -14, 102, 48, -12, 2, 0 },
|
||||
{ 0, 2, -16, 94, 58, -12, 2, 0 }, { 0, 2, -14, 84, 66, -12, 2, 0 },
|
||||
{ 0, 2, -14, 76, 76, -14, 2, 0 }, { 0, 2, -12, 66, 84, -14, 2, 0 },
|
||||
{ 0, 2, -12, 58, 94, -16, 2, 0 }, { 0, 2, -12, 48, 102, -14, 2, 0 },
|
||||
{ 0, 2, -10, 38, 110, -14, 2, 0 }, { 0, 2, -8, 28, 116, -12, 2, 0 },
|
||||
{ 0, 0, -4, 18, 122, -10, 2, 0 }, { 0, 0, -2, 8, 126, -6, 2, 0 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_sub_pel_filters_8sharp[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { -2, 2, -6, 126, 8, -2, 2, 0 },
|
||||
{ -2, 6, -12, 124, 16, -6, 4, -2 }, { -2, 8, -18, 120, 26, -10, 6, -2 },
|
||||
{ -4, 10, -22, 116, 38, -14, 6, -2 }, { -4, 10, -22, 108, 48, -18, 8, -2 },
|
||||
{ -4, 10, -24, 100, 60, -20, 8, -2 }, { -4, 10, -24, 90, 70, -22, 10, -2 },
|
||||
{ -4, 12, -24, 80, 80, -24, 12, -4 }, { -2, 10, -22, 70, 90, -24, 10, -4 },
|
||||
{ -2, 8, -20, 60, 100, -24, 10, -4 }, { -2, 8, -18, 48, 108, -22, 10, -4 },
|
||||
{ -2, 6, -14, 38, 116, -22, 10, -4 }, { -2, 6, -10, 26, 120, -18, 8, -2 },
|
||||
{ -2, 4, -6, 16, 124, -12, 6, -2 }, { 0, 2, -2, 8, 126, -6, 2, -2 }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_sub_pel_filters_8smooth[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 2, 28, 62, 34, 2, 0, 0 },
|
||||
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
|
||||
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
|
||||
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, -2, 16, 54, 48, 12, 0, 0 },
|
||||
{ 0, -2, 14, 52, 52, 14, -2, 0 }, { 0, 0, 12, 48, 54, 16, -2, 0 },
|
||||
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
|
||||
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
|
||||
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 28, 2, 0 }
|
||||
};
|
||||
|
||||
static const InterpFilterParams
|
||||
av1_interp_filter_params_list[SWITCHABLE_FILTERS + 1] = {
|
||||
{ (const int16_t *)av1_sub_pel_filters_8, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_REGULAR },
|
||||
{ (const int16_t *)av1_sub_pel_filters_8smooth, SUBPEL_TAPS,
|
||||
SUBPEL_SHIFTS, EIGHTTAP_SMOOTH },
|
||||
{ (const int16_t *)av1_sub_pel_filters_8sharp, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
MULTITAP_SHARP },
|
||||
{ (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
BILINEAR }
|
||||
};
|
||||
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_sub_pel_filters_4[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
|
||||
{ 0, 0, -8, 122, 18, -4, 0, 0 }, { 0, 0, -10, 116, 28, -6, 0, 0 },
|
||||
{ 0, 0, -12, 110, 38, -8, 0, 0 }, { 0, 0, -12, 102, 48, -10, 0, 0 },
|
||||
{ 0, 0, -14, 94, 58, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
|
||||
{ 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
|
||||
{ 0, 0, -10, 58, 94, -14, 0, 0 }, { 0, 0, -10, 48, 102, -12, 0, 0 },
|
||||
{ 0, 0, -8, 38, 110, -12, 0, 0 }, { 0, 0, -6, 28, 116, -10, 0, 0 },
|
||||
{ 0, 0, -4, 18, 122, -8, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
|
||||
};
|
||||
DECLARE_ALIGNED(256, static const InterpKernel,
|
||||
av1_sub_pel_filters_4smooth[SUBPEL_SHIFTS]) = {
|
||||
{ 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 30, 62, 34, 2, 0, 0 },
|
||||
{ 0, 0, 26, 62, 36, 4, 0, 0 }, { 0, 0, 22, 62, 40, 4, 0, 0 },
|
||||
{ 0, 0, 20, 60, 42, 6, 0, 0 }, { 0, 0, 18, 58, 44, 8, 0, 0 },
|
||||
{ 0, 0, 16, 56, 46, 10, 0, 0 }, { 0, 0, 14, 54, 48, 12, 0, 0 },
|
||||
{ 0, 0, 12, 52, 52, 12, 0, 0 }, { 0, 0, 12, 48, 54, 14, 0, 0 },
|
||||
{ 0, 0, 10, 46, 56, 16, 0, 0 }, { 0, 0, 8, 44, 58, 18, 0, 0 },
|
||||
{ 0, 0, 6, 42, 60, 20, 0, 0 }, { 0, 0, 4, 40, 62, 22, 0, 0 },
|
||||
{ 0, 0, 4, 36, 62, 26, 0, 0 }, { 0, 0, 2, 34, 62, 30, 0, 0 }
|
||||
};
|
||||
|
||||
// For w<=4, MULTITAP_SHARP is the same as EIGHTTAP_REGULAR
|
||||
static const InterpFilterParams av1_interp_4tap[SWITCHABLE_FILTERS + 1] = {
|
||||
{ (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_REGULAR },
|
||||
{ (const int16_t *)av1_sub_pel_filters_4smooth, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_SMOOTH },
|
||||
{ (const int16_t *)av1_sub_pel_filters_4, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
EIGHTTAP_REGULAR },
|
||||
{ (const int16_t *)av1_bilinear_filters, SUBPEL_TAPS, SUBPEL_SHIFTS,
|
||||
BILINEAR },
|
||||
};
|
||||
|
||||
static INLINE const InterpFilterParams *
|
||||
av1_get_interp_filter_params_with_block_size(const InterpFilter interp_filter,
|
||||
const int w) {
|
||||
if (w <= 4) return &av1_interp_4tap[interp_filter];
|
||||
return &av1_interp_filter_params_list[interp_filter];
|
||||
}
|
||||
|
||||
static INLINE const int16_t *av1_get_interp_filter_kernel(
|
||||
const InterpFilter interp_filter) {
|
||||
return av1_interp_filter_params_list[interp_filter].filter_ptr;
|
||||
}
|
||||
|
||||
static INLINE const int16_t *av1_get_interp_filter_subpel_kernel(
|
||||
const InterpFilterParams filter_params, const int subpel) {
|
||||
return filter_params.filter_ptr + filter_params.taps * subpel;
|
||||
const InterpFilterParams *const filter_params, const int subpel) {
|
||||
return filter_params->filter_ptr + filter_params->taps * subpel;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
|
|
@ -294,9 +294,6 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
|
|||
mv->row = clamp(mv->row, min_row, max_row);
|
||||
}
|
||||
|
||||
static INLINE int mv_has_subpel(const MV *mv) {
|
||||
return (mv->row & SUBPEL_MASK) || (mv->col & SUBPEL_MASK);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче