Fwd txfm and quantizer HBD/LBD data paths co-exist

Change-Id: Iaae46d0735539b8b8daf9faac81c2a3434838020
This commit is contained in:
Yi Luo 2017-06-27 16:07:28 -07:00
Родитель 8ab5b5f613
Коммит 0f4195c218
13 изменённых файлов: 98 добавлений и 148 удалений

Просмотреть файл

@ -290,10 +290,10 @@ DSP_SRCS-yes += quantize.c
DSP_SRCS-yes += quantize.h
DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
DSP_SRCS-$(HAVE_AVX2) += x86/highbd_quantize_intrin_avx2.c
endif
ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm

Просмотреть файл

@ -525,13 +525,12 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} # CONFIG_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER
} else {
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
@ -543,15 +542,14 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b sse2 avx2/;
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b sse2 avx2/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
} # CONFIG_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER
} # CONFIG_AOM_QM
if (aom_config("CONFIG_AV1") eq "yes") {

Просмотреть файл

@ -256,7 +256,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
}
#endif // CONFIG_TX64X64
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr,
@ -523,7 +522,6 @@ void aom_highbd_quantize_b_64x64_c(
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
#else // CONFIG_AOM_QM
@ -602,7 +600,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
}
#endif // CONFIG_TX64X64
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr,
@ -825,5 +822,4 @@ void aom_highbd_quantize_b_64x64_c(
*eob_ptr = eob + 1;
}
#endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AOM_QM

Просмотреть файл

@ -15,7 +15,6 @@
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
@ -152,4 +151,3 @@ void aom_highbd_quantize_b_32x32_sse2(
}
*eob_ptr = eob + 1;
}
#endif

Просмотреть файл

@ -125,10 +125,10 @@ endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/av1_quantize_sse2.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_quantize_avx2.c
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_highbd_quantize_avx2.c
endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm
@ -140,10 +140,10 @@ endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
endif
ifeq ($(CONFIG_EXT_INTER),yes)
AV1_CX_SRCS-yes += encoder/wedge_utils.c

Просмотреть файл

@ -417,25 +417,23 @@ if (aom_config("CONFIG_DPCM_INTRA") eq "yes") {
}
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
#fwd txfm
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
}
#fwd txfm
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
#
# Motion search
@ -480,33 +478,34 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/av1_highbd_block_error sse2/;
if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
}
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
} else {
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
}
# fdct functions
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
}
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
}
if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
}
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
} else {
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
}
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
# End av1_high encoder functions
if (aom_config("CONFIG_EXT_INTER") eq "yes") {

Просмотреть файл

@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
}
#endif // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr,
@ -951,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
}
}
#if CONFIG_HIGHBITDEPTH
static INLINE void highbd_quantize_dc(
const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
@ -980,7 +978,6 @@ static INLINE void highbd_quantize_dc(
}
*eob_ptr = eob + 1;
}
#endif // CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
@ -1519,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade(
}
}
#endif // CONFIG_NEW_QUANT
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr,
@ -1579,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
*eob_ptr = eob + 1;
}
#endif // CONFIG_HIGHBITDEPTH
static void invert_quant(int16_t *quant, int16_t *shift, int d) {
uint32_t t;
int l, m;

Просмотреть файл

@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr,
@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade(
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc,
const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT
#endif // CONFIG_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -2020,12 +2020,10 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
}
}
#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
int stride) {
av1_fwht4x4_c(input, output, stride);
}
#endif // CONFIG_HIGHBITDEPTH
void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {

Просмотреть файл

@ -494,7 +494,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
}
#if !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
typedef enum QUANT_FUNC {
QUANT_FUNC_LOWBD = 0,
QUANT_FUNC_HIGHBD = 1,
@ -514,29 +513,12 @@ static AV1_QUANT_FACADE
#endif // !CONFIG_NEW_QUANT
{ NULL, NULL }
};
#endif // !CONFIG_PVQ
#else
typedef enum QUANT_FUNC {
QUANT_FUNC_LOWBD = 0,
QUANT_FUNC_TYPES = 1
} QUANT_FUNC;
static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
[QUANT_FUNC_TYPES] = {
#if !CONFIG_NEW_QUANT
{ av1_quantize_fp_facade },
{ av1_quantize_b_facade },
{ av1_quantize_dc_facade },
#else // !CONFIG_NEW_QUANT
{ av1_quantize_fp_nuq_facade },
{ av1_quantize_b_nuq_facade },
{ av1_quantize_dc_nuq_facade },
#endif // !CONFIG_NEW_QUANT
{ NULL }
};
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_PVQ
typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride,
FWD_TXFM_PARAM *param);
static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm,
av1_highbd_fwd_txfm };
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
@ -668,29 +650,13 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
#if !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
}
#if CONFIG_LV_MAP
p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif // CONFIG_LV_MAP
return;
}
#endif // CONFIG_HIGHBITDEPTH
av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
const int is_hbd = get_bitdepth_data_path_index(xd);
fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &fwd_txfm_param);
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD](
quant_func_list[xform_quant_idx][is_hbd](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
@ -700,7 +666,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif // CONFIG_LV_MAP
#else // #if !CONFIG_PVQ
return;
#else // CONFIG_PVQ
(void)xform_quant_idx;
#if CONFIG_HIGHBITDEPTH
fwd_txfm_param.bd = xd->bd;

Просмотреть файл

@ -203,7 +203,6 @@ static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
}
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
#if CONFIG_HIGHBITDEPTH
#if CONFIG_CHROMA_2X2
static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
@ -237,6 +236,7 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless,
const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
if (lossless) {
assert(tx_type == DCT_DCT);
av1_highbd_fwht4x4(src_diff, coeff, diff_stride);
@ -248,7 +248,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case DCT_ADST:
case ADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@ -257,7 +257,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case ADST_FLIPADST:
case FLIPADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
// use the c version for anything including identity for now
case V_DCT:
@ -268,7 +268,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST:
case IDTX:
// fallthrough intended
av1_fwd_txfm2d_4x4_c(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -279,47 +279,54 @@ static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_4x8_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_8x4_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_8x16_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_16x8_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_16x32_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt;
av1_fwd_txfm2d_32x16_c(src_diff, coeff, diff_stride, tx_type, bd);
int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
}
static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
@ -327,7 +334,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case DCT_ADST:
case ADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@ -336,7 +343,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case ADST_FLIPADST:
case FLIPADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
// use the c version for anything including identity for now
case V_DCT:
@ -347,7 +354,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST:
case IDTX:
// fallthrough intended
av1_fwd_txfm2d_8x8_c(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -357,6 +364,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
@ -364,7 +372,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
case DCT_ADST:
case ADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@ -373,7 +381,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
case ADST_FLIPADST:
case FLIPADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
// use the c version for anything including identity for now
case V_DCT:
@ -384,7 +392,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST:
case IDTX:
// fallthrough intended
av1_fwd_txfm2d_16x16_c(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -394,6 +402,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
(void)fwd_txfm_opt;
switch (tx_type) {
case DCT_DCT:
@ -401,7 +410,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
case DCT_ADST:
case ADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case FLIPADST_DCT:
@ -410,7 +419,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
case ADST_FLIPADST:
case FLIPADST_ADST:
// fallthrough intended
av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
// use the c version for anything including identity for now
case V_DCT:
@ -421,7 +430,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST:
case IDTX:
// fallthrough intended
av1_fwd_txfm2d_32x32_c(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#endif // CONFIG_EXT_TX
default: assert(0);
@ -432,11 +441,12 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
(void)fwd_txfm_opt;
(void)bd;
switch (tx_type) {
case DCT_DCT:
av1_fwd_txfm2d_64x64(src_diff, coeff, diff_stride, tx_type, bd);
av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, tx_type, bd);
break;
#if CONFIG_EXT_TX
case ADST_DCT:
@ -459,7 +469,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
// in a later change. This shouldn't impact performance since
// DCT_DCT is the only extended type currently allowed for 64x64,
// as dictated by get_ext_tx_set_type in blockd.h.
av1_fwd_txfm2d_64x64_c(src_diff, coeff, diff_stride, DCT_DCT, bd);
av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd);
break;
case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break;
#endif // CONFIG_EXT_TX
@ -467,7 +477,6 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff,
}
}
#endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
FWD_TXFM_PARAM *fwd_txfm_param) {
@ -534,7 +543,6 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
}
}
#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) {
const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL;
@ -596,4 +604,3 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
default: assert(0); break;
}
}
#endif // CONFIG_HIGHBITDEPTH

Просмотреть файл

@ -20,9 +20,7 @@ typedef struct FWD_TXFM_PARAM {
TX_TYPE tx_type;
TX_SIZE tx_size;
int lossless;
#if CONFIG_HIGHBITDEPTH
int bd;
#endif // CONFIG_HIGHBITDEPTH
} FWD_TXFM_PARAM;
#ifdef __cplusplus
@ -32,10 +30,8 @@ extern "C" {
void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
FWD_TXFM_PARAM *fwd_txfm_param);
#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param);
#endif // CONFIG_HIGHBITDEPTH
#ifdef __cplusplus
} // extern "C"

Просмотреть файл

@ -113,7 +113,7 @@ static void fdct4x4_sse4_1(__m128i *in, int bit) {
in[3] = _mm_unpackhi_epi64(v1, v3);
}
static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) {
static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
_mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
@ -404,7 +404,7 @@ static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) {
in[15] = _mm_srai_epi32(in[15], shift);
}
static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) {
static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) {
_mm_store_si128((__m128i *)(output + 0 * 4), res[0]);
_mm_store_si128((__m128i *)(output + 1 * 4), res[1]);
_mm_store_si128((__m128i *)(output + 2 * 4), res[2]);
@ -1788,7 +1788,7 @@ static void col_txfm_16x16_rounding(__m128i *in, int shift) {
col_txfm_8x8_rounding(&in[48], shift);
}
static void write_buffer_16x16(const __m128i *in, tran_low_t *output) {
static void write_buffer_16x16(const __m128i *in, int32_t *output) {
const int size_8x8 = 16 * 4;
write_buffer_8x8(&in[0], output);
output += size_8x8;