diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 94e25870e..7b2b5fa85 100644 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -1027,13 +1027,23 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_AOM_QM") eq "yes") { if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { add_proto qw/void aom_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_quantize_b/; add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_quantize_b_32x32/; + + add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_quantize_b_64x64/; if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_highbd_quantize_b/; add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_highbd_quantize_b_32x32/; + + add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + specialize qw/aom_highbd_quantize_b_64x64/; } # CONFIG_AOM_HIGHBITDEPTH } # CONFIG_AV1_ENCODER } else { @@ -1044,12 +1054,18 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { add_proto qw/void aom_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/aom_quantize_b_32x32/, "$ssse3_x86_64", "$avx_x86_64"; + add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_quantize_b_64x64/; + if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/aom_highbd_quantize_b sse2/; add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/aom_highbd_quantize_b_32x32 sse2/; + + add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_highbd_quantize_b_64x64/; } # CONFIG_AOM_HIGHBITDEPTH } # CONFIG_AV1_ENCODER } # CONFIG_AOM_QM diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c index 1b9bbdc62..f7870ca03 100644 --- a/aom_dsp/quantize.c +++ b/aom_dsp/quantize.c @@ -99,6 +99,38 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + const int n_coeffs = 1024; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int64_t tmp, eob = -1; + int32_t tmp32; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2), + INT16_MIN, INT16_MAX); + tmp32 = (int32_t)((tmp * qm_ptr[rc] * quant) >> (14 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = + (dequant_ptr * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; + if (tmp32) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + #if CONFIG_AOM_HIGHBITDEPTH void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, @@ -129,6 +161,38 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, } *eob_ptr = eob + 1; } + +#if CONFIG_TX64X64 +void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + const int n_coeffs = 1024; + int eob = -1; + int dequant; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int coeff = coeff_ptr[0]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2); + const uint32_t abs_qcoeff = + (uint32_t)((tmp * qm_ptr[0] * quant) >> (14 + AOM_QM_BITS)); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = + (dequant_ptr * iqm_ptr[0] + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; + dqcoeff_ptr[0] = (qcoeff_ptr[0] * dequant) / 4; + if (abs_qcoeff) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 #endif void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -316,6 +380,72 @@ void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), + ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[4096]; + int i, eob = -1; + int dequant; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || + coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const qm_val_t wt = qm_ptr[rc]; + int64_t tmp; + int tmp32; + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); + tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); + tmp = tmp * wt; + tmp32 = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * + quant_shift_ptr[rc != 0]) >> + (14 + AOM_QM_BITS); + + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; + + if (tmp32) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + #if CONFIG_AOM_HIGHBITDEPTH void aom_highbd_quantize_b_32x32_c( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, @@ -375,8 +505,71 @@ void aom_highbd_quantize_b_32x32_c( } *eob_ptr = eob + 1; } -#endif + +#if CONFIG_TX64X64 +void aom_highbd_quantize_b_64x64_c( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), + ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[4096]; + int i, eob = -1; + int dequant; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const qm_val_t wt = qm_ptr[rc]; + const int coeff = coeff_ptr[rc] * wt; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= (zbins[rc != 0] << AOM_QM_BITS) || + coeff <= (nzbins[rc != 0] << AOM_QM_BITS)) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const qm_val_t wt = qm_ptr[rc]; + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp1 = + abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); + const int64_t tmpw = tmp1 * wt; + const int64_t tmp2 = ((tmpw * quant_ptr[rc != 0]) >> 16) + tmpw; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> (14 + AOM_QM_BITS)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dequant = + (dequant_ptr[rc != 0] * iqm_ptr[rc] + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; + if (abs_qcoeff) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 +#endif // CONFIG_AOM_HIGHBITDEPTH + #else + void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, @@ -450,6 +643,33 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr) { + const int n_coeffs = 4096; + const int rc = 0; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + int tmp, eob = -1; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2), + INT16_MIN, INT16_MAX); + tmp = (tmp * quant) >> 14; + qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 4; + if (tmp) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + #if CONFIG_AOM_HIGHBITDEPTH void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, @@ -475,6 +695,33 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, } *eob_ptr = eob + 1; } + +#if CONFIG_TX64X64 +void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, + uint16_t *eob_ptr) { + const int n_coeffs = 4096; + int eob = -1; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + const int coeff = coeff_ptr[0]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 2); + const uint32_t abs_qcoeff = (uint32_t)((tmp * quant) >> 14); + qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant_ptr / 4; + if (abs_qcoeff) eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 #endif void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, @@ -632,6 +879,62 @@ void aom_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void aom_quantize_b_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), + ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[4096]; + int i, eob = -1; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + int tmp; + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); + abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); + tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) * + quant_shift_ptr[rc != 0]) >> + 14; + + qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4; + + if (tmp) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + #if CONFIG_AOM_HIGHBITDEPTH void aom_highbd_quantize_b_32x32_c( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, @@ -682,5 +985,57 @@ void aom_highbd_quantize_b_32x32_c( } *eob_ptr = eob + 1; } + +#if CONFIG_TX64X64 +void aom_highbd_quantize_b_64x64_c( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan) { + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 2), + ROUND_POWER_OF_TWO(zbin_ptr[1], 2) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; + + int idx = 0; + int idx_arr[4096]; + int i, eob = -1; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; + + // If the coefficient is out of the base ZBIN range, keep it for + // quantization. + if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) + idx_arr[idx++] = i; + } + + // Quantization pass: only process the coefficients selected in + // pre-scan pass. Note: idx can be zero. + for (i = 0; i < idx; i++) { + const int rc = scan[idx_arr[i]]; + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + const int64_t tmp1 = + abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 2); + const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 14); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 4; + if (abs_qcoeff) eob = idx_arr[i]; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 #endif #endif diff --git a/aom_dsp/quantize.h b/aom_dsp/quantize.h index 45ed6780d..67e3b5ece 100644 --- a/aom_dsp/quantize.h +++ b/aom_dsp/quantize.h @@ -30,6 +30,13 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +#if CONFIG_TX64X64 +void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +#endif // CONFIG_TX64X64 void aom_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, @@ -50,6 +57,13 @@ void aom_highbd_quantize_dc_32x32( const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); +#if CONFIG_TX64X64 +void aom_highbd_quantize_dc_64x64( + const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, + const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr, const qm_val_t *qm_ptr, + const qm_val_t *iqm_ptr); +#endif // CONFIG_TX64X64 void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, @@ -58,8 +72,10 @@ void aom_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr); -#endif -#else +#endif // CONFIG_AOM_HIGHBITDEPTH + +#else // CONFIG_AOM_QM + void aom_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, @@ -68,7 +84,12 @@ void aom_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); - +#if CONFIG_TX64X64 +void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 #if CONFIG_AOM_HIGHBITDEPTH void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, @@ -81,8 +102,16 @@ void aom_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr); -#endif -#endif +#if CONFIG_TX64X64 +void aom_highbd_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, + const int16_t *round_ptr, + const int16_t quant_ptr, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, + const int16_t dequant_ptr, uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 +#endif // CONFIG_AOM_HIGHBITDEPTH +#endif // CONFIG_AOM_QM #ifdef __cplusplus } // extern "C" diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index f96dcf260..fce07f91b 100644 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl @@ -206,6 +206,14 @@ if (aom_config("CONFIG_NEW_QUANT") eq "yes") { add_proto qw/void quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; specialize qw/quantize_32x32_fp_nuq/; + + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; + specialize qw/quantize_64x64_nuq/; + + add_proto qw/void quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; + specialize qw/quantize_64x64_fp_nuq/; + } } # FILTER_INTRA predictor functions @@ -332,8 +340,15 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { specialize qw/av1_block_error/; add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp/; add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp_32x32/; + + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp_64x64/; + } add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; specialize qw/av1_fdct8x8_quant/; @@ -345,10 +360,18 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { specialize qw/av1_block_error_fp neon/, "$sse2_x86inc"; add_proto qw/void av1_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp/; add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp_32x32/; + + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_quantize_fp_64x64/; + } add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t *iqm_ptr"; + specialize qw/av1_fdct8x8_quant/; } } else { if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { @@ -363,6 +386,11 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/av1_quantize_fp_32x32/; + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/av1_quantize_fp_64x64/; + } + add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/av1_fdct8x8_quant/; } else { @@ -378,6 +406,11 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { add_proto qw/void av1_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/av1_quantize_fp_32x32/, "$ssse3_x86_64"; + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/av1_quantize_fp_64x64/; + } + add_proto qw/void av1_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/av1_fdct8x8_quant sse2 ssse3 neon/; } @@ -478,6 +511,14 @@ specialize qw/av1_full_range_search/; add_proto qw/void av1_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; specialize qw/av1_temporal_filter_apply sse2 msa/; +if (aom_config("CONFIG_AOM_QM") eq "yes") { + add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; + specialize qw/av1_quantize_b/; +} else { + add_proto qw/void av1_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; + specialize qw/av1_quantize_b/; +} + if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { # ENCODEMB INVOKE @@ -493,6 +534,14 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { add_proto qw/void highbd_quantize_32x32_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; specialize qw/highbd_quantize_32x32_fp_nuq/; + + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void highbd_quantize_64x64_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; + specialize qw/highbd_quantize_64x64_nuq/; + + add_proto qw/void highbd_quantize_64x64_fp_nuq/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, const cuml_bins_type_nuq *cuml_bins_ptr, const dequant_val_type_nuq *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const int16_t *scan, const uint8_t *band"; + specialize qw/highbd_quantize_64x64_fp_nuq/; + } } add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; @@ -505,6 +554,11 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; specialize qw/av1_highbd_quantize_fp_32x32/; + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; + specialize qw/av1_highbd_quantize_fp_64x64/; + } + add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; specialize qw/av1_highbd_quantize_b/; } else { diff --git a/av1/common/common_data.h b/av1/common/common_data.h index cedab2f8d..7877106d0 100644 --- a/av1/common/common_data.h +++ b/av1/common/common_data.h @@ -478,7 +478,11 @@ static const int tx_size_2d[TX_SIZES_ALL] = { #if CONFIG_CB4X4 4, #endif - 16, 64, 256, 1024, 32, 32, 128, 128, 512, 512, + 16, 64, 256, 1024, +#if CONFIG_TX64X64 + 4096, +#endif // CONFIG_TX64X64 + 32, 32, 128, 128, 512, 512, }; static const uint8_t tx_size_1d_log2[TX_SIZES] = { 2, 3, 4, 5 }; diff --git a/av1/common/idct.c b/av1/common/idct.c index 2663d2d36..156fc96af 100644 --- a/av1/common/idct.c +++ b/av1/common/idct.c @@ -20,10 +20,7 @@ #include "av1/common/enums.h" #include "av1/common/idct.h" -int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type, - const TX_SIZE tx_size) { - (void)tx_type; - (void)xd; +int get_tx_scale(const TX_SIZE tx_size) { if (txsize_sqr_up_map[tx_size] == TX_32X32) return 1; #if CONFIG_TX64X64 else if (txsize_sqr_up_map[tx_size] == TX_64X64) diff --git a/av1/common/idct.h b/av1/common/idct.h index db9a6e2df..8f1eea145 100644 --- a/av1/common/idct.h +++ b/av1/common/idct.h @@ -51,8 +51,7 @@ typedef struct { #endif // CONFIG_AOM_HIGHBITDEPTH #define MAX_TX_SCALE 1 -int get_tx_scale(const MACROBLOCKD *const xd, const TX_TYPE tx_type, - const TX_SIZE tx_size); +int get_tx_scale(const TX_SIZE tx_size); void av1_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); diff --git a/av1/decoder/detokenize.c b/av1/decoder/detokenize.c index 0f183f27f..024006c86 100644 --- a/av1/decoder/detokenize.c +++ b/av1/decoder/detokenize.c @@ -98,6 +98,7 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, const uint8_t *cat4_prob; const uint8_t *cat5_prob; const uint8_t *cat6_prob; + (void)tx_type; if (counts) { coef_counts = counts->coef[tx_size_ctx][type][ref]; @@ -138,7 +139,7 @@ static int decode_coefs(MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, cat6_prob = av1_cat6_prob; #endif - dq_shift = get_tx_scale(xd, tx_type, tx_size); + dq_shift = get_tx_scale(tx_size); while (c < max_eob) { int val = -1; diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c index c137760b9..600acbe56 100644 --- a/av1/encoder/dct.c +++ b/av1/encoder/dct.c @@ -2000,6 +2000,7 @@ void av1_fwd_idtx_c(const int16_t *src_diff, tran_low_t *coeff, int stride, } } } +#endif // CONFIG_EXT_TX #if CONFIG_AOM_HIGHBITDEPTH void av1_highbd_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, @@ -2014,4 +2015,3 @@ void av1_highbd_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, } #endif // CONFIG_TX64X64 #endif // CONFIG_AOM_HIGHBITDEPTH -#endif // CONFIG_EXT_TX diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index a0bf9bfdd..a0fa37ad1 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -99,7 +99,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, int seg_id = xd->mi[0]->mbmi.segment_id; const qm_val_t *iqmatrix = pd->seg_iqmatrix[seg_id][!ref][tx_size]; #endif - const int shift = get_tx_scale(xd, tx_type, tx_size); + const int shift = get_tx_scale(tx_size); #if CONFIG_NEW_QUANT int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type); const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq]; @@ -471,7 +471,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, const int16_t *src_diff; src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; - qparam.log_scale = get_tx_scale(xd, tx_type, tx_size); + qparam.log_scale = get_tx_scale(tx_size); #else MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; tran_low_t *ref_coeff = BLOCK_OFFSET(pd->pvq_ref_coeff, block); @@ -578,6 +578,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, } #if CONFIG_NEW_QUANT +// TODO(debargha, sarah): Unify these functions with the ones above void av1_xform_quant_nuq(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int ctx) { @@ -615,36 +616,60 @@ void av1_xform_quant_nuq(const AV1_COMMON *cm, MACROBLOCK *x, int plane, fwd_txfm_param.bd = xd->bd; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - highbd_quantize_32x32_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant, p->quant_shift, - pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, - dqcoeff, eob, scan_order->scan, band); - } else { - highbd_quantize_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant, - p->quant_shift, pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], - qcoeff, dqcoeff, eob, scan_order->scan, band); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + highbd_quantize_64x64_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant, p->quant_shift, + pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; +#endif // CONFIG_TX64X64 + case 1: + highbd_quantize_32x32_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant, p->quant_shift, + pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; + default: + highbd_quantize_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant, p->quant_shift, + pd->dequant, (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; } return; } #endif // CONFIG_AOM_HIGHBITDEPTH fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - quantize_32x32_nuq(coeff, 1024, x->skip_block, p->quant, p->quant_shift, - pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], - qcoeff, dqcoeff, eob, scan_order->scan, band); - } else { - quantize_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant, - p->quant_shift, pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, - dqcoeff, eob, scan_order->scan, band); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + quantize_64x64_nuq(coeff, tx_size_2d[tx_size], x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], + qcoeff, dqcoeff, eob, scan_order->scan, band); + break; +#endif // CONFIG_TX64X64 + case 1: + quantize_32x32_nuq(coeff, tx_size_2d[tx_size], x->skip_block, + p->quant, p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], + qcoeff, dqcoeff, eob, scan_order->scan, band); + break; + default: + quantize_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant, + p->quant_shift, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], + qcoeff, dqcoeff, eob, scan_order->scan, band); + break; } } @@ -685,36 +710,59 @@ void av1_xform_quant_fp_nuq(const AV1_COMMON *cm, MACROBLOCK *x, int plane, fwd_txfm_param.bd = xd->bd; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - highbd_quantize_32x32_fp_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, - dqcoeff, eob, scan_order->scan, band); - } else { - highbd_quantize_fp_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, - dqcoeff, eob, scan_order->scan, band); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + highbd_quantize_64x64_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; +#endif // CONFIG_TX64X64 + case 1: + highbd_quantize_32x32_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; + default: + highbd_quantize_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); } return; } #endif // CONFIG_AOM_HIGHBITDEPTH fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - quantize_32x32_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, - p->quant_fp, pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], - qcoeff, dqcoeff, eob, scan_order->scan, band); - } else { - quantize_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, - pd->dequant, - (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], - (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], - qcoeff, dqcoeff, eob, scan_order->scan, band); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + quantize_64x64_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; +#endif // CONFIG_TX64X64 + case 1: + quantize_32x32_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], qcoeff, + dqcoeff, eob, scan_order->scan, band); + break; + default: + quantize_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp, + pd->dequant, + (const cuml_bins_type_nuq *)p->cuml_bins_nuq[dq], + (const dequant_val_type_nuq *)pd->dequant_val_nuq[dq], + qcoeff, dqcoeff, eob, scan_order->scan, band); + break; } } @@ -753,31 +801,54 @@ void av1_xform_quant_dc_nuq(MACROBLOCK *x, int plane, int block, int blk_row, fwd_txfm_param.bd = xd->bd; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - highbd_quantize_dc_32x32_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], - p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], - pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); - } else { - highbd_quantize_dc_nuq(coeff, tx_size_2d[tx_size], x->skip_block, - p->quant[0], p->quant_shift[0], pd->dequant[0], - p->cuml_bins_nuq[dq][0], - pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + highbd_quantize_dc_64x64_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], + p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], + pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + break; +#endif // CONFIG_TX64X64 + case 1: + highbd_quantize_dc_32x32_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], + p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], + pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + break; + default: + highbd_quantize_dc_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], + p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], + pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + break; } return; } #endif // CONFIG_AOM_HIGHBITDEPTH fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - quantize_dc_32x32_nuq(coeff, tx_size_2d[tx_size], x->skip_block, - p->quant[0], p->quant_shift[0], pd->dequant[0], - p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], - qcoeff, dqcoeff, eob); - } else { - quantize_dc_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], - p->quant_shift[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], - pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + quantize_dc_64x64_nuq(coeff, tx_size_2d[tx_size], x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; +#endif // CONFIG_TX64X64 + case 1: + quantize_dc_32x32_nuq(coeff, tx_size_2d[tx_size], x->skip_block, + p->quant[0], p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; + default: + quantize_dc_nuq(coeff, tx_size_2d[tx_size], x->skip_block, p->quant[0], + p->quant_shift[0], pd->dequant[0], + p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; } } @@ -816,31 +887,54 @@ void av1_xform_quant_dc_fp_nuq(MACROBLOCK *x, int plane, int block, int blk_row, fwd_txfm_param.bd = xd->bd; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - highbd_quantize_dc_32x32_fp_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], - pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], - qcoeff, dqcoeff, eob); - } else { - highbd_quantize_dc_fp_nuq( - coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], - pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], - qcoeff, dqcoeff, eob); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + highbd_quantize_dc_64x64_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], + pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; +#endif // CONFIG_TX64X64 + case 1: + highbd_quantize_dc_32x32_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], + pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; + default: + highbd_quantize_dc_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], + pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; } return; } #endif // CONFIG_AOM_HIGHBITDEPTH fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (tx_size == TX_32X32) { - quantize_dc_32x32_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, - p->quant_fp[0], pd->dequant[0], - p->cuml_bins_nuq[dq][0], - pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); - } else { - quantize_dc_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, - p->quant_fp[0], pd->dequant[0], p->cuml_bins_nuq[dq][0], - pd->dequant_val_nuq[dq][0], qcoeff, dqcoeff, eob); + switch (get_tx_scale(tx_size)) { +#if CONFIG_TX64X64 + case 2: + quantize_dc_64x64_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], + pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; +#endif // CONFIG_TX64X64 + case 1: + quantize_dc_32x32_fp_nuq( + coeff, tx_size_2d[tx_size], x->skip_block, p->quant_fp[0], + pd->dequant[0], p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; + default: + quantize_dc_fp_nuq(coeff, tx_size_2d[tx_size], x->skip_block, + p->quant_fp[0], pd->dequant[0], + p->cuml_bins_nuq[dq][0], pd->dequant_val_nuq[dq][0], + qcoeff, dqcoeff, eob); + break; } } #endif // CONFIG_NEW_QUANT diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c index a88c88435..4ada078c0 100644 --- a/av1/encoder/hybrid_fwd_txfm.c +++ b/av1/encoder/hybrid_fwd_txfm.c @@ -441,7 +441,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, (void)bd; switch (tx_type) { case DCT_DCT: - av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type); + av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type); break; #if CONFIG_EXT_TX case ADST_DCT: @@ -458,7 +458,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, case H_ADST: case V_FLIPADST: case H_FLIPADST: - av1_highbd_fht64x64_c(src_diff, coeff, diff_stride, tx_type); + av1_highbd_fht64x64(src_diff, coeff, diff_stride, tx_type); break; case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break; #endif // CONFIG_EXT_TX diff --git a/av1/encoder/quantize.c b/av1/encoder/quantize.c index 771f94b36..9dc1b134b 100644 --- a/av1/encoder/quantize.c +++ b/av1/encoder/quantize.c @@ -59,28 +59,28 @@ static INLINE int quantize_coeff_bigtx_nuq( const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, int logsizeby32) { + tran_low_t *dqcoeff_ptr, int logsizeby16) { const int coeff = coeffv; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int i, q; int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); for (i = 0; i < NUQ_KNOTS; i++) { - if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], logsizeby16)) { q = i; break; } } if (i == NUQ_KNOTS) { - tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32); + tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], logsizeby16); q = NUQ_KNOTS + - (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (15 - logsizeby32)); + (((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - logsizeby16)); } if (q) { *dqcoeff_ptr = ROUND_POWER_OF_TWO( - av1_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32); + av1_dequant_abscoeff_nuq(q, dequant, dequant_val), logsizeby16); // *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val) >> - // (1 + logsizeby32); + // (logsizeby16); *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; } else { @@ -123,14 +123,14 @@ static INLINE int quantize_coeff_fp_nuq( static INLINE int quantize_coeff_bigtx_fp_nuq( const tran_low_t coeffv, const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby32) { + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby16) { const int coeff = coeffv; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int i, q; int tmp = clamp(abs_coeff, INT16_MIN, INT16_MAX); for (i = 0; i < NUQ_KNOTS; i++) { - if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], logsizeby16)) { q = i; break; } @@ -138,15 +138,15 @@ static INLINE int quantize_coeff_bigtx_fp_nuq( if (i == NUQ_KNOTS) { q = NUQ_KNOTS + ((((int64_t)tmp - - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32)) * + ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], logsizeby16)) * quant) >> - (15 - logsizeby32)); + (16 - logsizeby16)); } if (q) { *dqcoeff_ptr = ROUND_POWER_OF_TWO( - av1_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32); + av1_dequant_abscoeff_nuq(q, dequant, dequant_val), logsizeby16); // *dqcoeff_ptr = av1_dequant_abscoeff_nuq(q, dequant, dequant_val) >> - // (1 + logsizeby32); + // (logsizeby16); *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; } else { @@ -205,7 +205,7 @@ void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int rc = 0; if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift, dequant, cuml_bins_ptr, dequant_val, qcoeff_ptr, - dqcoeff_ptr, 0)) + dqcoeff_ptr, get_tx_scale(TX_32X32))) eob = 0; } *eob_ptr = eob + 1; @@ -225,12 +225,54 @@ void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int rc = 0; if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant, cuml_bins_ptr, dequant_val, qcoeff_ptr, - dqcoeff_ptr, 0)) + dqcoeff_ptr, get_tx_scale(TX_32X32))) eob = 0; } *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t quant, + const int16_t quant_shift, const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift, dequant, + cuml_bins_ptr, dequant_val, qcoeff_ptr, + dqcoeff_ptr, get_tx_scale(TX_64X64))) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant, + cuml_bins_ptr, dequant_val, qcoeff_ptr, + dqcoeff_ptr, get_tx_scale(TX_64X64))) + eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + void quantize_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, const int16_t *dequant_ptr, @@ -300,7 +342,8 @@ void quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, if (quantize_coeff_bigtx_nuq( coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], - dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], 0)) + dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], + get_tx_scale(TX_32X32))) eob = i; } } @@ -325,12 +368,66 @@ void quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, if (quantize_coeff_bigtx_fp_nuq( coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc], - &dqcoeff_ptr[rc], 0)) + &dqcoeff_ptr[rc], get_tx_scale(TX_32X32))) eob = i; } } *eob_ptr = eob + 1; } + +#if CONFIG_TX64X64 +void quantize_64x64_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_bigtx_nuq( + coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], + dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], + get_tx_scale(TX_64X64))) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (quantize_coeff_bigtx_fp_nuq( + coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], get_tx_scale(TX_64X64))) + eob = i; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 #endif // CONFIG_NEW_QUANT void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, @@ -353,24 +450,42 @@ void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // obsolete skip_block const int skip_block = 0; - if (qparam->log_scale == 0) { - av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan + switch (qparam->log_scale) { + case 0: + av1_quantize_fp(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, + p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); - } else { - av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round_fp, - p->quant_fp, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan + ); + break; + case 1: + av1_quantize_fp_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); + ); + break; +#if CONFIG_TX64X64 + case 2: + av1_quantize_fp_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, + p->round_fp, p->quant_fp, p->quant_shift, + qcoeff_ptr, dqcoeff_ptr, pd->dequant, eob_ptr, + sc->scan, sc->iscan +#if CONFIG_AOM_QM + , + qm_ptr, iqm_ptr +#endif + ); + break; +#endif // CONFIG_TX64X64 + default: assert(0); } } @@ -387,24 +502,40 @@ void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // obsolete skip_block const int skip_block = 0; - if (qparam->log_scale == 0) { - aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, p->quant, - p->quant_shift, qcoeff_ptr, dqcoeff_ptr, pd->dequant, - eob_ptr, sc->scan, sc->iscan + switch (qparam->log_scale) { + case 0: + aom_quantize_b(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); - } else { - aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, - p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, - pd->dequant, eob_ptr, sc->scan, sc->iscan + ); + break; + case 1: + aom_quantize_b_32x32(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); + ); + break; +#if CONFIG_TX64X64 + case 2: + aom_quantize_b_64x64(coeff_ptr, n_coeffs, skip_block, p->zbin, p->round, + p->quant, p->quant_shift, qcoeff_ptr, dqcoeff_ptr, + pd->dequant, eob_ptr, sc->scan, sc->iscan +#if CONFIG_AOM_QM + , + qm_ptr, iqm_ptr +#endif + ); + break; +#endif // CONFIG_TX64X64 + default: assert(0); } } @@ -421,23 +552,38 @@ void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, // obsolete skip_block const int skip_block = 0; (void)sc; - if (qparam->log_scale == 0) { - aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, - p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], - eob_ptr + + switch (qparam->log_scale) { + case 0: + aom_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round, + p->quant_fp[0], qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], + eob_ptr #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); - } else { - aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0], - qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr + ); + break; + case 1: + aom_quantize_dc_32x32(coeff_ptr, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr #if CONFIG_AOM_QM - , - qm_ptr, iqm_ptr + , + qm_ptr, iqm_ptr #endif - ); + ); + break; +#if CONFIG_TX64X64 + aom_quantize_dc_64x64(coeff_ptr, skip_block, p->round, p->quant_fp[0], + qcoeff_ptr, dqcoeff_ptr, pd->dequant[0], eob_ptr +#if CONFIG_AOM_QM + , + qm_ptr, iqm_ptr +#endif + ); + case 2: break; +#endif // CONFIG_TX64X64 + default: assert(0); } } @@ -574,28 +720,28 @@ static INLINE int highbd_quantize_coeff_fp_nuq( static INLINE int highbd_quantize_coeff_bigtx_fp_nuq( const tran_low_t coeffv, const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby32) { + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, int logsizeby16) { const int coeff = coeffv; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int i, q; int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); for (i = 0; i < NUQ_KNOTS; i++) { - if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], logsizeby16)) { q = i; break; } } if (i == NUQ_KNOTS) { q = NUQ_KNOTS + - (int)(((tmp - ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], - 1 + logsizeby32)) * + (int)(((tmp - + ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], logsizeby16)) * quant) >> - (15 - logsizeby32)); + (16 - logsizeby16)); } if (q) { *dqcoeff_ptr = ROUND_POWER_OF_TWO( - av1_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32); + av1_dequant_abscoeff_nuq(q, dequant, dequant_val), logsizeby16); *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; } else { @@ -609,26 +755,26 @@ static INLINE int highbd_quantize_coeff_bigtx_nuq( const tran_low_t coeffv, const int16_t quant, const int16_t quant_shift, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, int logsizeby32) { + tran_low_t *dqcoeff_ptr, int logsizeby16) { const int coeff = coeffv; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int i, q; int64_t tmp = clamp(abs_coeff, INT32_MIN, INT32_MAX); for (i = 0; i < NUQ_KNOTS; i++) { - if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], 1 + logsizeby32)) { + if (tmp < ROUND_POWER_OF_TWO(cuml_bins_ptr[i], logsizeby16)) { q = i; break; } } if (i == NUQ_KNOTS) { - tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], 1 + logsizeby32); + tmp -= ROUND_POWER_OF_TWO(cuml_bins_ptr[NUQ_KNOTS - 1], logsizeby16); q = NUQ_KNOTS + (int)(((((tmp * quant) >> 16) + tmp) * quant_shift) >> - (15 - logsizeby32)); + (16 - logsizeby16)); } if (q) { *dqcoeff_ptr = ROUND_POWER_OF_TWO( - av1_dequant_abscoeff_nuq(q, dequant, dequant_val), 1 + logsizeby32); + av1_dequant_abscoeff_nuq(q, dequant, dequant_val), logsizeby16); *qcoeff_ptr = (q ^ coeff_sign) - coeff_sign; *dqcoeff_ptr = *qcoeff_ptr < 0 ? -*dqcoeff_ptr : *dqcoeff_ptr; } else { @@ -723,7 +869,8 @@ void highbd_quantize_32x32_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, if (highbd_quantize_coeff_bigtx_nuq( coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], - dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], 0)) + dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], + get_tx_scale(TX_32X32))) eob = i; } } @@ -749,13 +896,68 @@ void highbd_quantize_32x32_fp_nuq_c(const tran_low_t *coeff_ptr, if (highbd_quantize_coeff_bigtx_fp_nuq( coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc], - &dqcoeff_ptr[rc], 0)) + &dqcoeff_ptr[rc], get_tx_scale(TX_32X32))) eob = i; } } *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void highbd_quantize_64x64_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const int16_t *scan, const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_bigtx_nuq( + coeff_ptr[rc], quant_ptr[rc != 0], quant_shift_ptr[rc != 0], + dequant_ptr[rc != 0], cuml_bins_ptr[band[i]], + dequant_val[band[i]], &qcoeff_ptr[rc], &dqcoeff_ptr[rc], + get_tx_scale(TX_64X64))) + eob = i; + } + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_64x64_fp_nuq_c(const tran_low_t *coeff_ptr, + intptr_t n_coeffs, int skip_block, + const int16_t *quant_ptr, + const int16_t *dequant_ptr, + const cuml_bins_type_nuq *cuml_bins_ptr, + const dequant_val_type_nuq *dequant_val, + tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, + const int16_t *scan, const uint8_t *band) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + int i; + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + if (highbd_quantize_coeff_bigtx_fp_nuq( + coeff_ptr[rc], quant_ptr[rc != 0], dequant_ptr[rc != 0], + cuml_bins_ptr[band[i]], dequant_val[band[i]], &qcoeff_ptr[rc], + &dqcoeff_ptr[rc], get_tx_scale(TX_64X64))) + eob = i; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + void highbd_quantize_fp_nuq_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *quant_ptr, const int16_t *dequant_ptr, @@ -793,7 +995,8 @@ void highbd_quantize_dc_32x32_nuq( const int rc = 0; if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift, dequant, cuml_bins_ptr, dequant_val, - qcoeff_ptr, dqcoeff_ptr, 0)) + qcoeff_ptr, dqcoeff_ptr, + get_tx_scale(TX_32X32))) eob = 0; } *eob_ptr = eob + 1; @@ -811,11 +1014,52 @@ void highbd_quantize_dc_32x32_fp_nuq( const int rc = 0; if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant, cuml_bins_ptr, dequant_val, - qcoeff_ptr, dqcoeff_ptr, 0)) + qcoeff_ptr, dqcoeff_ptr, + get_tx_scale(TX_32X32))) eob = 0; } *eob_ptr = eob + 1; } + +#if CONFIG_TX64X64 +void highbd_quantize_dc_64x64_nuq( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t quant, const int16_t quant_shift, const int16_t dequant, + const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_bigtx_nuq(coeff_ptr[rc], quant, quant_shift, + dequant, cuml_bins_ptr, dequant_val, + qcoeff_ptr, dqcoeff_ptr, + get_tx_scale(TX_64X64))) + eob = 0; + } + *eob_ptr = eob + 1; +} + +void highbd_quantize_dc_64x64_fp_nuq( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { + int eob = -1; + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + if (!skip_block) { + const int rc = 0; + if (highbd_quantize_coeff_bigtx_fp_nuq(coeff_ptr[rc], quant, dequant, + cuml_bins_ptr, dequant_val, + qcoeff_ptr, dqcoeff_ptr, + get_tx_scale(TX_64X64))) + eob = 0; + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 #endif // CONFIG_NEW_QUANT #endif // CONFIG_AOM_HIGHBITDEPTH @@ -999,6 +1243,154 @@ void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, *eob_ptr = eob + 1; } +#if CONFIG_TX64X64 +void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan, const int16_t *iscan +#if CONFIG_AOM_QM + , + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr +#endif + ) { + int i, eob = -1; + (void)zbin_ptr; + (void)quant_shift_ptr; + (void)iscan; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + for (i = 0; i < n_coeffs; i++) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; + int64_t tmp = 0; +#endif + const int coeff_sign = (coeff >> 31); + int tmp32 = 0; + int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + +#if CONFIG_AOM_QM + if (abs_coeff * wt >= (dequant_ptr[rc != 0] << (AOM_QM_BITS - 3))) { +#else + if (abs_coeff >= (dequant_ptr[rc != 0] >> 3)) { +#endif + abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); + abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); +#if CONFIG_AOM_QM + tmp = abs_coeff * wt; + tmp32 = (int)(tmp * quant_ptr[rc != 0]) >> (AOM_QM_BITS + 14); + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / 4; +#else + tmp32 = (abs_coeff * quant_ptr[rc != 0]) >> 15; + qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant_ptr[rc != 0]) / 4; +#endif + } + + if (tmp32) eob = i; + } + } + *eob_ptr = eob + 1; +} +#endif // CONFIG_TX64X64 + +void av1_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t *zbin_ptr, + const int16_t *round_ptr, const int16_t *quant_ptr, + const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + uint16_t *eob_ptr, const int16_t *scan, + const int16_t *iscan, +#if CONFIG_AOM_QM + const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, +#endif + int log_scale) { + int i, non_zero_count = (int)n_coeffs, eob = -1; + int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; + int round[2] = { round_ptr[0], round_ptr[1] }; + int nzbins[2]; + int scale = 1; + int shift = 16; + (void)iscan; + + if (log_scale > 0) { + zbins[0] = ROUND_POWER_OF_TWO(zbin_ptr[0], log_scale); + zbins[1] = ROUND_POWER_OF_TWO(zbin_ptr[1], log_scale); + round[0] = ROUND_POWER_OF_TWO(round_ptr[0], log_scale); + round[1] = ROUND_POWER_OF_TWO(round_ptr[1], log_scale); + scale = 1 << log_scale; + shift = 16 - log_scale; + } + + nzbins[0] = zbins[0] * -1; + nzbins[1] = zbins[1] * -1; + + memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); + memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); + + if (!skip_block) { + // Pre-scan pass + for (i = (int)n_coeffs - 1; i >= 0; i--) { + const int rc = scan[i]; + const int coeff = coeff_ptr[rc]; + if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) + non_zero_count--; + else + break; + } + + // Quantization pass: All coefficients with index >= zero_flag are + // skippable. Note: zero_flag can be zero. + for (i = 0; i < non_zero_count; i++) { + const int rc = scan[i]; +#if CONFIG_AOM_QM + const qm_val_t wt = qm_ptr[rc]; + const qm_val_t iwt = iqm_ptr[rc]; + const int dequant = + (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> + AOM_QM_BITS; +#endif + const int coeff = coeff_ptr[rc]; + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; +#if CONFIG_AOM_QM + if (abs_coeff * wt >= (zbins[rc != 0] << AOM_QM_BITS)) { +#else + + if (abs_coeff >= zbins[rc != 0]) { +#endif + const int64_t tmp1 = abs_coeff + round[rc != 0]; + const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; +#if CONFIG_AOM_QM + const uint32_t abs_qcoeff = (uint32_t)( + (tmp2 * wt * quant_shift_ptr[rc != 0]) >> (AOM_QM_BITS + shift)); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = (qcoeff_ptr[rc] * dequant) / scale; +#else + const uint32_t abs_qcoeff = + (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> shift); + qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); + dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / scale; +#endif // CONFIG_AOM_QM + if (abs_qcoeff) eob = i; + } + } + } + *eob_ptr = eob + 1; +} + #if CONFIG_AOM_HIGHBITDEPTH void av1_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, diff --git a/av1/encoder/quantize.h b/av1/encoder/quantize.h index f5f045eb6..b13af5a73 100644 --- a/av1/encoder/quantize.h +++ b/av1/encoder/quantize.h @@ -127,6 +127,15 @@ void quantize_dc_32x32_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); +#if CONFIG_TX64X64 +void quantize_dc_64x64_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t quant, + const int16_t quant_shift, const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 void quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, @@ -139,6 +148,15 @@ void quantize_dc_32x32_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); +#if CONFIG_TX64X64 +void quantize_dc_64x64_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, + int skip_block, const int16_t quant, + const int16_t dequant, + const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 #endif // CONFIG_NEW_QUANT #if CONFIG_AOM_HIGHBITDEPTH @@ -197,6 +215,13 @@ void highbd_quantize_dc_32x32_nuq( const int16_t quant, const int16_t quant_shift, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); +#if CONFIG_TX64X64 +void highbd_quantize_dc_64x64_nuq( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t quant, const int16_t quant_shift, const int16_t dequant, + const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 void highbd_quantize_dc_fp_nuq(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t quant, const int16_t dequant, @@ -209,7 +234,13 @@ void highbd_quantize_dc_32x32_fp_nuq( const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); - +#if CONFIG_TX64X64 +void highbd_quantize_dc_64x64_fp_nuq( + const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, + const int16_t quant, const int16_t dequant, const tran_low_t *cuml_bins_ptr, + const tran_low_t *dequant_val, tran_low_t *qcoeff_ptr, + tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); +#endif // CONFIG_TX64X64 #endif // CONFIG_NEW_QUANT #endif // CONFIG_AOM_HIGHBITDEPTH diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index b354c7d0e..2b8547292 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -1022,8 +1022,7 @@ static void dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block, // not involve an inverse transform, but it is less accurate. const int buffer_length = tx_size_2d[tx_size]; int64_t this_sse; - int tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); - int shift = (MAX_TX_SCALE - get_tx_scale(xd, tx_type, tx_size)) * 2; + int shift = (MAX_TX_SCALE - get_tx_scale(tx_size)) * 2; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); #if CONFIG_PVQ @@ -7854,8 +7853,8 @@ static int64_t handle_inter_mode( #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION continue; #else - restore_dst_buf(xd, orig_dst, orig_dst_stride); - return INT64_MAX; + restore_dst_buf(xd, orig_dst, orig_dst_stride); + return INT64_MAX; #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION } /* clang-format on */