diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc index 862edbe80..c30b82763 100644 --- a/test/vp9_quantize_test.cc +++ b/test/vp9_quantize_test.cc @@ -34,7 +34,7 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count, const int16_t *round, const int16_t *quant, const int16_t *quant_shift, tran_low_t *qcoeff, tran_low_t *dqcoeff, - const int16_t *dequant, + const int16_t *dequant, int zbin_oq_value, uint16_t *eob, const int16_t *scan, const int16_t *iscan); typedef std::tr1::tuple @@ -80,6 +80,7 @@ class VP9Quantize32Test : public ::testing::TestWithParam { TEST_P(VP9QuantizeTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); + int zbin_oq_value = 0; DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256); DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2); DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2); @@ -115,12 +116,13 @@ TEST_P(VP9QuantizeTest, OperationCheck) { } ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, ref_qcoeff_ptr, - ref_dqcoeff_ptr, dequant_ptr, + ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value, ref_eob_ptr, scan_order->scan, scan_order->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, - dqcoeff_ptr, dequant_ptr, eob_ptr, + dqcoeff_ptr, dequant_ptr, + zbin_oq_value, eob_ptr, scan_order->scan, scan_order->iscan)); for (int j = 0; j < sz; ++j) { err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) | @@ -139,6 +141,7 @@ TEST_P(VP9QuantizeTest, OperationCheck) { TEST_P(VP9Quantize32Test, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); + int zbin_oq_value = 0; DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024); DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2); DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2); @@ -174,12 +177,13 @@ TEST_P(VP9Quantize32Test, OperationCheck) { } ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, ref_qcoeff_ptr, - ref_dqcoeff_ptr, dequant_ptr, + ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value, ref_eob_ptr, scan_order->scan, scan_order->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, - dqcoeff_ptr, dequant_ptr, eob_ptr, + dqcoeff_ptr, dequant_ptr, + zbin_oq_value, eob_ptr, scan_order->scan, scan_order->iscan)); for (int j = 0; j < sz; ++j) { err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) | @@ -198,6 +202,7 @@ TEST_P(VP9Quantize32Test, OperationCheck) { TEST_P(VP9QuantizeTest, EOBCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); + int zbin_oq_value = 0; DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256); DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2); DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2); @@ -237,12 +242,13 @@ TEST_P(VP9QuantizeTest, EOBCheck) { ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, ref_qcoeff_ptr, - ref_dqcoeff_ptr, dequant_ptr, + ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value, ref_eob_ptr, scan_order->scan, scan_order->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, - dqcoeff_ptr, dequant_ptr, eob_ptr, + dqcoeff_ptr, dequant_ptr, + zbin_oq_value, eob_ptr, scan_order->scan, scan_order->iscan)); for (int j = 0; j < sz; ++j) { @@ -262,6 +268,7 @@ TEST_P(VP9QuantizeTest, EOBCheck) { TEST_P(VP9Quantize32Test, EOBCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); + int zbin_oq_value = 0; DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024); DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2); DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2); @@ -301,12 +308,13 @@ TEST_P(VP9Quantize32Test, EOBCheck) { ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, ref_qcoeff_ptr, - ref_dqcoeff_ptr, dequant_ptr, + ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value, ref_eob_ptr, scan_order->scan, scan_order->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, qcoeff_ptr, - dqcoeff_ptr, dequant_ptr, eob_ptr, + dqcoeff_ptr, dequant_ptr, + zbin_oq_value, eob_ptr, scan_order->scan, scan_order->iscan)); for (int j = 0; j < sz; ++j) { diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index d2ab875e9..df3db505f 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1125,37 +1125,37 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_block_error/; - add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp/; - add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_32x32/; - add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b/; - add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_32x32/; - add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_fdct8x8_quant/; } else { add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz"; specialize qw/vp9_block_error avx2/, "$sse2_x86inc"; - add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_fp_32x32/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b sse2/, "$ssse3_x86_64"; - add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_quantize_b_32x32/, "$ssse3_x86_64"; - add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_fdct8x8_quant sse2 ssse3/; } @@ -1850,16 +1850,16 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vp9_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride, int bd"; specialize qw/vp9_highbd_subtract_block/; - add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp/; - add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_fp_32x32/; - add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_b sse2/; - add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vp9_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/vp9_highbd_quantize_b_32x32 sse2/; # diff --git a/vp9/encoder/arm/neon/vp9_quantize_neon.c b/vp9/encoder/arm/neon/vp9_quantize_neon.c index 9cf1e5e2c..8c13d0da6 100644 --- a/vp9/encoder/arm/neon/vp9_quantize_neon.c +++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c @@ -26,12 +26,13 @@ void vp9_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)scan; if (!skip_block) { diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 68174a6cc..2ffc7ea67 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -40,6 +40,8 @@ struct macroblock_plane { int16_t *round; int64_t quant_thred[2]; + // Zbin Over Quant value + int16_t zbin_extra; }; /* The [2] dimension is for whether we skip the EOB node (i.e. if previous diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 506f6de84..020a95196 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -339,7 +339,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int eob = -1; @@ -416,6 +416,7 @@ void vp9_fdct8x8_quant_c(const int16_t *input, int stride, // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 4c948237d..535cc30c7 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3893,6 +3893,8 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); + vp9_update_zbin_extra(x); + if (!is_inter_block(mbmi)) { int plane; mbmi->skip = 1; diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 0b48bdee9..9b2165be6 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -391,28 +391,28 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, pd->dequant, - eob, scan_order->scan, + p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_highbd_fdct16x16(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_highbd_fdct8x8(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; default: @@ -427,28 +427,28 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_fdct16x16(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 256, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_fdct8x8_quant(src_diff, diff_stride, coeff, 64, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->zbin, p->round_fp, p->quant_fp, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; default: @@ -561,28 +561,28 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, - dqcoeff, pd->dequant, eob, + dqcoeff, pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_highbd_fdct16x16(src_diff, coeff, diff_stride); vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_highbd_fdct8x8(src_diff, coeff, diff_stride); vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; default: @@ -597,28 +597,28 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vp9_fdct16x16(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vp9_fdct8x8(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; case TX_4X4: x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); break; default: @@ -849,7 +849,8 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, - qcoeff, dqcoeff, pd->dequant, eob, + qcoeff, dqcoeff, pd->dequant, + p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -870,7 +871,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -892,7 +893,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) { @@ -918,7 +919,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } @@ -957,7 +958,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -977,7 +978,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -997,7 +998,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } if (!x->skip_encode && *eob) @@ -1021,7 +1022,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, x->fwd_txm4x4(src_diff, coeff, diff_stride); vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, - pd->dequant, eob, scan_order->scan, + pd->dequant, p->zbin_extra, eob, scan_order->scan, scan_order->iscan); } diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 389dc87e0..63242a922 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -122,13 +122,14 @@ void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; // TODO(jingning) Decide the need of these arguments after the // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -167,6 +168,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -176,6 +178,7 @@ void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, // quantization process is completed. (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); @@ -214,11 +217,12 @@ void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -257,11 +261,12 @@ void vp9_highbd_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -297,11 +302,13 @@ void vp9_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]}; - const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; + const int zbins[2] = { zbin_ptr[0] + zbin_oq_value, + zbin_ptr[1] + zbin_oq_value }; + const int nzbins[2] = { zbins[0] * -1, + zbins[1] * -1 }; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -348,12 +355,14 @@ void vp9_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, + const int16_t *dequant_ptr, int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; - const int zbins[2] = {zbin_ptr[0], zbin_ptr[1]}; - const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; + const int zbins[2] = { zbin_ptr[0] + zbin_oq_value, + zbin_ptr[1] + zbin_oq_value }; + const int nzbins[2] = { zbins[0] * -1, + zbins[1] * -1 }; (void)iscan; vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); @@ -403,10 +412,10 @@ void vp9_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1), - ROUND_POWER_OF_TWO(zbin_ptr[1], 1)}; + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1), + ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) }; const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; int idx = 0; @@ -462,11 +471,11 @@ void vp9_highbd_quantize_b_32x32_c(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { - const int zbins[2] = {ROUND_POWER_OF_TWO(zbin_ptr[0], 1), - ROUND_POWER_OF_TWO(zbin_ptr[1], 1)}; - const int nzbins[2] = {zbins[0] * -1, zbins[1] * -1}; + const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1), + ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1) }; + const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; int idx = 0; int idx_arr[1024]; @@ -525,7 +534,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->zbin, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, &p->eobs[block], + pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan); return; } @@ -535,7 +544,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, p->zbin, p->round, p->quant, p->quant_shift, BLOCK_OFFSET(p->qcoeff, block), BLOCK_OFFSET(pd->dqcoeff, block), - pd->dequant, &p->eobs[block], scan, iscan); + pd->dequant, p->zbin_extra, &p->eobs[block], scan, iscan); } static void invert_quant(int16_t *quant, int16_t *shift, int d) { @@ -632,6 +641,8 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const int segment_id = xd->mi[0].src_mi->mbmi.segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); + // TODO(paulwilkins): 0 value for zbin for now pending follow on patch. + const int zbin = 0; int i; // Y @@ -641,10 +652,13 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; + x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7); xd->plane[0].dequant = cm->y_dequant[qindex]; - x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; - x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1]; + x->plane[0].quant_thred[0] = (x->plane[0].zbin[0] + x->plane[0].zbin_extra) * + (x->plane[0].zbin[0] + x->plane[0].zbin_extra); + x->plane[0].quant_thred[1] = (x->plane[0].zbin[1] + x->plane[0].zbin_extra) * + (x->plane[0].zbin[1] + x->plane[0].zbin_extra); // UV for (i = 1; i < 3; i++) { @@ -654,10 +668,15 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; + x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7); xd->plane[i].dequant = cm->uv_dequant[qindex]; - x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; - x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1]; + x->plane[i].quant_thred[0] = + (x->plane[i].zbin[0] + x->plane[i].zbin_extra) * + (x->plane[i].zbin[0] + x->plane[i].zbin_extra); + x->plane[i].quant_thred[1] = + (x->plane[i].zbin[1] + x->plane[i].zbin_extra) * + (x->plane[i].zbin[1] + x->plane[i].zbin_extra); } x->skip_block = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); @@ -669,6 +688,15 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { vp9_initialize_me_consts(cpi, x->q_index); } +void vp9_update_zbin_extra(MACROBLOCK *x) { + const int y_zbin_extra = 0; + const int uv_zbin_extra = 0; + + x->plane[0].zbin_extra = (int16_t)y_zbin_extra; + x->plane[1].zbin_extra = (int16_t)uv_zbin_extra; + x->plane[2].zbin_extra = (int16_t)uv_zbin_extra; +} + void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->td.mb); } diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index de2839f5b..9aeb5f05b 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -68,6 +68,8 @@ struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); +void vp9_update_zbin_extra(MACROBLOCK *x); + void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); diff --git a/vp9/encoder/x86/vp9_dct_sse2.c b/vp9/encoder/x86/vp9_dct_sse2.c index ae22a0b32..e671f3998 100644 --- a/vp9/encoder/x86/vp9_dct_sse2.c +++ b/vp9/encoder/x86/vp9_dct_sse2.c @@ -254,7 +254,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - uint16_t* eob_ptr, + int zbin_oq_value, uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -287,6 +287,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride, (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)coeff_ptr; // Pre-condition input (shift by two) diff --git a/vp9/encoder/x86/vp9_dct_ssse3.c b/vp9/encoder/x86/vp9_dct_ssse3.c index 5c0ad7892..237c5e278 100644 --- a/vp9/encoder/x86/vp9_dct_ssse3.c +++ b/vp9/encoder/x86/vp9_dct_ssse3.c @@ -23,7 +23,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - uint16_t* eob_ptr, + int zbin_oq_value, uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -57,6 +57,7 @@ void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; (void)coeff_ptr; // Pre-condition input (shift by two) diff --git a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c index 0bce9c321..55c6ed71f 100644 --- a/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c +++ b/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c @@ -24,6 +24,7 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -31,11 +32,11 @@ void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, __m128i zbins[2]; __m128i nzbins[2]; - zbins[0] = _mm_set_epi32((int)zbin_ptr[1], - (int)zbin_ptr[1], - (int)zbin_ptr[1], - (int)zbin_ptr[0]); - zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); + zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value), + (int)(zbin_ptr[1] + zbin_oq_value), + (int)(zbin_ptr[1] + zbin_oq_value), + (int)(zbin_ptr[0] + zbin_oq_value)); + zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value)); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); @@ -110,6 +111,7 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, + int zbin_oq_value, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { @@ -118,14 +120,14 @@ void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, int idx = 0; int idx_arr[1024]; int i, eob = -1; - const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); - const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); + const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); + const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); (void)scan; - zbins[0] = _mm_set_epi32(zbin1_tmp, - zbin1_tmp, - zbin1_tmp, - zbin0_tmp); - zbins[1] = _mm_set1_epi32(zbin1_tmp); + zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value), + (zbin1_tmp + zbin_oq_value), + (zbin1_tmp + zbin_oq_value), + (zbin0_tmp + zbin_oq_value)); + zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value)); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); diff --git a/vp9/encoder/x86/vp9_quantize_sse2.c b/vp9/encoder/x86/vp9_quantize_sse2.c index 679c66e30..e06eb2f15 100644 --- a/vp9/encoder/x86/vp9_quantize_sse2.c +++ b/vp9/encoder/x86/vp9_quantize_sse2.c @@ -18,7 +18,7 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - uint16_t* eob_ptr, + int zbin_oq_value, uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; @@ -39,10 +39,13 @@ void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, // Setup global values { + __m128i zbin_oq; __m128i pw_1; + zbin_oq = _mm_set1_epi16(zbin_oq_value); zbin = _mm_load_si128((const __m128i*)zbin_ptr); round = _mm_load_si128((const __m128i*)round_ptr); quant = _mm_load_si128((const __m128i*)quant_ptr); + zbin = _mm_add_epi16(zbin, zbin_oq); pw_1 = _mm_set1_epi16(1); zbin = _mm_sub_epi16(zbin, pw_1); dequant = _mm_load_si128((const __m128i*)dequant_ptr); @@ -226,13 +229,14 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, const int16_t* round_ptr, const int16_t* quant_ptr, const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, - uint16_t* eob_ptr, + int zbin_oq_value, uint16_t* eob_ptr, const int16_t* scan_ptr, const int16_t* iscan_ptr) { __m128i zero; (void)scan_ptr; (void)zbin_ptr; (void)quant_shift_ptr; + (void)zbin_oq_value; coeff_ptr += n_coeffs; iscan_ptr += n_coeffs;