diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index bd9dc559e..7c5f77e33 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -58,6 +58,7 @@ typedef struct vp10_token_state { int next; int16_t token; tran_low_t qc; + tran_low_t dqc; } vp10_token_state; // These numbers are empirically obtained. @@ -99,17 +100,18 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, const int eob = p->eobs[block]; const PLANE_TYPE type = pd->plane_type; const int default_eob = 16 << (tx_size << 1); - const int16_t *dequant_ptr = pd->dequant; - const uint8_t *const band_translate = get_band_translate(tx_size); + const int16_t* const dequant_ptr = pd->dequant; + const uint8_t* const band_translate = get_band_translate(tx_size); TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size); - const scan_order *const so = + const scan_order* const so = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); - const int16_t *const scan = so->scan; - const int16_t *const nb = so->neighbors; + const int16_t* const scan = so->scan; + const int16_t* const nb = so->neighbors; #if CONFIG_NEW_QUANT const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq; #endif // CONFIG_NEW_QUANT - int shift = get_tx_scale(xd, tx_type, tx_size); + const int shift = get_tx_scale(xd, tx_type, tx_size); + const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift }; int next = eob, sz = 0; const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1; const int64_t rddiv = mb->rddiv; @@ -179,6 +181,7 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, tokens[i][0].next = next; tokens[i][0].token = t0; tokens[i][0].qc = x; + tokens[i][0].dqc = dqcoeff[rc]; best_index[i][0] = best; /* Evaluate the second possibility for this state. */ @@ -271,6 +274,24 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, tokens[i][1].next = next; tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; + + if (x) { + tran_low_t offset = dq_step[rc != 0]; + // The 32x32 transform coefficient uses half quantization step size. + // Account for the rounding difference in the dequantized coefficeint + // value when the quantization index is dropped from an even number + // to an odd number. + if (shift & x) + offset += (dequant_ptr[rc != 0] & 0x01); + + if (sz == 0) + tokens[i][1].dqc = dqcoeff[rc] - offset; + else + tokens[i][1].dqc = dqcoeff[rc] + offset; + } else { + tokens[i][1].dqc = 0; + } + best_index[i][1] = best; /* Finally, make this the new head of the trellis. */ next = i; @@ -309,24 +330,22 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block, rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1]; UPDATE_RD_COST(); best = rd_cost1 < rd_cost0; + final_eob = -1; - memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2))); - memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2))); + for (i = next; i < eob; i = next) { const int x = tokens[i][best].qc; const int rc = scan[i]; - if (x) { - final_eob = i; - } + + if (x) final_eob = i; qcoeff[rc] = x; + dqcoeff[rc] = tokens[i][best].dqc; + #if CONFIG_NEW_QUANT dqcoeff[rc] = dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0], dequant_val[band_translate[i]]); if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift); if (x < 0) dqcoeff[rc] = -dqcoeff[rc]; -#else - dqcoeff[rc] = (abs(x * dequant_ptr[rc != 0]) >> shift); - if (x < 0) dqcoeff[rc] = -dqcoeff[rc]; #endif // CONFIG_NEW_QUANT next = tokens[i][best].next;