Refactor trellis optimization process
This commit refactors the trellis coefficient optimization process. It saves multiplications used to generate the final dequantized coefficients. It also removes two memset operations on quantized and dequantized coefficient sets. The trellis coefficient optimization is on average running over 10% faster. Change-Id: If3aa26d2a706c3012bf2b7ac059bf1825250e81f
This commit is contained in:
Родитель
1faf288798
Коммит
e9c44a76a2
|
@ -58,6 +58,7 @@ typedef struct vp10_token_state {
|
|||
int next;
|
||||
int16_t token;
|
||||
tran_low_t qc;
|
||||
tran_low_t dqc;
|
||||
} vp10_token_state;
|
||||
|
||||
// These numbers are empirically obtained.
|
||||
|
@ -99,17 +100,18 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||
const int eob = p->eobs[block];
|
||||
const PLANE_TYPE type = pd->plane_type;
|
||||
const int default_eob = 16 << (tx_size << 1);
|
||||
const int16_t *dequant_ptr = pd->dequant;
|
||||
const uint8_t *const band_translate = get_band_translate(tx_size);
|
||||
const int16_t* const dequant_ptr = pd->dequant;
|
||||
const uint8_t* const band_translate = get_band_translate(tx_size);
|
||||
TX_TYPE tx_type = get_tx_type(type, xd, block, tx_size);
|
||||
const scan_order *const so =
|
||||
const scan_order* const so =
|
||||
get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi));
|
||||
const int16_t *const scan = so->scan;
|
||||
const int16_t *const nb = so->neighbors;
|
||||
const int16_t* const scan = so->scan;
|
||||
const int16_t* const nb = so->neighbors;
|
||||
#if CONFIG_NEW_QUANT
|
||||
const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq;
|
||||
#endif // CONFIG_NEW_QUANT
|
||||
int shift = get_tx_scale(xd, tx_type, tx_size);
|
||||
const int shift = get_tx_scale(xd, tx_type, tx_size);
|
||||
const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift };
|
||||
int next = eob, sz = 0;
|
||||
const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][type]) >> 1;
|
||||
const int64_t rddiv = mb->rddiv;
|
||||
|
@ -179,6 +181,7 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||
tokens[i][0].next = next;
|
||||
tokens[i][0].token = t0;
|
||||
tokens[i][0].qc = x;
|
||||
tokens[i][0].dqc = dqcoeff[rc];
|
||||
best_index[i][0] = best;
|
||||
|
||||
/* Evaluate the second possibility for this state. */
|
||||
|
@ -271,6 +274,24 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||
tokens[i][1].next = next;
|
||||
tokens[i][1].token = best ? t1 : t0;
|
||||
tokens[i][1].qc = x;
|
||||
|
||||
if (x) {
|
||||
tran_low_t offset = dq_step[rc != 0];
|
||||
// The 32x32 transform coefficient uses half quantization step size.
|
||||
// Account for the rounding difference in the dequantized coefficeint
|
||||
// value when the quantization index is dropped from an even number
|
||||
// to an odd number.
|
||||
if (shift & x)
|
||||
offset += (dequant_ptr[rc != 0] & 0x01);
|
||||
|
||||
if (sz == 0)
|
||||
tokens[i][1].dqc = dqcoeff[rc] - offset;
|
||||
else
|
||||
tokens[i][1].dqc = dqcoeff[rc] + offset;
|
||||
} else {
|
||||
tokens[i][1].dqc = 0;
|
||||
}
|
||||
|
||||
best_index[i][1] = best;
|
||||
/* Finally, make this the new head of the trellis. */
|
||||
next = i;
|
||||
|
@ -309,24 +330,22 @@ int vp10_optimize_b(MACROBLOCK *mb, int plane, int block,
|
|||
rate1 += mb->token_costs[tx_size][type][ref][band][0][ctx][t1];
|
||||
UPDATE_RD_COST();
|
||||
best = rd_cost1 < rd_cost0;
|
||||
|
||||
final_eob = -1;
|
||||
memset(qcoeff, 0, sizeof(*qcoeff) * (16 << (tx_size * 2)));
|
||||
memset(dqcoeff, 0, sizeof(*dqcoeff) * (16 << (tx_size * 2)));
|
||||
|
||||
for (i = next; i < eob; i = next) {
|
||||
const int x = tokens[i][best].qc;
|
||||
const int rc = scan[i];
|
||||
if (x) {
|
||||
final_eob = i;
|
||||
}
|
||||
|
||||
if (x) final_eob = i;
|
||||
qcoeff[rc] = x;
|
||||
dqcoeff[rc] = tokens[i][best].dqc;
|
||||
|
||||
#if CONFIG_NEW_QUANT
|
||||
dqcoeff[rc] = dequant_abscoeff_nuq(abs(x), dequant_ptr[rc != 0],
|
||||
dequant_val[band_translate[i]]);
|
||||
if (shift) dqcoeff[rc] = ROUND_POWER_OF_TWO(dqcoeff[rc], shift);
|
||||
if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
|
||||
#else
|
||||
dqcoeff[rc] = (abs(x * dequant_ptr[rc != 0]) >> shift);
|
||||
if (x < 0) dqcoeff[rc] = -dqcoeff[rc];
|
||||
#endif // CONFIG_NEW_QUANT
|
||||
|
||||
next = tokens[i][best].next;
|
||||
|
|
Загрузка…
Ссылка в новой задаче