Inline vp9_quantize() in xform_quant().
Cycle times: 4x4: 151 to 131 cycles (15% faster) 8x8: 334 to 306 cycles (9% faster) 16x16: 1401 to 1368 cycles (2.5% faster) 32x32: 7403 to 7367 cycles (0.5% faster) Total encode time of first 50 frames of bus @ 1500kbps (speed 0) goes from 1min39.2 to 1min38.6, i.e. a 0.67% overall speedup. Change-Id: I799a49460e5e3fcab01725564dd49c629bfe935f
This commit is contained in:
Родитель
7e684e2009
Коммит
1ff94fea56
|
@ -432,48 +432,86 @@ void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
|
|||
struct encode_b_args* const args = arg;
|
||||
MACROBLOCK* const x = args->x;
|
||||
MACROBLOCKD* const xd = &x->e_mbd;
|
||||
const int bw = plane_block_width(bsize, &xd->plane[plane]);
|
||||
const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane,
|
||||
block, ss_txfrm_size);
|
||||
int16_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block, 16);
|
||||
int16_t *const src_diff = raster_block_offset_int16(xd, bsize, plane,
|
||||
raster_block,
|
||||
x->plane[plane].src_diff);
|
||||
TX_TYPE tx_type = DCT_DCT;
|
||||
struct macroblock_plane *const p = &x->plane[plane];
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
int16_t *coeff = BLOCK_OFFSET(p->coeff, block, 16);
|
||||
int16_t *qcoeff = BLOCK_OFFSET(pd->qcoeff, block, 16);
|
||||
int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block, 16);
|
||||
const TX_SIZE tx_size = (TX_SIZE)(ss_txfrm_size / 2);
|
||||
TX_TYPE tx_type;
|
||||
const int16_t *scan, *iscan;
|
||||
uint16_t *eob = &pd->eobs[block];
|
||||
const int bwl = b_width_log2(bsize) - pd->subsampling_x, bw = 1 << bwl;
|
||||
const int twl = bwl - tx_size, twmask = (1 << twl) - 1;
|
||||
int xoff, yoff;
|
||||
int16_t *src_diff;
|
||||
|
||||
switch (ss_txfrm_size / 2) {
|
||||
switch (tx_size) {
|
||||
case TX_32X32:
|
||||
scan = vp9_default_scan_32x32;
|
||||
iscan = vp9_default_iscan_32x32;
|
||||
block >>= 6;
|
||||
xoff = 32 * (block & twmask);
|
||||
yoff = 32 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (x->rd_search)
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 2);
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
|
||||
else
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 2);
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
|
||||
vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
break;
|
||||
case TX_16X16:
|
||||
tx_type = plane == 0 ? get_tx_type_16x16(xd) : DCT_DCT;
|
||||
scan = get_scan_16x16(tx_type);
|
||||
iscan = get_iscan_16x16(tx_type);
|
||||
block >>= 4;
|
||||
xoff = 16 * (block & twmask);
|
||||
yoff = 16 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_short_fht16x16(src_diff, coeff, bw, tx_type);
|
||||
vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);
|
||||
else
|
||||
x->fwd_txm16x16(src_diff, coeff, bw * 2);
|
||||
x->fwd_txm16x16(src_diff, coeff, bw * 8);
|
||||
vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
break;
|
||||
case TX_8X8:
|
||||
tx_type = plane == 0 ? get_tx_type_8x8(xd) : DCT_DCT;
|
||||
scan = get_scan_8x8(tx_type);
|
||||
iscan = get_iscan_8x8(tx_type);
|
||||
block >>= 2;
|
||||
xoff = 8 * (block & twmask);
|
||||
yoff = 8 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_short_fht8x8(src_diff, coeff, bw, tx_type);
|
||||
vp9_short_fht8x8(src_diff, coeff, bw * 4, tx_type);
|
||||
else
|
||||
x->fwd_txm8x8(src_diff, coeff, bw * 2);
|
||||
x->fwd_txm8x8(src_diff, coeff, bw * 8);
|
||||
vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
break;
|
||||
case TX_4X4:
|
||||
tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT;
|
||||
tx_type = plane == 0 ? get_tx_type_4x4(xd, block) : DCT_DCT;
|
||||
scan = get_scan_4x4(tx_type);
|
||||
iscan = get_iscan_4x4(tx_type);
|
||||
xoff = 4 * (block & twmask);
|
||||
yoff = 4 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (tx_type != DCT_DCT)
|
||||
vp9_short_fht4x4(src_diff, coeff, bw, tx_type);
|
||||
vp9_short_fht4x4(src_diff, coeff, bw * 4, tx_type);
|
||||
else
|
||||
x->fwd_txm4x4(src_diff, coeff, bw * 2);
|
||||
x->fwd_txm4x4(src_diff, coeff, bw * 8);
|
||||
vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
|
||||
p->quant, p->quant_shift, qcoeff, dqcoeff,
|
||||
pd->dequant, p->zbin_extra, eob, scan, iscan);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type);
|
||||
}
|
||||
|
||||
static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize,
|
||||
|
|
|
@ -152,63 +152,6 @@ void vp9_quantize_b_32x32_c(int16_t *coeff_ptr, intptr_t n_coeffs,
|
|||
*eob_ptr = eob + 1;
|
||||
}
|
||||
|
||||
void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coeffs,
|
||||
TX_TYPE tx_type) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
const int16_t *scan, *iscan;
|
||||
|
||||
// These contexts may be available in the caller
|
||||
switch (n_coeffs) {
|
||||
case 4 * 4:
|
||||
scan = get_scan_4x4(tx_type);
|
||||
iscan = get_iscan_4x4(tx_type);
|
||||
break;
|
||||
case 8 * 8:
|
||||
scan = get_scan_8x8(tx_type);
|
||||
iscan = get_iscan_8x8(tx_type);
|
||||
break;
|
||||
case 16 * 16:
|
||||
scan = get_scan_16x16(tx_type);
|
||||
iscan = get_iscan_16x16(tx_type);
|
||||
break;
|
||||
default:
|
||||
scan = vp9_default_scan_32x32;
|
||||
iscan = vp9_default_iscan_32x32;
|
||||
break;
|
||||
}
|
||||
|
||||
// Call different quantization for different transform size.
|
||||
if (n_coeffs >= 1024) {
|
||||
// Save index of picked coefficient in pre-scan pass.
|
||||
vp9_quantize_b_32x32(BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
|
||||
n_coeffs, mb->skip_block,
|
||||
mb->plane[plane].zbin,
|
||||
mb->plane[plane].round,
|
||||
mb->plane[plane].quant,
|
||||
mb->plane[plane].quant_shift,
|
||||
BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16),
|
||||
BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
|
||||
xd->plane[plane].dequant,
|
||||
mb->plane[plane].zbin_extra,
|
||||
&xd->plane[plane].eobs[block],
|
||||
scan, iscan);
|
||||
}
|
||||
else {
|
||||
vp9_quantize_b(BLOCK_OFFSET(mb->plane[plane].coeff, block, 16),
|
||||
n_coeffs, mb->skip_block,
|
||||
mb->plane[plane].zbin,
|
||||
mb->plane[plane].round,
|
||||
mb->plane[plane].quant,
|
||||
mb->plane[plane].quant_shift,
|
||||
BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16),
|
||||
BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16),
|
||||
xd->plane[plane].dequant,
|
||||
mb->plane[plane].zbin_extra,
|
||||
&xd->plane[plane].eobs[block],
|
||||
scan, iscan);
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
int y_blocks) {
|
||||
MACROBLOCKD *const xd = &mb->e_mbd;
|
||||
|
|
|
@ -22,9 +22,6 @@
|
|||
#define prototype_quantize_mb(sym) \
|
||||
void (sym)(MACROBLOCK *x)
|
||||
|
||||
void vp9_quantize(MACROBLOCK *mb, int plane, int block, int n_coefs,
|
||||
TX_TYPE tx_type);
|
||||
|
||||
void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *mb, int b_idx1, int b_idx2,
|
||||
int y_blocks);
|
||||
void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type,
|
||||
|
|
Загрузка…
Ссылка в новой задаче