Use low precision 32x32fdct for encodemb in speed1
The low precision 32x32 fdct has all the intermediate steps within 16-bit depth, hence allowing faster SSE2 implementation, at the expense of larger round-trip error. It was used in the rate-distortion optimization search loop only. Using the low precision version, in replace of the high precision one, affects the compression performance by about 0.7% (derf, stdhd) at speed 0. For speed 1, it makes derf set down by only 0.017%. Change-Id: I4e7d18fac5bea5317b91c8e7dabae143bc6b5c8b
This commit is contained in:
Родитель
78182538d6
Коммит
debb9c68c8
|
@ -144,7 +144,7 @@ struct macroblock {
|
|||
int optimize;
|
||||
|
||||
// indicate if it is in the rd search loop or encoding process
|
||||
int rd_search;
|
||||
int use_lp32x32fdct;
|
||||
int skip_encode;
|
||||
|
||||
// Used to store sub partition's choices.
|
||||
|
|
|
@ -565,7 +565,7 @@ static void pick_sb_modes(VP9_COMP *cpi, int mi_row, int mi_col,
|
|||
MACROBLOCK *const x = &cpi->mb;
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
|
||||
x->rd_search = 1;
|
||||
x->use_lp32x32fdct = 1;
|
||||
|
||||
if (bsize < BLOCK_8X8) {
|
||||
// When ab_index = 0 all sub-blocks are handled, so for ab_index != 0
|
||||
|
@ -2546,7 +2546,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
|||
const int mis = cm->mode_info_stride;
|
||||
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
|
||||
const int mi_height = num_8x8_blocks_high_lookup[bsize];
|
||||
x->rd_search = 0;
|
||||
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
|
||||
x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
|
||||
xd->q_index < QIDX_SKIP_THRESH);
|
||||
if (x->skip_encode)
|
||||
|
|
|
@ -475,7 +475,7 @@ void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize,
|
|||
xoff = 32 * (block & twmask);
|
||||
yoff = 32 * (block >> twl);
|
||||
src_diff = p->src_diff + 4 * bw * yoff + xoff;
|
||||
if (x->rd_search)
|
||||
if (x->use_lp32x32fdct)
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
|
||||
else
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
|
||||
|
@ -670,7 +670,7 @@ void encode_block_intra(int plane, int block, BLOCK_SIZE_TYPE bsize,
|
|||
dst, pd->dst.stride, dst, pd->dst.stride);
|
||||
vp9_subtract_block(32, 32, src_diff, bw * 4,
|
||||
src, p->src.stride, dst, pd->dst.stride);
|
||||
if (x->rd_search)
|
||||
if (x->use_lp32x32fdct)
|
||||
vp9_short_fdct32x32_rd(src_diff, coeff, bw * 8);
|
||||
else
|
||||
vp9_short_fdct32x32(src_diff, coeff, bw * 8);
|
||||
|
|
|
@ -723,6 +723,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
sf->adaptive_rd_thresh = 0;
|
||||
sf->use_lastframe_partitioning = 0;
|
||||
sf->tx_size_search_method = USE_FULL_RD;
|
||||
sf->use_lp32x32fdct = 0;
|
||||
sf->use_8tap_always = 0;
|
||||
sf->use_avoid_tested_higherror = 0;
|
||||
sf->reference_masking = 0;
|
||||
|
@ -794,6 +795,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
sf->use_uv_intra_rd_estimate = 1;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->skip_encode_sb = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->auto_mv_step_size = 1;
|
||||
|
||||
sf->auto_min_max_partition_size = 1;
|
||||
|
@ -825,6 +827,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
sf->use_uv_intra_rd_estimate = 1;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->skip_encode_sb = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->using_small_partition_info = 1;
|
||||
sf->disable_splitmv =
|
||||
(MIN(cpi->common.width, cpi->common.height) >= 720)? 1 : 0;
|
||||
|
@ -848,6 +851,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
FLAG_EARLY_TERMINATE;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->skip_encode_sb = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->disable_splitmv = 1;
|
||||
sf->auto_mv_step_size = 1;
|
||||
sf->search_method = BIGDIA;
|
||||
|
@ -869,6 +873,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
FLAG_SKIP_INTRA_LOWVAR |
|
||||
FLAG_EARLY_TERMINATE;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->optimize_coefficients = 0;
|
||||
sf->auto_mv_step_size = 1;
|
||||
// sf->reduce_first_step_size = 1;
|
||||
|
|
|
@ -257,6 +257,7 @@ typedef struct {
|
|||
int skip_encode_frame;
|
||||
int use_lastframe_partitioning;
|
||||
TX_SIZE_SEARCH_METHOD tx_size_search_method;
|
||||
int use_lp32x32fdct;
|
||||
int use_8tap_always;
|
||||
int use_avoid_tested_higherror;
|
||||
int skip_lots_of_modes;
|
||||
|
|
Загрузка…
Ссылка в новой задаче