Rework pred pixel buffer system in non-RD coding mode
This commit makes the inter prediction buffer system to support hybrid partition search. It reduces the runtime of speed -5 by about 3%. No compression performance change. vidyo1 720p 1000 kbps 11831 ms -> 11497 ms nik 720p 1000 kbps 10919 ms -> 10645 ms Change-Id: I5b2da747c6395c253cd074d3907f5402e1840c36
This commit is contained in:
Родитель
9349a28e80
Коммит
7bea8c59f9
|
@ -2678,6 +2678,22 @@ static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
|
|||
}
|
||||
}
|
||||
|
||||
// Reset the prediction pixel ready flag recursively.
|
||||
static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
|
||||
pc_tree->none.pred_pixel_ready = 0;
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 0;
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 0;
|
||||
pc_tree->vertical[0].pred_pixel_ready = 0;
|
||||
pc_tree->vertical[1].pred_pixel_ready = 0;
|
||||
|
||||
if (bsize > BLOCK_8X8) {
|
||||
BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i)
|
||||
pred_pixel_ready_reset(pc_tree->split[i], subsize);
|
||||
}
|
||||
}
|
||||
|
||||
static void nonrd_pick_partition(VP9_COMP *cpi,
|
||||
TileDataEnc *tile_data,
|
||||
TOKENEXTRA **tp, int mi_row,
|
||||
|
@ -2736,6 +2752,10 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
partition_vert_allowed &= force_vert_split;
|
||||
}
|
||||
|
||||
ctx->pred_pixel_ready = !(partition_vert_allowed ||
|
||||
partition_horz_allowed ||
|
||||
do_split);
|
||||
|
||||
// PARTITION_NONE
|
||||
if (partition_none_allowed) {
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col,
|
||||
|
@ -2743,7 +2763,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
ctx->mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
ctx->skip_txfm[0] = x->skip_txfm[0];
|
||||
ctx->skip = x->skip;
|
||||
ctx->pred_pixel_ready = 0;
|
||||
|
||||
if (this_rdc.rate != INT_MAX) {
|
||||
int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
|
||||
|
@ -2819,17 +2838,17 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
subsize = get_subsize(bsize, PARTITION_HORZ);
|
||||
if (sf->adaptive_motion_search)
|
||||
load_pred_mv(x, ctx);
|
||||
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
|
||||
&pc_tree->horizontal[0]);
|
||||
|
||||
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->horizontal[0].skip = x->skip;
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 0;
|
||||
|
||||
if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
|
||||
load_pred_mv(x, ctx);
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col,
|
||||
&this_rdc, subsize,
|
||||
&pc_tree->horizontal[1]);
|
||||
|
@ -2837,7 +2856,6 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->horizontal[1].skip = x->skip;
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 0;
|
||||
|
||||
if (this_rdc.rate == INT_MAX) {
|
||||
vp9_rd_cost_reset(&sum_rdc);
|
||||
|
@ -2854,32 +2872,32 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
if (sum_rdc.rdcost < best_rdc.rdcost) {
|
||||
best_rdc = sum_rdc;
|
||||
pc_tree->partitioning = PARTITION_HORZ;
|
||||
} else {
|
||||
pred_pixel_ready_reset(pc_tree, bsize);
|
||||
}
|
||||
}
|
||||
|
||||
// PARTITION_VERT
|
||||
if (partition_vert_allowed && do_rect) {
|
||||
subsize = get_subsize(bsize, PARTITION_VERT);
|
||||
|
||||
if (sf->adaptive_motion_search)
|
||||
load_pred_mv(x, ctx);
|
||||
|
||||
pc_tree->vertical[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize,
|
||||
&pc_tree->vertical[0]);
|
||||
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->vertical[0].skip = x->skip;
|
||||
pc_tree->vertical[0].pred_pixel_ready = 0;
|
||||
|
||||
if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
|
||||
load_pred_mv(x, ctx);
|
||||
pc_tree->vertical[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms,
|
||||
&this_rdc, subsize,
|
||||
&pc_tree->vertical[1]);
|
||||
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->vertical[1].skip = x->skip;
|
||||
pc_tree->vertical[1].pred_pixel_ready = 0;
|
||||
|
||||
if (this_rdc.rate == INT_MAX) {
|
||||
vp9_rd_cost_reset(&sum_rdc);
|
||||
|
@ -2896,6 +2914,8 @@ static void nonrd_pick_partition(VP9_COMP *cpi,
|
|||
if (sum_rdc.rdcost < best_rdc.rdcost) {
|
||||
best_rdc = sum_rdc;
|
||||
pc_tree->partitioning = PARTITION_VERT;
|
||||
} else {
|
||||
pred_pixel_ready_reset(pc_tree, bsize);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2977,27 +2997,27 @@ static void nonrd_select_partition(VP9_COMP *cpi,
|
|||
} else {
|
||||
switch (partition) {
|
||||
case PARTITION_NONE:
|
||||
pc_tree->none.pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->none);
|
||||
pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->none.skip = x->skip;
|
||||
pc_tree->none.pred_pixel_ready = 1;
|
||||
break;
|
||||
case PARTITION_VERT:
|
||||
pc_tree->vertical[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->vertical[0]);
|
||||
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->vertical[0].skip = x->skip;
|
||||
pc_tree->vertical[0].pred_pixel_ready = 1;
|
||||
if (mi_col + hbs < cm->mi_cols) {
|
||||
pc_tree->vertical[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
|
||||
&this_rdc, subsize, &pc_tree->vertical[1]);
|
||||
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->vertical[1].skip = x->skip;
|
||||
pc_tree->vertical[1].pred_pixel_ready = 1;
|
||||
if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
|
||||
rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
|
||||
rd_cost->rate += this_rdc.rate;
|
||||
|
@ -3006,19 +3026,19 @@ static void nonrd_select_partition(VP9_COMP *cpi,
|
|||
}
|
||||
break;
|
||||
case PARTITION_HORZ:
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->horizontal[0]);
|
||||
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->horizontal[0].skip = x->skip;
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 1;
|
||||
if (mi_row + hbs < cm->mi_rows) {
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
|
||||
&this_rdc, subsize, &pc_tree->horizontal[0]);
|
||||
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->horizontal[1].skip = x->skip;
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 1;
|
||||
if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
|
||||
rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
|
||||
rd_cost->rate += this_rdc.rate;
|
||||
|
@ -3096,6 +3116,7 @@ static void nonrd_use_partition(VP9_COMP *cpi,
|
|||
|
||||
switch (partition) {
|
||||
case PARTITION_NONE:
|
||||
pc_tree->none.pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->none);
|
||||
pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
|
@ -3103,12 +3124,14 @@ static void nonrd_use_partition(VP9_COMP *cpi,
|
|||
pc_tree->none.skip = x->skip;
|
||||
break;
|
||||
case PARTITION_VERT:
|
||||
pc_tree->vertical[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->vertical[0]);
|
||||
pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->vertical[0].skip = x->skip;
|
||||
if (mi_col + hbs < cm->mi_cols) {
|
||||
pc_tree->vertical[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs,
|
||||
&this_rdc, subsize, &pc_tree->vertical[1]);
|
||||
pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
|
@ -3122,12 +3145,14 @@ static void nonrd_use_partition(VP9_COMP *cpi,
|
|||
}
|
||||
break;
|
||||
case PARTITION_HORZ:
|
||||
pc_tree->horizontal[0].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost,
|
||||
subsize, &pc_tree->horizontal[0]);
|
||||
pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
|
||||
pc_tree->horizontal[0].skip = x->skip;
|
||||
if (mi_row + hbs < cm->mi_rows) {
|
||||
pc_tree->horizontal[1].pred_pixel_ready = 1;
|
||||
nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col,
|
||||
&this_rdc, subsize, &pc_tree->horizontal[0]);
|
||||
pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi;
|
||||
|
|
|
@ -515,8 +515,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
PRED_BUFFER *best_pred = NULL;
|
||||
PRED_BUFFER *this_mode_pred = NULL;
|
||||
const int pixels_in_block = bh * bw;
|
||||
int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
if (reuse_inter_pred) {
|
||||
int i;
|
||||
for (i = 0; i < 3; i++) {
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -639,7 +640,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
// Search for the best prediction filter type, when the resulting
|
||||
// motion vector is at sub-pixel accuracy level for luma component, i.e.,
|
||||
// the last three bits are all zeros.
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
if (reuse_inter_pred) {
|
||||
if (!this_mode_pred) {
|
||||
this_mode_pred = &tmp[3];
|
||||
} else {
|
||||
|
@ -677,7 +678,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
best_cost = cost;
|
||||
skip_txfm = x->skip_txfm[0];
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
if (reuse_inter_pred) {
|
||||
if (this_mode_pred != current_pred) {
|
||||
free_pred_buffer(this_mode_pred);
|
||||
this_mode_pred = current_pred;
|
||||
|
@ -692,7 +693,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
}
|
||||
}
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby && this_mode_pred != current_pred)
|
||||
if (reuse_inter_pred && this_mode_pred != current_pred)
|
||||
free_pred_buffer(current_pred);
|
||||
|
||||
mbmi->interp_filter = best_filter;
|
||||
|
@ -744,13 +745,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
best_ref_frame = ref_frame;
|
||||
skip_txfm = x->skip_txfm[0];
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
if (reuse_inter_pred) {
|
||||
free_pred_buffer(best_pred);
|
||||
|
||||
best_pred = this_mode_pred;
|
||||
}
|
||||
} else {
|
||||
if (cpi->sf.reuse_inter_pred_sby)
|
||||
if (reuse_inter_pred)
|
||||
free_pred_buffer(this_mode_pred);
|
||||
}
|
||||
|
||||
|
@ -764,7 +764,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
|
||||
// If best prediction is not in dst buf, then copy the prediction block from
|
||||
// temp buf to dst buf.
|
||||
if (best_pred != NULL && cpi->sf.reuse_inter_pred_sby &&
|
||||
if (best_pred != NULL && reuse_inter_pred &&
|
||||
best_pred->data != orig_dst.buf) {
|
||||
pd->dst = orig_dst;
|
||||
#if CONFIG_VP9_HIGHBITDEPTH
|
||||
|
@ -799,7 +799,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
MIN(max_txsize_lookup[bsize],
|
||||
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
|
||||
|
||||
if (cpi->sf.reuse_inter_pred_sby) {
|
||||
if (reuse_inter_pred) {
|
||||
pd->dst.buf = tmp[0].data;
|
||||
pd->dst.stride = bw;
|
||||
}
|
||||
|
@ -831,7 +831,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
x->skip_txfm[0] = skip_txfm;
|
||||
}
|
||||
}
|
||||
if (cpi->sf.reuse_inter_pred_sby)
|
||||
if (reuse_inter_pred)
|
||||
pd->dst = orig_dst;
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче