Rework the tx type speed feature

This commit re-works the transform type speed feature. It moves
the transform type selection outside of the coding mode loop. This
avoids repeated motion search if the best prediction mode is
chosen as NEWMV. It improves the speed performance for clips that
contain more motion activities.

For mobile_cif at 1000 kbps, this makes the baseline encoding 7%
faster and makes the encoding with dynamic motion vector referencing
scheme enabled 10% faster.

Change-Id: I93e2714b3e461303372c4b66a4134ee212faffd1
This commit is contained in:
Jingning Han 2016-06-06 14:58:50 -07:00
Родитель 3713949b6d
Коммит 9a858e868c
1 изменённых файлов: 100 добавлений и 19 удалений

Просмотреть файл

@ -8358,12 +8358,12 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
VP9_ALT_FLAG
};
int64_t best_rd = best_rd_so_far;
int best_rate_y = INT_MAX, best_rate_uv = INT_MAX;
int64_t best_pred_diff[REFERENCE_MODES];
int64_t best_pred_rd[REFERENCE_MODES];
MB_MODE_INFO best_mbmode;
int best_mode_skippable = 0;
int midx, best_mode_index = -1;
const int FINAL_MODE_SEARCH = MAX_MODES;
unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
vpx_prob comp_mode_p;
int64_t best_intra_rd = INT64_MAX;
@ -8653,17 +8653,17 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
midx = end_pos;
}
if (cpi->sf.tx_type_search.fast_intra_tx_type_search)
x->use_default_intra_tx_type = 1;
else
x->use_default_intra_tx_type = 0;
if (cpi->sf.tx_type_search.fast_inter_tx_type_search)
x->use_default_inter_tx_type = 1;
else
x->use_default_inter_tx_type = 0;
for (midx = 0; midx <= FINAL_MODE_SEARCH; ++midx) {
for (midx = 0; midx < MAX_MODES; ++midx) {
int mode_index;
int mode_excluded = 0;
int64_t this_rd = INT64_MAX;
@ -8683,22 +8683,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
uint8_t ref_frame_type;
#endif
if (midx == FINAL_MODE_SEARCH) {
if (best_mode_index < 0)
break;
mode_index = best_mode_index;
if (!is_inter_mode(best_mbmode.mode) &&
x->use_default_intra_tx_type == 1) {
x->use_default_intra_tx_type = 0;
} else if (is_inter_mode(best_mbmode.mode) &&
x->use_default_inter_tx_type == 1) {
x->use_default_inter_tx_type = 0;
} else {
break;
}
} else {
mode_index = mode_map[midx];
}
mode_index = mode_map[midx];
this_mode = vp10_mode_order[mode_index].mode;
ref_frame = vp10_mode_order[mode_index].ref_frame[0];
second_ref_frame = vp10_mode_order[mode_index].ref_frame[1];
@ -9465,6 +9450,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_mode_skippable = skippable;
best_rate_y = rate_y +
vp10_cost_bit(vp10_get_skip_prob(cm, xd), this_skip2 || skippable);
best_rate_uv = rate_uv;
if (!x->select_tx_size)
swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
@ -9535,6 +9523,99 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
break;
}
if (sf->tx_type_search.fast_inter_tx_type_search == 1 &&
xd->lossless[mbmi->segment_id] == 0 &&
best_mode_index >= 0) {
int rate_y = 0, rate_uv = 0;
int64_t dist_y = 0, dist_uv = 0;
int skip_y = 0, skip_uv = 0, skip_blk = 0;
int64_t sse_y = 0, sse_uv = 0;
x->use_default_inter_tx_type = 0;
x->use_default_intra_tx_type = 0;
*mbmi = best_mbmode;
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
// Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
if (has_second_ref(mbmi))
xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
}
if (is_inter_mode(mbmi->mode)) {
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
vp10_subtract_plane(x, bsize, 0);
#if CONFIG_VAR_TX
if (cm->tx_mode == TX_MODE_SELECT || xd->lossless[mbmi->segment_id]) {
select_tx_type_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y,
bsize, INT64_MAX);
} else {
int idx, idy;
super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y,
bsize, INT64_MAX);
for (idy = 0; idy < xd->n8_h; ++idy)
for (idx = 0; idx < xd->n8_w; ++idx)
mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
memset(x->blk_skip[0], skip_y,
sizeof(uint8_t) * xd->n8_h * xd->n8_w * 4);
}
inter_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv,
&sse_uv, bsize, INT64_MAX);
#else
super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y,
bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv,
&sse_uv, bsize, INT64_MAX);
#endif // CONFIG_VAR_TX
} else {
super_block_yrd(cpi, x, &rate_y, &dist_y, &skip_y, &sse_y,
bsize, INT64_MAX);
super_block_uvrd(cpi, x, &rate_uv, &dist_uv, &skip_uv,
&sse_uv, bsize, INT64_MAX);
}
if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, (dist_y + dist_uv)) >
RDCOST(x->rdmult, x->rddiv, 0, (sse_y + sse_uv))) {
skip_blk = 1;
rate_y = vp10_cost_bit(vp10_get_skip_prob(cm, xd), 1);
rate_uv = 0;
dist_y = sse_y;
dist_uv = sse_uv;
} else {
skip_blk = 0;
rate_y += vp10_cost_bit(vp10_get_skip_prob(cm, xd), 0);
}
if (RDCOST(x->rdmult, x->rddiv,
best_rate_y + best_rate_uv, rd_cost->dist) >
RDCOST(x->rdmult, x->rddiv,
rate_y + rate_uv, (dist_y + dist_uv))) {
#if CONFIG_VAR_TX
int idx, idy;
#endif
best_mbmode.tx_type = mbmi->tx_type;
best_mbmode.tx_size = mbmi->tx_size;
#if CONFIG_VAR_TX
for (idy = 0; idy < xd->n8_h; ++idy)
for (idx = 0; idx < xd->n8_w; ++idx)
best_mbmode.inter_tx_size[idy][idx] = mbmi->inter_tx_size[idy][idx];
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(ctx->blk_skip[i], x->blk_skip[i],
sizeof(uint8_t) * ctx->num_4x4_blk);
#endif
rd_cost->rate += (rate_y + rate_uv - best_rate_y - best_rate_uv);
rd_cost->dist = dist_y + dist_uv;
rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv,
rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
}
}
// Only try palette mode when the best mode so far is an intra mode.
if (cm->allow_screen_content_tools && !is_inter_mode(best_mbmode.mode)) {
PREDICTION_MODE mode_selected;