Merge scale_factors and scale_factors_uv.
This prevents a duplicate memcpy of a 128-byte struct every time set_scale_factors() is called (which is a lot), thus leading to a decrease from 3.7 MB to 1.85 MB of struct copying per 64x64 block RD/partition loop. Overall, this decreases encoding time of the first 50 frames of bus @ 1500kbps (speed 0) from 1min5.9 to 1min4.9, i.e. about a 1.5% overall speedup. We can likely get more gains by removing the copy of the other struct (and replacing it with an indexing) as well. Change-Id: I3dceb7e79f71e6fe911b11cc994cf89a869dde7a
This commit is contained in:
Родитель
df4b4fab26
Коммит
5ebe503f04
|
@ -219,7 +219,6 @@ typedef struct macroblockd {
|
|||
struct macroblockd_plane plane[MAX_MB_PLANE];
|
||||
|
||||
struct scale_factors scale_factor[2];
|
||||
struct scale_factors scale_factor_uv[2];
|
||||
|
||||
MODE_INFO *prev_mode_info_context;
|
||||
MODE_INFO *mode_info_context;
|
||||
|
|
|
@ -296,8 +296,7 @@ static void build_inter_predictors(int plane, int block,
|
|||
const int pre_stride = arg->pre_stride[which_mv][plane];
|
||||
const uint8_t *const pre = base_pre +
|
||||
scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]);
|
||||
struct scale_factors * const scale =
|
||||
plane == 0 ? &xd->scale_factor[which_mv] : &xd->scale_factor_uv[which_mv];
|
||||
struct scale_factors * const scale = &xd->scale_factor[which_mv];
|
||||
|
||||
// dest
|
||||
uint8_t *const dst = arg->dst[plane] + arg->dst_stride[plane] * y + x;
|
||||
|
|
|
@ -83,8 +83,7 @@ static void setup_dst_planes(MACROBLOCKD *xd,
|
|||
static void setup_pre_planes(MACROBLOCKD *xd, int i,
|
||||
const YV12_BUFFER_CONFIG *src,
|
||||
int mi_row, int mi_col,
|
||||
const struct scale_factors *scale,
|
||||
const struct scale_factors *scale_uv) {
|
||||
const struct scale_factors *sf) {
|
||||
if (src) {
|
||||
int j;
|
||||
uint8_t* buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
|
||||
|
@ -94,7 +93,6 @@ static void setup_pre_planes(MACROBLOCKD *xd, int i,
|
|||
|
||||
for (j = 0; j < MAX_MB_PLANE; ++j) {
|
||||
struct macroblockd_plane *pd = &xd->plane[j];
|
||||
const struct scale_factors *sf = j ? scale_uv : scale;
|
||||
setup_pred_plane(&pd->pre[i], buffers[j], strides[j],
|
||||
mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y);
|
||||
}
|
||||
|
@ -103,8 +101,8 @@ static void setup_pre_planes(MACROBLOCKD *xd, int i,
|
|||
|
||||
static void set_scale_factors(MACROBLOCKD *xd, int ref0, int ref1,
|
||||
struct scale_factors sf[MAX_REF_FRAMES]) {
|
||||
xd->scale_factor[0] = xd->scale_factor_uv[0] = sf[ref0 >= 0 ? ref0 : 0];
|
||||
xd->scale_factor[1] = xd->scale_factor_uv[1] = sf[ref1 >= 0 ? ref1 : 0];
|
||||
xd->scale_factor[0] = sf[ref0 >= 0 ? ref0 : 0];
|
||||
xd->scale_factor[1] = sf[ref1 >= 0 ? ref1 : 0];
|
||||
}
|
||||
|
||||
void vp9_setup_scale_factors(VP9_COMMON *cm, int i);
|
||||
|
|
|
@ -215,9 +215,7 @@ static void set_ref(VP9D_COMP *pbi, int i, int mi_row, int mi_col) {
|
|||
|
||||
const YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[ref]];
|
||||
xd->scale_factor[i] = cm->active_ref_scale[ref];
|
||||
xd->scale_factor_uv[i] = cm->active_ref_scale[ref];
|
||||
setup_pre_planes(xd, i, cfg, mi_row, mi_col,
|
||||
&xd->scale_factor[i], &xd->scale_factor_uv[i]);
|
||||
setup_pre_planes(xd, i, cfg, mi_row, mi_col, &xd->scale_factor[i]);
|
||||
xd->corrupted |= cfg->corrupted;
|
||||
}
|
||||
|
||||
|
@ -969,8 +967,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
|
|||
update_frame_context(&pc->fc);
|
||||
|
||||
// Initialize xd pointers. Any reference should do for xd->pre, so use 0.
|
||||
setup_pre_planes(xd, 0, &pc->yv12_fb[pc->active_ref_idx[0]], 0, 0,
|
||||
NULL, NULL);
|
||||
setup_pre_planes(xd, 0, &pc->yv12_fb[pc->active_ref_idx[0]], 0, 0, NULL);
|
||||
setup_dst_planes(xd, new_fb, 0, 0);
|
||||
|
||||
new_fb->corrupted |= read_compressed_header(pbi, data, first_partition_size);
|
||||
|
|
|
@ -1078,9 +1078,9 @@ static void choose_partitioning(VP9_COMP *cpi, MODE_INFO *m, int mi_row,
|
|||
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
|
||||
|
||||
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[0], &xd->scale_factor_uv[0]);
|
||||
&xd->scale_factor[0]);
|
||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[1], &xd->scale_factor_uv[1]);
|
||||
&xd->scale_factor[1]);
|
||||
xd->mode_info_context->mbmi.ref_frame[0] = LAST_FRAME;
|
||||
xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64;
|
||||
vp9_find_best_ref_mvs(xd, m->mbmi.ref_mvs[m->mbmi.ref_frame[0]],
|
||||
|
@ -1917,7 +1917,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) {
|
|||
|
||||
// TODO(jkoleszar): are these initializations required?
|
||||
setup_pre_planes(xd, 0, &cm->yv12_fb[cm->ref_frame_map[cpi->lst_fb_idx]],
|
||||
0, 0, NULL, NULL);
|
||||
0, 0, NULL);
|
||||
setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], 0, 0);
|
||||
|
||||
setup_block_dptrs(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
|
||||
|
@ -2551,9 +2551,9 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
|
|||
assert(cm->frame_type != KEY_FRAME);
|
||||
|
||||
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[0], &xd->scale_factor_uv[0]);
|
||||
&xd->scale_factor[0]);
|
||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[1], &xd->scale_factor_uv[1]);
|
||||
&xd->scale_factor[1]);
|
||||
|
||||
|
||||
vp9_build_inter_predictors_sb(
|
||||
|
|
|
@ -501,7 +501,7 @@ void vp9_first_pass(VP9_COMP *cpi) {
|
|||
vp9_clear_system_state(); // __asm emms;
|
||||
|
||||
vp9_setup_src_planes(x, cpi->Source, 0, 0);
|
||||
setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL, NULL);
|
||||
setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL);
|
||||
setup_dst_planes(xd, new_yv12, 0, 0);
|
||||
|
||||
x->partition_info = x->pi;
|
||||
|
|
|
@ -2420,7 +2420,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
for (i = 0; i < MAX_MB_PLANE; i++)
|
||||
backup_yv12[i] = xd->plane[i].pre[0];
|
||||
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL, NULL);
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
|
||||
}
|
||||
|
||||
vp9_clamp_mv_min_max(x, &ref_mv);
|
||||
|
@ -2528,7 +2528,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
// motion search code to be used without additional modifications.
|
||||
for (i = 0; i < MAX_MB_PLANE; i++)
|
||||
backup_yv12[i] = xd->plane[i].pre[0];
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL, NULL);
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame[0], mi_row, mi_col, NULL);
|
||||
}
|
||||
|
||||
if (scaled_ref_frame[1]) {
|
||||
|
@ -2536,7 +2536,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
|
|||
for (i = 0; i < MAX_MB_PLANE; i++)
|
||||
backup_second_yv12[i] = xd->plane[i].pre[1];
|
||||
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL, NULL);
|
||||
setup_pre_planes(xd, 0, scaled_ref_frame[1], mi_row, mi_col, NULL);
|
||||
}
|
||||
|
||||
xd->scale_factor[0].set_scaled_offsets(&xd->scale_factor[0],
|
||||
|
|
|
@ -58,7 +58,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
|
|||
vp9_build_inter_predictor(u_mb_ptr, stride,
|
||||
&pred[256], 8,
|
||||
&mv,
|
||||
&xd->scale_factor_uv[which_mv],
|
||||
&xd->scale_factor[which_mv],
|
||||
8, 8,
|
||||
which_mv,
|
||||
&xd->subpix, MV_PRECISION_Q4);
|
||||
|
@ -66,7 +66,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
|
|||
vp9_build_inter_predictor(v_mb_ptr, stride,
|
||||
&pred[320], 8,
|
||||
&mv,
|
||||
&xd->scale_factor_uv[which_mv],
|
||||
&xd->scale_factor[which_mv],
|
||||
8, 8,
|
||||
which_mv,
|
||||
&xd->subpix, MV_PRECISION_Q4);
|
||||
|
@ -443,7 +443,6 @@ void vp9_temporal_filter_prepare(VP9_COMP *cpi, int distance) {
|
|||
cm->yv12_fb[cm->new_fb_idx].y_crop_width,
|
||||
cm->yv12_fb[cm->new_fb_idx].y_crop_height,
|
||||
cm->width, cm->height);
|
||||
cpi->mb.e_mbd.scale_factor_uv[0] = cpi->mb.e_mbd.scale_factor[0];
|
||||
|
||||
// Setup frame pointers, NULL indicates frame not included in filter
|
||||
vpx_memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *));
|
||||
|
|
Загрузка…
Ссылка в новой задаче