Alter Speed 3.
This patch deletes the variance based speed three partitioning. Speed 3 now uses the same partitioning method as speed 2 but with some stricter conditions. The speed and quality are now somewhere between speeds 2 and 4 whereas before it was worse in both than speed 4. Change-Id: Ia142e7007299d79db3ceee6ca8670540db6f7a41
This commit is contained in:
Родитель
8e45778eaf
Коммит
a76caa7ff4
|
@ -951,323 +951,6 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
|||
}
|
||||
}
|
||||
|
||||
static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8,
|
||||
BLOCK_SIZE bsize, int mis, int mi_row,
|
||||
int mi_col) {
|
||||
int r, c;
|
||||
const int bs = MAX(num_8x8_blocks_wide_lookup[bsize],
|
||||
num_8x8_blocks_high_lookup[bsize]);
|
||||
const int idx_str = mis * mi_row + mi_col;
|
||||
MODE_INFO **const mi2 = &mi_8x8[idx_str];
|
||||
|
||||
mi2[0] = cm->mi + idx_str;
|
||||
mi2[0]->mbmi.sb_type = bsize;
|
||||
|
||||
for (r = 0; r < bs; r++)
|
||||
for (c = 0; c < bs; c++)
|
||||
if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols)
|
||||
mi2[r * mis + c] = mi2[0];
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int64_t sum_square_error;
|
||||
int64_t sum_error;
|
||||
int count;
|
||||
int variance;
|
||||
} var;
|
||||
|
||||
typedef struct {
|
||||
var none;
|
||||
var horz[2];
|
||||
var vert[2];
|
||||
} partition_variance;
|
||||
|
||||
#define VT(TYPE, BLOCKSIZE) \
|
||||
typedef struct { \
|
||||
partition_variance vt; \
|
||||
BLOCKSIZE split[4]; } TYPE;
|
||||
|
||||
VT(v8x8, var)
|
||||
VT(v16x16, v8x8)
|
||||
VT(v32x32, v16x16)
|
||||
VT(v64x64, v32x32)
|
||||
|
||||
typedef struct {
|
||||
partition_variance *vt;
|
||||
var *split[4];
|
||||
} vt_node;
|
||||
|
||||
typedef enum {
|
||||
V16X16,
|
||||
V32X32,
|
||||
V64X64,
|
||||
} TREE_LEVEL;
|
||||
|
||||
static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) {
|
||||
int i;
|
||||
switch (bsize) {
|
||||
case BLOCK_64X64: {
|
||||
v64x64 *vt = (v64x64 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_32X32: {
|
||||
v32x32 *vt = (v32x32 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_16X16: {
|
||||
v16x16 *vt = (v16x16 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i].vt.none;
|
||||
break;
|
||||
}
|
||||
case BLOCK_8X8: {
|
||||
v8x8 *vt = (v8x8 *) data;
|
||||
node->vt = &vt->vt;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = &vt->split[i];
|
||||
break;
|
||||
}
|
||||
default:
|
||||
node->vt = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
node->split[i] = 0;
|
||||
assert(-1);
|
||||
}
|
||||
}
|
||||
|
||||
// Set variance values given sum square error, sum error, count.
|
||||
static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
|
||||
v->sum_square_error = s2;
|
||||
v->sum_error = s;
|
||||
v->count = c;
|
||||
if (c > 0)
|
||||
v->variance = (int)(256
|
||||
* (v->sum_square_error - v->sum_error * v->sum_error / v->count)
|
||||
/ v->count);
|
||||
else
|
||||
v->variance = 0;
|
||||
}
|
||||
|
||||
// Combine 2 variance structures by summing the sum_error, sum_square_error,
|
||||
// and counts and then calculating the new variance.
|
||||
void sum_2_variances(var *r, var *a, var*b) {
|
||||
fill_variance(r, a->sum_square_error + b->sum_square_error,
|
||||
a->sum_error + b->sum_error, a->count + b->count);
|
||||
}
|
||||
|
||||
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
|
||||
vt_node node;
|
||||
tree_to_node(data, bsize, &node);
|
||||
sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
|
||||
sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
|
||||
sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
|
||||
sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
|
||||
sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
|
||||
}
|
||||
|
||||
#if PERFORM_RANDOM_PARTITIONING
|
||||
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
|
||||
BLOCK_SIZE block_size, int mi_row,
|
||||
int mi_col, int mi_size) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
vt_node vt;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
|
||||
|
||||
tree_to_node(data, block_size, &vt);
|
||||
|
||||
// split none is available only if we have more than half a block size
|
||||
// in width and height inside the visible image
|
||||
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
|
||||
(rand() & 3) < 1) {
|
||||
set_block_size(cm, m, block_size, mis, mi_row, mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// vertical split is available on all but the bottom border
|
||||
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
|
||||
&& (rand() & 3) < 1) {
|
||||
set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// horizontal split is available on all but the right border
|
||||
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
|
||||
&& (rand() & 3) < 1) {
|
||||
set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // !PERFORM_RANDOM_PARTITIONING
|
||||
|
||||
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m,
|
||||
BLOCK_SIZE bsize, int mi_row,
|
||||
int mi_col, int mi_size) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
vt_node vt;
|
||||
const int mis = cm->mode_info_stride;
|
||||
int64_t threshold = 50 * cpi->common.base_qindex;
|
||||
|
||||
tree_to_node(data, bsize, &vt);
|
||||
|
||||
// split none is available only if we have more than half a block size
|
||||
// in width and height inside the visible image
|
||||
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
|
||||
&& vt.vt->none.variance < threshold) {
|
||||
set_block_size(cm, m, bsize, mis, mi_row, mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// vertical split is available on all but the bottom border
|
||||
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
|
||||
&& vt.vt->vert[1].variance < threshold) {
|
||||
set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// horizontal split is available on all but the right border
|
||||
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
|
||||
&& vt.vt->horz[1].variance < threshold) {
|
||||
set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row,
|
||||
mi_col);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif // PERFORM_RANDOM_PARTITIONING
|
||||
|
||||
static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
int mi_row, int mi_col) {
|
||||
VP9_COMMON * const cm = &cpi->common;
|
||||
MACROBLOCK *x = &cpi->mb;
|
||||
MACROBLOCKD *xd = &cpi->mb.e_mbd;
|
||||
const int mis = cm->mode_info_stride;
|
||||
// TODO(JBB): More experimentation or testing of this threshold;
|
||||
int64_t threshold = 4;
|
||||
int i, j, k;
|
||||
v64x64 vt;
|
||||
unsigned char * s;
|
||||
int sp;
|
||||
const unsigned char * d;
|
||||
int dp;
|
||||
int pixels_wide = 64, pixels_high = 64;
|
||||
|
||||
vp9_zero(vt);
|
||||
set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
|
||||
|
||||
if (xd->mb_to_right_edge < 0)
|
||||
pixels_wide += (xd->mb_to_right_edge >> 3);
|
||||
|
||||
if (xd->mb_to_bottom_edge < 0)
|
||||
pixels_high += (xd->mb_to_bottom_edge >> 3);
|
||||
|
||||
s = x->plane[0].src.buf;
|
||||
sp = x->plane[0].src.stride;
|
||||
|
||||
// TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
|
||||
// but this needs more experimentation.
|
||||
threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
|
||||
|
||||
d = vp9_64x64_zeros;
|
||||
dp = 64;
|
||||
if (cm->frame_type != KEY_FRAME) {
|
||||
int_mv nearest_mv, near_mv;
|
||||
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
|
||||
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
|
||||
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
|
||||
|
||||
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[0]);
|
||||
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
|
||||
&xd->scale_factor[1]);
|
||||
|
||||
xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME;
|
||||
xd->this_mi->mbmi.sb_type = BLOCK_64X64;
|
||||
vp9_find_best_ref_mvs(xd,
|
||||
mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]],
|
||||
&nearest_mv, &near_mv);
|
||||
|
||||
xd->this_mi->mbmi.mv[0] = nearest_mv;
|
||||
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
|
||||
|
||||
d = xd->plane[0].dst.buf;
|
||||
dp = xd->plane[0].dst.stride;
|
||||
}
|
||||
|
||||
// Fill in the entire tree of 8x8 variances for splits.
|
||||
for (i = 0; i < 4; i++) {
|
||||
const int x32_idx = ((i & 1) << 5);
|
||||
const int y32_idx = ((i >> 1) << 5);
|
||||
for (j = 0; j < 4; j++) {
|
||||
const int x16_idx = x32_idx + ((j & 1) << 4);
|
||||
const int y16_idx = y32_idx + ((j >> 1) << 4);
|
||||
v16x16 *vst = &vt.split[i].split[j];
|
||||
for (k = 0; k < 4; k++) {
|
||||
int x_idx = x16_idx + ((k & 1) << 3);
|
||||
int y_idx = y16_idx + ((k >> 1) << 3);
|
||||
unsigned int sse = 0;
|
||||
int sum = 0;
|
||||
if (x_idx < pixels_wide && y_idx < pixels_high)
|
||||
vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
|
||||
d + y_idx * dp + x_idx, dp, &sse, &sum);
|
||||
fill_variance(&vst->split[k].vt.none, sse, sum, 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Fill the rest of the variance tree by summing the split partition
|
||||
// values.
|
||||
for (i = 0; i < 4; i++) {
|
||||
for (j = 0; j < 4; j++) {
|
||||
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
|
||||
}
|
||||
fill_variance_tree(&vt.split[i], BLOCK_32X32);
|
||||
}
|
||||
fill_variance_tree(&vt, BLOCK_64X64);
|
||||
// Now go through the entire structure, splitting every block size until
|
||||
// we get to one that's got a variance lower than our threshold, or we
|
||||
// hit 8x8.
|
||||
if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col,
|
||||
4)) {
|
||||
for (i = 0; i < 4; ++i) {
|
||||
const int x32_idx = ((i & 1) << 2);
|
||||
const int y32_idx = ((i >> 1) << 2);
|
||||
if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32,
|
||||
(mi_row + y32_idx), (mi_col + x32_idx), 2)) {
|
||||
for (j = 0; j < 4; ++j) {
|
||||
const int x16_idx = ((j & 1) << 1);
|
||||
const int y16_idx = ((j >> 1) << 1);
|
||||
if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8,
|
||||
BLOCK_16X16,
|
||||
(mi_row + y32_idx + y16_idx),
|
||||
(mi_col + x32_idx + x16_idx), 1)) {
|
||||
for (k = 0; k < 4; ++k) {
|
||||
const int x8_idx = (k & 1);
|
||||
const int y8_idx = (k >> 1);
|
||||
set_block_size(cm, mi_8x8, BLOCK_8X8, mis,
|
||||
(mi_row + y32_idx + y16_idx + y8_idx),
|
||||
(mi_col + x32_idx + x16_idx + x8_idx));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8,
|
||||
TOKENEXTRA **tp, int mi_row, int mi_col,
|
||||
BLOCK_SIZE bsize, int *rate, int64_t *dist,
|
||||
|
@ -2060,7 +1743,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|
|||
if (cpi->sf.reference_masking)
|
||||
rd_pick_reference_frame(cpi, mi_row, mi_col);
|
||||
|
||||
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
|
||||
if (cpi->sf.use_lastframe_partitioning ||
|
||||
cpi->sf.use_one_partition_size_always ) {
|
||||
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
|
||||
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
|
||||
|
@ -2072,10 +1755,6 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
|
|||
set_partitioning(cpi, mi_8x8, mi_row, mi_col);
|
||||
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1);
|
||||
} else if (cpi->sf.partition_by_variance) {
|
||||
choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col);
|
||||
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
|
||||
&dummy_rate, &dummy_dist, 1);
|
||||
} else {
|
||||
if ((cpi->common.current_video_frame
|
||||
% cpi->sf.last_partitioning_redo_frequency) == 0
|
||||
|
|
|
@ -703,7 +703,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
sf->adaptive_motion_search = 0;
|
||||
sf->use_avoid_tested_higherror = 0;
|
||||
sf->reference_masking = 0;
|
||||
sf->partition_by_variance = 0;
|
||||
sf->use_one_partition_size_always = 0;
|
||||
sf->less_rectangular_check = 0;
|
||||
sf->use_square_partition_only = 0;
|
||||
|
@ -826,8 +825,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
sf->mode_skip_start = 6;
|
||||
}
|
||||
if (speed == 3) {
|
||||
sf->less_rectangular_check = 1;
|
||||
sf->use_square_partition_only = 1;
|
||||
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
|
||||
sf->partition_by_variance = 1;
|
||||
sf->use_lastframe_partitioning = 1;
|
||||
sf->adjust_partitioning_from_last_frame = 1;
|
||||
sf->last_partitioning_redo_frequency = 3;
|
||||
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
|
||||
cpi->common.intra_only ||
|
||||
cpi->common.show_frame == 0) ?
|
||||
|
@ -839,17 +842,23 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
|
|||
FLAG_SKIP_COMP_REFMISMATCH |
|
||||
FLAG_SKIP_INTRA_LOWVAR |
|
||||
FLAG_EARLY_TERMINATE;
|
||||
sf->intra_y_mode_mask = INTRA_DC_ONLY;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
|
||||
sf->use_uv_intra_rd_estimate = 1;
|
||||
sf->use_rd_breakout = 1;
|
||||
sf->skip_encode_sb = 1;
|
||||
sf->use_lp32x32fdct = 1;
|
||||
sf->adaptive_motion_search = 1;
|
||||
sf->using_small_partition_info = 0;
|
||||
sf->disable_splitmv = 1;
|
||||
sf->auto_mv_step_size = 1;
|
||||
sf->search_method = BIGDIA;
|
||||
sf->subpel_iters_per_step = 1;
|
||||
sf->use_fast_lpf_pick = 1;
|
||||
sf->auto_min_max_partition_size = 1;
|
||||
sf->auto_min_max_partition_interval = 2;
|
||||
sf->disable_split_var_thresh = 64;
|
||||
sf->disable_filter_search_var_thresh = 64;
|
||||
sf->intra_y_mode_mask = INTRA_DC_ONLY;
|
||||
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
|
||||
sf->use_fast_coef_updates = 2;
|
||||
sf->mode_skip_start = 6;
|
||||
}
|
||||
|
|
|
@ -250,7 +250,6 @@ typedef struct {
|
|||
TX_SIZE_SEARCH_METHOD tx_size_search_method;
|
||||
int use_lp32x32fdct;
|
||||
int use_avoid_tested_higherror;
|
||||
int partition_by_variance;
|
||||
int use_one_partition_size_always;
|
||||
int less_rectangular_check;
|
||||
int use_square_partition_only;
|
||||
|
|
Загрузка…
Ссылка в новой задаче