This patch deletes the variance based speed three partitioning.
Speed 3 now uses the same partitioning method as speed 2
but with some stricter conditions.

The speed and quality are now somewhere between speeds 2 and 4
whereas before it was worse in both than speed 4.

Change-Id: Ia142e7007299d79db3ceee6ca8670540db6f7a41
This commit is contained in:
Paul Wilkins 2013-09-19 18:20:18 +01:00
Родитель 8e45778eaf
Коммит a76caa7ff4
3 изменённых файлов: 14 добавлений и 327 удалений

Просмотреть файл

@ -951,323 +951,6 @@ static void copy_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
}
}
static void set_block_size(VP9_COMMON * const cm, MODE_INFO **mi_8x8,
BLOCK_SIZE bsize, int mis, int mi_row,
int mi_col) {
int r, c;
const int bs = MAX(num_8x8_blocks_wide_lookup[bsize],
num_8x8_blocks_high_lookup[bsize]);
const int idx_str = mis * mi_row + mi_col;
MODE_INFO **const mi2 = &mi_8x8[idx_str];
mi2[0] = cm->mi + idx_str;
mi2[0]->mbmi.sb_type = bsize;
for (r = 0; r < bs; r++)
for (c = 0; c < bs; c++)
if (mi_row + r < cm->mi_rows && mi_col + c < cm->mi_cols)
mi2[r * mis + c] = mi2[0];
}
typedef struct {
int64_t sum_square_error;
int64_t sum_error;
int count;
int variance;
} var;
typedef struct {
var none;
var horz[2];
var vert[2];
} partition_variance;
#define VT(TYPE, BLOCKSIZE) \
typedef struct { \
partition_variance vt; \
BLOCKSIZE split[4]; } TYPE;
VT(v8x8, var)
VT(v16x16, v8x8)
VT(v32x32, v16x16)
VT(v64x64, v32x32)
typedef struct {
partition_variance *vt;
var *split[4];
} vt_node;
typedef enum {
V16X16,
V32X32,
V64X64,
} TREE_LEVEL;
static void tree_to_node(void *data, BLOCK_SIZE bsize, vt_node *node) {
int i;
switch (bsize) {
case BLOCK_64X64: {
v64x64 *vt = (v64x64 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
case BLOCK_32X32: {
v32x32 *vt = (v32x32 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
case BLOCK_16X16: {
v16x16 *vt = (v16x16 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i].vt.none;
break;
}
case BLOCK_8X8: {
v8x8 *vt = (v8x8 *) data;
node->vt = &vt->vt;
for (i = 0; i < 4; i++)
node->split[i] = &vt->split[i];
break;
}
default:
node->vt = 0;
for (i = 0; i < 4; i++)
node->split[i] = 0;
assert(-1);
}
}
// Set variance values given sum square error, sum error, count.
static void fill_variance(var *v, int64_t s2, int64_t s, int c) {
v->sum_square_error = s2;
v->sum_error = s;
v->count = c;
if (c > 0)
v->variance = (int)(256
* (v->sum_square_error - v->sum_error * v->sum_error / v->count)
/ v->count);
else
v->variance = 0;
}
// Combine 2 variance structures by summing the sum_error, sum_square_error,
// and counts and then calculating the new variance.
void sum_2_variances(var *r, var *a, var*b) {
fill_variance(r, a->sum_square_error + b->sum_square_error,
a->sum_error + b->sum_error, a->count + b->count);
}
static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
vt_node node;
tree_to_node(data, bsize, &node);
sum_2_variances(&node.vt->horz[0], node.split[0], node.split[1]);
sum_2_variances(&node.vt->horz[1], node.split[2], node.split[3]);
sum_2_variances(&node.vt->vert[0], node.split[0], node.split[2]);
sum_2_variances(&node.vt->vert[1], node.split[1], node.split[3]);
sum_2_variances(&node.vt->none, &node.vt->vert[0], &node.vt->vert[1]);
}
#if PERFORM_RANDOM_PARTITIONING
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO *m,
BLOCK_SIZE block_size, int mi_row,
int mi_col, int mi_size) {
VP9_COMMON * const cm = &cpi->common;
vt_node vt;
const int mis = cm->mode_info_stride;
int64_t threshold = 4 * cpi->common.base_qindex * cpi->common.base_qindex;
tree_to_node(data, block_size, &vt);
// split none is available only if we have more than half a block size
// in width and height inside the visible image
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows &&
(rand() & 3) < 1) {
set_block_size(cm, m, block_size, mis, mi_row, mi_col);
return 1;
}
// vertical split is available on all but the bottom border
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
&& (rand() & 3) < 1) {
set_block_size(cm, m, get_subsize(block_size, PARTITION_VERT), mis, mi_row,
mi_col);
return 1;
}
// horizontal split is available on all but the right border
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
&& (rand() & 3) < 1) {
set_block_size(cm, m, get_subsize(block_size, PARTITION_HORZ), mis, mi_row,
mi_col);
return 1;
}
return 0;
}
#else // !PERFORM_RANDOM_PARTITIONING
static int set_vt_partitioning(VP9_COMP *cpi, void *data, MODE_INFO **m,
BLOCK_SIZE bsize, int mi_row,
int mi_col, int mi_size) {
VP9_COMMON * const cm = &cpi->common;
vt_node vt;
const int mis = cm->mode_info_stride;
int64_t threshold = 50 * cpi->common.base_qindex;
tree_to_node(data, bsize, &vt);
// split none is available only if we have more than half a block size
// in width and height inside the visible image
if (mi_col + mi_size < cm->mi_cols && mi_row + mi_size < cm->mi_rows
&& vt.vt->none.variance < threshold) {
set_block_size(cm, m, bsize, mis, mi_row, mi_col);
return 1;
}
// vertical split is available on all but the bottom border
if (mi_row + mi_size < cm->mi_rows && vt.vt->vert[0].variance < threshold
&& vt.vt->vert[1].variance < threshold) {
set_block_size(cm, m, get_subsize(bsize, PARTITION_VERT), mis, mi_row,
mi_col);
return 1;
}
// horizontal split is available on all but the right border
if (mi_col + mi_size < cm->mi_cols && vt.vt->horz[0].variance < threshold
&& vt.vt->horz[1].variance < threshold) {
set_block_size(cm, m, get_subsize(bsize, PARTITION_HORZ), mis, mi_row,
mi_col);
return 1;
}
return 0;
}
#endif // PERFORM_RANDOM_PARTITIONING
static void choose_partitioning(VP9_COMP *cpi, MODE_INFO **mi_8x8,
int mi_row, int mi_col) {
VP9_COMMON * const cm = &cpi->common;
MACROBLOCK *x = &cpi->mb;
MACROBLOCKD *xd = &cpi->mb.e_mbd;
const int mis = cm->mode_info_stride;
// TODO(JBB): More experimentation or testing of this threshold;
int64_t threshold = 4;
int i, j, k;
v64x64 vt;
unsigned char * s;
int sp;
const unsigned char * d;
int dp;
int pixels_wide = 64, pixels_high = 64;
vp9_zero(vt);
set_offsets(cpi, mi_row, mi_col, BLOCK_64X64);
if (xd->mb_to_right_edge < 0)
pixels_wide += (xd->mb_to_right_edge >> 3);
if (xd->mb_to_bottom_edge < 0)
pixels_high += (xd->mb_to_bottom_edge >> 3);
s = x->plane[0].src.buf;
sp = x->plane[0].src.stride;
// TODO(JBB): Clearly the higher the quantizer the fewer partitions we want
// but this needs more experimentation.
threshold = threshold * cpi->common.base_qindex * cpi->common.base_qindex;
d = vp9_64x64_zeros;
dp = 64;
if (cm->frame_type != KEY_FRAME) {
int_mv nearest_mv, near_mv;
const int idx = cm->ref_frame_map[get_ref_frame_idx(cpi, LAST_FRAME)];
YV12_BUFFER_CONFIG *ref_fb = &cm->yv12_fb[idx];
YV12_BUFFER_CONFIG *second_ref_fb = NULL;
setup_pre_planes(xd, 0, ref_fb, mi_row, mi_col,
&xd->scale_factor[0]);
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
&xd->scale_factor[1]);
xd->this_mi->mbmi.ref_frame[0] = LAST_FRAME;
xd->this_mi->mbmi.sb_type = BLOCK_64X64;
vp9_find_best_ref_mvs(xd,
mi_8x8[0]->mbmi.ref_mvs[mi_8x8[0]->mbmi.ref_frame[0]],
&nearest_mv, &near_mv);
xd->this_mi->mbmi.mv[0] = nearest_mv;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64);
d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride;
}
// Fill in the entire tree of 8x8 variances for splits.
for (i = 0; i < 4; i++) {
const int x32_idx = ((i & 1) << 5);
const int y32_idx = ((i >> 1) << 5);
for (j = 0; j < 4; j++) {
const int x16_idx = x32_idx + ((j & 1) << 4);
const int y16_idx = y32_idx + ((j >> 1) << 4);
v16x16 *vst = &vt.split[i].split[j];
for (k = 0; k < 4; k++) {
int x_idx = x16_idx + ((k & 1) << 3);
int y_idx = y16_idx + ((k >> 1) << 3);
unsigned int sse = 0;
int sum = 0;
if (x_idx < pixels_wide && y_idx < pixels_high)
vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp,
d + y_idx * dp + x_idx, dp, &sse, &sum);
fill_variance(&vst->split[k].vt.none, sse, sum, 64);
}
}
}
// Fill the rest of the variance tree by summing the split partition
// values.
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
}
fill_variance_tree(&vt.split[i], BLOCK_32X32);
}
fill_variance_tree(&vt, BLOCK_64X64);
// Now go through the entire structure, splitting every block size until
// we get to one that's got a variance lower than our threshold, or we
// hit 8x8.
if (!set_vt_partitioning(cpi, &vt, mi_8x8, BLOCK_64X64, mi_row, mi_col,
4)) {
for (i = 0; i < 4; ++i) {
const int x32_idx = ((i & 1) << 2);
const int y32_idx = ((i >> 1) << 2);
if (!set_vt_partitioning(cpi, &vt.split[i], mi_8x8, BLOCK_32X32,
(mi_row + y32_idx), (mi_col + x32_idx), 2)) {
for (j = 0; j < 4; ++j) {
const int x16_idx = ((j & 1) << 1);
const int y16_idx = ((j >> 1) << 1);
if (!set_vt_partitioning(cpi, &vt.split[i].split[j], mi_8x8,
BLOCK_16X16,
(mi_row + y32_idx + y16_idx),
(mi_col + x32_idx + x16_idx), 1)) {
for (k = 0; k < 4; ++k) {
const int x8_idx = (k & 1);
const int y8_idx = (k >> 1);
set_block_size(cm, mi_8x8, BLOCK_8X8, mis,
(mi_row + y32_idx + y16_idx + y8_idx),
(mi_col + x32_idx + x16_idx + x8_idx));
}
}
}
}
}
}
}
static void rd_use_partition(VP9_COMP *cpi, MODE_INFO **mi_8x8,
TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist,
@ -2060,7 +1743,7 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
if (cpi->sf.reference_masking)
rd_pick_reference_frame(cpi, mi_row, mi_col);
if (cpi->sf.partition_by_variance || cpi->sf.use_lastframe_partitioning ||
if (cpi->sf.use_lastframe_partitioning ||
cpi->sf.use_one_partition_size_always ) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
@ -2072,10 +1755,6 @@ static void encode_sb_row(VP9_COMP *cpi, int mi_row, TOKENEXTRA **tp,
set_partitioning(cpi, mi_8x8, mi_row, mi_col);
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
} else if (cpi->sf.partition_by_variance) {
choose_partitioning(cpi, cm->mi_grid_visible, mi_row, mi_col);
rd_use_partition(cpi, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
} else {
if ((cpi->common.current_video_frame
% cpi->sf.last_partitioning_redo_frequency) == 0

Просмотреть файл

@ -703,7 +703,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_motion_search = 0;
sf->use_avoid_tested_higherror = 0;
sf->reference_masking = 0;
sf->partition_by_variance = 0;
sf->use_one_partition_size_always = 0;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
@ -826,8 +825,12 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->mode_skip_start = 6;
}
if (speed == 3) {
sf->less_rectangular_check = 1;
sf->use_square_partition_only = 1;
sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->partition_by_variance = 1;
sf->use_lastframe_partitioning = 1;
sf->adjust_partitioning_from_last_frame = 1;
sf->last_partitioning_redo_frequency = 3;
sf->tx_size_search_method = ((cpi->common.frame_type == KEY_FRAME ||
cpi->common.intra_only ||
cpi->common.show_frame == 0) ?
@ -839,17 +842,23 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
FLAG_SKIP_COMP_REFMISMATCH |
FLAG_SKIP_INTRA_LOWVAR |
FLAG_EARLY_TERMINATE;
sf->intra_y_mode_mask = INTRA_DC_ONLY;
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
sf->use_uv_intra_rd_estimate = 1;
sf->use_rd_breakout = 1;
sf->skip_encode_sb = 1;
sf->use_lp32x32fdct = 1;
sf->adaptive_motion_search = 1;
sf->using_small_partition_info = 0;
sf->disable_splitmv = 1;
sf->auto_mv_step_size = 1;
sf->search_method = BIGDIA;
sf->subpel_iters_per_step = 1;
sf->use_fast_lpf_pick = 1;
sf->auto_min_max_partition_size = 1;
sf->auto_min_max_partition_interval = 2;
sf->disable_split_var_thresh = 64;
sf->disable_filter_search_var_thresh = 64;
sf->intra_y_mode_mask = INTRA_DC_ONLY;
sf->intra_uv_mode_mask = INTRA_DC_ONLY;
sf->use_fast_coef_updates = 2;
sf->mode_skip_start = 6;
}

Просмотреть файл

@ -250,7 +250,6 @@ typedef struct {
TX_SIZE_SEARCH_METHOD tx_size_search_method;
int use_lp32x32fdct;
int use_avoid_tested_higherror;
int partition_by_variance;
int use_one_partition_size_always;
int less_rectangular_check;
int use_square_partition_only;