Adds variance based fixed size partitioning

Adds a method for determining a fixed size partition based on
variance of a 64x64 SB. This method is added to rtc speed 6.
Also fixes a bug in rtc_use_partition() and includes some
refactoring related to partitioning search, and some cosmetics.

Currently compared to speed 5, the coding efficiency of speed 6
is -19% and that of speed 7 is -55%, in cbr mode.

Change-Id: I057e04125a8b765906bb7d4bf7a36d1e575de7c6
This commit is contained in:
Deb Mukherjee 2014-02-24 15:21:13 -08:00
Родитель 8cc54d576f
Коммит 10bae82510
5 изменённых файлов: 109 добавлений и 51 удалений

Просмотреть файл

@ -102,6 +102,24 @@ static unsigned int get_sby_perpixel_variance(VP9_COMP *cpi, MACROBLOCK *x,
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
} }
static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi) {
unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
if (var < 256)
return BLOCK_64X64;
else
return BLOCK_32X32;
}
static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi) {
unsigned int var = get_sby_perpixel_variance(cpi, &cpi->mb, BLOCK_64X64);
if (var < 1024)
return BLOCK_32X32;
else if (var < 4096)
return BLOCK_16X16;
else
return BLOCK_8X8;
}
// Original activity measure from Tim T's code. // Original activity measure from Tim T's code.
static unsigned int tt_activity_measure(MACROBLOCK *x) { static unsigned int tt_activity_measure(MACROBLOCK *x) {
unsigned int sse; unsigned int sse;
@ -994,7 +1012,7 @@ static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
int index = block_row * mis + block_col; int index = block_row * mis + block_col;
// Find a partition size that fits // Find a partition size that fits
bsize = find_partition_size(cpi->sf.always_this_block_size, bsize = find_partition_size(bsize,
(row8x8_remaining - block_row), (row8x8_remaining - block_row),
(col8x8_remaining - block_col), &bh, &bw); (col8x8_remaining - block_col), &bh, &bw);
mi_8x8[index] = mi_upper_left + index; mi_8x8[index] = mi_upper_left + index;
@ -1918,8 +1936,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
} }
} }
static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) { int mi_row, TOKENEXTRA **tp) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
int mi_col; int mi_col;
@ -1947,19 +1965,32 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
vp9_zero(cpi->mb.pred_mv); vp9_zero(cpi->mb.pred_mv);
if (cpi->sf.use_lastframe_partitioning || if ((cpi->sf.partition_search_type == SEARCH_PARTITION &&
cpi->sf.use_one_partition_size_always ) { cpi->sf.use_lastframe_partitioning) ||
cpi->sf.partition_search_type == FIXED_PARTITION ||
cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) {
const int idx_str = cm->mode_info_stride * mi_row + mi_col; const int idx_str = cm->mode_info_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
cpi->mb.source_variance = UINT_MAX; cpi->mb.source_variance = UINT_MAX;
if (cpi->sf.use_one_partition_size_always) { if (cpi->sf.partition_search_type == FIXED_PARTITION) {
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col,
cpi->sf.always_this_block_size); cpi->sf.always_this_block_size);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1); &dummy_rate, &dummy_dist, 1);
} else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
// TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
// Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
// map to the same thing.
BLOCK_SIZE bsize;
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
bsize = get_rd_var_based_fixed_partition(cpi);
set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1);
} else { } else {
if ((cm->current_video_frame if ((cm->current_video_frame
% cpi->sf.last_partitioning_redo_frequency) == 0 % cpi->sf.last_partitioning_redo_frequency) == 0
@ -2253,12 +2284,12 @@ static INLINE int get_block_col(int b32i, int b16i, int b8i) {
return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1);
} }
static void rtc_use_partition(VP9_COMP *cpi, static void nonrd_use_partition(VP9_COMP *cpi,
const TileInfo *const tile, const TileInfo *const tile,
MODE_INFO **mi_8x8, MODE_INFO **mi_8x8,
TOKENEXTRA **tp, int mi_row, int mi_col, TOKENEXTRA **tp, int mi_row, int mi_col,
BLOCK_SIZE bsize, int *rate, int64_t *dist, BLOCK_SIZE bsize, int *rate, int64_t *dist,
int do_recon) { int do_recon) {
VP9_COMMON *const cm = &cpi->common; VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->mb; MACROBLOCK *const x = &cpi->mb;
MACROBLOCKD *const xd = &cpi->mb.e_mbd; MACROBLOCKD *const xd = &cpi->mb.e_mbd;
@ -2271,8 +2302,8 @@ static void rtc_use_partition(VP9_COMP *cpi,
int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row); int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row);
int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col); int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col);
int mi_8x8_width = num_8x8_blocks_wide_lookup[bsize]; int bw = num_8x8_blocks_wide_lookup[bsize];
int mi_8x8_hight = num_8x8_blocks_high_lookup[bsize]; int bh = num_8x8_blocks_high_lookup[bsize];
int brate; int brate;
int64_t bdist; int64_t bdist;
@ -2280,14 +2311,13 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist = 0; *dist = 0;
// find prediction mode for each 8x8 block // find prediction mode for each 8x8 block
for (br = 0; br < rows; br += mi_8x8_hight) { for (br = 0; br < rows; br += bh) {
for (bc = 0; bc < cols; bc += mi_8x8_width) { for (bc = 0; bc < cols; bc += bw) {
int row = mi_row + br; int row = mi_row + br;
int col = mi_col + bc; int col = mi_col + bc;
int bh = 0, bw = 0;
BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc,
&bh, &bw); &bh, &bw);
set_offsets(cpi, tile, row, col, bs); set_offsets(cpi, tile, row, col, bs);
if (cm->frame_type != KEY_FRAME) if (cm->frame_type != KEY_FRAME)
@ -2299,8 +2329,9 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist += bdist; *dist += bdist;
for (j = 0; j < bh; ++j) for (j = 0; j < bh; ++j)
for (i = 0; i < bw; ++i) for (i = 0; i < bw; ++i) {
xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; xd->mi_8x8[j * mis + i] = xd->mi_8x8[0];
}
} }
} }
@ -2310,8 +2341,8 @@ static void rtc_use_partition(VP9_COMP *cpi,
*dist = chosen_dist; *dist = chosen_dist;
} }
static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile, static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
int mi_row, TOKENEXTRA **tp) { int mi_row, TOKENEXTRA **tp) {
VP9_COMMON * const cm = &cpi->common; VP9_COMMON * const cm = &cpi->common;
int mi_col; int mi_col;
@ -2329,9 +2360,21 @@ static void encode_rtc_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
cpi->mb.source_variance = UINT_MAX; cpi->mb.source_variance = UINT_MAX;
rtc_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, if (cpi->sf.partition_search_type == FIXED_PARTITION) {
cpi->sf.always_this_block_size, nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
&dummy_rate, &dummy_dist, 1); cpi->sf.always_this_block_size,
&dummy_rate, &dummy_dist, 1);
} else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION ||
cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
// TODO(debargha): Implement VAR_BASED_PARTITION as a separate case.
// Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION
// map to the same thing.
BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col,
bsize, &dummy_rate, &dummy_dist, 1);
} else {
assert(0);
}
} }
} }
// end RTC play code // end RTC play code
@ -2387,7 +2430,7 @@ static void encode_frame_internal(VP9_COMP *cpi) {
set_prev_mi(cm); set_prev_mi(cm);
if (cpi->sf.use_pick_mode) { if (cpi->sf.use_nonrd_pick_mode) {
// Initialize internal buffer pointers for rtc coding, where non-RD // Initialize internal buffer pointers for rtc coding, where non-RD
// mode decision is used and hence no buffer pointer swap needed. // mode decision is used and hence no buffer pointer swap needed.
int i; int i;
@ -2423,10 +2466,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
vp9_tile_init(&tile, cm, tile_row, tile_col); vp9_tile_init(&tile, cm, tile_row, tile_col);
for (mi_row = tile.mi_row_start; for (mi_row = tile.mi_row_start;
mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) {
if (cpi->sf.use_pick_mode) if (cpi->sf.use_nonrd_pick_mode)
encode_rtc_sb_row(cpi, &tile, mi_row, &tp); encode_nonrd_sb_row(cpi, &tile, mi_row, &tp);
else else
encode_sb_row(cpi, &tile, mi_row, &tp); encode_rd_sb_row(cpi, &tile, mi_row, &tp);
} }
cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old); cpi->tok_count[tile_row][tile_col] = (unsigned int)(tp - tp_old);
assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols)); assert(tp - cpi->tok <= get_token_alloc(cm->mb_rows, cm->mb_cols));
@ -2689,7 +2732,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 && x->skip_recode = !x->select_txfm_size && mbmi->sb_type >= BLOCK_8X8 &&
(cpi->oxcf.aq_mode != COMPLEXITY_AQ) && (cpi->oxcf.aq_mode != COMPLEXITY_AQ) &&
!cpi->sf.use_pick_mode; !cpi->sf.use_nonrd_pick_mode;
x->skip_optimize = ctx->is_coded; x->skip_optimize = ctx->is_coded;
ctx->is_coded = 1; ctx->is_coded = 1;
x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;

Просмотреть файл

@ -717,7 +717,7 @@ static void set_good_speed_feature(VP9_COMMON *cm,
} }
if (speed >= 5) { if (speed >= 5) {
sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
sf->use_one_partition_size_always = 1; sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_16X16; sf->always_this_block_size = BLOCK_16X16;
sf->tx_size_search_method = frame_is_intra_only(cm) ? sf->tx_size_search_method = frame_is_intra_only(cm) ?
USE_FULL_RD : USE_LARGESTALL; USE_FULL_RD : USE_LARGESTALL;
@ -863,12 +863,12 @@ static void set_rt_speed_feature(VP9_COMMON *cm,
sf->search_method = FAST_HEX; sf->search_method = FAST_HEX;
} }
if (speed >= 6) { if (speed >= 6) {
sf->use_one_partition_size_always = 1; sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
sf->always_this_block_size = BLOCK_32X32;
} }
if (speed >= 7) { if (speed >= 7) {
sf->partition_search_type = FIXED_PARTITION;
sf->always_this_block_size = BLOCK_16X16; sf->always_this_block_size = BLOCK_16X16;
sf->use_pick_mode = 1; sf->use_nonrd_pick_mode = 1;
} }
} }
@ -906,7 +906,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->adaptive_motion_search = 0; sf->adaptive_motion_search = 0;
sf->adaptive_pred_interp_filter = 0; sf->adaptive_pred_interp_filter = 0;
sf->reference_masking = 0; sf->reference_masking = 0;
sf->use_one_partition_size_always = 0; sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0; sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0; sf->use_square_partition_only = 0;
sf->auto_min_max_partition_size = NOT_IN_USE; sf->auto_min_max_partition_size = NOT_IN_USE;
@ -928,7 +928,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
sf->use_fast_lpf_pick = 0; sf->use_fast_lpf_pick = 0;
sf->use_fast_coef_updates = 0; sf->use_fast_coef_updates = 0;
sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set
sf->use_pick_mode = 0; sf->use_nonrd_pick_mode = 0;
sf->encode_breakout_thresh = 0; sf->encode_breakout_thresh = 0;
switch (cpi->oxcf.mode) { switch (cpi->oxcf.mode) {
@ -2900,7 +2900,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) {
vp9_save_coding_context(cpi); vp9_save_coding_context(cpi);
cpi->dummy_packing = 1; cpi->dummy_packing = 1;
if (!cpi->sf.use_pick_mode) if (!cpi->sf.use_nonrd_pick_mode)
vp9_pack_bitstream(cpi, dest, size); vp9_pack_bitstream(cpi, dest, size);
rc->projected_frame_size = (int)(*size) << 3; rc->projected_frame_size = (int)(*size) << 3;

Просмотреть файл

@ -218,6 +218,22 @@ typedef enum {
ENCODE_BREAKOUT_LIMITED = 2 ENCODE_BREAKOUT_LIMITED = 2
} ENCODE_BREAKOUT_TYPE; } ENCODE_BREAKOUT_TYPE;
typedef enum {
// Search partitions using RD/NONRD criterion
SEARCH_PARTITION = 0,
// Always use a fixed size partition
FIXED_PARTITION = 1,
// Use a fixed size partition in every 64X64 SB, where the size is
// determined based on source variance
VAR_BASED_FIXED_PARTITION = 2,
// Use an arbitrary partitioning scheme based on source variance within
// a 64X64 SB
VAR_BASED_PARTITION
} PARTITION_SEARCH_TYPE;
typedef struct { typedef struct {
// Frame level coding parameter update // Frame level coding parameter update
int frame_parameter_update; int frame_parameter_update;
@ -304,16 +320,6 @@ typedef struct {
// TODO(JBB): remove this as its no longer used. // TODO(JBB): remove this as its no longer used.
// If set partition size will always be always_this_block_size.
int use_one_partition_size_always;
// Skip rectangular partition test when partition type none gives better
// rd than partition type split.
int less_rectangular_check;
// Disable testing non square partitions. (eg 16x32)
int use_square_partition_only;
// After looking at the first set of modes (set by index here), skip // After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame // checking modes for reference frames that don't match the reference frame
// of the best so far. // of the best so far.
@ -322,9 +328,18 @@ typedef struct {
// TODO(JBB): Remove this. // TODO(JBB): Remove this.
int reference_masking; int reference_masking;
// Used in conjunction with use_one_partition_size_always. PARTITION_SEARCH_TYPE partition_search_type;
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size; BLOCK_SIZE always_this_block_size;
// Skip rectangular partition test when partition type none gives better
// rd than partition type split.
int less_rectangular_check;
// Disable testing non square partitions. (eg 16x32)
int use_square_partition_only;
// Sets min and max partition sizes for this 64x64 region based on the // Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor. // same 64x64 in last encoded frame, and the left and above neighbor.
AUTO_MIN_MAX_MODE auto_min_max_partition_size; AUTO_MIN_MAX_MODE auto_min_max_partition_size;
@ -396,7 +411,7 @@ typedef struct {
int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced int use_fast_coef_updates; // 0: 2-loop, 1: 1-loop, 2: 1-loop reduced
// This flag controls the use of non-RD mode decision. // This flag controls the use of non-RD mode decision.
int use_pick_mode; int use_nonrd_pick_mode;
// This variable sets the encode_breakout threshold. Currently, it is only // This variable sets the encode_breakout threshold. Currently, it is only
// enabled in real time mode. // enabled in real time mode.

Просмотреть файл

@ -1041,7 +1041,7 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi,
// JBB : This is realtime mode. In real time mode the first frame // JBB : This is realtime mode. In real time mode the first frame
// should be larger. Q of 0 is disabled because we force tx size to be // should be larger. Q of 0 is disabled because we force tx size to be
// 16x16... // 16x16...
if (cpi->sf.use_pick_mode) { if (cpi->sf.use_nonrd_pick_mode) {
if (cpi->common.current_video_frame == 0) if (cpi->common.current_video_frame == 0)
q /= 3; q /= 3;
if (q == 0) if (q == 0)

Просмотреть файл

@ -295,7 +295,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
set_block_thresholds(cpi); set_block_thresholds(cpi);
if (!cpi->sf.use_pick_mode) { if (!cpi->sf.use_nonrd_pick_mode) {
fill_token_costs(x->token_costs, cm->fc.coef_probs); fill_token_costs(x->token_costs, cm->fc.coef_probs);
for (i = 0; i < PARTITION_CONTEXTS; i++) for (i = 0; i < PARTITION_CONTEXTS; i++)
@ -303,7 +303,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
vp9_partition_tree); vp9_partition_tree);
} }
if (!cpi->sf.use_pick_mode || (cm->current_video_frame & 0x07) == 1) { if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
fill_mode_costs(cpi); fill_mode_costs(cpi);
if (!frame_is_intra_only(cm)) { if (!frame_is_intra_only(cm)) {