Refactor sub8x8 tx size RD for daala-dist

For a tx size RD search with partition size >= 8x8 and tx size < 8x8,
daala-dist function is applied to the whole partition after all tx blocks are encoded
instead of each 8x8 sub block of the partition.

Change-Id: I27d9e2960aa641f550096e32ebcdf8dfb4de79a6
This commit is contained in:
Yushin Cho 2017-06-07 14:18:54 -07:00
Родитель 2751192a2b
Коммит 30a2c5f245
4 изменённых файлов: 54 добавлений и 158 удалений

Просмотреть файл

@ -178,49 +178,6 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
}
#endif
#if CONFIG_DAALA_DIST
void av1_foreach_8x8_transformed_block_in_yplane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit,
foreach_transformed_block_visitor mi_visit, void *arg) {
const struct macroblockd_plane *const pd = &xd->plane[0];
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
// transform size varies per plane, look it up in a common way.
const TX_SIZE tx_size = get_tx_size(0, xd);
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const uint8_t txw_unit = tx_size_wide_unit[tx_size];
const uint8_t txh_unit = tx_size_high_unit[tx_size];
const int step = txw_unit * txh_unit;
int i = 0, r, c;
// If mb_to_right_edge is < 0 we are in a situation in which
// the current block size extends into the UMV and we won't
// visit the sub blocks that are wholly within the UMV.
const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
const int skip_check_r = tx_size_high[tx_size] == 8 ? 1 : 0;
const int skip_check_c = tx_size_wide[tx_size] == 8 ? 1 : 0;
assert(plane_bsize >= BLOCK_8X8);
assert(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4);
// Keep track of the row and column of the blocks we use so that we know
// if we are in the unrestricted motion border.
for (r = 0; r < max_blocks_high; r += txh_unit) {
// Skip visiting the sub blocks that are wholly within the UMV.
for (c = 0; c < max_blocks_wide; c += txw_unit) {
visit(0, i, r, c, plane_bsize, tx_size, arg);
// Call whenever each 8x8 tx block is done
if (((r & txh_unit) || skip_check_r) && ((c & txw_unit) || skip_check_c))
mi_visit(0, i, r - (1 - skip_check_r) * txh_unit,
c - (1 - skip_check_c) * txw_unit, plane_bsize, tx_size, arg);
i += step;
}
}
}
#endif
#if !CONFIG_PVQ || CONFIG_VAR_TX
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
int plane, TX_SIZE tx_size, int has_eob, int aoff,

Просмотреть файл

@ -1166,13 +1166,6 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
void *arg);
#endif
#if CONFIG_DAALA_DIST
void av1_foreach_8x8_transformed_block_in_yplane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit,
foreach_transformed_block_visitor mi_visit, void *arg);
#endif
#if CONFIG_COEF_INTERLEAVE
static INLINE int get_max_4x4_size(int num_4x4, int mb_to_edge,
int subsampling) {

Просмотреть файл

@ -207,11 +207,6 @@ struct macroblock {
int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
#endif
#if CONFIG_DAALA_DIST
// Keep rate of each 4x4 block in the current macroblock during RDO
// This is needed when using the 8x8 Daala distortion metric during RDO,
// because it evaluates distortion in a different order than the underlying
// 4x4 blocks are coded.
int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
#if CONFIG_CB4X4
DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
#endif // CONFIG_CB4X4

Просмотреть файл

@ -1562,16 +1562,6 @@ CALCULATE_RD : {}
// TODO(jingning): temporarily enabled only for luma component
rd = AOMMIN(rd1, rd2);
#if CONFIG_DAALA_DIST
if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
this_rd_stats.dist = 0;
this_rd_stats.sse = 0;
rd = 0;
x->rate_4x4[block] = this_rd_stats.rate;
}
#endif // CONFIG_DAALA_DIST
#if !CONFIG_PVQ
this_rd_stats.skip &= !x->plane[plane].eobs[block];
#else
@ -1581,111 +1571,74 @@ CALCULATE_RD : {}
args->this_rd += rd;
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
return;
#if CONFIG_DAALA_DIST
if (!(plane == 0 && plane_bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) {
#endif
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
return;
}
#if CONFIG_DAALA_DIST
}
#endif
}
#if CONFIG_DAALA_DIST
static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
int blk_col, BLOCK_SIZE plane_bsize,
TX_SIZE tx_size, void *arg) {
struct rdcost_block_args *args = arg;
MACROBLOCK *const x = args->x;
static void daala_dist_sub8x8_txfm_rd(MACROBLOCK *x, BLOCK_SIZE bsize,
struct rdcost_block_args *args) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[0];
const struct macroblock_plane *const p = &x->plane[0];
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
int64_t rd, rd1, rd2;
RD_STATS this_rd_stats;
int qm = OD_HVS_QM;
int use_activity_masking = 0;
(void)tx_size;
assert(plane == 0);
assert(plane_bsize >= BLOCK_8X8);
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif // CONFIG_PVQ
av1_init_rd_stats(&this_rd_stats);
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const int pred_stride = block_size_wide[bsize];
const uint8_t *src = &p->src.buf[0];
const uint8_t *dst = &pd->dst.buf[0];
const int16_t *pred = &pd->pred[0];
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
if (args->exit_early) return;
int i, j;
int64_t rd, rd1, rd2;
int qm = OD_HVS_QM;
int use_activity_masking = 0;
unsigned int tmp1, tmp2;
int qindex = x->qindex;
{
const struct macroblock_plane *const p = &x->plane[plane];
struct macroblockd_plane *const pd = &xd->plane[plane];
assert((bw & 0x07) == 0);
assert((bh & 0x07) == 0);
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const int diff_stride = block_size_wide[plane_bsize];
DECLARE_ALIGNED(16, uint8_t, pred8[MAX_SB_SQUARE]);
const uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
const uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
for (j = 0; j < bh; j++)
for (i = 0; i < bw; i++) pred8[j * bh + i] = pred[j * pred_stride + i];
unsigned int tmp1, tmp2;
int qindex = x->qindex;
const int pred_stride = block_size_wide[plane_bsize];
const int pred_idx = (blk_row * pred_stride + blk_col)
<< tx_size_wide_log2[0];
int16_t *pred = &pd->pred[pred_idx];
int i, j;
const int tx_blk_size = 8;
tmp1 = av1_daala_dist(src, src_stride, pred8, bw, bw, bh, qm,
use_activity_masking, qindex);
tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, bw, bh, qm,
use_activity_masking, qindex);
DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
for (j = 0; j < tx_blk_size; j++)
for (i = 0; i < tx_blk_size; i++)
pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
use_activity_masking, qindex);
tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
use_activity_masking, qindex);
if (!is_inter_block(mbmi)) {
this_rd_stats.sse = (int64_t)tmp1 * 16;
this_rd_stats.dist = (int64_t)tmp2 * 16;
} else {
// For inter mode, the decoded pixels are provided in pd->pred,
// while the predicted pixels are in dst.
this_rd_stats.sse = (int64_t)tmp2 * 16;
this_rd_stats.dist = (int64_t)tmp1 * 16;
}
if (!is_inter_block(mbmi)) {
args->rd_stats.sse = (int64_t)tmp1 * 16;
args->rd_stats.dist = (int64_t)tmp2 * 16;
} else {
// For inter mode, the decoded pixels are provided in pd->pred,
// while the predicted pixels are in dst.
args->rd_stats.sse = (int64_t)tmp2 * 16;
args->rd_stats.dist = (int64_t)tmp1 * 16;
}
rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
{
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
const uint8_t txw_unit = tx_size_wide_unit[tx_size];
const uint8_t txh_unit = tx_size_high_unit[tx_size];
const int step = txw_unit * txh_unit;
int offset_h = tx_size_high_unit[TX_4X4];
// The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
this_rd_stats.rate =
x->rate_4x4[block - max_blocks_wide * offset_h - step] +
x->rate_4x4[block - max_blocks_wide * offset_h] +
x->rate_4x4[block - step] + x->rate_4x4[block];
}
rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
rd1 = RDCOST(x->rdmult, x->rddiv, args->rd_stats.rate, args->rd_stats.dist);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->rd_stats.sse);
rd = AOMMIN(rd1, rd2);
args->rd_stats.dist += this_rd_stats.dist;
args->rd_stats.sse += this_rd_stats.sse;
args->this_rd += rd;
if (args->this_rd > args->best_rd) {
args->exit_early = 1;
return;
}
args->rd_stats.rdcost = rd;
args->this_rd = rd;
}
#endif // CONFIG_DAALA_DIST
@ -1707,15 +1660,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
#if CONFIG_DAALA_DIST
if (plane == 0 && bsize >= BLOCK_8X8 &&
if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
av1_foreach_8x8_transformed_block_in_yplane(
xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
else
#endif // CONFIG_DAALA_DIST
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
&args);
daala_dist_sub8x8_txfm_rd(x, bsize, &args);
#endif
if (args.exit_early) {
av1_invalid_rd_stats(rd_stats);