Refactor sub8x8 tx size RD for daala-dist
For a tx size RD search with partition size >= 8x8 and tx size < 8x8, daala-dist function is applied to the whole partition after all tx blocks are encoded instead of each 8x8 sub block of the partition. Change-Id: I27d9e2960aa641f550096e32ebcdf8dfb4de79a6
This commit is contained in:
Родитель
2751192a2b
Коммит
30a2c5f245
|
@ -178,49 +178,6 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
|
|||
}
|
||||
#endif
|
||||
|
||||
#if CONFIG_DAALA_DIST
|
||||
void av1_foreach_8x8_transformed_block_in_yplane(
|
||||
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit,
|
||||
foreach_transformed_block_visitor mi_visit, void *arg) {
|
||||
const struct macroblockd_plane *const pd = &xd->plane[0];
|
||||
// block and transform sizes, in number of 4x4 blocks log 2 ("*_b")
|
||||
// 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8
|
||||
// transform size varies per plane, look it up in a common way.
|
||||
const TX_SIZE tx_size = get_tx_size(0, xd);
|
||||
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
|
||||
const uint8_t txw_unit = tx_size_wide_unit[tx_size];
|
||||
const uint8_t txh_unit = tx_size_high_unit[tx_size];
|
||||
const int step = txw_unit * txh_unit;
|
||||
int i = 0, r, c;
|
||||
|
||||
// If mb_to_right_edge is < 0 we are in a situation in which
|
||||
// the current block size extends into the UMV and we won't
|
||||
// visit the sub blocks that are wholly within the UMV.
|
||||
const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0);
|
||||
const int max_blocks_high = max_block_high(xd, plane_bsize, 0);
|
||||
const int skip_check_r = tx_size_high[tx_size] == 8 ? 1 : 0;
|
||||
const int skip_check_c = tx_size_wide[tx_size] == 8 ? 1 : 0;
|
||||
|
||||
assert(plane_bsize >= BLOCK_8X8);
|
||||
assert(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4);
|
||||
|
||||
// Keep track of the row and column of the blocks we use so that we know
|
||||
// if we are in the unrestricted motion border.
|
||||
for (r = 0; r < max_blocks_high; r += txh_unit) {
|
||||
// Skip visiting the sub blocks that are wholly within the UMV.
|
||||
for (c = 0; c < max_blocks_wide; c += txw_unit) {
|
||||
visit(0, i, r, c, plane_bsize, tx_size, arg);
|
||||
// Call whenever each 8x8 tx block is done
|
||||
if (((r & txh_unit) || skip_check_r) && ((c & txw_unit) || skip_check_c))
|
||||
mi_visit(0, i, r - (1 - skip_check_r) * txh_unit,
|
||||
c - (1 - skip_check_c) * txw_unit, plane_bsize, tx_size, arg);
|
||||
i += step;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !CONFIG_PVQ || CONFIG_VAR_TX
|
||||
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
|
||||
int plane, TX_SIZE tx_size, int has_eob, int aoff,
|
||||
|
|
|
@ -1166,13 +1166,6 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
|
|||
void *arg);
|
||||
#endif
|
||||
|
||||
#if CONFIG_DAALA_DIST
|
||||
void av1_foreach_8x8_transformed_block_in_yplane(
|
||||
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
|
||||
foreach_transformed_block_visitor visit,
|
||||
foreach_transformed_block_visitor mi_visit, void *arg);
|
||||
#endif
|
||||
|
||||
#if CONFIG_COEF_INTERLEAVE
|
||||
static INLINE int get_max_4x4_size(int num_4x4, int mb_to_edge,
|
||||
int subsampling) {
|
||||
|
|
|
@ -207,11 +207,6 @@ struct macroblock {
|
|||
int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
|
||||
#endif
|
||||
#if CONFIG_DAALA_DIST
|
||||
// Keep rate of each 4x4 block in the current macroblock during RDO
|
||||
// This is needed when using the 8x8 Daala distortion metric during RDO,
|
||||
// because it evaluates distortion in a different order than the underlying
|
||||
// 4x4 blocks are coded.
|
||||
int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
|
||||
#if CONFIG_CB4X4
|
||||
DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
|
||||
#endif // CONFIG_CB4X4
|
||||
|
|
|
@ -1562,16 +1562,6 @@ CALCULATE_RD : {}
|
|||
// TODO(jingning): temporarily enabled only for luma component
|
||||
rd = AOMMIN(rd1, rd2);
|
||||
|
||||
#if CONFIG_DAALA_DIST
|
||||
if (plane == 0 && plane_bsize >= BLOCK_8X8 &&
|
||||
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) {
|
||||
this_rd_stats.dist = 0;
|
||||
this_rd_stats.sse = 0;
|
||||
rd = 0;
|
||||
x->rate_4x4[block] = this_rd_stats.rate;
|
||||
}
|
||||
#endif // CONFIG_DAALA_DIST
|
||||
|
||||
#if !CONFIG_PVQ
|
||||
this_rd_stats.skip &= !x->plane[plane].eobs[block];
|
||||
#else
|
||||
|
@ -1581,111 +1571,74 @@ CALCULATE_RD : {}
|
|||
|
||||
args->this_rd += rd;
|
||||
|
||||
if (args->this_rd > args->best_rd) {
|
||||
args->exit_early = 1;
|
||||
return;
|
||||
#if CONFIG_DAALA_DIST
|
||||
if (!(plane == 0 && plane_bsize >= BLOCK_8X8 &&
|
||||
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))) {
|
||||
#endif
|
||||
if (args->this_rd > args->best_rd) {
|
||||
args->exit_early = 1;
|
||||
return;
|
||||
}
|
||||
#if CONFIG_DAALA_DIST
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if CONFIG_DAALA_DIST
|
||||
static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row,
|
||||
int blk_col, BLOCK_SIZE plane_bsize,
|
||||
TX_SIZE tx_size, void *arg) {
|
||||
struct rdcost_block_args *args = arg;
|
||||
MACROBLOCK *const x = args->x;
|
||||
static void daala_dist_sub8x8_txfm_rd(MACROBLOCK *x, BLOCK_SIZE bsize,
|
||||
struct rdcost_block_args *args) {
|
||||
MACROBLOCKD *const xd = &x->e_mbd;
|
||||
const struct macroblockd_plane *const pd = &xd->plane[0];
|
||||
const struct macroblock_plane *const p = &x->plane[0];
|
||||
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
|
||||
int64_t rd, rd1, rd2;
|
||||
RD_STATS this_rd_stats;
|
||||
int qm = OD_HVS_QM;
|
||||
int use_activity_masking = 0;
|
||||
|
||||
(void)tx_size;
|
||||
|
||||
assert(plane == 0);
|
||||
assert(plane_bsize >= BLOCK_8X8);
|
||||
#if CONFIG_PVQ
|
||||
use_activity_masking = x->daala_enc.use_activity_masking;
|
||||
#endif // CONFIG_PVQ
|
||||
av1_init_rd_stats(&this_rd_stats);
|
||||
const int src_stride = p->src.stride;
|
||||
const int dst_stride = pd->dst.stride;
|
||||
const int pred_stride = block_size_wide[bsize];
|
||||
const uint8_t *src = &p->src.buf[0];
|
||||
const uint8_t *dst = &pd->dst.buf[0];
|
||||
const int16_t *pred = &pd->pred[0];
|
||||
const int bw = block_size_wide[bsize];
|
||||
const int bh = block_size_high[bsize];
|
||||
|
||||
if (args->exit_early) return;
|
||||
int i, j;
|
||||
int64_t rd, rd1, rd2;
|
||||
int qm = OD_HVS_QM;
|
||||
int use_activity_masking = 0;
|
||||
unsigned int tmp1, tmp2;
|
||||
int qindex = x->qindex;
|
||||
|
||||
{
|
||||
const struct macroblock_plane *const p = &x->plane[plane];
|
||||
struct macroblockd_plane *const pd = &xd->plane[plane];
|
||||
assert((bw & 0x07) == 0);
|
||||
assert((bh & 0x07) == 0);
|
||||
|
||||
const int src_stride = p->src.stride;
|
||||
const int dst_stride = pd->dst.stride;
|
||||
const int diff_stride = block_size_wide[plane_bsize];
|
||||
DECLARE_ALIGNED(16, uint8_t, pred8[MAX_SB_SQUARE]);
|
||||
|
||||
const uint8_t *src =
|
||||
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
|
||||
const uint8_t *dst =
|
||||
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
|
||||
for (j = 0; j < bh; j++)
|
||||
for (i = 0; i < bw; i++) pred8[j * bh + i] = pred[j * pred_stride + i];
|
||||
|
||||
unsigned int tmp1, tmp2;
|
||||
int qindex = x->qindex;
|
||||
const int pred_stride = block_size_wide[plane_bsize];
|
||||
const int pred_idx = (blk_row * pred_stride + blk_col)
|
||||
<< tx_size_wide_log2[0];
|
||||
int16_t *pred = &pd->pred[pred_idx];
|
||||
int i, j;
|
||||
const int tx_blk_size = 8;
|
||||
tmp1 = av1_daala_dist(src, src_stride, pred8, bw, bw, bh, qm,
|
||||
use_activity_masking, qindex);
|
||||
tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, bw, bh, qm,
|
||||
use_activity_masking, qindex);
|
||||
|
||||
DECLARE_ALIGNED(16, uint8_t, pred8[8 * 8]);
|
||||
|
||||
for (j = 0; j < tx_blk_size; j++)
|
||||
for (i = 0; i < tx_blk_size; i++)
|
||||
pred8[j * tx_blk_size + i] = pred[j * diff_stride + i];
|
||||
|
||||
tmp1 = av1_daala_dist(src, src_stride, pred8, tx_blk_size, 8, 8, qm,
|
||||
use_activity_masking, qindex);
|
||||
tmp2 = av1_daala_dist(src, src_stride, dst, dst_stride, 8, 8, qm,
|
||||
use_activity_masking, qindex);
|
||||
|
||||
if (!is_inter_block(mbmi)) {
|
||||
this_rd_stats.sse = (int64_t)tmp1 * 16;
|
||||
this_rd_stats.dist = (int64_t)tmp2 * 16;
|
||||
} else {
|
||||
// For inter mode, the decoded pixels are provided in pd->pred,
|
||||
// while the predicted pixels are in dst.
|
||||
this_rd_stats.sse = (int64_t)tmp2 * 16;
|
||||
this_rd_stats.dist = (int64_t)tmp1 * 16;
|
||||
}
|
||||
if (!is_inter_block(mbmi)) {
|
||||
args->rd_stats.sse = (int64_t)tmp1 * 16;
|
||||
args->rd_stats.dist = (int64_t)tmp2 * 16;
|
||||
} else {
|
||||
// For inter mode, the decoded pixels are provided in pd->pred,
|
||||
// while the predicted pixels are in dst.
|
||||
args->rd_stats.sse = (int64_t)tmp2 * 16;
|
||||
args->rd_stats.dist = (int64_t)tmp1 * 16;
|
||||
}
|
||||
|
||||
rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist);
|
||||
if (args->this_rd + rd > args->best_rd) {
|
||||
args->exit_early = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
|
||||
const uint8_t txw_unit = tx_size_wide_unit[tx_size];
|
||||
const uint8_t txh_unit = tx_size_high_unit[tx_size];
|
||||
const int step = txw_unit * txh_unit;
|
||||
int offset_h = tx_size_high_unit[TX_4X4];
|
||||
// The rate of the current 8x8 block is the sum of four 4x4 blocks in it.
|
||||
this_rd_stats.rate =
|
||||
x->rate_4x4[block - max_blocks_wide * offset_h - step] +
|
||||
x->rate_4x4[block - max_blocks_wide * offset_h] +
|
||||
x->rate_4x4[block - step] + x->rate_4x4[block];
|
||||
}
|
||||
rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist);
|
||||
rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse);
|
||||
rd1 = RDCOST(x->rdmult, x->rddiv, args->rd_stats.rate, args->rd_stats.dist);
|
||||
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->rd_stats.sse);
|
||||
rd = AOMMIN(rd1, rd2);
|
||||
|
||||
args->rd_stats.dist += this_rd_stats.dist;
|
||||
args->rd_stats.sse += this_rd_stats.sse;
|
||||
|
||||
args->this_rd += rd;
|
||||
|
||||
if (args->this_rd > args->best_rd) {
|
||||
args->exit_early = 1;
|
||||
return;
|
||||
}
|
||||
args->rd_stats.rdcost = rd;
|
||||
args->this_rd = rd;
|
||||
}
|
||||
#endif // CONFIG_DAALA_DIST
|
||||
|
||||
|
@ -1707,15 +1660,13 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi,
|
|||
|
||||
av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
|
||||
|
||||
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
|
||||
&args);
|
||||
#if CONFIG_DAALA_DIST
|
||||
if (plane == 0 && bsize >= BLOCK_8X8 &&
|
||||
if (!args.exit_early && plane == 0 && bsize >= BLOCK_8X8 &&
|
||||
(tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4))
|
||||
av1_foreach_8x8_transformed_block_in_yplane(
|
||||
xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args);
|
||||
else
|
||||
#endif // CONFIG_DAALA_DIST
|
||||
av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
|
||||
&args);
|
||||
daala_dist_sub8x8_txfm_rd(x, bsize, &args);
|
||||
#endif
|
||||
|
||||
if (args.exit_early) {
|
||||
av1_invalid_rd_stats(rd_stats);
|
||||
|
|
Загрузка…
Ссылка в новой задаче