check in the final code implementation for parallel deblocking proposal from Intel. code changes have been reviewed and approved.
Change-Id: I4a3cdb939b7b96a3aa27f6a00da7a0e73222f3f3
This commit is contained in:
Родитель
8234d97a40
Коммит
42ff3881ac
|
@ -1151,9 +1151,10 @@ static void highbd_filter_selectively_vert(
|
|||
}
|
||||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
|
||||
void av1_filter_block_plane_non420(AV1_COMMON *cm,
|
||||
void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
|
||||
struct macroblockd_plane *plane,
|
||||
MODE_INFO **mi_8x8, int mi_row, int mi_col) {
|
||||
MODE_INFO **mi_8x8, int mi_row,
|
||||
int mi_col) {
|
||||
const int ss_x = plane->subsampling_x;
|
||||
const int ss_y = plane->subsampling_y;
|
||||
const int row_step = 1 << ss_y;
|
||||
|
@ -1274,8 +1275,24 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
|
|||
mi_8x8 += row_step_stride;
|
||||
}
|
||||
|
||||
// Now do horizontal pass
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
}
|
||||
|
||||
void av1_filter_block_plane_non420_hor(AV1_COMMON *cm,
|
||||
struct macroblockd_plane *plane,
|
||||
int mi_row) {
|
||||
const int ss_y = plane->subsampling_y;
|
||||
const int row_step = 1 << ss_y;
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
uint8_t *const dst0 = dst->buf;
|
||||
unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 };
|
||||
unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 };
|
||||
unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 };
|
||||
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 };
|
||||
uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
|
||||
int r;
|
||||
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
|
||||
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
|
||||
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
|
||||
|
@ -1311,9 +1328,12 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
|
|||
#endif // CONFIG_AOM_HIGHBITDEPTH
|
||||
dst->buf += 8 * dst->stride;
|
||||
}
|
||||
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
}
|
||||
|
||||
void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
|
||||
void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm) {
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
|
@ -1356,13 +1376,22 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
|
|||
mask_4x4 >>= 16;
|
||||
mask_4x4_int >>= 16;
|
||||
}
|
||||
|
||||
// Horizontal pass
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
mask_16x16 = lfm->above_y[TX_16X16];
|
||||
mask_8x8 = lfm->above_y[TX_8X8];
|
||||
mask_4x4 = lfm->above_y[TX_4X4];
|
||||
mask_4x4_int = lfm->int_4x4_y;
|
||||
}
|
||||
|
||||
void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm) {
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
uint8_t *const dst0 = dst->buf;
|
||||
int r;
|
||||
uint64_t mask_16x16 = lfm->above_y[TX_16X16];
|
||||
uint64_t mask_8x8 = lfm->above_y[TX_8X8];
|
||||
uint64_t mask_4x4 = lfm->above_y[TX_4X4];
|
||||
uint64_t mask_4x4_int = lfm->int_4x4_y;
|
||||
|
||||
assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
|
||||
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
|
||||
unsigned int mask_16x16_r;
|
||||
|
@ -1402,9 +1431,12 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
|
|||
mask_4x4 >>= 8;
|
||||
mask_4x4_int >>= 8;
|
||||
}
|
||||
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
}
|
||||
|
||||
void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
|
||||
void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm) {
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
|
@ -1464,17 +1496,27 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
|
|||
}
|
||||
}
|
||||
|
||||
// Horizontal pass
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
mask_16x16 = lfm->above_uv[TX_16X16];
|
||||
mask_8x8 = lfm->above_uv[TX_8X8];
|
||||
mask_4x4 = lfm->above_uv[TX_4X4];
|
||||
}
|
||||
|
||||
void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm) {
|
||||
struct buf_2d *const dst = &plane->dst;
|
||||
uint8_t *const dst0 = dst->buf;
|
||||
int r;
|
||||
uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
|
||||
uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
|
||||
uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
|
||||
#if CONFIG_MISC_FIXES
|
||||
mask_4x4_int = lfm->above_int_4x4_uv;
|
||||
uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
|
||||
#else
|
||||
mask_4x4_int = lfm->int_4x4_uv;
|
||||
uint64_t mask_4x4_int = lfm->int_4x4_uv;
|
||||
#endif
|
||||
|
||||
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
|
||||
|
||||
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
|
||||
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
|
||||
const unsigned int mask_4x4_int_r =
|
||||
|
@ -1516,6 +1558,9 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
|
|||
mask_4x4 >>= 4;
|
||||
mask_4x4_int >>= 4;
|
||||
}
|
||||
|
||||
// restore the buf pointer in case there is additional filter pass.
|
||||
dst->buf = dst0;
|
||||
}
|
||||
|
||||
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
||||
|
@ -1535,6 +1580,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
|||
else
|
||||
path = LF_PATH_SLOW;
|
||||
|
||||
#if CONFIG_PARALLEL_DEBLOCKING
|
||||
// Filter all the vertical edges in the whole frame
|
||||
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
|
||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||
|
||||
|
@ -1542,27 +1589,84 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
|||
int plane;
|
||||
|
||||
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
|
||||
|
||||
// TODO(JBB): Make setup_mask work for non 420.
|
||||
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
|
||||
|
||||
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
|
||||
for (plane = 1; plane < num_planes; ++plane) {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
|
||||
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
|
||||
mi_row, mi_col);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Filter all the horizontal edges in the whole frame
|
||||
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
|
||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
int plane;
|
||||
|
||||
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
|
||||
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
|
||||
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
|
||||
for (plane = 1; plane < num_planes; ++plane) {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
|
||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
int plane;
|
||||
|
||||
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
|
||||
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
|
||||
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
|
||||
for (plane = 1; plane < num_planes; ++plane) {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
|
||||
mi_row, mi_col);
|
||||
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||
}
|
||||
|
||||
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
||||
|
|
|
@ -103,17 +103,27 @@ void av1_setup_mask(struct AV1Common *const cm, const int mi_row,
|
|||
const int mi_col, MODE_INFO **mi_8x8,
|
||||
const int mode_info_stride, LOOP_FILTER_MASK *lfm);
|
||||
|
||||
void av1_filter_block_plane_ss00(struct AV1Common *const cm,
|
||||
void av1_filter_block_plane_ss00_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm);
|
||||
void av1_filter_block_plane_ss00_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm);
|
||||
|
||||
void av1_filter_block_plane_ss11(struct AV1Common *const cm,
|
||||
void av1_filter_block_plane_ss11_ver(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm);
|
||||
void av1_filter_block_plane_ss11_hor(struct AV1Common *const cm,
|
||||
struct macroblockd_plane *const plane,
|
||||
int mi_row, LOOP_FILTER_MASK *lfm);
|
||||
|
||||
void av1_filter_block_plane_non420(struct AV1Common *cm,
|
||||
void av1_filter_block_plane_non420_ver(struct AV1Common *cm,
|
||||
struct macroblockd_plane *plane,
|
||||
MODE_INFO **mi_8x8, int mi_row, int mi_col);
|
||||
MODE_INFO **mi_8x8, int mi_row,
|
||||
int mi_col);
|
||||
void av1_filter_block_plane_non420_hor(struct AV1Common *cm,
|
||||
struct macroblockd_plane *plane,
|
||||
int mi_row);
|
||||
|
||||
void av1_loop_filter_init(struct AV1Common *cm);
|
||||
|
||||
|
|
|
@ -85,29 +85,142 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
|
|||
#endif // CONFIG_MULTITHREAD
|
||||
}
|
||||
|
||||
// Implement row loopfiltering for each thread.
|
||||
static INLINE void thread_loop_filter_rows(
|
||||
const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
|
||||
struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
|
||||
int y_only, AV1LfSync *const lf_sync) {
|
||||
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
|
||||
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
|
||||
int mi_row, mi_col;
|
||||
enum lf_path path;
|
||||
static INLINE enum lf_path get_loop_filter_path(
|
||||
int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
|
||||
if (y_only)
|
||||
path = LF_PATH_444;
|
||||
return LF_PATH_444;
|
||||
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
|
||||
path = LF_PATH_420;
|
||||
return LF_PATH_420;
|
||||
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
|
||||
path = LF_PATH_444;
|
||||
return LF_PATH_444;
|
||||
else
|
||||
path = LF_PATH_SLOW;
|
||||
return LF_PATH_SLOW;
|
||||
}
|
||||
|
||||
for (mi_row = start; mi_row < stop;
|
||||
static INLINE void loop_filter_block_plane_ver(
|
||||
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
|
||||
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
|
||||
LOOP_FILTER_MASK *lfm) {
|
||||
if (plane == 0) {
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
|
||||
} else {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
|
||||
mi_row, mi_col);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static INLINE void loop_filter_block_plane_hor(
|
||||
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
|
||||
int mi_row, enum lf_path path, LOOP_FILTER_MASK *lfm) {
|
||||
if (plane == 0) {
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
|
||||
} else {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Row-based multi-threaded loopfilter hook
|
||||
#if CONFIG_PARALLEL_DEBLOCKING
|
||||
static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
int mi_row, mi_col;
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
|
||||
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
LOOP_FILTER_MASK lfm;
|
||||
int plane;
|
||||
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
|
||||
mi_col);
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
|
||||
mi_row, mi_col, path, &lfm);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
const int sb_cols =
|
||||
mi_cols_aligned_to_sb(lf_data->cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
|
||||
int mi_row, mi_col;
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
|
||||
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
|
||||
LOOP_FILTER_MASK lfm;
|
||||
int plane;
|
||||
|
||||
// TODO(wenhao.zhang@intel.com): For better parallelization, reorder
|
||||
// the outer loop to column-based and remove the synchronizations here.
|
||||
sync_read(lf_sync, r, c);
|
||||
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
|
||||
mi_col);
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
|
||||
for (plane = 0; plane < num_planes; ++plane)
|
||||
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
|
||||
path, &lfm);
|
||||
|
||||
sync_write(lf_sync, r, c, sb_cols);
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
|
||||
const int sb_cols =
|
||||
mi_cols_aligned_to_sb(lf_data->cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
|
||||
int mi_row, mi_col;
|
||||
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
|
||||
|
||||
for (mi_row = lf_data->start; mi_row < lf_data->stop;
|
||||
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
|
||||
MODE_INFO **const mi =
|
||||
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
|
||||
|
||||
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
|
||||
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
|
||||
LOOP_FILTER_MASK lfm;
|
||||
|
@ -115,40 +228,24 @@ static INLINE void thread_loop_filter_rows(
|
|||
|
||||
sync_read(lf_sync, r, c);
|
||||
|
||||
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
|
||||
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
|
||||
mi_col);
|
||||
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
|
||||
lf_data->cm->mi_stride, &lfm);
|
||||
|
||||
// TODO(JBB): Make setup_mask work for non 420.
|
||||
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
|
||||
|
||||
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
|
||||
for (plane = 1; plane < num_planes; ++plane) {
|
||||
switch (path) {
|
||||
case LF_PATH_420:
|
||||
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_444:
|
||||
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
|
||||
break;
|
||||
case LF_PATH_SLOW:
|
||||
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
|
||||
mi_row, mi_col);
|
||||
break;
|
||||
}
|
||||
for (plane = 0; plane < num_planes; ++plane) {
|
||||
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
|
||||
mi_row, mi_col, path, &lfm);
|
||||
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
|
||||
path, &lfm);
|
||||
}
|
||||
|
||||
sync_write(lf_sync, r, c, sb_cols);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Row-based multi-threaded loopfilter hook
|
||||
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
|
||||
LFWorkerData *const lf_data) {
|
||||
thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
|
||||
lf_data->start, lf_data->stop, lf_data->y_only,
|
||||
lf_sync);
|
||||
return 1;
|
||||
}
|
||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||
|
||||
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
||||
struct macroblockd_plane planes[MAX_MB_PLANE],
|
||||
|
@ -170,9 +267,6 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
|
||||
}
|
||||
|
||||
// Initialize cur_sb_col to -1 for all SB rows.
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
|
||||
// Set up loopfilter thread data.
|
||||
// The decoder is capping num_workers because it has been observed that using
|
||||
// more threads on the loopfilter than there are cores will hurt performance
|
||||
|
@ -181,6 +275,71 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
// tries to use more threads for the loopfilter, it will hurt performance
|
||||
// because of contention. If the multithreading code changes in the future
|
||||
// then the number of workers used by the loopfilter should be revisited.
|
||||
|
||||
#if CONFIG_PARALLEL_DEBLOCKING
|
||||
// Initialize cur_sb_col to -1 for all SB rows.
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
|
||||
// Filter all the vertical edges in the whole frame
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
||||
worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
|
||||
worker->data1 = lf_sync;
|
||||
worker->data2 = lf_data;
|
||||
|
||||
// Loopfilter data
|
||||
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
|
||||
lf_data->start = start + i * MI_BLOCK_SIZE;
|
||||
lf_data->stop = stop;
|
||||
lf_data->y_only = y_only;
|
||||
|
||||
// Start loopfiltering
|
||||
if (i == num_workers - 1) {
|
||||
winterface->execute(worker);
|
||||
} else {
|
||||
winterface->launch(worker);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait till all rows are finished
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
// Filter all the horizontal edges in the whole frame
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
||||
worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
|
||||
worker->data1 = lf_sync;
|
||||
worker->data2 = lf_data;
|
||||
|
||||
// Loopfilter data
|
||||
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
|
||||
lf_data->start = start + i * MI_BLOCK_SIZE;
|
||||
lf_data->stop = stop;
|
||||
lf_data->y_only = y_only;
|
||||
|
||||
// Start loopfiltering
|
||||
if (i == num_workers - 1) {
|
||||
winterface->execute(worker);
|
||||
} else {
|
||||
winterface->launch(worker);
|
||||
}
|
||||
}
|
||||
|
||||
// Wait till all rows are finished
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||
// Initialize cur_sb_col to -1 for all SB rows.
|
||||
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
|
||||
|
||||
for (i = 0; i < num_workers; ++i) {
|
||||
AVxWorker *const worker = &workers[i];
|
||||
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
|
||||
|
@ -207,6 +366,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
for (i = 0; i < num_workers; ++i) {
|
||||
winterface->sync(&workers[i]);
|
||||
}
|
||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||
}
|
||||
|
||||
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
||||
|
|
|
@ -263,6 +263,7 @@ EXPERIMENT_LIST="
|
|||
supertx
|
||||
ans
|
||||
daala_ec
|
||||
parallel_deblocking
|
||||
"
|
||||
CONFIG_LIST="
|
||||
dependency_tracking
|
||||
|
|
Загрузка…
Ссылка в новой задаче