integrate parallel_deblocking with CB4x4
this change makes parallel deblocking experiment works with cb4x4. the inner loop process every 4x4 block. Change-Id: I86adb3d7b6d67a91ccc12aab29da9bfb8c522cf1
This commit is contained in:
Родитель
b2a01db85f
Коммит
17905edfe0
|
@ -149,10 +149,15 @@ void aom_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
|
||||||
const uint8_t *blimit, const uint8_t *limit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *thresh) {
|
const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
#if !CONFIG_PARALLEL_DEBLOCKING
|
#if !CONFIG_PARALLEL_DEBLOCKING
|
||||||
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
|
@ -179,10 +184,15 @@ void aom_lpf_horizontal_4_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
|
||||||
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
void aom_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
#if !CONFIG_PARALLEL_DEBLOCKING
|
#if !CONFIG_PARALLEL_DEBLOCKING
|
||||||
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
|
@ -229,10 +239,15 @@ static INLINE void filter8(int8_t mask, uint8_t thresh, int8_t flat,
|
||||||
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
void aom_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
|
|
||||||
|
@ -256,8 +271,13 @@ void aom_lpf_horizontal_8_dual_c(uint8_t *s, int p, const uint8_t *blimit0,
|
||||||
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
void aom_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask =
|
const int8_t mask =
|
||||||
|
@ -390,10 +410,15 @@ static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh, int count) {
|
const uint8_t *thresh, int count) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int step = 4;
|
||||||
|
#else
|
||||||
|
int step = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < step * count; ++i) {
|
||||||
const uint8_t p7 = s[-8 * p], p6 = s[-7 * p], p5 = s[-6 * p],
|
const uint8_t p7 = s[-8 * p], p6 = s[-7 * p], p5 = s[-6 * p],
|
||||||
p4 = s[-5 * p], p3 = s[-4 * p], p2 = s[-3 * p],
|
p4 = s[-5 * p], p3 = s[-4 * p], p2 = s[-3 * p],
|
||||||
p1 = s[-2 * p], p0 = s[-p];
|
p1 = s[-2 * p], p0 = s[-p];
|
||||||
|
@ -436,7 +461,11 @@ void aom_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
|
||||||
void aom_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
void aom_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
|
||||||
|
#else
|
||||||
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
@ -478,7 +507,11 @@ static void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
|
||||||
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
void aom_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh) {
|
const uint8_t *limit, const uint8_t *thresh) {
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4);
|
||||||
|
#else
|
||||||
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
|
mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
void aom_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
|
||||||
|
@ -596,10 +629,15 @@ void aom_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
|
||||||
const uint8_t *blimit, const uint8_t *limit,
|
const uint8_t *blimit, const uint8_t *limit,
|
||||||
const uint8_t *thresh, int bd) {
|
const uint8_t *thresh, int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
#if !CONFIG_PARALLEL_DEBLOCKING
|
#if !CONFIG_PARALLEL_DEBLOCKING
|
||||||
const uint16_t p3 = s[-4 * p];
|
const uint16_t p3 = s[-4 * p];
|
||||||
const uint16_t p2 = s[-3 * p];
|
const uint16_t p2 = s[-3 * p];
|
||||||
|
@ -636,10 +674,15 @@ void aom_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
#if !CONFIG_PARALLEL_DEBLOCKING
|
#if !CONFIG_PARALLEL_DEBLOCKING
|
||||||
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
|
@ -689,10 +732,15 @@ void aom_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
|
||||||
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
|
||||||
|
|
||||||
|
@ -718,8 +766,13 @@ void aom_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int count = 4;
|
||||||
|
#else
|
||||||
|
int count = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < count; ++i) {
|
||||||
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
|
||||||
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
|
||||||
const int8_t mask =
|
const int8_t mask =
|
||||||
|
@ -813,10 +866,15 @@ static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
|
||||||
const uint8_t *thresh, int count,
|
const uint8_t *thresh, int count,
|
||||||
int bd) {
|
int bd) {
|
||||||
int i;
|
int i;
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
int step = 4;
|
||||||
|
#else
|
||||||
|
int step = 8;
|
||||||
|
#endif
|
||||||
|
|
||||||
// loop filter designed to work using chars so that we can make maximum use
|
// loop filter designed to work using chars so that we can make maximum use
|
||||||
// of 8 bit simd instructions.
|
// of 8 bit simd instructions.
|
||||||
for (i = 0; i < 8 * count; ++i) {
|
for (i = 0; i < step * count; ++i) {
|
||||||
const uint16_t p3 = s[-4 * p];
|
const uint16_t p3 = s[-4 * p];
|
||||||
const uint16_t p2 = s[-3 * p];
|
const uint16_t p2 = s[-3 * p];
|
||||||
const uint16_t p1 = s[-2 * p];
|
const uint16_t p1 = s[-2 * p];
|
||||||
|
@ -852,7 +910,11 @@ void aom_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh, int bd) {
|
const uint8_t *thresh, int bd) {
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
|
||||||
|
#else
|
||||||
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
|
highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
|
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
|
||||||
|
@ -888,13 +950,21 @@ static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
|
||||||
void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
void aom_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
|
||||||
const uint8_t *limit, const uint8_t *thresh,
|
const uint8_t *limit, const uint8_t *thresh,
|
||||||
int bd) {
|
int bd) {
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 4, bd);
|
||||||
|
#else
|
||||||
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
|
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
|
void aom_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p,
|
||||||
const uint8_t *blimit,
|
const uint8_t *blimit,
|
||||||
const uint8_t *limit,
|
const uint8_t *limit,
|
||||||
const uint8_t *thresh, int bd) {
|
const uint8_t *thresh, int bd) {
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING && CONFIG_CB4X4
|
||||||
|
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd);
|
||||||
|
#else
|
||||||
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd);
|
highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif // CONFIG_HIGHBITDEPTH
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
|
|
@ -22,7 +22,7 @@
|
||||||
|
|
||||||
#include "av1/common/seg_common.h"
|
#include "av1/common/seg_common.h"
|
||||||
|
|
||||||
#define CONFIG_PARALLEL_DEBLOCKING_15TAPLUMAONLY 0
|
#define PARALLEL_DEBLOCKING_15TAPLUMAONLY 1
|
||||||
|
|
||||||
// 64 bit masks for left transform size. Each 1 represents a position where
|
// 64 bit masks for left transform size. Each 1 represents a position where
|
||||||
// we should apply a loop filter across the left border of an 8x8 block
|
// we should apply a loop filter across the left border of an 8x8 block
|
||||||
|
@ -1857,8 +1857,6 @@ void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
|
||||||
dst->buf = dst0;
|
dst->buf = dst0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !(CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
|
|
||||||
CONFIG_CB4X4)
|
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING
|
#if CONFIG_PARALLEL_DEBLOCKING
|
||||||
typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
|
typedef enum EDGE_DIR { VERT_EDGE = 0, HORZ_EDGE = 1, NUM_EDGE_DIRS } EDGE_DIR;
|
||||||
static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES] = {
|
static const uint32_t av1_prediction_masks[NUM_EDGE_DIRS][BLOCK_SIZES] = {
|
||||||
|
@ -2010,10 +2008,17 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
// not sure if changes are required.
|
// not sure if changes are required.
|
||||||
assert(0 && "Not yet updated");
|
assert(0 && "Not yet updated");
|
||||||
#endif // CONFIG_EXT_PARTITION
|
#endif // CONFIG_EXT_PARTITION
|
||||||
|
|
||||||
{
|
{
|
||||||
const TX_SIZE ts =
|
const TX_SIZE ts =
|
||||||
av1_get_transform_size(ppCurr[0], edgeDir, scaleHorz, scaleVert);
|
av1_get_transform_size(ppCurr[0], edgeDir, scaleHorz, scaleVert);
|
||||||
|
#if CONFIG_EXT_DELTA_Q
|
||||||
|
const uint32_t currLevel =
|
||||||
|
get_filter_level(cm, &cm->lf_info, &ppCurr[0]->mbmi);
|
||||||
|
#else
|
||||||
const uint32_t currLevel = get_filter_level(&cm->lf_info, &ppCurr[0]->mbmi);
|
const uint32_t currLevel = get_filter_level(&cm->lf_info, &ppCurr[0]->mbmi);
|
||||||
|
#endif // CONFIG_EXT_DELTA_Q
|
||||||
|
|
||||||
const int currSkipped =
|
const int currSkipped =
|
||||||
ppCurr[0]->mbmi.skip && is_inter_block(&ppCurr[0]->mbmi);
|
ppCurr[0]->mbmi.skip && is_inter_block(&ppCurr[0]->mbmi);
|
||||||
const uint32_t coord = (VERT_EDGE == edgeDir) ? (x) : (y);
|
const uint32_t coord = (VERT_EDGE == edgeDir) ? (x) : (y);
|
||||||
|
@ -2034,7 +2039,13 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
const MODE_INFO *const pPrev = *(ppCurr - modeStep);
|
const MODE_INFO *const pPrev = *(ppCurr - modeStep);
|
||||||
const TX_SIZE pvTs =
|
const TX_SIZE pvTs =
|
||||||
av1_get_transform_size(pPrev, edgeDir, scaleHorz, scaleVert);
|
av1_get_transform_size(pPrev, edgeDir, scaleHorz, scaleVert);
|
||||||
|
#if CONFIG_EXT_DELTA_Q
|
||||||
|
const uint32_t pvLvl =
|
||||||
|
get_filter_level(cm, &cm->lf_info, &pPrev->mbmi);
|
||||||
|
#else
|
||||||
const uint32_t pvLvl = get_filter_level(&cm->lf_info, &pPrev->mbmi);
|
const uint32_t pvLvl = get_filter_level(&cm->lf_info, &pPrev->mbmi);
|
||||||
|
#endif // CONFIG_EXT_DELTA_Q
|
||||||
|
|
||||||
const int pvSkip = pPrev->mbmi.skip && is_inter_block(&pPrev->mbmi);
|
const int pvSkip = pPrev->mbmi.skip && is_inter_block(&pPrev->mbmi);
|
||||||
const int32_t puEdge =
|
const int32_t puEdge =
|
||||||
(coord &
|
(coord &
|
||||||
|
@ -2046,7 +2057,7 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
// if the current and the previous blocks are skipped,
|
// if the current and the previous blocks are skipped,
|
||||||
// deblock the edge if the edge belongs to a PU's edge only.
|
// deblock the edge if the edge belongs to a PU's edge only.
|
||||||
if ((currLevel || pvLvl) && (!pvSkip || !currSkipped || puEdge)) {
|
if ((currLevel || pvLvl) && (!pvSkip || !currSkipped || puEdge)) {
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || CONFIG_PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
const TX_SIZE minTs = AOMMIN(ts, pvTs);
|
const TX_SIZE minTs = AOMMIN(ts, pvTs);
|
||||||
if (TX_4X4 >= minTs) {
|
if (TX_4X4 >= minTs) {
|
||||||
pParams->filterLength = 4;
|
pParams->filterLength = 4;
|
||||||
|
@ -2054,7 +2065,7 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
pParams->filterLength = 8;
|
pParams->filterLength = 8;
|
||||||
} else {
|
} else {
|
||||||
pParams->filterLength = 16;
|
pParams->filterLength = 16;
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
#if PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
// No wide filtering for chroma plane
|
// No wide filtering for chroma plane
|
||||||
if (scaleHorz || scaleVert) {
|
if (scaleHorz || scaleVert) {
|
||||||
pParams->filterLength = 8;
|
pParams->filterLength = 8;
|
||||||
|
@ -2064,7 +2075,7 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
#else
|
#else
|
||||||
pParams->filterLength = (TX_4X4 >= AOMMIN(ts, pvTs)) ? (4) : (8);
|
pParams->filterLength = (TX_4X4 >= AOMMIN(ts, pvTs)) ? (4) : (8);
|
||||||
|
|
||||||
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP
|
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
|
|
||||||
// update the level if the current block is skipped,
|
// update the level if the current block is skipped,
|
||||||
// but the previous one is not
|
// but the previous one is not
|
||||||
|
@ -2072,10 +2083,14 @@ static void set_lpf_parameters(AV1_DEBLOCKING_PARAMETERS *const pParams,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !CONFIG_CB4X4
|
||||||
// prepare internal edge parameters
|
// prepare internal edge parameters
|
||||||
if (currLevel && !currSkipped) {
|
if (currLevel && !currSkipped) {
|
||||||
pParams->filterLengthInternal = (TX_4X4 >= ts) ? (4) : (0);
|
pParams->filterLengthInternal = (TX_4X4 >= ts) ? (4) : (0);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// prepare common parameters
|
// prepare common parameters
|
||||||
if (pParams->filterLength || pParams->filterLengthInternal) {
|
if (pParams->filterLength || pParams->filterLengthInternal) {
|
||||||
const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
|
const loop_filter_thresh *const limits = cm->lf_info.lfthr + level;
|
||||||
|
@ -2093,15 +2108,21 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
|
||||||
const ptrdiff_t modeStride,
|
const ptrdiff_t modeStride,
|
||||||
const uint32_t cuX,
|
const uint32_t cuX,
|
||||||
const uint32_t cuY) {
|
const uint32_t cuY) {
|
||||||
|
const int col_step = MI_SIZE >> MI_SIZE_LOG2;
|
||||||
|
const int row_step = MI_SIZE >> MI_SIZE_LOG2;
|
||||||
const uint32_t scaleHorz = pPlane->subsampling_x;
|
const uint32_t scaleHorz = pPlane->subsampling_x;
|
||||||
const uint32_t scaleVert = pPlane->subsampling_y;
|
const uint32_t scaleVert = pPlane->subsampling_y;
|
||||||
const uint32_t width = pPlane->dst.width;
|
const uint32_t width = pPlane->dst.width;
|
||||||
const uint32_t height = pPlane->dst.height;
|
const uint32_t height = pPlane->dst.height;
|
||||||
uint8_t *const pDst = pPlane->dst.buf;
|
uint8_t *const pDst = pPlane->dst.buf;
|
||||||
const int dstStride = pPlane->dst.stride;
|
const int dstStride = pPlane->dst.stride;
|
||||||
for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += 1) {
|
for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += row_step) {
|
||||||
uint8_t *p = pDst + y * MI_SIZE * dstStride;
|
uint8_t *p = pDst + y * MI_SIZE * dstStride;
|
||||||
for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += 1) {
|
for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += col_step) {
|
||||||
|
// inner loop always filter vertical edges in a MI block. If MI size
|
||||||
|
// is 8x8, it will filter the vertical edge aligned with a 8x8 block.
|
||||||
|
// If 4x4 trasnform is used, it will then filter the internal edge
|
||||||
|
// aligned with a 4x4 block
|
||||||
const MODE_INFO **const pCurr =
|
const MODE_INFO **const pCurr =
|
||||||
ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz);
|
ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz);
|
||||||
AV1_DEBLOCKING_PARAMETERS params;
|
AV1_DEBLOCKING_PARAMETERS params;
|
||||||
|
@ -2112,31 +2133,59 @@ static void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
|
||||||
switch (params.filterLength) {
|
switch (params.filterLength) {
|
||||||
// apply 4-tap filtering
|
// apply 4-tap filtering
|
||||||
case 4:
|
case 4:
|
||||||
aom_lpf_vertical_4(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_vertical_4_c(CONVERT_TO_SHORTPTR(p), dstStride,
|
||||||
|
params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_vertical_4_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
// apply 8-tap filtering
|
// apply 8-tap filtering
|
||||||
case 8:
|
case 8:
|
||||||
aom_lpf_vertical_8(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_vertical_8_c(CONVERT_TO_SHORTPTR(p), dstStride,
|
||||||
|
params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_vertical_8_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || CONFIG_PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
// apply 16-tap filtering
|
// apply 16-tap filtering
|
||||||
case 16:
|
case 16:
|
||||||
aom_lpf_vertical_16(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_vertical_16_c(CONVERT_TO_SHORTPTR(p), dstStride,
|
||||||
|
params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_vertical_16_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP
|
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
// no filtering
|
// no filtering
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
// process the internal edge
|
// process the internal edge
|
||||||
if (params.filterLengthInternal) {
|
if (params.filterLengthInternal) {
|
||||||
aom_lpf_vertical_4(p + 4, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_vertical_4_c(CONVERT_TO_SHORTPTR(p + 4), dstStride,
|
||||||
|
params.mblim, params.lim, params.hev_thr,
|
||||||
|
cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_vertical_4_c(p + 4, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
}
|
}
|
||||||
// advance the destination pointer
|
// advance the destination pointer
|
||||||
p += 8;
|
p += MI_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2147,15 +2196,21 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
|
||||||
const ptrdiff_t modeStride,
|
const ptrdiff_t modeStride,
|
||||||
const uint32_t cuX,
|
const uint32_t cuX,
|
||||||
const uint32_t cuY) {
|
const uint32_t cuY) {
|
||||||
|
const int col_step = MI_SIZE >> MI_SIZE_LOG2;
|
||||||
|
const int row_step = MI_SIZE >> MI_SIZE_LOG2;
|
||||||
const uint32_t scaleHorz = pPlane->subsampling_x;
|
const uint32_t scaleHorz = pPlane->subsampling_x;
|
||||||
const uint32_t scaleVert = pPlane->subsampling_y;
|
const uint32_t scaleVert = pPlane->subsampling_y;
|
||||||
const uint32_t width = pPlane->dst.width;
|
const uint32_t width = pPlane->dst.width;
|
||||||
const uint32_t height = pPlane->dst.height;
|
const uint32_t height = pPlane->dst.height;
|
||||||
uint8_t *const pDst = pPlane->dst.buf;
|
uint8_t *const pDst = pPlane->dst.buf;
|
||||||
const int dstStride = pPlane->dst.stride;
|
const int dstStride = pPlane->dst.stride;
|
||||||
for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += 1) {
|
for (int y = 0; y < (MAX_MIB_SIZE >> scaleVert); y += row_step) {
|
||||||
uint8_t *p = pDst + y * MI_SIZE * dstStride;
|
uint8_t *p = pDst + y * MI_SIZE * dstStride;
|
||||||
for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += 1) {
|
for (int x = 0; x < (MAX_MIB_SIZE >> scaleHorz); x += col_step) {
|
||||||
|
// inner loop always filter vertical edges in a MI block. If MI size
|
||||||
|
// is 8x8, it will first filter the vertical edge aligned with a 8x8
|
||||||
|
// block. If 4x4 trasnform is used, it will then filter the internal
|
||||||
|
// edge aligned with a 4x4 block
|
||||||
const MODE_INFO **const pCurr =
|
const MODE_INFO **const pCurr =
|
||||||
ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz);
|
ppModeInfo + (y << scaleVert) * modeStride + (x << scaleHorz);
|
||||||
AV1_DEBLOCKING_PARAMETERS params;
|
AV1_DEBLOCKING_PARAMETERS params;
|
||||||
|
@ -2166,45 +2221,74 @@ static void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
|
||||||
switch (params.filterLength) {
|
switch (params.filterLength) {
|
||||||
// apply 4-tap filtering
|
// apply 4-tap filtering
|
||||||
case 4:
|
case 4:
|
||||||
aom_lpf_horizontal_4(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_horizontal_4_c(CONVERT_TO_SHORTPTR(p), dstStride,
|
||||||
|
params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_horizontal_4_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
// apply 8-tap filtering
|
// apply 8-tap filtering
|
||||||
case 8:
|
case 8:
|
||||||
aom_lpf_horizontal_8(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_horizontal_8_c(CONVERT_TO_SHORTPTR(p), dstStride,
|
||||||
|
params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_horizontal_8_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || CONFIG_PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
#if CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
// apply 16-tap filtering
|
// apply 16-tap filtering
|
||||||
case 16:
|
case 16:
|
||||||
aom_lpf_horizontal_edge_16(p, dstStride, params.mblim, params.lim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_horizontal_edge_16_c(
|
||||||
|
CONVERT_TO_SHORTPTR(p), dstStride, params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_horizontal_edge_16_c(p, dstStride, params.mblim, params.lim,
|
||||||
params.hev_thr);
|
params.hev_thr);
|
||||||
break;
|
break;
|
||||||
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP
|
#endif // CONFIG_PARALLEL_DEBLOCKING_15TAP || PARALLEL_DEBLOCKING_15TAPLUMAONLY
|
||||||
// no filtering
|
// no filtering
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
// process the internal edge
|
// process the internal edge
|
||||||
if (params.filterLengthInternal) {
|
if (params.filterLengthInternal) {
|
||||||
aom_lpf_horizontal_4(p + 4 * dstStride, dstStride, params.mblim,
|
#if CONFIG_HIGHBITDEPTH
|
||||||
|
if (cm->use_highbitdepth)
|
||||||
|
aom_highbd_lpf_horizontal_4_c(CONVERT_TO_SHORTPTR(p + 4 * dstStride),
|
||||||
|
dstStride, params.mblim, params.lim,
|
||||||
|
params.hev_thr, cm->bit_depth);
|
||||||
|
else
|
||||||
|
#endif // CONFIG_HIGHBITDEPTH
|
||||||
|
aom_lpf_horizontal_4_c(p + 4 * dstStride, dstStride, params.mblim,
|
||||||
params.lim, params.hev_thr);
|
params.lim, params.hev_thr);
|
||||||
}
|
}
|
||||||
// advance the destination pointer
|
// advance the destination pointer
|
||||||
p += 8;
|
p += MI_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif // CONFIG_PARALLEL_DEBLOCKING
|
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||||
#endif
|
|
||||||
|
|
||||||
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
||||||
struct macroblockd_plane planes[MAX_MB_PLANE],
|
struct macroblockd_plane planes[MAX_MB_PLANE],
|
||||||
int start, int stop, int y_only) {
|
int start, int stop, int y_only) {
|
||||||
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
|
|
||||||
CONFIG_CB4X4
|
|
||||||
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
|
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
|
||||||
int mi_row, mi_col;
|
int mi_row, mi_col;
|
||||||
|
|
||||||
|
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES || \
|
||||||
|
CONFIG_CB4X4
|
||||||
|
|
||||||
|
#if !CONFIG_PARALLEL_DEBLOCKING
|
||||||
#if CONFIG_VAR_TX
|
#if CONFIG_VAR_TX
|
||||||
for (int i = 0; i < MAX_MB_PLANE; ++i)
|
for (int i = 0; i < MAX_MB_PLANE; ++i)
|
||||||
memset(cm->top_txfm_context[i], TX_32X32, cm->mi_cols << TX_UNIT_WIDE_LOG2);
|
memset(cm->top_txfm_context[i], TX_32X32, cm->mi_cols << TX_UNIT_WIDE_LOG2);
|
||||||
|
@ -2229,27 +2313,17 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
|
#else
|
||||||
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
|
|
||||||
int mi_row, mi_col;
|
#if CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
|
||||||
#if !CONFIG_PARALLEL_DEBLOCKING
|
assert(0 && "Not yet updated. ToDo as next steps");
|
||||||
enum lf_path path;
|
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
|
||||||
LOOP_FILTER_MASK lfm;
|
|
||||||
|
|
||||||
if (y_only)
|
|
||||||
path = LF_PATH_444;
|
|
||||||
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
|
|
||||||
path = LF_PATH_420;
|
|
||||||
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
|
|
||||||
path = LF_PATH_444;
|
|
||||||
else
|
|
||||||
path = LF_PATH_SLOW;
|
|
||||||
#endif
|
|
||||||
#if CONFIG_PARALLEL_DEBLOCKING
|
|
||||||
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
||||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
||||||
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
||||||
|
// filter all vertical edges in every 64x64 super block
|
||||||
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
||||||
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
||||||
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
||||||
|
@ -2264,6 +2338,42 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
||||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
||||||
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
||||||
|
// filter all horizontal edges in every 64x64 super block
|
||||||
|
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
||||||
|
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
||||||
|
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
||||||
|
av1_filter_block_plane_horz(
|
||||||
|
cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col),
|
||||||
|
cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz,
|
||||||
|
(mi_row * MI_SIZE) >> scaleVert);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // CONFIG_PARALLEL_DEBLOCKING
|
||||||
|
|
||||||
|
#else // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
|
||||||
|
|
||||||
|
#if CONFIG_PARALLEL_DEBLOCKING
|
||||||
|
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
||||||
|
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||||
|
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
||||||
|
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
||||||
|
// filter all vertical edges in every 64x64 super block
|
||||||
|
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
||||||
|
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
||||||
|
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
||||||
|
av1_filter_block_plane_vert(
|
||||||
|
cm, planes + planeIdx, (const MODE_INFO **)(mi + mi_col),
|
||||||
|
cm->mi_stride, (mi_col * MI_SIZE) >> scaleHorz,
|
||||||
|
(mi_row * MI_SIZE) >> scaleVert);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
||||||
|
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||||
|
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
||||||
|
av1_setup_dst_planes(planes, cm->sb_size, frame_buffer, mi_row, mi_col);
|
||||||
|
// filter all horizontal edges in every 64x64 super block
|
||||||
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
for (int planeIdx = 0; planeIdx < num_planes; planeIdx += 1) {
|
||||||
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
const int32_t scaleHorz = planes[planeIdx].subsampling_x;
|
||||||
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
const int32_t scaleVert = planes[planeIdx].subsampling_y;
|
||||||
|
@ -2275,6 +2385,18 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else // CONFIG_PARALLEL_DEBLOCKING
|
#else // CONFIG_PARALLEL_DEBLOCKING
|
||||||
|
enum lf_path path;
|
||||||
|
LOOP_FILTER_MASK lfm;
|
||||||
|
|
||||||
|
if (y_only)
|
||||||
|
path = LF_PATH_444;
|
||||||
|
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
|
||||||
|
path = LF_PATH_420;
|
||||||
|
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
|
||||||
|
path = LF_PATH_444;
|
||||||
|
else
|
||||||
|
path = LF_PATH_SLOW;
|
||||||
|
|
||||||
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
|
||||||
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
|
||||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
|
||||||
|
|
|
@ -551,6 +551,11 @@ post_process_cmdline() {
|
||||||
soft_enable accounting
|
soft_enable accounting
|
||||||
soft_enable inspection
|
soft_enable inspection
|
||||||
fi
|
fi
|
||||||
|
if enabled parallel_deblocking_15tap && ! enabled parallel_deblocking; then
|
||||||
|
log_echo "parallel_deblocking_15tap dependes on parallel_deblocking, so"
|
||||||
|
log_echo "enabling parallel_deblocking"
|
||||||
|
soft_enable parallel_deblocking
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
process_targets() {
|
process_targets() {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче