Further optimizations of loop restoration
Change-Id: I4c4300f3f565d8aecf65669b77aaa874bb73a3a0
This commit is contained in:
Родитель
f10cba2b39
Коммит
519dbcf19b
|
@ -27,7 +27,7 @@ static int domaintxfmrf_vtable[DOMAINTXFMRF_ITERS][DOMAINTXFMRF_PARAMS][256];
|
||||||
static const int override_y_only[RESTORE_TYPES] = { 1, 1, 1, 1, 1 };
|
static const int override_y_only[RESTORE_TYPES] = { 1, 1, 1, 1, 1 };
|
||||||
|
|
||||||
static const int domaintxfmrf_params[DOMAINTXFMRF_PARAMS] = {
|
static const int domaintxfmrf_params[DOMAINTXFMRF_PARAMS] = {
|
||||||
48, 52, 56, 60, 64, 68, 72, 76, 80, 82, 84, 86, 88,
|
32, 40, 48, 56, 64, 68, 72, 76, 80, 82, 84, 86, 88,
|
||||||
90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
|
90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105,
|
||||||
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
|
106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
|
||||||
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 134,
|
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 134,
|
||||||
|
@ -252,8 +252,8 @@ static void loop_wiener_filter(uint8_t *data, int width, int height, int stride,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void boxsum(int64_t *src, int width, int height, int src_stride, int r,
|
static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
|
||||||
int sqr, int64_t *dst, int dst_stride, int64_t *tmp,
|
int sqr, int32_t *dst, int dst_stride, int32_t *tmp,
|
||||||
int tmp_stride) {
|
int tmp_stride) {
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
|
@ -342,11 +342,11 @@ void decode_xq(int *xqd, int *xq) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define APPROXIMATE_SGR 1
|
#define APPROXIMATE_SGR 1
|
||||||
void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
|
||||||
int bit_depth, int r, int eps, void *tmpbuf) {
|
int bit_depth, int r, int eps, void *tmpbuf) {
|
||||||
int64_t *A = (int64_t *)tmpbuf;
|
int32_t *A = (int32_t *)tmpbuf;
|
||||||
int64_t *B = A + RESTORATION_TILEPELS_MAX;
|
int32_t *B = A + RESTORATION_TILEPELS_MAX;
|
||||||
int64_t *T = B + RESTORATION_TILEPELS_MAX;
|
int32_t *T = B + RESTORATION_TILEPELS_MAX;
|
||||||
int8_t num[RESTORATION_TILEPELS_MAX];
|
int8_t num[RESTORATION_TILEPELS_MAX];
|
||||||
int i, j;
|
int i, j;
|
||||||
eps <<= 2 * (bit_depth - 8);
|
eps <<= 2 * (bit_depth - 8);
|
||||||
|
@ -358,10 +358,9 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
for (j = 0; j < width; ++j) {
|
for (j = 0; j < width; ++j) {
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int n = num[k];
|
const int n = num[k];
|
||||||
int64_t den;
|
const int64_t p = A[k] * n - B[k] * B[k];
|
||||||
A[k] = A[k] * n - B[k] * B[k];
|
const int64_t q = p + n * n * eps;
|
||||||
den = A[k] + n * n * eps;
|
A[k] = (int32_t)((p << SGRPROJ_SGR_BITS) + (q >> 1)) / q;
|
||||||
A[k] = ((A[k] << SGRPROJ_SGR_BITS) + (den >> 1)) / den;
|
|
||||||
B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n;
|
B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -372,11 +371,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a =
|
const int32_t a =
|
||||||
3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1];
|
3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1];
|
||||||
const int64_t b =
|
const int32_t b =
|
||||||
3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1];
|
3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -386,11 +385,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a =
|
const int32_t a =
|
||||||
3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1];
|
3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1];
|
||||||
const int64_t b =
|
const int32_t b =
|
||||||
3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1];
|
3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -400,11 +399,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a =
|
const int32_t a =
|
||||||
3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1];
|
3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1];
|
||||||
const int64_t b =
|
const int32_t b =
|
||||||
3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1];
|
3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -414,11 +413,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a =
|
const int32_t a =
|
||||||
3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1];
|
3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1];
|
||||||
const int64_t b =
|
const int32_t b =
|
||||||
3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1];
|
3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -427,11 +426,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] +
|
const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] +
|
||||||
A[k + width - 1] + A[k + width + 1];
|
A[k + width - 1] + A[k + width + 1];
|
||||||
const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] +
|
const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] +
|
||||||
B[k + width - 1] + B[k + width + 1];
|
B[k + width - 1] + B[k + width + 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -440,11 +439,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] +
|
const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] +
|
||||||
A[k - width - 1] + A[k - width + 1];
|
A[k - width - 1] + A[k - width + 1];
|
||||||
const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] +
|
const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] +
|
||||||
B[k - width - 1] + B[k - width + 1];
|
B[k - width - 1] + B[k - width + 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -453,11 +452,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] +
|
const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] +
|
||||||
A[k - width + 1] + A[k + width + 1];
|
A[k - width + 1] + A[k + width + 1];
|
||||||
const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] +
|
const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] +
|
||||||
B[k - width + 1] + B[k + width + 1];
|
B[k - width + 1] + B[k + width + 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -466,11 +465,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 3;
|
const int nb = 3;
|
||||||
const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] +
|
const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] +
|
||||||
A[k - width - 1] + A[k + width - 1];
|
A[k - width - 1] + A[k + width - 1];
|
||||||
const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] +
|
const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] +
|
||||||
B[k - width - 1] + B[k + width - 1];
|
B[k - width - 1] + B[k + width - 1];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -479,17 +478,17 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int nb = 5;
|
const int nb = 5;
|
||||||
const int64_t a =
|
const int32_t a =
|
||||||
(A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 +
|
(A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 +
|
||||||
(A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] +
|
(A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] +
|
||||||
A[k + 1 + width]) *
|
A[k + 1 + width]) *
|
||||||
3;
|
3;
|
||||||
const int64_t b =
|
const int32_t b =
|
||||||
(B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 +
|
(B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 +
|
||||||
(B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] +
|
(B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] +
|
||||||
B[k + 1 + width]) *
|
B[k + 1 + width]) *
|
||||||
3;
|
3;
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
(((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -503,7 +502,7 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int n = num[k];
|
const int n = num[k];
|
||||||
const int64_t v =
|
const int32_t v =
|
||||||
(((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n;
|
(((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n;
|
||||||
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS);
|
||||||
}
|
}
|
||||||
|
@ -511,12 +510,13 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
||||||
#endif // APPROXIMATE_SGR
|
#endif // APPROXIMATE_SGR
|
||||||
}
|
}
|
||||||
|
|
||||||
static void apply_selfguided_restoration(int64_t *dat, int width, int height,
|
static void apply_selfguided_restoration(uint8_t *dat, int width, int height,
|
||||||
int stride, int bit_depth, int eps,
|
int stride, int bit_depth, int eps,
|
||||||
int *xqd, void *tmpbuf) {
|
int *xqd, uint8_t *dst, int dst_stride,
|
||||||
|
void *tmpbuf) {
|
||||||
int xq[2];
|
int xq[2];
|
||||||
int64_t *flt1 = (int64_t *)tmpbuf;
|
int32_t *flt1 = (int32_t *)tmpbuf;
|
||||||
int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
|
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
|
||||||
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
|
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i = 0; i < height; ++i) {
|
for (i = 0; i < height; ++i) {
|
||||||
|
@ -535,13 +535,14 @@ static void apply_selfguided_restoration(int64_t *dat, int width, int height,
|
||||||
for (j = 0; j < width; ++j) {
|
for (j = 0; j < width; ++j) {
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * stride + j;
|
const int l = i * stride + j;
|
||||||
const int64_t u = ((int64_t)dat[l] << SGRPROJ_RST_BITS);
|
const int m = i * dst_stride + j;
|
||||||
const int64_t f1 = (int64_t)flt1[k] - u;
|
const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
|
||||||
const int64_t f2 = (int64_t)flt2[k] - u;
|
const int32_t f1 = (int32_t)flt1[k] - u;
|
||||||
|
const int32_t f2 = (int32_t)flt2[k] - u;
|
||||||
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
|
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
|
||||||
const int16_t w =
|
const int16_t w =
|
||||||
(int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
|
(int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
|
||||||
dat[l] = w;
|
dst[m] = clip_pixel(w);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -552,10 +553,9 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
|
||||||
int dst_stride) {
|
int dst_stride) {
|
||||||
const int tile_width = rst->tile_width >> rst->subsampling_x;
|
const int tile_width = rst->tile_width >> rst->subsampling_x;
|
||||||
const int tile_height = rst->tile_height >> rst->subsampling_y;
|
const int tile_height = rst->tile_height >> rst->subsampling_y;
|
||||||
int i, j;
|
|
||||||
int h_start, h_end, v_start, v_end;
|
int h_start, h_end, v_start, v_end;
|
||||||
uint8_t *data_p, *dst_p;
|
uint8_t *data_p, *dst_p;
|
||||||
int64_t *dat = (int64_t *)rst->tmpbuf;
|
uint8_t *dat = (uint8_t *)rst->tmpbuf;
|
||||||
uint8_t *tmpbuf =
|
uint8_t *tmpbuf =
|
||||||
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
|
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
|
||||||
|
|
||||||
|
@ -568,22 +568,11 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width,
|
||||||
tile_width, tile_height, width, height, 0, 0,
|
tile_width, tile_height, width, height, 0, 0,
|
||||||
&h_start, &h_end, &v_start, &v_end);
|
&h_start, &h_end, &v_start, &v_end);
|
||||||
data_p = data + h_start + v_start * stride;
|
data_p = data + h_start + v_start * stride;
|
||||||
for (i = 0; i < (v_end - v_start); ++i) {
|
|
||||||
for (j = 0; j < (h_end - h_start); ++j) {
|
|
||||||
dat[i * (h_end - h_start) + j] = data_p[i * stride + j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start,
|
|
||||||
h_end - h_start, 8,
|
|
||||||
rst->rsi->sgrproj_info[tile_idx].ep,
|
|
||||||
rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf);
|
|
||||||
dst_p = dst + h_start + v_start * dst_stride;
|
dst_p = dst + h_start + v_start * dst_stride;
|
||||||
for (i = 0; i < (v_end - v_start); ++i) {
|
apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride,
|
||||||
for (j = 0; j < (h_end - h_start); ++j) {
|
8, rst->rsi->sgrproj_info[tile_idx].ep,
|
||||||
dst_p[i * dst_stride + j] =
|
rst->rsi->sgrproj_info[tile_idx].xqd, dst_p,
|
||||||
clip_pixel((int)dat[i * (h_end - h_start) + j]);
|
dst_stride, tmpbuf);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void loop_sgrproj_filter(uint8_t *data, int width, int height,
|
static void loop_sgrproj_filter(uint8_t *data, int width, int height,
|
||||||
|
@ -857,6 +846,44 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void apply_selfguided_restoration_highbd(uint16_t *dat, int width,
|
||||||
|
int height, int stride,
|
||||||
|
int bit_depth, int eps,
|
||||||
|
int *xqd, uint16_t *dst,
|
||||||
|
int dst_stride, void *tmpbuf) {
|
||||||
|
int xq[2];
|
||||||
|
int32_t *flt1 = (int32_t *)tmpbuf;
|
||||||
|
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
|
||||||
|
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
|
||||||
|
int i, j;
|
||||||
|
for (i = 0; i < height; ++i) {
|
||||||
|
for (j = 0; j < width; ++j) {
|
||||||
|
assert(i * width + j < RESTORATION_TILEPELS_MAX);
|
||||||
|
flt1[i * width + j] = dat[i * stride + j];
|
||||||
|
flt2[i * width + j] = dat[i * stride + j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
av1_selfguided_restoration(flt1, width, height, width, bit_depth,
|
||||||
|
sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
|
||||||
|
av1_selfguided_restoration(flt2, width, height, width, bit_depth,
|
||||||
|
sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
|
||||||
|
decode_xq(xqd, xq);
|
||||||
|
for (i = 0; i < height; ++i) {
|
||||||
|
for (j = 0; j < width; ++j) {
|
||||||
|
const int k = i * width + j;
|
||||||
|
const int l = i * stride + j;
|
||||||
|
const int m = i * dst_stride + j;
|
||||||
|
const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS);
|
||||||
|
const int32_t f1 = (int32_t)flt1[k] - u;
|
||||||
|
const int32_t f2 = (int32_t)flt2[k] - u;
|
||||||
|
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
|
||||||
|
const int16_t w =
|
||||||
|
(int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
|
||||||
|
dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
|
static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
|
||||||
int width, int height, int stride,
|
int width, int height, int stride,
|
||||||
RestorationInternal *rst,
|
RestorationInternal *rst,
|
||||||
|
@ -864,10 +891,9 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
|
||||||
int dst_stride) {
|
int dst_stride) {
|
||||||
const int tile_width = rst->tile_width >> rst->subsampling_x;
|
const int tile_width = rst->tile_width >> rst->subsampling_x;
|
||||||
const int tile_height = rst->tile_height >> rst->subsampling_y;
|
const int tile_height = rst->tile_height >> rst->subsampling_y;
|
||||||
int i, j;
|
|
||||||
int h_start, h_end, v_start, v_end;
|
int h_start, h_end, v_start, v_end;
|
||||||
uint16_t *data_p, *dst_p;
|
uint16_t *data_p, *dst_p;
|
||||||
int64_t *dat = (int64_t *)rst->tmpbuf;
|
uint16_t *dat = (uint16_t *)rst->tmpbuf;
|
||||||
uint8_t *tmpbuf =
|
uint8_t *tmpbuf =
|
||||||
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
|
(uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat);
|
||||||
|
|
||||||
|
@ -880,22 +906,11 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx,
|
||||||
tile_width, tile_height, width, height, 0, 0,
|
tile_width, tile_height, width, height, 0, 0,
|
||||||
&h_start, &h_end, &v_start, &v_end);
|
&h_start, &h_end, &v_start, &v_end);
|
||||||
data_p = data + h_start + v_start * stride;
|
data_p = data + h_start + v_start * stride;
|
||||||
for (i = 0; i < (v_end - v_start); ++i) {
|
|
||||||
for (j = 0; j < (h_end - h_start); ++j) {
|
|
||||||
dat[i * (h_end - h_start) + j] = data_p[i * stride + j];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start,
|
|
||||||
h_end - h_start, bit_depth,
|
|
||||||
rst->rsi->sgrproj_info[tile_idx].ep,
|
|
||||||
rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf);
|
|
||||||
dst_p = dst + h_start + v_start * dst_stride;
|
dst_p = dst + h_start + v_start * dst_stride;
|
||||||
for (i = 0; i < (v_end - v_start); ++i) {
|
apply_selfguided_restoration_highbd(
|
||||||
for (j = 0; j < (h_end - h_start); ++j) {
|
data_p, h_end - h_start, v_end - v_start, stride, bit_depth,
|
||||||
dst_p[i * dst_stride + j] =
|
rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd,
|
||||||
clip_pixel_highbd((int)dat[i * (h_end - h_start) + j], bit_depth);
|
dst_p, dst_stride, tmpbuf);
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
|
static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height,
|
||||||
|
|
|
@ -42,11 +42,10 @@ extern "C" {
|
||||||
#define DOMAINTXFMRF_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(int32_t))
|
#define DOMAINTXFMRF_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(int32_t))
|
||||||
#define DOMAINTXFMRF_BITS (DOMAINTXFMRF_PARAMS_BITS)
|
#define DOMAINTXFMRF_BITS (DOMAINTXFMRF_PARAMS_BITS)
|
||||||
|
|
||||||
// 6 highprecision 64-bit buffers needed for the filter:
|
// 6 highprecision buffers needed for the filter:
|
||||||
// 1 for the degraded frame, 2 for the restored versions and
|
// 1 for the degraded frame, 2 for the restored versions and
|
||||||
// 3 for each restoration operation
|
// 3 for each restoration operation
|
||||||
// TODO(debargha): Explore if we can use 32-bit buffers
|
#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 6 * sizeof(int32_t))
|
||||||
#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 6 * sizeof(int64_t))
|
|
||||||
#define SGRPROJ_PARAMS_BITS 3
|
#define SGRPROJ_PARAMS_BITS 3
|
||||||
#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
|
#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
|
||||||
|
|
||||||
|
@ -211,7 +210,7 @@ int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width,
|
||||||
int height);
|
int height);
|
||||||
void av1_free_restoration_struct(RestorationInfo *rst_info);
|
void av1_free_restoration_struct(RestorationInfo *rst_info);
|
||||||
|
|
||||||
void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride,
|
void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride,
|
||||||
int bit_depth, int r, int eps, void *tmpbuf);
|
int bit_depth, int r, int eps, void *tmpbuf);
|
||||||
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
|
void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height,
|
||||||
int stride, int param, uint8_t *dst,
|
int stride, int param, uint8_t *dst,
|
||||||
|
|
|
@ -739,7 +739,7 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) {
|
||||||
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
|
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
|
||||||
"Failed to allocate trial restored frame buffer");
|
"Failed to allocate trial restored frame buffer");
|
||||||
cpi->extra_rstbuf = (uint8_t *)aom_realloc(
|
cpi->extra_rstbuf = (uint8_t *)aom_realloc(
|
||||||
cpi->extra_rstbuf, RESTORATION_TILEPELS_MAX * sizeof(int64_t));
|
cpi->extra_rstbuf, RESTORATION_TILEPELS_MAX * sizeof(int32_t));
|
||||||
if (!cpi->extra_rstbuf)
|
if (!cpi->extra_rstbuf)
|
||||||
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
|
aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
|
||||||
"Failed to allocate extra rstbuf for restoration");
|
"Failed to allocate extra rstbuf for restoration");
|
||||||
|
|
|
@ -121,10 +121,10 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src,
|
||||||
return filt_err;
|
return filt_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64_t get_pixel_proj_error(int64_t *src, int width, int height,
|
static int64_t get_pixel_proj_error(int32_t *src, int width, int height,
|
||||||
int src_stride, int64_t *dgd,
|
int src_stride, int32_t *dgd,
|
||||||
int dgd_stride, int64_t *flt1,
|
int dgd_stride, int32_t *flt1,
|
||||||
int flt1_stride, int64_t *flt2,
|
int flt1_stride, int32_t *flt2,
|
||||||
int flt2_stride, int *xqd) {
|
int flt2_stride, int *xqd) {
|
||||||
int i, j;
|
int i, j;
|
||||||
int64_t err = 0;
|
int64_t err = 0;
|
||||||
|
@ -132,12 +132,12 @@ static int64_t get_pixel_proj_error(int64_t *src, int width, int height,
|
||||||
decode_xq(xqd, xq);
|
decode_xq(xqd, xq);
|
||||||
for (i = 0; i < height; ++i) {
|
for (i = 0; i < height; ++i) {
|
||||||
for (j = 0; j < width; ++j) {
|
for (j = 0; j < width; ++j) {
|
||||||
const int64_t s = (int64_t)src[i * src_stride + j];
|
const int32_t s = (int32_t)src[i * src_stride + j];
|
||||||
const int64_t u = (int64_t)dgd[i * dgd_stride + j];
|
const int32_t u = (int32_t)dgd[i * dgd_stride + j];
|
||||||
const int64_t f1 = (int64_t)flt1[i * flt1_stride + j] - u;
|
const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u;
|
||||||
const int64_t f2 = (int64_t)flt2[i * flt2_stride + j] - u;
|
const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u;
|
||||||
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
|
const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
|
||||||
const int64_t e =
|
const int32_t e =
|
||||||
ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
|
ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
|
||||||
ROUND_POWER_OF_TWO(s, SGRPROJ_RST_BITS);
|
ROUND_POWER_OF_TWO(s, SGRPROJ_RST_BITS);
|
||||||
err += e * e;
|
err += e * e;
|
||||||
|
@ -146,9 +146,9 @@ static int64_t get_pixel_proj_error(int64_t *src, int width, int height,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_proj_subspace(int64_t *src, int width, int height,
|
static void get_proj_subspace(int32_t *src, int width, int height,
|
||||||
int src_stride, int64_t *dgd, int dgd_stride,
|
int src_stride, int32_t *dgd, int dgd_stride,
|
||||||
int64_t *flt1, int flt1_stride, int64_t *flt2,
|
int32_t *flt1, int flt1_stride, int32_t *flt2,
|
||||||
int flt2_stride, int *xq) {
|
int flt2_stride, int *xq) {
|
||||||
int i, j;
|
int i, j;
|
||||||
double H[2][2] = { { 0, 0 }, { 0, 0 } };
|
double H[2][2] = { { 0, 0 }, { 0, 0 } };
|
||||||
|
@ -198,10 +198,10 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
|
||||||
int src_stride, int bit_depth,
|
int src_stride, int bit_depth,
|
||||||
int *eps, int *xqd, void *srcbuf,
|
int *eps, int *xqd, void *srcbuf,
|
||||||
void *rstbuf) {
|
void *rstbuf) {
|
||||||
int64_t *srd = (int64_t *)srcbuf;
|
int32_t *srd = (int32_t *)srcbuf;
|
||||||
int64_t *dgd = (int64_t *)rstbuf;
|
int32_t *dgd = (int32_t *)rstbuf;
|
||||||
int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
|
int32_t *flt1 = dgd + RESTORATION_TILEPELS_MAX;
|
||||||
int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
|
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
|
||||||
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
|
uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX);
|
||||||
int i, j, ep, bestep = 0;
|
int i, j, ep, bestep = 0;
|
||||||
int64_t err, besterr = -1;
|
int64_t err, besterr = -1;
|
||||||
|
@ -213,11 +213,11 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
|
||||||
uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
|
uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
|
||||||
for (i = 0; i < height; ++i) {
|
for (i = 0; i < height; ++i) {
|
||||||
for (j = 0; j < width; ++j) {
|
for (j = 0; j < width; ++j) {
|
||||||
flt1[i * width + j] = (int64_t)dat[i * dat_stride + j];
|
flt1[i * width + j] = (int32_t)dat[i * dat_stride + j];
|
||||||
flt2[i * width + j] = (int64_t)dat[i * dat_stride + j];
|
flt2[i * width + j] = (int32_t)dat[i * dat_stride + j];
|
||||||
dgd[i * width + j] = (int64_t)dat[i * dat_stride + j]
|
dgd[i * width + j] = (int32_t)dat[i * dat_stride + j]
|
||||||
<< SGRPROJ_RST_BITS;
|
<< SGRPROJ_RST_BITS;
|
||||||
srd[i * width + j] = (int64_t)src[i * src_stride + j]
|
srd[i * width + j] = (int32_t)src[i * src_stride + j]
|
||||||
<< SGRPROJ_RST_BITS;
|
<< SGRPROJ_RST_BITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -228,10 +228,10 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
|
||||||
for (j = 0; j < width; ++j) {
|
for (j = 0; j < width; ++j) {
|
||||||
const int k = i * width + j;
|
const int k = i * width + j;
|
||||||
const int l = i * dat_stride + j;
|
const int l = i * dat_stride + j;
|
||||||
flt1[k] = (int64_t)dat[l];
|
flt1[k] = (int32_t)dat[l];
|
||||||
flt2[k] = (int64_t)dat[l];
|
flt2[k] = (int32_t)dat[l];
|
||||||
dgd[k] = (int64_t)dat[l] << SGRPROJ_RST_BITS;
|
dgd[k] = (int32_t)dat[l] << SGRPROJ_RST_BITS;
|
||||||
srd[k] = (int64_t)src[i * src_stride + j] << SGRPROJ_RST_BITS;
|
srd[k] = (int32_t)src[i * src_stride + j] << SGRPROJ_RST_BITS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче