From 519dbcf19b2210f9682093483d498b1d423eec42 Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Fri, 16 Dec 2016 03:13:02 -0800 Subject: [PATCH] Further optimizations of loop restoration Change-Id: I4c4300f3f565d8aecf65669b77aaa874bb73a3a0 --- av1/common/restoration.c | 177 +++++++++++++++++++++------------------ av1/common/restoration.h | 7 +- av1/encoder/encoder.c | 2 +- av1/encoder/pickrst.c | 48 +++++------ 4 files changed, 124 insertions(+), 110 deletions(-) diff --git a/av1/common/restoration.c b/av1/common/restoration.c index 14485c58b..6bff42c09 100644 --- a/av1/common/restoration.c +++ b/av1/common/restoration.c @@ -27,7 +27,7 @@ static int domaintxfmrf_vtable[DOMAINTXFMRF_ITERS][DOMAINTXFMRF_PARAMS][256]; static const int override_y_only[RESTORE_TYPES] = { 1, 1, 1, 1, 1 }; static const int domaintxfmrf_params[DOMAINTXFMRF_PARAMS] = { - 48, 52, 56, 60, 64, 68, 72, 76, 80, 82, 84, 86, 88, + 32, 40, 48, 56, 64, 68, 72, 76, 80, 82, 84, 86, 88, 90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 134, @@ -252,8 +252,8 @@ static void loop_wiener_filter(uint8_t *data, int width, int height, int stride, } } -static void boxsum(int64_t *src, int width, int height, int src_stride, int r, - int sqr, int64_t *dst, int dst_stride, int64_t *tmp, +static void boxsum(int32_t *src, int width, int height, int src_stride, int r, + int sqr, int32_t *dst, int dst_stride, int32_t *tmp, int tmp_stride) { int i, j; @@ -342,11 +342,11 @@ void decode_xq(int *xqd, int *xq) { } #define APPROXIMATE_SGR 1 -void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, +void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride, int bit_depth, int r, int eps, void *tmpbuf) { - int64_t *A = (int64_t *)tmpbuf; - int64_t *B = A + RESTORATION_TILEPELS_MAX; - int64_t *T = B + RESTORATION_TILEPELS_MAX; + int32_t *A = (int32_t *)tmpbuf; + int32_t *B = A + RESTORATION_TILEPELS_MAX; + int32_t *T = B + RESTORATION_TILEPELS_MAX; int8_t num[RESTORATION_TILEPELS_MAX]; int i, j; eps <<= 2 * (bit_depth - 8); @@ -358,10 +358,9 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, for (j = 0; j < width; ++j) { const int k = i * width + j; const int n = num[k]; - int64_t den; - A[k] = A[k] * n - B[k] * B[k]; - den = A[k] + n * n * eps; - A[k] = ((A[k] << SGRPROJ_SGR_BITS) + (den >> 1)) / den; + const int64_t p = A[k] * n - B[k] * B[k]; + const int64_t q = p + n * n * eps; + A[k] = (int32_t)((p << SGRPROJ_SGR_BITS) + (q >> 1)) / q; B[k] = ((SGRPROJ_SGR - A[k]) * B[k] + (n >> 1)) / n; } } @@ -372,11 +371,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = + const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k + width] + A[k + width + 1]; - const int64_t b = + const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k + width] + B[k + width + 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -386,11 +385,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = + const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k + width] + A[k + width - 1]; - const int64_t b = + const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k + width] + B[k + width - 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -400,11 +399,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = + const int32_t a = 3 * A[k] + 2 * A[k + 1] + 2 * A[k - width] + A[k - width + 1]; - const int64_t b = + const int32_t b = 3 * B[k] + 2 * B[k + 1] + 2 * B[k - width] + B[k - width + 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -414,11 +413,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = + const int32_t a = 3 * A[k] + 2 * A[k - 1] + 2 * A[k - width] + A[k - width - 1]; - const int64_t b = + const int32_t b = 3 * B[k] + 2 * B[k - 1] + 2 * B[k - width] + B[k - width - 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -427,11 +426,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] + + const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k + width] + A[k + width - 1] + A[k + width + 1]; - const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] + + const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k + width] + B[k + width - 1] + B[k + width + 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -440,11 +439,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] + + const int32_t a = A[k] + 2 * (A[k - 1] + A[k + 1]) + A[k - width] + A[k - width - 1] + A[k - width + 1]; - const int64_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] + + const int32_t b = B[k] + 2 * (B[k - 1] + B[k + 1]) + B[k - width] + B[k - width - 1] + B[k - width + 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -453,11 +452,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] + + const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k + 1] + A[k - width + 1] + A[k + width + 1]; - const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] + + const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k + 1] + B[k - width + 1] + B[k + width + 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -466,11 +465,11 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 3; - const int64_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] + + const int32_t a = A[k] + 2 * (A[k - width] + A[k + width]) + A[k - 1] + A[k - width - 1] + A[k + width - 1]; - const int64_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] + + const int32_t b = B[k] + 2 * (B[k - width] + B[k + width]) + B[k - 1] + B[k - width - 1] + B[k + width - 1]; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -479,17 +478,17 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int nb = 5; - const int64_t a = + const int32_t a = (A[k] + A[k - 1] + A[k + 1] + A[k - width] + A[k + width]) * 4 + (A[k - 1 - width] + A[k - 1 + width] + A[k + 1 - width] + A[k + 1 + width]) * 3; - const int64_t b = + const int32_t b = (B[k] + B[k - 1] + B[k + 1] + B[k - width] + B[k + width]) * 4 + (B[k - 1 - width] + B[k - 1 + width] + B[k + 1 - width] + B[k + 1 + width]) * 3; - const int64_t v = + const int32_t v = (((a * dgd[l] + b) << SGRPROJ_RST_BITS) + (1 << nb) / 2) >> nb; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -503,7 +502,7 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, const int k = i * width + j; const int l = i * stride + j; const int n = num[k]; - const int64_t v = + const int32_t v = (((A[k] * dgd[l] + B[k]) << SGRPROJ_RST_BITS) + (n >> 1)) / n; dgd[l] = ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS); } @@ -511,12 +510,13 @@ void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, #endif // APPROXIMATE_SGR } -static void apply_selfguided_restoration(int64_t *dat, int width, int height, +static void apply_selfguided_restoration(uint8_t *dat, int width, int height, int stride, int bit_depth, int eps, - int *xqd, void *tmpbuf) { + int *xqd, uint8_t *dst, int dst_stride, + void *tmpbuf) { int xq[2]; - int64_t *flt1 = (int64_t *)tmpbuf; - int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *flt1 = (int32_t *)tmpbuf; + int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); int i, j; for (i = 0; i < height; ++i) { @@ -535,13 +535,14 @@ static void apply_selfguided_restoration(int64_t *dat, int width, int height, for (j = 0; j < width; ++j) { const int k = i * width + j; const int l = i * stride + j; - const int64_t u = ((int64_t)dat[l] << SGRPROJ_RST_BITS); - const int64_t f1 = (int64_t)flt1[k] - u; - const int64_t f2 = (int64_t)flt2[k] - u; + const int m = i * dst_stride + j; + const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); + const int32_t f1 = (int32_t)flt1[k] - u; + const int32_t f2 = (int32_t)flt2[k] - u; const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); const int16_t w = (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); - dat[l] = w; + dst[m] = clip_pixel(w); } } } @@ -552,10 +553,9 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width, int dst_stride) { const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_height = rst->tile_height >> rst->subsampling_y; - int i, j; int h_start, h_end, v_start, v_end; uint8_t *data_p, *dst_p; - int64_t *dat = (int64_t *)rst->tmpbuf; + uint8_t *dat = (uint8_t *)rst->tmpbuf; uint8_t *tmpbuf = (uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat); @@ -568,22 +568,11 @@ static void loop_sgrproj_filter_tile(uint8_t *data, int tile_idx, int width, tile_width, tile_height, width, height, 0, 0, &h_start, &h_end, &v_start, &v_end); data_p = data + h_start + v_start * stride; - for (i = 0; i < (v_end - v_start); ++i) { - for (j = 0; j < (h_end - h_start); ++j) { - dat[i * (h_end - h_start) + j] = data_p[i * stride + j]; - } - } - apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start, - h_end - h_start, 8, - rst->rsi->sgrproj_info[tile_idx].ep, - rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf); dst_p = dst + h_start + v_start * dst_stride; - for (i = 0; i < (v_end - v_start); ++i) { - for (j = 0; j < (h_end - h_start); ++j) { - dst_p[i * dst_stride + j] = - clip_pixel((int)dat[i * (h_end - h_start) + j]); - } - } + apply_selfguided_restoration(data_p, h_end - h_start, v_end - v_start, stride, + 8, rst->rsi->sgrproj_info[tile_idx].ep, + rst->rsi->sgrproj_info[tile_idx].xqd, dst_p, + dst_stride, tmpbuf); } static void loop_sgrproj_filter(uint8_t *data, int width, int height, @@ -857,6 +846,44 @@ static void loop_wiener_filter_highbd(uint8_t *data8, int width, int height, } } +static void apply_selfguided_restoration_highbd(uint16_t *dat, int width, + int height, int stride, + int bit_depth, int eps, + int *xqd, uint16_t *dst, + int dst_stride, void *tmpbuf) { + int xq[2]; + int32_t *flt1 = (int32_t *)tmpbuf; + int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); + int i, j; + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + assert(i * width + j < RESTORATION_TILEPELS_MAX); + flt1[i * width + j] = dat[i * stride + j]; + flt2[i * width + j] = dat[i * stride + j]; + } + } + av1_selfguided_restoration(flt1, width, height, width, bit_depth, + sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2); + av1_selfguided_restoration(flt2, width, height, width, bit_depth, + sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2); + decode_xq(xqd, xq); + for (i = 0; i < height; ++i) { + for (j = 0; j < width; ++j) { + const int k = i * width + j; + const int l = i * stride + j; + const int m = i * dst_stride + j; + const int32_t u = ((int32_t)dat[l] << SGRPROJ_RST_BITS); + const int32_t f1 = (int32_t)flt1[k] - u; + const int32_t f2 = (int32_t)flt2[k] - u; + const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); + const int16_t w = + (int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS); + dst[m] = (uint16_t)clip_pixel_highbd(w, bit_depth); + } + } +} + static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, int width, int height, int stride, RestorationInternal *rst, @@ -864,10 +891,9 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, int dst_stride) { const int tile_width = rst->tile_width >> rst->subsampling_x; const int tile_height = rst->tile_height >> rst->subsampling_y; - int i, j; int h_start, h_end, v_start, v_end; uint16_t *data_p, *dst_p; - int64_t *dat = (int64_t *)rst->tmpbuf; + uint16_t *dat = (uint16_t *)rst->tmpbuf; uint8_t *tmpbuf = (uint8_t *)rst->tmpbuf + RESTORATION_TILEPELS_MAX * sizeof(*dat); @@ -880,22 +906,11 @@ static void loop_sgrproj_filter_tile_highbd(uint16_t *data, int tile_idx, tile_width, tile_height, width, height, 0, 0, &h_start, &h_end, &v_start, &v_end); data_p = data + h_start + v_start * stride; - for (i = 0; i < (v_end - v_start); ++i) { - for (j = 0; j < (h_end - h_start); ++j) { - dat[i * (h_end - h_start) + j] = data_p[i * stride + j]; - } - } - apply_selfguided_restoration(dat, h_end - h_start, v_end - v_start, - h_end - h_start, bit_depth, - rst->rsi->sgrproj_info[tile_idx].ep, - rst->rsi->sgrproj_info[tile_idx].xqd, tmpbuf); dst_p = dst + h_start + v_start * dst_stride; - for (i = 0; i < (v_end - v_start); ++i) { - for (j = 0; j < (h_end - h_start); ++j) { - dst_p[i * dst_stride + j] = - clip_pixel_highbd((int)dat[i * (h_end - h_start) + j], bit_depth); - } - } + apply_selfguided_restoration_highbd( + data_p, h_end - h_start, v_end - v_start, stride, bit_depth, + rst->rsi->sgrproj_info[tile_idx].ep, rst->rsi->sgrproj_info[tile_idx].xqd, + dst_p, dst_stride, tmpbuf); } static void loop_sgrproj_filter_highbd(uint8_t *data8, int width, int height, diff --git a/av1/common/restoration.h b/av1/common/restoration.h index 727d93e89..f0f259a02 100644 --- a/av1/common/restoration.h +++ b/av1/common/restoration.h @@ -42,11 +42,10 @@ extern "C" { #define DOMAINTXFMRF_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * sizeof(int32_t)) #define DOMAINTXFMRF_BITS (DOMAINTXFMRF_PARAMS_BITS) -// 6 highprecision 64-bit buffers needed for the filter: +// 6 highprecision buffers needed for the filter: // 1 for the degraded frame, 2 for the restored versions and // 3 for each restoration operation -// TODO(debargha): Explore if we can use 32-bit buffers -#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 6 * sizeof(int64_t)) +#define SGRPROJ_TMPBUF_SIZE (RESTORATION_TILEPELS_MAX * 6 * sizeof(int32_t)) #define SGRPROJ_PARAMS_BITS 3 #define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS) @@ -211,7 +210,7 @@ int av1_alloc_restoration_struct(RestorationInfo *rst_info, int width, int height); void av1_free_restoration_struct(RestorationInfo *rst_info); -void av1_selfguided_restoration(int64_t *dgd, int width, int height, int stride, +void av1_selfguided_restoration(int32_t *dgd, int width, int height, int stride, int bit_depth, int r, int eps, void *tmpbuf); void av1_domaintxfmrf_restoration(uint8_t *dgd, int width, int height, int stride, int param, uint8_t *dst, diff --git a/av1/encoder/encoder.c b/av1/encoder/encoder.c index fcb77a3a6..b4cc57821 100644 --- a/av1/encoder/encoder.c +++ b/av1/encoder/encoder.c @@ -739,7 +739,7 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) { aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate trial restored frame buffer"); cpi->extra_rstbuf = (uint8_t *)aom_realloc( - cpi->extra_rstbuf, RESTORATION_TILEPELS_MAX * sizeof(int64_t)); + cpi->extra_rstbuf, RESTORATION_TILEPELS_MAX * sizeof(int32_t)); if (!cpi->extra_rstbuf) aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate extra rstbuf for restoration"); diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index d904a322f..9ee58ff0a 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c @@ -121,10 +121,10 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src, return filt_err; } -static int64_t get_pixel_proj_error(int64_t *src, int width, int height, - int src_stride, int64_t *dgd, - int dgd_stride, int64_t *flt1, - int flt1_stride, int64_t *flt2, +static int64_t get_pixel_proj_error(int32_t *src, int width, int height, + int src_stride, int32_t *dgd, + int dgd_stride, int32_t *flt1, + int flt1_stride, int32_t *flt2, int flt2_stride, int *xqd) { int i, j; int64_t err = 0; @@ -132,12 +132,12 @@ static int64_t get_pixel_proj_error(int64_t *src, int width, int height, decode_xq(xqd, xq); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { - const int64_t s = (int64_t)src[i * src_stride + j]; - const int64_t u = (int64_t)dgd[i * dgd_stride + j]; - const int64_t f1 = (int64_t)flt1[i * flt1_stride + j] - u; - const int64_t f2 = (int64_t)flt2[i * flt2_stride + j] - u; + const int32_t s = (int32_t)src[i * src_stride + j]; + const int32_t u = (int32_t)dgd[i * dgd_stride + j]; + const int32_t f1 = (int32_t)flt1[i * flt1_stride + j] - u; + const int32_t f2 = (int32_t)flt2[i * flt2_stride + j] - u; const int64_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS); - const int64_t e = + const int32_t e = ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - ROUND_POWER_OF_TWO(s, SGRPROJ_RST_BITS); err += e * e; @@ -146,9 +146,9 @@ static int64_t get_pixel_proj_error(int64_t *src, int width, int height, return err; } -static void get_proj_subspace(int64_t *src, int width, int height, - int src_stride, int64_t *dgd, int dgd_stride, - int64_t *flt1, int flt1_stride, int64_t *flt2, +static void get_proj_subspace(int32_t *src, int width, int height, + int src_stride, int32_t *dgd, int dgd_stride, + int32_t *flt1, int flt1_stride, int32_t *flt2, int flt2_stride, int *xq) { int i, j; double H[2][2] = { { 0, 0 }, { 0, 0 } }; @@ -198,10 +198,10 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, int src_stride, int bit_depth, int *eps, int *xqd, void *srcbuf, void *rstbuf) { - int64_t *srd = (int64_t *)srcbuf; - int64_t *dgd = (int64_t *)rstbuf; - int64_t *flt1 = dgd + RESTORATION_TILEPELS_MAX; - int64_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; + int32_t *srd = (int32_t *)srcbuf; + int32_t *dgd = (int32_t *)rstbuf; + int32_t *flt1 = dgd + RESTORATION_TILEPELS_MAX; + int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; uint8_t *tmpbuf2 = (uint8_t *)(flt2 + RESTORATION_TILEPELS_MAX); int i, j, ep, bestep = 0; int64_t err, besterr = -1; @@ -213,11 +213,11 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, uint16_t *dat = CONVERT_TO_SHORTPTR(dat8); for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { - flt1[i * width + j] = (int64_t)dat[i * dat_stride + j]; - flt2[i * width + j] = (int64_t)dat[i * dat_stride + j]; - dgd[i * width + j] = (int64_t)dat[i * dat_stride + j] + flt1[i * width + j] = (int32_t)dat[i * dat_stride + j]; + flt2[i * width + j] = (int32_t)dat[i * dat_stride + j]; + dgd[i * width + j] = (int32_t)dat[i * dat_stride + j] << SGRPROJ_RST_BITS; - srd[i * width + j] = (int64_t)src[i * src_stride + j] + srd[i * width + j] = (int32_t)src[i * src_stride + j] << SGRPROJ_RST_BITS; } } @@ -228,10 +228,10 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, for (j = 0; j < width; ++j) { const int k = i * width + j; const int l = i * dat_stride + j; - flt1[k] = (int64_t)dat[l]; - flt2[k] = (int64_t)dat[l]; - dgd[k] = (int64_t)dat[l] << SGRPROJ_RST_BITS; - srd[k] = (int64_t)src[i * src_stride + j] << SGRPROJ_RST_BITS; + flt1[k] = (int32_t)dat[l]; + flt2[k] = (int32_t)dat[l]; + dgd[k] = (int32_t)dat[l] << SGRPROJ_RST_BITS; + srd[k] = (int32_t)src[i * src_stride + j] << SGRPROJ_RST_BITS; } } }