Reformatting the deringing code
Manally removed the "clang-format off" lines. The rest is done by clang Change-Id: I88a2028b55a541729b4e8896cdf66b544e9898bb
This commit is contained in:
Родитель
e254241ce7
Коммит
39d92a071d
|
@ -9,8 +9,6 @@
|
|||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
|
@ -72,39 +70,37 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
|
|||
return count;
|
||||
}
|
||||
|
||||
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
|
||||
static INLINE void copy_8x8_16bit_to_8bit(uint8_t *dst, int dstride,
|
||||
int16_t *src, int sstride) {
|
||||
int i, j;
|
||||
for (i = 0; i < 8; i++)
|
||||
for (j = 0; j < 8; j++)
|
||||
dst[i * dstride + j] = src[i * sstride + j];
|
||||
for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
|
||||
}
|
||||
|
||||
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src, int sstride) {
|
||||
static INLINE void copy_4x4_16bit_to_8bit(uint8_t *dst, int dstride,
|
||||
int16_t *src, int sstride) {
|
||||
int i, j;
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
dst[i * dstride + j] = src[i * sstride + j];
|
||||
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
|
||||
}
|
||||
|
||||
/* TODO: Optimize this function for SSE. */
|
||||
void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src,
|
||||
dering_list *dlist, int dering_count, int bsize)
|
||||
{
|
||||
dering_list *dlist, int dering_count,
|
||||
int bsize) {
|
||||
int bi, bx, by;
|
||||
if (bsize == 3) {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)],
|
||||
dstride,
|
||||
copy_8x8_16bit_to_8bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
} else {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)],
|
||||
dstride,
|
||||
copy_4x4_16bit_to_8bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
}
|
||||
|
@ -112,15 +108,14 @@ void copy_dering_16bit_to_8bit(uint8_t *dst, int dstride, int16_t *src,
|
|||
|
||||
/* TODO: Optimize this function for SSE. */
|
||||
static void copy_sb8_16(AV1_COMMON *cm, int16_t *dst, int dstride,
|
||||
const uint8_t *src, int src_voffset, int src_hoffset, int sstride,
|
||||
int vsize, int hsize)
|
||||
{
|
||||
const uint8_t *src, int src_voffset, int src_hoffset,
|
||||
int sstride, int vsize, int hsize) {
|
||||
int r, c;
|
||||
(void)cm;
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
const uint16_t *base = &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride
|
||||
+ src_hoffset];
|
||||
const uint16_t *base =
|
||||
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
|
||||
for (r = 0; r < vsize; r++) {
|
||||
for (c = 0; c < hsize; c++) {
|
||||
dst[r * dstride + c] = base[r * sstride + c];
|
||||
|
@ -190,8 +185,7 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
int level;
|
||||
int nhb, nvb;
|
||||
int cstart = 0;
|
||||
if (!dering_left)
|
||||
cstart = -OD_FILT_HBORDER;
|
||||
if (!dering_left) cstart = -OD_FILT_HBORDER;
|
||||
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
|
||||
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
|
||||
level = compute_level_from_index(
|
||||
|
@ -200,8 +194,8 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
->mbmi.dering_gain);
|
||||
curr_row_dering[sbc] = 0;
|
||||
if (level == 0 ||
|
||||
(dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
|
||||
sbc * MAX_MIB_SIZE, dlist)) == 0) {
|
||||
(dering_count = sb_compute_dering_list(
|
||||
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
|
||||
dering_left = 0;
|
||||
continue;
|
||||
}
|
||||
|
@ -225,8 +219,8 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
OD_DERING_VERY_LARGE to avoid filtering with the outside. */
|
||||
for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
|
||||
for (c = cend; c < (nhb << bsize[pli]) + OD_FILT_HBORDER; ++c) {
|
||||
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER]
|
||||
= OD_DERING_VERY_LARGE;
|
||||
src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
|
||||
OD_DERING_VERY_LARGE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -242,15 +236,18 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
}
|
||||
/* Copy in the pixels we need from the current superblock for
|
||||
deringing.*/
|
||||
copy_sb8_16(cm, &src[OD_FILT_VBORDER*OD_FILT_BSTRIDE + OD_FILT_HBORDER
|
||||
+ cstart], OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
|
||||
copy_sb8_16(
|
||||
cm,
|
||||
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
|
||||
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * sbr, coffset + cstart,
|
||||
xd->plane[pli].dst.stride, rend, cend - cstart);
|
||||
if (!prev_row_dering[sbc]) {
|
||||
copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
|
||||
xd->plane[pli].dst.buf,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER, coffset,
|
||||
xd->plane[pli].dst.stride, OD_FILT_VBORDER, nhb << bsize[pli]);
|
||||
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
|
||||
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
|
||||
nhb << bsize[pli]);
|
||||
} else if (sbr > 0) {
|
||||
for (r = 0; r < OD_FILT_VBORDER; r++) {
|
||||
for (c = 0; c < nhb << bsize[pli]; c++) {
|
||||
|
@ -267,11 +264,10 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
}
|
||||
}
|
||||
if (!prev_row_dering[sbc - 1]) {
|
||||
copy_sb8_16(cm, src, OD_FILT_BSTRIDE,
|
||||
xd->plane[pli].dst.buf,
|
||||
copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
|
||||
coffset - OD_FILT_HBORDER,
|
||||
xd->plane[pli].dst.stride, OD_FILT_VBORDER, OD_FILT_HBORDER);
|
||||
coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
|
||||
OD_FILT_VBORDER, OD_FILT_HBORDER);
|
||||
} else if (sbr > 0 && sbc > 0) {
|
||||
for (r = 0; r < OD_FILT_VBORDER; r++) {
|
||||
for (c = -OD_FILT_HBORDER; c < 0; c++) {
|
||||
|
@ -291,8 +287,8 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << bsize[pli])],
|
||||
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
|
||||
coffset + (nhb << bsize[pli]),
|
||||
xd->plane[pli].dst.stride, OD_FILT_VBORDER, OD_FILT_HBORDER);
|
||||
coffset + (nhb << bsize[pli]), xd->plane[pli].dst.stride,
|
||||
OD_FILT_VBORDER, OD_FILT_HBORDER);
|
||||
} else if (sbr > 0 && sbc < nhsb - 1) {
|
||||
for (r = 0; r < OD_FILT_VBORDER; r++) {
|
||||
for (c = nhb << bsize[pli];
|
||||
|
@ -323,13 +319,13 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
for (c = 0; c < OD_FILT_HBORDER; c++) {
|
||||
/* Saving pixels in case we need to dering the superblock on the
|
||||
right. */
|
||||
colbuf[pli][r][c] = src[r * OD_FILT_BSTRIDE + c
|
||||
+ (nhb << bsize[pli])];
|
||||
colbuf[pli][r][c] =
|
||||
src[r * OD_FILT_BSTRIDE + c + (nhb << bsize[pli])];
|
||||
}
|
||||
}
|
||||
copy_sb8_16(cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER, coffset,
|
||||
xd->plane[pli].dst.stride, OD_FILT_VBORDER,
|
||||
(MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER,
|
||||
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
|
||||
(nhb << bsize[pli]));
|
||||
|
||||
/* FIXME: This is a temporary hack that uses more conservative
|
||||
|
@ -339,26 +335,26 @@ void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
|
|||
else
|
||||
threshold = level << coeff_shift;
|
||||
if (threshold == 0) continue;
|
||||
od_dering(dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE
|
||||
+ OD_FILT_HBORDER],
|
||||
od_dering(
|
||||
dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
|
||||
dec[pli], dir, pli, dlist, dering_count, threshold, coeff_shift);
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
if (cm->use_highbitdepth) {
|
||||
copy_dering_16bit_to_16bit(
|
||||
(int16_t *)&CONVERT_TO_SHORTPTR(
|
||||
xd->plane[pli].dst.buf)[xd->plane[pli].dst.stride *
|
||||
xd->plane[pli]
|
||||
.dst.buf)[xd->plane[pli].dst.stride *
|
||||
(MAX_MIB_SIZE * sbr << bsize[pli]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli])],
|
||||
xd->plane[pli].dst.stride, dst, dlist,
|
||||
dering_count, 3 - dec[pli]);
|
||||
xd->plane[pli].dst.stride, dst, dlist, dering_count,
|
||||
3 - dec[pli]);
|
||||
} else {
|
||||
#endif
|
||||
copy_dering_16bit_to_8bit(
|
||||
&xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
|
||||
(MAX_MIB_SIZE * sbr << bsize[pli]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[pli])],
|
||||
xd->plane[pli].dst.stride, dst, dlist,
|
||||
dering_count, bsize[pli]);
|
||||
xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]);
|
||||
#if CONFIG_AOM_HIGHBITDEPTH
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -11,8 +11,6 @@
|
|||
#ifndef AV1_COMMON_DERING_H_
|
||||
#define AV1_COMMON_DERING_H_
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include "av1/common/od_dering.h"
|
||||
#include "av1/common/onyxc_int.h"
|
||||
#include "aom/aom_integer.h"
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include "dering.h"
|
||||
|
@ -258,39 +256,37 @@ static INLINE int od_adjust_thresh(int threshold, int32_t var) {
|
|||
return (threshold * OD_THRESH_TABLE_Q8[OD_ILOG(v1)] + 128) >> 8;
|
||||
}
|
||||
|
||||
static INLINE void copy_8x8_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
|
||||
static INLINE void copy_8x8_16bit_to_16bit(int16_t *dst, int dstride,
|
||||
int16_t *src, int sstride) {
|
||||
int i, j;
|
||||
for (i = 0; i < 8; i++)
|
||||
for (j = 0; j < 8; j++)
|
||||
dst[i * dstride + j] = src[i * sstride + j];
|
||||
for (j = 0; j < 8; j++) dst[i * dstride + j] = src[i * sstride + j];
|
||||
}
|
||||
|
||||
static INLINE void copy_4x4_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src, int sstride) {
|
||||
static INLINE void copy_4x4_16bit_to_16bit(int16_t *dst, int dstride,
|
||||
int16_t *src, int sstride) {
|
||||
int i, j;
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
dst[i * dstride + j] = src[i * sstride + j];
|
||||
for (j = 0; j < 4; j++) dst[i * dstride + j] = src[i * sstride + j];
|
||||
}
|
||||
|
||||
/* TODO: Optimize this function for SSE. */
|
||||
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
|
||||
dering_list *dlist, int dering_count, int bsize)
|
||||
{
|
||||
dering_list *dlist, int dering_count,
|
||||
int bsize) {
|
||||
int bi, bx, by;
|
||||
if (bsize == 3) {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)],
|
||||
dstride,
|
||||
copy_8x8_16bit_to_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
} else {
|
||||
for (bi = 0; bi < dering_count; bi++) {
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)],
|
||||
dstride,
|
||||
copy_4x4_16bit_to_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride,
|
||||
&src[bi << 2 * bsize], 1 << bsize);
|
||||
}
|
||||
}
|
||||
|
@ -317,8 +313,8 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
|
|||
int32_t var;
|
||||
by = dlist[bi].by;
|
||||
bx = dlist[bi].bx;
|
||||
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx], OD_FILT_BSTRIDE,
|
||||
&var, coeff_shift);
|
||||
dir[by][bx] = od_dir_find8(&in[8 * by * OD_FILT_BSTRIDE + 8 * bx],
|
||||
OD_FILT_BSTRIDE, &var, coeff_shift);
|
||||
/* Deringing orthogonal to the direction uses a tighter threshold
|
||||
because we want to be conservative. We've presumably already
|
||||
achieved some deringing, so the amount of change is expected
|
||||
|
@ -350,7 +346,7 @@ void od_dering(int16_t *y, int16_t *in, int xdec,
|
|||
if (filter2_thresh[by][bx] == 0) continue;
|
||||
(filter_dering_orthogonal[bsize - OD_LOG_BSIZE0])(
|
||||
&y[bi << 2 * bsize], 1 << bsize,
|
||||
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)], filter2_thresh[by][bx],
|
||||
dir[by][bx]);
|
||||
&in[(by * OD_FILT_BSTRIDE << bsize) + (bx << bsize)],
|
||||
filter2_thresh[by][bx], dir[by][bx]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,8 +12,6 @@
|
|||
#if !defined(_dering_H)
|
||||
#define _dering_H (1)
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include "odintrin.h"
|
||||
|
||||
#define OD_DERINGSIZES (2)
|
||||
|
@ -47,7 +45,8 @@ typedef void (*od_filter_dering_orthogonal_func)(int16_t *y, int ystride,
|
|||
const int16_t *in,
|
||||
int threshold, int dir);
|
||||
void copy_dering_16bit_to_16bit(int16_t *dst, int dstride, int16_t *src,
|
||||
dering_list *dlist, int dering_count, int bsize);
|
||||
dering_list *dlist, int dering_count,
|
||||
int bsize);
|
||||
|
||||
void od_dering(int16_t *y, int16_t *in, int xdec,
|
||||
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS], int pli,
|
||||
|
|
|
@ -9,8 +9,6 @@
|
|||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
// clang-format off
|
||||
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
|
@ -101,10 +99,9 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
int16_t tmp_dst[MAX_MIB_SIZE * MAX_MIB_SIZE * 8 * 8];
|
||||
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
|
||||
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
|
||||
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE,
|
||||
dlist);
|
||||
if (dering_count == 0)
|
||||
continue;
|
||||
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
|
||||
sbc * MAX_MIB_SIZE, dlist);
|
||||
if (dering_count == 0) continue;
|
||||
best_gi = 0;
|
||||
for (gi = 0; gi < DERING_REFINEMENT_LEVELS; gi++) {
|
||||
int cur_mse;
|
||||
|
@ -123,13 +120,16 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
}
|
||||
in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
|
||||
/* We avoid filtering the pixels for which some of the pixels to average
|
||||
are outside the frame. We could change the filter instead, but it would
|
||||
are outside the frame. We could change the filter instead, but it
|
||||
would
|
||||
add special cases for any future vectorization. */
|
||||
for (i = 0; i < OD_DERING_INBUF_SIZE; i++) inbuf[i] = OD_DERING_VERY_LARGE;
|
||||
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
|
||||
inbuf[i] = OD_DERING_VERY_LARGE;
|
||||
for (i = -OD_FILT_VBORDER * (sbr != 0);
|
||||
i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
|
||||
for (j = -OD_FILT_HBORDER * (sbc != 0);
|
||||
j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1); j++) {
|
||||
j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
|
||||
j++) {
|
||||
int16_t *x;
|
||||
x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
|
||||
(sbc * MAX_MIB_SIZE << bsize[0])];
|
||||
|
@ -138,8 +138,8 @@ int av1_dering_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
|
|||
}
|
||||
od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold,
|
||||
coeff_shift);
|
||||
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst, dlist,
|
||||
dering_count, bsize[0]);
|
||||
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
|
||||
dlist, dering_count, bsize[0]);
|
||||
cur_mse = (int)compute_dist(
|
||||
dst, MAX_MIB_SIZE << bsize[0],
|
||||
&ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
|
||||
|
|
Загрузка…
Ссылка в новой задаче