Do real chroma RDO search for CDEF

Chroma now has a list of strenghts too, with the superblock signalling
shared between luma and chroma.

low-latency, cpu=4:

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
-0.0114 | -1.4626 | -1.4745 |  -0.0423 | 0.0430 | -0.0001 |    -0.7416

Change-Id: I389c77f1d80020f810e45f8502c656ad9d397c8c
This commit is contained in:
Jean-Marc Valin 2017-03-22 17:09:51 -04:00 коммит произвёл Jean-Marc Valin
Родитель b9370acd43
Коммит e9f7742437
15 изменённых файлов: 162 добавлений и 447 удалений

Просмотреть файл

@ -854,31 +854,14 @@ specialize qw/aom_lpf_horizontal_4_dual sse2 neon dspr2 msa/;
if (aom_config("CONFIG_CDEF") eq "yes") {
add_proto qw/void aom_clpf_block_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_hblock_hbd/, "const uint16_t *src, uint16_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_clpf_detect_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int bd, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi_hbd/, "const uint16_t *rec, const uint16_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int bd, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_detect_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi_hbd sse2 ssse3 sse4_1 neon/;
}
}
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
}
add_proto qw/void aom_clpf_block/, "const uint8_t *src, uint8_t *dst, int sstride, int dstride, int x0, int y0, int sizex, int sizey, unsigned int strength, unsigned int bd";
add_proto qw/void aom_clpf_detect/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum0, int *sum1, unsigned int strength, int size, unsigned int dmp";
add_proto qw/void aom_clpf_detect_multi/, "const uint8_t *rec, const uint8_t *org, int rstride, int ostride, int x0, int y0, int width, int height, int *sum, int size, unsigned int dmp";
# VS compiling for 32 bit targets does not support vector types in
# structs as arguments, which makes the v256 type of the intrinsics
# hard to support, so optimizations for this target are disabled.
if ($opts{config} !~ /libs-x86-win32-vs.*/) {
specialize qw/aom_clpf_block_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_hblock_hbd sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_block sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect sse2 ssse3 sse4_1 neon/;
specialize qw/aom_clpf_detect_multi sse2 ssse3 sse4_1 neon/;
}
}

Просмотреть файл

@ -110,13 +110,6 @@ AV1_CX_SRCS-yes += encoder/mbgraph.c
AV1_CX_SRCS-yes += encoder/mbgraph.h
ifeq ($(CONFIG_CDEF),yes)
AV1_CX_SRCS-yes += encoder/pickcdef.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.h
AV1_CX_SRCS-yes += encoder/clpf_rdo_simd.h
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/clpf_rdo_sse2.c
AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/clpf_rdo_ssse3.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/clpf_rdo_sse4.c
AV1_CX_SRCS-$(HAVE_NEON) += encoder/clpf_rdo_neon.c
endif
ifeq ($(CONFIG_PVQ),yes)
# PVQ from daala

Просмотреть файл

@ -143,8 +143,8 @@ static void copy_sb8_16(UNUSED AV1_COMMON *cm, uint16_t *dst, int dstride,
#endif
}
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_strength_u, int clpf_strength_v) {
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
MACROBLOCKD *xd) {
int r, c;
int sbr, sbc;
int nhsb, nvsb;
@ -162,11 +162,9 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int dering_left;
int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
int nplanes = 3;
int *lev;
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
lev = cm->cdef_strengths;
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
av1_setup_dst_planes(xd->plane, frame, 0, 0);
@ -193,6 +191,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
dering_left = 1;
for (sbc = 0; sbc < nhsb; sbc++) {
int level, clpf_strength;
int uv_level, uv_clpf_strength;
int nhb, nvb;
int cstart = 0;
#if 0 // TODO(stemidts/jmvalin): Handle tile borders correctly
@ -205,18 +204,34 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
level = dering_level_table
[lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
[cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
CLPF_STRENGTHS];
clpf_strength =
lev[cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
cm->cdef_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
CLPF_STRENGTHS;
clpf_strength += clpf_strength == 3;
uv_level = dering_level_table
[cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] /
CLPF_STRENGTHS];
uv_clpf_strength =
cm->cdef_uv_strengths[cm->mi_grid_visible[MAX_MIB_SIZE * sbr *
cm->mi_stride +
MAX_MIB_SIZE * sbc]
->mbmi.cdef_strength] %
CLPF_STRENGTHS;
uv_clpf_strength += uv_clpf_strength == 3;
curr_row_dering[sbc] = 0;
if ((level == 0 && clpf_strength == 0) ||
if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
uv_clpf_strength == 0) ||
(dering_count = sb_compute_dering_list(
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
dering_left = 0;
@ -232,9 +247,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_damping = 3 - (pli != AOM_PLANE_Y) + (cm->base_qindex >> 6);
if (pli) {
if (!chroma_dering) level = 0;
clpf_strength = pli == 1 ? clpf_strength_u : clpf_strength_v;
clpf_strength += clpf_strength == 3;
if (chroma_dering)
level = uv_level;
else
level = 0;
clpf_strength = uv_clpf_strength;
}
if (sbc == nhsb - 1)
cend = (nhb << bsize[pli]);
@ -359,12 +376,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
(nhb << bsize[pli]));
/* FIXME: This is a temporary hack that uses more conservative
deringing for chroma. */
if (pli)
threshold = (level * 5 + 4) >> 3 << coeff_shift;
else
threshold = level << coeff_shift;
threshold = level << coeff_shift;
if (threshold == 0 && clpf_strength == 0) continue;
od_dering(dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],

Просмотреть файл

@ -33,8 +33,7 @@ extern int dering_level_table[DERING_STRENGTHS];
int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col);
int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
dering_list *dlist);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd,
int clpf_strength_u, int clpf_strength_v);
void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd);

Просмотреть файл

@ -404,9 +404,8 @@ typedef struct AV1Common {
#if CONFIG_CDEF
int nb_cdef_strengths;
int cdef_strengths[CDEF_MAX_STRENGTHS];
int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
int cdef_bits;
int clpf_strength_u;
int clpf_strength_v;
#endif
#if CONFIG_DELTA_Q

Просмотреть файл

@ -2672,9 +2672,8 @@ static void setup_cdef(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->nb_cdef_strengths = 1 << cm->cdef_bits;
for (i = 0; i < cm->nb_cdef_strengths; i++) {
cm->cdef_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
cm->cdef_uv_strengths[i] = aom_rb_read_literal(rb, CDEF_STRENGTH_BITS);
}
cm->clpf_strength_u = aom_rb_read_literal(rb, 2);
cm->clpf_strength_v = aom_rb_read_literal(rb, 2);
}
#endif // CONFIG_CDEF
@ -4948,8 +4947,7 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
#if CONFIG_CDEF
if (!cm->skip_loop_filter) {
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb, cm->clpf_strength_u,
cm->clpf_strength_v);
av1_cdef_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
}
#endif // CONFIG_CDEF

Просмотреть файл

@ -3496,9 +3496,8 @@ static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->cdef_bits, 2);
for (i = 0; i < cm->nb_cdef_strengths; i++) {
aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS);
aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS);
}
aom_wb_write_literal(wb, cm->clpf_strength_u, 2);
aom_wb_write_literal(wb, cm->clpf_strength_v, 2);
}
#endif

Просмотреть файл

@ -1,232 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_image.h"
#include "aom/aom_integer.h"
#include "av1/common/quant_common.h"
// Calculate the error of a filtered and unfiltered block
void aom_clpf_detect_c(const uint8_t *rec, const uint8_t *org, int rstride,
int ostride, int x0, int y0, int width, int height,
int *sum0, int *sum1, unsigned int strength, int size,
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x];
const int X = rec[y * rstride + x];
const int A = rec[AOMMAX(0, y - 2) * rstride + x];
const int B = rec[AOMMAX(0, y - 1) * rstride + x];
const int C = rec[y * rstride + AOMMAX(0, x - 2)];
const int D = rec[y * rstride + AOMMAX(0, x - 1)];
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)];
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, strength, dmp);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
}
}
}
void aom_clpf_detect_multi_c(const uint8_t *rec, const uint8_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int dmp) {
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x];
const int X = rec[y * rstride + x];
const int A = rec[AOMMAX(0, y - 2) * rstride + x];
const int B = rec[AOMMAX(0, y - 1) * rstride + x];
const int C = rec[y * rstride + AOMMAX(0, x - 2)];
const int D = rec[y * rstride + AOMMAX(0, x - 1)];
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)];
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)];
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x];
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x];
const int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp);
const int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp);
const int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
sum[0] += (O - X) * (O - X);
sum[1] += (O - F1) * (O - F1);
sum[2] += (O - F2) * (O - F2);
sum[3] += (O - F3) * (O - F3);
}
}
}
#if CONFIG_AOM_HIGHBITDEPTH
// Identical to aom_clpf_detect_c() apart from "rec" and "org".
void aom_clpf_detect_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0, int width,
int height, int *sum0, int *sum1,
unsigned int strength, int size, unsigned int bd,
unsigned int dmp) {
const int shift = bd - 8;
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
const int O = org[y * ostride + x] >> shift;
const int X = rec[y * rstride + x] >> shift;
const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift;
const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift;
const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift;
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift;
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift;
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta = av1_clpf_sample(X, A, B, C, D, E, F, G, H,
strength >> shift, dmp - shift);
const int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
}
}
}
// aom_clpf_detect_multi_c() apart from "rec" and "org".
void aom_clpf_detect_multi_hbd_c(const uint16_t *rec, const uint16_t *org,
int rstride, int ostride, int x0, int y0,
int width, int height, int *sum, int size,
unsigned int bd, unsigned int dmp) {
const int shift = bd - 8;
int x, y;
for (y = y0; y < y0 + size; y++) {
for (x = x0; x < x0 + size; x++) {
int O = org[y * ostride + x] >> shift;
int X = rec[y * rstride + x] >> shift;
const int A = rec[AOMMAX(0, y - 2) * rstride + x] >> shift;
const int B = rec[AOMMAX(0, y - 1) * rstride + x] >> shift;
const int C = rec[y * rstride + AOMMAX(0, x - 2)] >> shift;
const int D = rec[y * rstride + AOMMAX(0, x - 1)] >> shift;
const int E = rec[y * rstride + AOMMIN(width - 1, x + 1)] >> shift;
const int F = rec[y * rstride + AOMMIN(width - 1, x + 2)] >> shift;
const int G = rec[AOMMIN(height - 1, y + 1) * rstride + x] >> shift;
const int H = rec[AOMMIN(height - 1, y + 2) * rstride + x] >> shift;
const int delta1 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 1, dmp - shift);
const int delta2 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 2, dmp - shift);
const int delta3 =
av1_clpf_sample(X, A, B, C, D, E, F, G, H, 4, dmp - shift);
const int F1 = X + delta1;
const int F2 = X + delta2;
const int F3 = X + delta3;
sum[0] += (O - X) * (O - X);
sum[1] += (O - F1) * (O - F1);
sum[2] += (O - F2) * (O - F2);
sum[3] += (O - F3) * (O - F3);
}
}
}
#endif
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
static void clpf_rdo(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
unsigned int block_size, int w, int h, uint64_t res[4],
int plane) {
int m, n;
int sum[4];
const int subx = plane != AOM_PLANE_Y && rec->subsampling_x;
const int suby = plane != AOM_PLANE_Y && rec->subsampling_y;
uint8_t *rec_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? rec->u_buffer : rec->v_buffer)
: rec->y_buffer;
uint8_t *org_buffer =
plane != AOM_PLANE_Y
? (plane == AOM_PLANE_U ? org->u_buffer : org->v_buffer)
: org->y_buffer;
int rec_width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int rec_height =
plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
int rec_stride = plane != AOM_PLANE_Y ? rec->uv_stride : rec->y_stride;
int org_stride = plane != AOM_PLANE_Y ? org->uv_stride : org->y_stride;
int damping =
cm->bit_depth - 5 - (plane != AOM_PLANE_Y) + (cm->base_qindex >> 6);
sum[0] = sum[1] = sum[2] = sum[3] = 0;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = n * block_size;
int ypos = m * block_size;
if (!cm->mi_grid_visible[(ypos << suby) / MI_SIZE * cm->mi_stride +
(xpos << subx) / MI_SIZE]
->mbmi.skip) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
aom_clpf_detect_multi_hbd(
CONVERT_TO_SHORTPTR(rec_buffer), CONVERT_TO_SHORTPTR(org_buffer),
rec_stride, org_stride, xpos, ypos, rec_width, rec_height, sum,
block_size, cm->bit_depth, damping);
} else {
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size, damping);
}
#else
aom_clpf_detect_multi(rec_buffer, org_buffer, rec_stride, org_stride,
xpos, ypos, rec_width, rec_height, sum,
block_size, damping);
#endif
}
}
}
res[0] += sum[0];
res[1] += sum[1];
res[2] += sum[2];
res[3] += sum[3];
}
void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int plane) {
int i;
uint64_t best, sums[4];
int width = plane != AOM_PLANE_Y ? rec->uv_crop_width : rec->y_crop_width;
int height = plane != AOM_PLANE_Y ? rec->uv_crop_height : rec->y_crop_height;
const int bs = MI_SIZE;
const int bslog = get_msb(bs);
memset(sums, 0, sizeof(sums));
clpf_rdo(rec, org, cm, bs, width >> bslog, height >> bslog, sums, plane);
// Add a favourable bias for conservative strengths
for (i = 0; i < 4; i++) sums[i] -= sums[i] >> (7 + i);
// Tag the strength to the error
for (i = 0; i < 4; i++) sums[i] = (sums[i] << 2) + i;
// Identify the strength with the smallest error
best = (uint64_t)1 << 63;
for (i = 0; i < 4; i++)
if (sums[i] < best) best = sums[i];
*best_strength = best & 3 ? 1 << ((best - 1) & 3) : 0;
}

Просмотреть файл

@ -1,21 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_CLPF_H_
#define AV1_ENCODER_CLPF_H_
#include "av1/common/reconinter.h"
void av1_clpf_test_plane(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int plane);
#endif

Просмотреть файл

@ -1,14 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_neon
#include "./clpf_rdo_simd.h"

Просмотреть файл

@ -1,14 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse2
#include "./clpf_rdo_simd.h"

Просмотреть файл

@ -1,14 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_sse4_1
#include "./clpf_rdo_simd.h"

Просмотреть файл

@ -1,14 +0,0 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "aom_dsp/aom_simd.h"
#define SIMD_FUNC(name) name##_ssse3
#include "./clpf_rdo_simd.h"

Просмотреть файл

@ -19,7 +19,6 @@
#if CONFIG_CDEF
#include "av1/common/cdef.h"
#include "av1/common/clpf.h"
#include "av1/encoder/clpf_rdo.h"
#endif // CONFIG_CDEF
#include "av1/common/filter.h"
#include "av1/common/idct.h"
@ -3522,7 +3521,6 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
}
#if CONFIG_CDEF
if (is_lossless_requested(&cpi->oxcf)) {
cm->clpf_strength_u = cm->clpf_strength_v = 0;
cm->cdef_bits = 0;
cm->cdef_strengths[0] = 0;
cm->nb_cdef_strengths = 1;
@ -3531,12 +3529,7 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
av1_cdef_search(cm->frame_to_show, cpi->Source, cm, xd);
// Apply the filter
av1_cdef_frame(cm->frame_to_show, cm, xd, cm->clpf_strength_u,
cm->clpf_strength_v);
// Pack the clpf chroma strengths into two bits each
cm->clpf_strength_u -= cm->clpf_strength_u == 4;
cm->clpf_strength_v -= cm->clpf_strength_v == 4;
av1_cdef_frame(cm->frame_to_show, cm, xd);
}
#endif
#if CONFIG_LOOP_RESTORATION

Просмотреть файл

@ -17,7 +17,6 @@
#include "av1/common/cdef.h"
#include "av1/common/onyxc_int.h"
#include "av1/common/reconinter.h"
#include "av1/encoder/clpf_rdo.h"
#include "av1/encoder/encoder.h"
#define TOTAL_STRENGTHS (DERING_STRENGTHS * CLPF_STRENGTHS)
@ -79,12 +78,12 @@ static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
}
static double compute_dist(uint16_t *x, int xstride, uint16_t *y, int ystride,
int nhb, int nvb, int coeff_shift) {
int nhb, int nvb, int coeff_shift, int bsize) {
int i, j;
double sum;
sum = 0;
for (i = 0; i < nvb << 3; i++) {
for (j = 0; j < nhb << 3; j++) {
for (i = 0; i < nvb << bsize; i++) {
for (j = 0; j < nhb << bsize; j++) {
double tmp;
tmp = x[i * xstride + j] - y[i * ystride + j];
sum += tmp * tmp;
@ -97,11 +96,11 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
AV1_COMMON *cm, MACROBLOCKD *xd) {
int r, c;
int sbr, sbc;
uint16_t *src;
uint16_t *ref_coeff;
uint16_t *src[3];
uint16_t *ref_coeff[3];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride;
int stride[3];
int bsize[3];
int dec[3];
int pli;
@ -114,8 +113,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse)[DERING_STRENGTHS * CLPF_STRENGTHS] =
aom_malloc(sizeof(*mse) * nvsb * nhsb);
int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse[3])[TOTAL_STRENGTHS];
int clpf_damping = 3 + (cm->base_qindex >> 6);
int i;
int best_lev[CDEF_MAX_STRENGTHS];
@ -123,35 +122,56 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nb_strength_bits;
int quantizer;
double lambda;
int nplanes = 3;
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
quantizer =
av1_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (cm->bit_depth - 8);
lambda = .12 * quantizer * quantizer / 256.;
src = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
ref_coeff =
aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
av1_setup_dst_planes(xd->plane, frame, 0, 0);
for (pli = 0; pli < 3; pli++) {
for (pli = 0; pli < nplanes; pli++) {
uint8_t *ref_buffer;
int ref_stride;
switch (pli) {
case 0:
ref_buffer = ref->y_buffer;
ref_stride = ref->y_stride;
break;
case 1:
ref_buffer = ref->u_buffer;
ref_stride = ref->uv_stride;
break;
case 2:
ref_buffer = ref->v_buffer;
ref_stride = ref->uv_stride;
break;
}
mse[pli] = aom_malloc(sizeof(**mse) * nvsb * nhsb);
src[pli] = aom_memalign(32, sizeof(*src) * cm->mi_rows * cm->mi_cols * 64);
ref_coeff[pli] =
aom_memalign(32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * 64);
dec[pli] = xd->plane[pli].subsampling_x;
bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
}
stride = cm->mi_cols << bsize[0];
for (r = 0; r < cm->mi_rows << bsize[0]; ++r) {
for (c = 0; c < cm->mi_cols << bsize[0]; ++c) {
stride[pli] = cm->mi_cols << 3;
for (r = 0; r < cm->mi_rows << bsize[pli]; ++r) {
for (c = 0; c < cm->mi_cols << bsize[pli]; ++c) {
#if CONFIG_AOM_HIGHBITDEPTH
if (cm->use_highbitdepth) {
src[r * stride + c] = CONVERT_TO_SHORTPTR(
xd->plane[0].dst.buf)[r * xd->plane[0].dst.stride + c];
ref_coeff[r * stride + c] =
CONVERT_TO_SHORTPTR(ref->y_buffer)[r * ref->y_stride + c];
} else {
if (cm->use_highbitdepth) {
src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR(
xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c];
ref_coeff[pli][r * stride[pli] + c] =
CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c];
} else {
#endif
src[r * stride + c] =
xd->plane[0].dst.buf[r * xd->plane[0].dst.stride + c];
ref_coeff[r * stride + c] = ref->y_buffer[r * ref->y_stride + c];
src[pli][r * stride[pli] + c] =
xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c];
#if CONFIG_AOM_HIGHBITDEPTH
}
#endif
}
#endif
}
}
sb_count = 0;
@ -175,44 +195,49 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int j;
level = dering_level_table[gi / CLPF_STRENGTHS];
threshold = level << coeff_shift;
for (r = 0; r < nvb << bsize[0]; r++) {
for (c = 0; c < nhb << bsize[0]; c++) {
dst[(r * MAX_MIB_SIZE << bsize[0]) + c] =
src[((sbr * MAX_MIB_SIZE << bsize[0]) + r) * stride +
(sbc * MAX_MIB_SIZE << bsize[0]) + c];
for (pli = 0; pli < nplanes; pli++) {
if (pli > 0 && !chroma_dering) threshold = 0;
for (r = 0; r < nvb << bsize[pli]; r++) {
for (c = 0; c < nhb << bsize[pli]; c++) {
dst[(r * MAX_MIB_SIZE << bsize[pli]) + c] =
src[pli]
[((sbr * MAX_MIB_SIZE << bsize[pli]) + r) * stride[pli] +
(sbc * MAX_MIB_SIZE << bsize[pli]) + c];
}
}
}
in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
/* We avoid filtering the pixels for which some of the pixels to average
are outside the frame. We could change the filter instead, but it
would
add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_VBORDER * (sbr != 0);
i < (nvb << bsize[0]) + OD_FILT_VBORDER * (sbr != nvsb - 1); i++) {
for (j = -OD_FILT_HBORDER * (sbc != 0);
j < (nhb << bsize[0]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
j++) {
uint16_t *x;
x = &src[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
(sbc * MAX_MIB_SIZE << bsize[0])];
in[i * OD_FILT_BSTRIDE + j] = x[i * stride + j];
in = inbuf + OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER;
/* We avoid filtering the pixels for which some of the pixels to
average
are outside the frame. We could change the filter instead, but it
would add special cases for any future vectorization. */
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
for (i = -OD_FILT_VBORDER * (sbr != 0);
i < (nvb << bsize[pli]) + OD_FILT_VBORDER * (sbr != nvsb - 1);
i++) {
for (j = -OD_FILT_HBORDER * (sbc != 0);
j < (nhb << bsize[pli]) + OD_FILT_HBORDER * (sbc != nhsb - 1);
j++) {
uint16_t *x;
x = &src[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])];
in[i * OD_FILT_BSTRIDE + j] = x[i * stride[pli] + j];
}
}
clpf_strength = gi % CLPF_STRENGTHS;
od_dering(tmp_dst, in, dec[pli], dir, pli, dlist, dering_count,
threshold, clpf_strength + (clpf_strength == 3),
clpf_damping, coeff_shift);
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[pli], tmp_dst,
dlist, dering_count, bsize[pli]);
mse[pli][sb_count][gi] = (int)compute_dist(
dst, MAX_MIB_SIZE << bsize[pli],
&ref_coeff[pli][(sbr * stride[pli] * MAX_MIB_SIZE << bsize[pli]) +
(sbc * MAX_MIB_SIZE << bsize[pli])],
stride[pli], nhb, nvb, coeff_shift, bsize[pli]);
sb_index[sb_count] =
MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
}
clpf_strength = gi % CLPF_STRENGTHS;
od_dering(tmp_dst, in, 0, dir, 0, dlist, dering_count, threshold,
clpf_strength + (clpf_strength == 3), clpf_damping,
coeff_shift);
copy_dering_16bit_to_16bit(dst, MAX_MIB_SIZE << bsize[0], tmp_dst,
dlist, dering_count, bsize[0]);
mse[sb_count][gi] = (int)compute_dist(
dst, MAX_MIB_SIZE << bsize[0],
&ref_coeff[(sbr * stride * MAX_MIB_SIZE << bsize[0]) +
(sbc * MAX_MIB_SIZE << bsize[0])],
stride, nhb, nvb, coeff_shift);
sb_index[sb_count] =
MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
}
sb_count++;
}
@ -222,7 +247,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
/* Search for different number of signalling bits. */
for (i = 0; i <= 3; i++) {
nb_strengths = 1 << i;
tot_mse = joint_strength_search(best_lev, nb_strengths, mse, sb_count);
tot_mse = joint_strength_search(best_lev, nb_strengths, mse[0], sb_count);
/* Count superblock signalling cost. */
tot_mse += (uint64_t)(sb_count * lambda * i);
/* Count header signalling cost. */
@ -243,21 +268,44 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
uint64_t best_mse = (uint64_t)1 << 63;
best_gi = 0;
for (gi = 0; gi < cm->nb_cdef_strengths; gi++) {
if (mse[i][best_lev[gi]] < best_mse) {
if (mse[0][i][best_lev[gi]] < best_mse) {
best_gi = gi;
best_mse = mse[i][best_lev[gi]];
best_mse = mse[0][i][best_lev[gi]];
}
}
selected_strength[i] = best_gi;
cm->mi_grid_visible[sb_index[i]]->mbmi.cdef_strength = best_gi;
}
aom_free(src);
aom_free(ref_coeff);
aom_free(mse);
int str;
/* For each strength option we picked in luma, find the optimal chroma
strength. */
if (nplanes >= 3) {
for (str = 0; str < cm->nb_cdef_strengths; str++) {
int gi;
int best_gi = 0;
best_tot_mse = (uint64_t)1 << 63;
for (gi = 0; gi < TOTAL_STRENGTHS; gi++) {
tot_mse = 0;
for (i = 0; i < sb_count; i++) {
if (selected_strength[i] == str) {
tot_mse += mse[1][i][gi] + mse[2][i][gi];
}
}
if (tot_mse < best_tot_mse) {
best_gi = gi;
best_tot_mse = tot_mse;
}
}
cm->cdef_uv_strengths[str] = best_gi;
}
} else {
for (str = 0; str < nb_strengths; str++) selected_strength[str] = 0;
}
for (pli = 0; pli < nplanes; pli++) {
aom_free(src[pli]);
aom_free(ref_coeff[pli]);
aom_free(mse[pli]);
}
aom_free(sb_index);
av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_u,
AOM_PLANE_U);
av1_clpf_test_plane(cm->frame_to_show, ref, cm, &cm->clpf_strength_v,
AOM_PLANE_V);
aom_free(selected_strength);
}