New CLPF: New kernel and RDO for strength and block size

Change-Id: I61eb08862a101df74a6b65ece459833401e81117
This commit is contained in:
Steinar Midtskogen 2016-05-06 13:48:20 +02:00 коммит произвёл Yaowu Xu
Родитель 9c1e2f92e5
Коммит 7560123c06
10 изменённых файлов: 467 добавлений и 151 удалений

Просмотреть файл

@ -65,8 +65,10 @@ AV1_COMMON_SRCS-yes += common/scan.h
# TODO(angiebird) the forward transform belongs under encoder/
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.h
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.c
ifeq ($(CONFIG_CLPF),yes)
AV1_COMMON_SRCS-yes += common/clpf.c
AV1_COMMON_SRCS-yes += common/clpf.h
endif
ifeq ($(CONFIG_DERING),yes)
AV1_COMMON_SRCS-yes += common/od_dering.c
AV1_COMMON_SRCS-yes += common/od_dering.h

Просмотреть файл

@ -82,6 +82,10 @@ AV1_CX_SRCS-yes += encoder/temporal_filter.h
AV1_CX_SRCS-yes += encoder/mbgraph.c
AV1_CX_SRCS-yes += encoder/mbgraph.h
AV1_CX_SRCS-$(CONFIG_DERING) += encoder/pickdering.c
ifeq ($(CONFIG_CLPF),yes)
AV1_CX_SRCS-yes += encoder/clpf_rdo.c
AV1_CX_SRCS-yes += encoder/clpf_rdo.h
endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c

Просмотреть файл

@ -9,96 +9,119 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
#include "aom_dsp/aom_dsp_common.h"
// Apply the filter on a single block
static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
int dstride, int has_top, int has_left, int has_bottom,
int has_right, int width, int height) {
int av1_clpf_maxbits(const AV1_COMMON *cm) {
return get_msb(ALIGN_POWER_OF_TWO(cm->mi_cols * MI_BLOCK_SIZE,
cm->clpf_size + 4) *
ALIGN_POWER_OF_TWO(cm->mi_rows * MI_BLOCK_SIZE,
cm->clpf_size + 4) >>
(cm->clpf_size * 2 + 8)) +
1;
}
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
clamp(E - X, -b, b) + 4 * clamp(F - X, -b, b);
return (8 + delta - (delta < 0)) >> 4;
}
static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
int y0, int sizex, int sizey, int width, int height,
unsigned int strength) {
int x, y;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int X = src[(y + 0) * sstride + x + 0];
int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
((A < X) + (B < X) + (C < X) + (D < X) > 2);
dst[y * dstride + x] = X + delta;
for (y = y0; y < y0 + sizey; y++) {
for (x = x0; x < x0 + sizex; x++) {
int X = src[y * stride + x];
int A = src[AOMMAX(0, y - 1) * stride + x];
int B = src[y * stride + AOMMAX(0, x - 2)];
int C = src[y * stride + AOMMAX(0, x - 1)];
int D = src[y * stride + AOMMIN(width - 1, x + 1)];
int E = src[y * stride + AOMMIN(width - 1, x + 2)];
int F = src[AOMMIN(height - 1, y + 1) * stride + x];
int delta;
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
dst[y * stride + x] = X + delta;
}
}
}
#define BS MI_SIZE *MI_BLOCK_SIZE
// Return number of filtered blocks
int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *)) {
/* Constrained low-pass filter (CLPF) */
int c, k, l, m, n;
int width = rec->y_crop_width;
int height = rec->y_crop_height;
int xpos, ypos;
int stride_y = rec->y_stride;
int stride_c = rec->uv_stride;
const int bs = MI_BLOCK_SIZE;
int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
int block_index = 0;
// Iterate over blocks within a superblock
static void av1_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
const AV1_COMMON *cm, MACROBLOCKD *xd,
MODE_INFO *const *mi_8x8, int xpos, int ypos) {
// Temporary buffer (to allow SIMD parallelism)
uint8_t buf_unaligned[BS * BS + 15];
uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
int x, y, p;
for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
// Do not filter if there is no residual
if (!mbmi->skip) {
// Do not filter frame edges
int has_top = ypos + y > 0;
int has_left = xpos + x > 0;
int has_bottom = ypos + y < cm->mi_rows - 1;
int has_right = xpos + x < cm->mi_cols - 1;
#if CLPF_ALLOW_BLOCK_PARALLELISM
// Do not filter superblock edges
has_top &= !!y;
has_left &= !!x;
has_bottom &= y != MI_BLOCK_SIZE - 1;
has_right &= x != MI_BLOCK_SIZE - 1;
#endif
av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
clpf_block(
xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
? buf + y * MI_SIZE * BS + x * MI_SIZE
: xd->plane[p].dst.buf,
xd->plane[p].dst.stride,
CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
has_top, has_left, has_bottom, has_right,
MI_SIZE >> xd->plane[p].subsampling_x,
MI_SIZE >> xd->plane[p].subsampling_y);
// Iterate over all filter blocks
for (k = 0; k < num_fb_ver; k++) {
for (l = 0; l < num_fb_hor; l++) {
int h, w;
int allskip = 1;
for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
for (n = 0; allskip && n < (1 << fb_size_log2) / bs; n++) {
xpos = (l << fb_size_log2) + n * bs;
ypos = (k << fb_size_log2) + m * bs;
if (xpos < width && ypos < height) {
allskip &=
cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
->mbmi.skip;
}
}
}
#if CLPF_ALLOW_PIXEL_PARALLELISM
for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
const MB_MODE_INFO *mbmi =
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
if (!mbmi->skip) {
int i = 0;
for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
MI_SIZE >> xd->plane[p].subsampling_x);
}
}
}
#endif
}
}
// Iterate over the superblocks of an entire frame
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
MACROBLOCKD *xd) {
int x, y;
for (y = 0; y < cm->mi_rows; y += MI_BLOCK_SIZE)
for (x = 0; x < cm->mi_cols; x += MI_BLOCK_SIZE)
av1_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
// Calculate the actual filter block size near frame edges
h = AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
w = AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
h += !h << fb_size_log2;
w += !w << fb_size_log2;
if (!allskip && // Do not filter the block if all is skip encoded
(!enable_fb_flag ||
decision(k, l, rec, org, cm, bs, w / bs, h / bs, strength,
fb_size_log2, blocks + block_index))) {
// Iterate over all smaller blocks inside the filter block
for (m = 0; m < (h + bs - 1) / bs; m++) {
for (n = 0; n < (w + bs - 1) / bs; n++) {
xpos = (l << fb_size_log2) + n * bs;
ypos = (k << fb_size_log2) + m * bs;
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
->mbmi.skip) {
// Not skip block, apply the filter
clpf_block(rec->y_buffer, dst->y_buffer, stride_y, xpos, ypos, bs,
bs, width, height, strength);
} else { // Skip block, copy instead
for (c = 0; c < bs; c++)
*(uint64_t *)(dst->y_buffer + (ypos + c) * stride_y + xpos) =
*(uint64_t *)(rec->y_buffer + (ypos + c) * stride_y + xpos);
}
}
}
} else { // Entire filter block is skip, copy
for (m = 0; m < h; m++)
memcpy(dst->y_buffer + ((k << fb_size_log2) + m) * stride_y +
(l << fb_size_log2),
rec->y_buffer + ((k << fb_size_log2) + m) * stride_y +
(l << fb_size_log2),
w);
}
block_index += !allskip; // Count number of blocks filtered
}
}
return block_index;
}

Просмотреть файл

@ -13,15 +13,17 @@
#include "av1/common/reconinter.h"
// Configuration
#define CLPF_ALLOW_PIXEL_PARALLELISM \
1 // 1 = SIMD friendly (adds a buffer requirement)
#define CLPF_ALLOW_BLOCK_PARALLELISM \
0 // 1 = MT friendly (degrades quality slighty)
#define CLPF_FILTER_ALL_PLANES \
0 // 1 = filter both luma and chroma, 0 = filter only luma
#define MAX_FB_SIZE 128
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
MACROBLOCKD *xd);
int av1_clpf_maxbits(const AV1_COMMON *cm);
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int enable_fb_flag, unsigned int strength,
unsigned int fb_size_log2, uint8_t *blocks,
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
const YV12_BUFFER_CONFIG *,
const AV1_COMMON *cm, int, int, int,
unsigned int, unsigned int, uint8_t *));
#endif

Просмотреть файл

@ -147,7 +147,10 @@ typedef struct AV1Common {
#endif
#if CONFIG_CLPF
int clpf;
int clpf_numblocks;
int clpf_size;
int clpf_strength;
uint8_t *clpf_blocks;
#endif
YV12_BUFFER_CONFIG *frame_to_show;

Просмотреть файл

@ -817,7 +817,26 @@ static void setup_loopfilter(struct loopfilter *lf,
#if CONFIG_CLPF
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
cm->clpf = aom_rb_read_literal(rb, 1);
cm->clpf_blocks = 0;
cm->clpf_strength = aom_rb_read_literal(rb, 2);
if (cm->clpf_strength) {
cm->clpf_size = aom_rb_read_literal(rb, 2);
if (cm->clpf_size) {
int i;
cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm));
CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks));
for (i = 0; i < cm->clpf_numblocks; i++) {
cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1);
}
}
}
}
static int clpf_bit(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *bit) {
return *bit;
}
#endif
@ -2240,8 +2259,22 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
}
#if CONFIG_CLPF
if (cm->clpf && !cm->skip_loop_filter)
av1_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
if (cm->clpf_strength && !cm->skip_loop_filter) {
YV12_BUFFER_CONFIG dst; // Buffer for the result
dst = pbi->cur_buf->buf;
CHECK_MEM_ERROR(cm, dst.y_buffer, aom_malloc(dst.y_stride * dst.y_height));
av1_clpf_frame(&dst, &pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
cm->clpf_strength + (cm->clpf_strength == 3),
4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
// Copy result
memcpy(pbi->cur_buf->buf.y_buffer, dst.y_buffer,
dst.y_height * dst.y_stride);
aom_free(dst.y_buffer);
}
if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
#endif
#if CONFIG_DERING
if (cm->dering_level && !cm->skip_loop_filter) {

Просмотреть файл

@ -1091,7 +1091,22 @@ static void encode_loopfilter(struct loopfilter *lf,
#if CONFIG_CLPF
static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
aom_wb_write_literal(wb, cm->clpf, 1);
aom_wb_write_literal(wb, cm->clpf_strength, 2);
if (cm->clpf_strength) {
aom_wb_write_literal(wb, cm->clpf_size, 2);
if (cm->clpf_size) {
int i;
// TODO(stemidts): The number of bits to transmit could be
// implicitly deduced if transmitted after the filter block or
// after the frame (when it's known whether the block is all
// skip and implicitly unfiltered). And the bits do not have
// 50% probability, so a more efficient coding is possible.
aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm));
for (i = 0; i < cm->clpf_numblocks; i++) {
aom_wb_write_literal(wb, cm->clpf_blocks[i], 1);
}
}
}
}
#endif

221
av1/encoder/clpf_rdo.c Normal file
Просмотреть файл

@ -0,0 +1,221 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "av1/common/clpf.h"
#include "aom/aom_integer.h"
#include "av1/common/quant_common.h"
// Calculate the error of a filtered and unfiltered block
static void detect_clpf(const uint8_t *rec, const uint8_t *org, int x0, int y0,
int width, int height, int so, int stride, int *sum0,
int *sum1, unsigned int strength) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
int O = org[y * so + x];
int X = rec[y * stride + x];
int A = rec[AOMMAX(0, y - 1) * stride + x];
int B = rec[y * stride + AOMMAX(0, x - 2)];
int C = rec[y * stride + AOMMAX(0, x - 1)];
int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
int delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
int Y = X + delta;
*sum0 += (O - X) * (O - X);
*sum1 += (O - Y) * (O - Y);
}
}
}
static void detect_multi_clpf(const uint8_t *rec, const uint8_t *org, int x0,
int y0, int width, int height, int so, int stride,
int *sum) {
int x, y;
for (y = y0; y < y0 + 8; y++) {
for (x = x0; x < x0 + 8; x++) {
int O = org[y * so + x];
int X = rec[y * stride + x];
int A = rec[AOMMAX(0, y - 1) * stride + x];
int B = rec[y * stride + AOMMAX(0, x - 2)];
int C = rec[y * stride + AOMMAX(0, x - 1)];
int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, 1);
int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, 2);
int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, 4);
int F1 = X + delta1;
int F2 = X + delta2;
int F3 = X + delta3;
sum[0] += (O - X) * (O - X);
sum[1] += (O - F1) * (O - F1);
sum[2] += (O - F2) * (O - F2);
sum[3] += (O - F3) * (O - F3);
}
}
}
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res) {
int m, n, sum0 = 0, sum1 = 0;
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = (l << fb_size_log2) + n * block_size;
int ypos = (k << fb_size_log2) + m * block_size;
const int bs = MI_BLOCK_SIZE;
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
->mbmi.skip)
detect_clpf(rec->y_buffer, org->y_buffer, xpos, ypos, rec->y_crop_width,
rec->y_crop_height, org->y_stride, rec->y_stride, &sum0,
&sum1, strength);
}
}
*res = sum1 < sum0;
return *res;
}
// Calculate the square error of all filter settings. Result:
// res[0][0] : unfiltered
// res[0][1-3] : strength=1,2,4, no signals
// res[1][0] : (bit count, fb size = 128)
// res[1][1-3] : strength=1,2,4, fb size = 128
// res[2][0] : (bit count, fb size = 64)
// res[2][1-3] : strength=1,2,4, fb size = 64
// res[3][0] : (bit count, fb size = 32)
// res[3][1-3] : strength=1,2,4, fb size = 32
static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
unsigned int block_size, unsigned int fb_size_log2, int w,
int h, int64_t res[4][4]) {
int i, m, n, filtered = 0;
int sum[4];
int bslog = get_msb(block_size);
sum[0] = sum[1] = sum[2] = sum[3] = 0;
if (fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
fb_size_log2--;
w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
h2 = AOMMIN(h - (1 << (fb_size_log2 - bslog)), h >> 1);
i = get_msb(MAX_FB_SIZE) - fb_size_log2;
sum1 = res[i][1];
sum2 = res[i][2];
sum3 = res[i][3];
oldfiltered = res[i][0];
res[i][0] = 0;
filtered =
clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, res);
if (1 << (fb_size_log2 - bslog) < w)
filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
fb_size_log2, w2, h1, res);
if (1 << (fb_size_log2 - bslog) < h) {
filtered |= clpf_rdo(y + (1 << fb_size_log2), x, rec, org, cm, block_size,
fb_size_log2, w1, h2, res);
filtered |= clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2),
rec, org, cm, block_size, fb_size_log2, w2, h2, res);
}
res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
res[i][0] = oldfiltered + filtered; // Number of signal bits
return filtered;
}
for (m = 0; m < h; m++) {
for (n = 0; n < w; n++) {
int xpos = x + n * block_size;
int ypos = y + m * block_size;
if (!cm->mi_grid_visible[ypos / MI_BLOCK_SIZE * cm->mi_stride +
xpos / MI_BLOCK_SIZE]
->mbmi.skip) {
detect_multi_clpf(rec->y_buffer, org->y_buffer, xpos, ypos,
rec->y_crop_width, rec->y_crop_height, org->y_stride,
rec->y_stride, sum);
filtered = 1;
}
}
}
for (i = 0; i < 4; i++) {
res[i][0] += sum[0];
res[i][1] += sum[1];
res[i][2] += sum[2];
res[i][3] += sum[3];
}
return filtered;
}
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int *best_bs) {
int i, j, k, l;
int64_t best, sums[4][4];
int width = rec->y_crop_width, height = rec->y_crop_height;
const int bs = MI_BLOCK_SIZE;
int fb_size_log2 = get_msb(MAX_FB_SIZE);
int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
memset(sums, 0, sizeof(sums));
for (k = 0; k < num_fb_ver; k++) {
for (l = 0; l < num_fb_hor; l++) {
// Calculate the block size after frame border clipping
int h =
AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
int w =
AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
h += !h << fb_size_log2;
w += !w << fb_size_log2;
clpf_rdo(k << fb_size_log2, l << fb_size_log2, rec, org, cm, bs,
fb_size_log2, w / bs, h / bs, sums);
}
}
for (j = 0; j < 4; j++) {
static const double lambda_square[] = {
// exp((i - 15.4244) / 8.4010)
0.159451, 0.179607, 0.202310, 0.227884, 0.256690, 0.289138, 0.325687,
0.366856, 0.413230, 0.465465, 0.524303, 0.590579, 0.665233, 0.749323,
0.844044, 0.950737, 1.070917, 1.206289, 1.358774, 1.530533, 1.724004,
1.941931, 2.187406, 2.463911, 2.775368, 3.126195, 3.521370, 3.966498,
4.467893, 5.032669, 5.668837, 6.385421, 7.192586, 8.101784, 9.125911,
10.27949, 11.57890, 13.04256, 14.69124, 16.54832, 18.64016, 20.99641,
23.65052, 26.64013, 30.00764, 33.80084, 38.07352, 42.88630, 48.30746,
54.41389, 61.29221, 69.04002, 77.76720, 87.59756, 98.67056, 111.1432,
125.1926, 141.0179, 158.8436, 178.9227, 201.5399, 227.0160, 255.7126,
288.0366
};
// Estimate the bit costs and adjust the square errors
double lambda =
lambda_square[av1_get_qindex(&cm->seg, 0, cm->base_qindex) >> 2];
int i, cost = (int)((1.2 * lambda * (sums[j][0] + 2 + 2 * (j > 0)) + 0.5));
for (i = 0; i < 4; i++)
sums[j][i] = ((sums[j][i] + (i && j) * cost) << 4) + j * 4 + i;
}
best = (int64_t)1 << 62;
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
if ((!i || j) && sums[i][j] < best) best = sums[i][j];
best &= 15;
*best_bs = (best > 3) * (5 + (best < 12) + (best < 8));
*best_strength = best ? 1 << ((best - 1) & 3) : 0;
}

26
av1/encoder/clpf_rdo.h Normal file
Просмотреть файл

@ -0,0 +1,26 @@
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#ifndef AV1_ENCODER_CLPF_H_
#define AV1_ENCODER_CLPF_H_
#include "av1/common/reconinter.h"
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int block_size, int w, int h, unsigned int strength,
unsigned int fb_size_log2, uint8_t *res);
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
int *best_strength, int *best_bs);
#endif

Просмотреть файл

@ -18,6 +18,7 @@
#include "av1/common/alloccommon.h"
#if CONFIG_CLPF
#include "av1/common/clpf.h"
#include "av1/encoder/clpf_rdo.h"
#endif
#if CONFIG_DERING
#include "av1/common/dering.h"
@ -2478,6 +2479,47 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
}
#if CONFIG_CLPF
cm->clpf_strength = 0;
cm->clpf_size = 2;
CHECK_MEM_ERROR(
cm, cm->clpf_blocks,
aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) *
((cm->frame_to_show->y_crop_height + 31) & ~31) >>
10));
if (!is_lossless_requested(&cpi->oxcf)) {
// Test CLPF
int i, hq = 1;
// TODO(yaowu): investigate per-segment CLPF decision and
// an optimal threshold, use 80 for now.
for (i = 0; i < MAX_SEGMENTS; i++)
hq &= av1_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
// Don't try filter if the entire image is nearly losslessly encoded
if (!hq) {
// Find the best strength and block size for the entire frame
int fb_size_log2, strength;
av1_clpf_test_frame(&cpi->last_frame_uf, cpi->Source, cm, &strength,
&fb_size_log2);
if (!fb_size_log2) fb_size_log2 = get_msb(MAX_FB_SIZE);
if (!strength) { // Better to disable for the whole frame?
cm->clpf_strength = 0;
} else {
// Apply the filter using the chosen strength
cm->clpf_strength = strength - (strength == 4);
cm->clpf_size =
fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
cm->clpf_numblocks =
av1_clpf_frame(cm->frame_to_show, &cpi->last_frame_uf, cpi->Source,
cm, !!cm->clpf_size, strength, 4 + cm->clpf_size,
cm->clpf_blocks, av1_clpf_decision);
}
}
}
#endif
#if CONFIG_DERING
if (is_lossless_requested(&cpi->oxcf)) {
cm->dering_level = 0;
@ -2488,65 +2530,6 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
}
#endif // CONFIG_DERING
#if CONFIG_CLPF
cm->clpf = 0;
if (!is_lossless_requested(&cpi->oxcf)) {
// Test CLPF
int i, hq = 1;
uint64_t before, after;
// TODO(yaowu): investigate per-segment CLPF decision and
// an optimal threshold, use 80 for now.
for (i = 0; i < MAX_SEGMENTS; i++)
hq &= av1_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
if (!hq) { // Don't try filter if the entire image is nearly losslessly
// encoded
#if CLPF_FILTER_ALL_PLANES
aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
before =
get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
av1_clpf_frame(cm->frame_to_show, cm, xd);
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
#else
aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
before = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
av1_clpf_frame(cm->frame_to_show, cm, xd);
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
#endif
if (before < after) {
// No improvement, restore original
#if CLPF_FILTER_ALL_PLANES
aom_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
#else
aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
#endif
} else {
cm->clpf = 1;
}
}
}
#endif
aom_extend_frame_inner_borders(cm->frame_to_show);
}
@ -3649,6 +3632,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
}
#endif // CONFIG_EXT_REFS
#if CONFIG_CLPF
aom_free(cm->clpf_blocks);
#endif
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {