New CLPF: New kernel and RDO for strength and block size
Change-Id: I61eb08862a101df74a6b65ece459833401e81117
This commit is contained in:
Родитель
9c1e2f92e5
Коммит
7560123c06
|
@ -65,8 +65,10 @@ AV1_COMMON_SRCS-yes += common/scan.h
|
|||
# TODO(angiebird) the forward transform belongs under encoder/
|
||||
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.h
|
||||
AV1_COMMON_SRCS-$(CONFIG_AV1_ENCODER) += common/av1_fwd_txfm.c
|
||||
ifeq ($(CONFIG_CLPF),yes)
|
||||
AV1_COMMON_SRCS-yes += common/clpf.c
|
||||
AV1_COMMON_SRCS-yes += common/clpf.h
|
||||
endif
|
||||
ifeq ($(CONFIG_DERING),yes)
|
||||
AV1_COMMON_SRCS-yes += common/od_dering.c
|
||||
AV1_COMMON_SRCS-yes += common/od_dering.h
|
||||
|
|
|
@ -82,6 +82,10 @@ AV1_CX_SRCS-yes += encoder/temporal_filter.h
|
|||
AV1_CX_SRCS-yes += encoder/mbgraph.c
|
||||
AV1_CX_SRCS-yes += encoder/mbgraph.h
|
||||
AV1_CX_SRCS-$(CONFIG_DERING) += encoder/pickdering.c
|
||||
ifeq ($(CONFIG_CLPF),yes)
|
||||
AV1_CX_SRCS-yes += encoder/clpf_rdo.c
|
||||
AV1_CX_SRCS-yes += encoder/clpf_rdo.h
|
||||
endif
|
||||
|
||||
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.c
|
||||
|
|
|
@ -9,96 +9,119 @@
|
|||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
#include "av1/common/clpf.h"
|
||||
#include "aom_dsp/aom_dsp_common.h"
|
||||
|
||||
// Apply the filter on a single block
|
||||
static void clpf_block(const uint8_t *src, uint8_t *dst, int sstride,
|
||||
int dstride, int has_top, int has_left, int has_bottom,
|
||||
int has_right, int width, int height) {
|
||||
int av1_clpf_maxbits(const AV1_COMMON *cm) {
|
||||
return get_msb(ALIGN_POWER_OF_TWO(cm->mi_cols * MI_BLOCK_SIZE,
|
||||
cm->clpf_size + 4) *
|
||||
ALIGN_POWER_OF_TWO(cm->mi_rows * MI_BLOCK_SIZE,
|
||||
cm->clpf_size + 4) >>
|
||||
(cm->clpf_size * 2 + 8)) +
|
||||
1;
|
||||
}
|
||||
|
||||
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b) {
|
||||
int delta = 4 * clamp(A - X, -b, b) + clamp(B - X, -b, b) +
|
||||
3 * clamp(C - X, -b, b) + 3 * clamp(D - X, -b, b) +
|
||||
clamp(E - X, -b, b) + 4 * clamp(F - X, -b, b);
|
||||
return (8 + delta - (delta < 0)) >> 4;
|
||||
}
|
||||
|
||||
static void clpf_block(const uint8_t *src, uint8_t *dst, int stride, int x0,
|
||||
int y0, int sizex, int sizey, int width, int height,
|
||||
unsigned int strength) {
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < height; y++) {
|
||||
for (x = 0; x < width; x++) {
|
||||
int X = src[(y + 0) * sstride + x + 0];
|
||||
int A = has_top ? src[(y - 1) * sstride + x + 0] : X;
|
||||
int B = has_left ? src[(y + 0) * sstride + x - 1] : X;
|
||||
int C = has_right ? src[(y + 0) * sstride + x + 1] : X;
|
||||
int D = has_bottom ? src[(y + 1) * sstride + x + 0] : X;
|
||||
int delta = ((A > X) + (B > X) + (C > X) + (D > X) > 2) -
|
||||
((A < X) + (B < X) + (C < X) + (D < X) > 2);
|
||||
dst[y * dstride + x] = X + delta;
|
||||
for (y = y0; y < y0 + sizey; y++) {
|
||||
for (x = x0; x < x0 + sizex; x++) {
|
||||
int X = src[y * stride + x];
|
||||
int A = src[AOMMAX(0, y - 1) * stride + x];
|
||||
int B = src[y * stride + AOMMAX(0, x - 2)];
|
||||
int C = src[y * stride + AOMMAX(0, x - 1)];
|
||||
int D = src[y * stride + AOMMIN(width - 1, x + 1)];
|
||||
int E = src[y * stride + AOMMIN(width - 1, x + 2)];
|
||||
int F = src[AOMMIN(height - 1, y + 1) * stride + x];
|
||||
int delta;
|
||||
delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
|
||||
dst[y * stride + x] = X + delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define BS MI_SIZE *MI_BLOCK_SIZE
|
||||
// Return number of filtered blocks
|
||||
int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int enable_fb_flag, unsigned int strength,
|
||||
unsigned int fb_size_log2, uint8_t *blocks,
|
||||
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
|
||||
const YV12_BUFFER_CONFIG *,
|
||||
const AV1_COMMON *cm, int, int, int,
|
||||
unsigned int, unsigned int, uint8_t *)) {
|
||||
/* Constrained low-pass filter (CLPF) */
|
||||
int c, k, l, m, n;
|
||||
int width = rec->y_crop_width;
|
||||
int height = rec->y_crop_height;
|
||||
int xpos, ypos;
|
||||
int stride_y = rec->y_stride;
|
||||
int stride_c = rec->uv_stride;
|
||||
const int bs = MI_BLOCK_SIZE;
|
||||
int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
|
||||
int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
|
||||
int block_index = 0;
|
||||
|
||||
// Iterate over blocks within a superblock
|
||||
static void av1_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
|
||||
const AV1_COMMON *cm, MACROBLOCKD *xd,
|
||||
MODE_INFO *const *mi_8x8, int xpos, int ypos) {
|
||||
// Temporary buffer (to allow SIMD parallelism)
|
||||
uint8_t buf_unaligned[BS * BS + 15];
|
||||
uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
|
||||
int x, y, p;
|
||||
|
||||
for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
|
||||
for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
|
||||
for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
|
||||
const MB_MODE_INFO *mbmi =
|
||||
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
|
||||
|
||||
// Do not filter if there is no residual
|
||||
if (!mbmi->skip) {
|
||||
// Do not filter frame edges
|
||||
int has_top = ypos + y > 0;
|
||||
int has_left = xpos + x > 0;
|
||||
int has_bottom = ypos + y < cm->mi_rows - 1;
|
||||
int has_right = xpos + x < cm->mi_cols - 1;
|
||||
#if CLPF_ALLOW_BLOCK_PARALLELISM
|
||||
// Do not filter superblock edges
|
||||
has_top &= !!y;
|
||||
has_left &= !!x;
|
||||
has_bottom &= y != MI_BLOCK_SIZE - 1;
|
||||
has_right &= x != MI_BLOCK_SIZE - 1;
|
||||
#endif
|
||||
av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
|
||||
clpf_block(
|
||||
xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
|
||||
? buf + y * MI_SIZE * BS + x * MI_SIZE
|
||||
: xd->plane[p].dst.buf,
|
||||
xd->plane[p].dst.stride,
|
||||
CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
|
||||
has_top, has_left, has_bottom, has_right,
|
||||
MI_SIZE >> xd->plane[p].subsampling_x,
|
||||
MI_SIZE >> xd->plane[p].subsampling_y);
|
||||
// Iterate over all filter blocks
|
||||
for (k = 0; k < num_fb_ver; k++) {
|
||||
for (l = 0; l < num_fb_hor; l++) {
|
||||
int h, w;
|
||||
int allskip = 1;
|
||||
for (m = 0; allskip && m < (1 << fb_size_log2) / bs; m++) {
|
||||
for (n = 0; allskip && n < (1 << fb_size_log2) / bs; n++) {
|
||||
xpos = (l << fb_size_log2) + n * bs;
|
||||
ypos = (k << fb_size_log2) + m * bs;
|
||||
if (xpos < width && ypos < height) {
|
||||
allskip &=
|
||||
cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
|
||||
->mbmi.skip;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#if CLPF_ALLOW_PIXEL_PARALLELISM
|
||||
for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
|
||||
for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
|
||||
const MB_MODE_INFO *mbmi =
|
||||
&mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
|
||||
av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
|
||||
if (!mbmi->skip) {
|
||||
int i = 0;
|
||||
for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
|
||||
memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
|
||||
buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
|
||||
MI_SIZE >> xd->plane[p].subsampling_x);
|
||||
|
||||
// Calculate the actual filter block size near frame edges
|
||||
h = AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
|
||||
w = AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
|
||||
h += !h << fb_size_log2;
|
||||
w += !w << fb_size_log2;
|
||||
if (!allskip && // Do not filter the block if all is skip encoded
|
||||
(!enable_fb_flag ||
|
||||
decision(k, l, rec, org, cm, bs, w / bs, h / bs, strength,
|
||||
fb_size_log2, blocks + block_index))) {
|
||||
// Iterate over all smaller blocks inside the filter block
|
||||
for (m = 0; m < (h + bs - 1) / bs; m++) {
|
||||
for (n = 0; n < (w + bs - 1) / bs; n++) {
|
||||
xpos = (l << fb_size_log2) + n * bs;
|
||||
ypos = (k << fb_size_log2) + m * bs;
|
||||
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
|
||||
->mbmi.skip) {
|
||||
// Not skip block, apply the filter
|
||||
clpf_block(rec->y_buffer, dst->y_buffer, stride_y, xpos, ypos, bs,
|
||||
bs, width, height, strength);
|
||||
} else { // Skip block, copy instead
|
||||
for (c = 0; c < bs; c++)
|
||||
*(uint64_t *)(dst->y_buffer + (ypos + c) * stride_y + xpos) =
|
||||
*(uint64_t *)(rec->y_buffer + (ypos + c) * stride_y + xpos);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else { // Entire filter block is skip, copy
|
||||
for (m = 0; m < h; m++)
|
||||
memcpy(dst->y_buffer + ((k << fb_size_log2) + m) * stride_y +
|
||||
(l << fb_size_log2),
|
||||
rec->y_buffer + ((k << fb_size_log2) + m) * stride_y +
|
||||
(l << fb_size_log2),
|
||||
w);
|
||||
}
|
||||
block_index += !allskip; // Count number of blocks filtered
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Iterate over the superblocks of an entire frame
|
||||
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd) {
|
||||
int x, y;
|
||||
|
||||
for (y = 0; y < cm->mi_rows; y += MI_BLOCK_SIZE)
|
||||
for (x = 0; x < cm->mi_cols; x += MI_BLOCK_SIZE)
|
||||
av1_clpf_sb(frame, cm, xd, cm->mi_grid_visible, x, y);
|
||||
return block_index;
|
||||
}
|
||||
|
|
|
@ -13,15 +13,17 @@
|
|||
|
||||
#include "av1/common/reconinter.h"
|
||||
|
||||
// Configuration
|
||||
#define CLPF_ALLOW_PIXEL_PARALLELISM \
|
||||
1 // 1 = SIMD friendly (adds a buffer requirement)
|
||||
#define CLPF_ALLOW_BLOCK_PARALLELISM \
|
||||
0 // 1 = MT friendly (degrades quality slighty)
|
||||
#define CLPF_FILTER_ALL_PLANES \
|
||||
0 // 1 = filter both luma and chroma, 0 = filter only luma
|
||||
#define MAX_FB_SIZE 128
|
||||
|
||||
void av1_clpf_frame(const YV12_BUFFER_CONFIG *frame, const AV1_COMMON *cm,
|
||||
MACROBLOCKD *xd);
|
||||
int av1_clpf_maxbits(const AV1_COMMON *cm);
|
||||
int av1_clpf_sample(int X, int A, int B, int C, int D, int E, int F, int b);
|
||||
int av1_clpf_frame(const YV12_BUFFER_CONFIG *dst, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int enable_fb_flag, unsigned int strength,
|
||||
unsigned int fb_size_log2, uint8_t *blocks,
|
||||
int (*decision)(int, int, const YV12_BUFFER_CONFIG *,
|
||||
const YV12_BUFFER_CONFIG *,
|
||||
const AV1_COMMON *cm, int, int, int,
|
||||
unsigned int, unsigned int, uint8_t *));
|
||||
|
||||
#endif
|
||||
|
|
|
@ -147,7 +147,10 @@ typedef struct AV1Common {
|
|||
#endif
|
||||
|
||||
#if CONFIG_CLPF
|
||||
int clpf;
|
||||
int clpf_numblocks;
|
||||
int clpf_size;
|
||||
int clpf_strength;
|
||||
uint8_t *clpf_blocks;
|
||||
#endif
|
||||
|
||||
YV12_BUFFER_CONFIG *frame_to_show;
|
||||
|
|
|
@ -817,7 +817,26 @@ static void setup_loopfilter(struct loopfilter *lf,
|
|||
|
||||
#if CONFIG_CLPF
|
||||
static void setup_clpf(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) {
|
||||
cm->clpf = aom_rb_read_literal(rb, 1);
|
||||
cm->clpf_blocks = 0;
|
||||
cm->clpf_strength = aom_rb_read_literal(rb, 2);
|
||||
if (cm->clpf_strength) {
|
||||
cm->clpf_size = aom_rb_read_literal(rb, 2);
|
||||
if (cm->clpf_size) {
|
||||
int i;
|
||||
cm->clpf_numblocks = aom_rb_read_literal(rb, av1_clpf_maxbits(cm));
|
||||
CHECK_MEM_ERROR(cm, cm->clpf_blocks, aom_malloc(cm->clpf_numblocks));
|
||||
for (i = 0; i < cm->clpf_numblocks; i++) {
|
||||
cm->clpf_blocks[i] = aom_rb_read_literal(rb, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int clpf_bit(int k, int l, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int block_size, int w, int h, unsigned int strength,
|
||||
unsigned int fb_size_log2, uint8_t *bit) {
|
||||
return *bit;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -2240,8 +2259,22 @@ void av1_decode_frame(AV1Decoder *pbi, const uint8_t *data,
|
|||
}
|
||||
|
||||
#if CONFIG_CLPF
|
||||
if (cm->clpf && !cm->skip_loop_filter)
|
||||
av1_clpf_frame(&pbi->cur_buf->buf, cm, &pbi->mb);
|
||||
if (cm->clpf_strength && !cm->skip_loop_filter) {
|
||||
YV12_BUFFER_CONFIG dst; // Buffer for the result
|
||||
|
||||
dst = pbi->cur_buf->buf;
|
||||
CHECK_MEM_ERROR(cm, dst.y_buffer, aom_malloc(dst.y_stride * dst.y_height));
|
||||
|
||||
av1_clpf_frame(&dst, &pbi->cur_buf->buf, 0, cm, !!cm->clpf_size,
|
||||
cm->clpf_strength + (cm->clpf_strength == 3),
|
||||
4 + cm->clpf_size, cm->clpf_blocks, clpf_bit);
|
||||
|
||||
// Copy result
|
||||
memcpy(pbi->cur_buf->buf.y_buffer, dst.y_buffer,
|
||||
dst.y_height * dst.y_stride);
|
||||
aom_free(dst.y_buffer);
|
||||
}
|
||||
if (cm->clpf_blocks) aom_free(cm->clpf_blocks);
|
||||
#endif
|
||||
#if CONFIG_DERING
|
||||
if (cm->dering_level && !cm->skip_loop_filter) {
|
||||
|
|
|
@ -1091,7 +1091,22 @@ static void encode_loopfilter(struct loopfilter *lf,
|
|||
|
||||
#if CONFIG_CLPF
|
||||
static void encode_clpf(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
|
||||
aom_wb_write_literal(wb, cm->clpf, 1);
|
||||
aom_wb_write_literal(wb, cm->clpf_strength, 2);
|
||||
if (cm->clpf_strength) {
|
||||
aom_wb_write_literal(wb, cm->clpf_size, 2);
|
||||
if (cm->clpf_size) {
|
||||
int i;
|
||||
// TODO(stemidts): The number of bits to transmit could be
|
||||
// implicitly deduced if transmitted after the filter block or
|
||||
// after the frame (when it's known whether the block is all
|
||||
// skip and implicitly unfiltered). And the bits do not have
|
||||
// 50% probability, so a more efficient coding is possible.
|
||||
aom_wb_write_literal(wb, cm->clpf_numblocks, av1_clpf_maxbits(cm));
|
||||
for (i = 0; i < cm->clpf_numblocks; i++) {
|
||||
aom_wb_write_literal(wb, cm->clpf_blocks[i], 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#include "av1/common/clpf.h"
|
||||
#include "aom/aom_integer.h"
|
||||
#include "av1/common/quant_common.h"
|
||||
|
||||
// Calculate the error of a filtered and unfiltered block
|
||||
static void detect_clpf(const uint8_t *rec, const uint8_t *org, int x0, int y0,
|
||||
int width, int height, int so, int stride, int *sum0,
|
||||
int *sum1, unsigned int strength) {
|
||||
int x, y;
|
||||
for (y = y0; y < y0 + 8; y++) {
|
||||
for (x = x0; x < x0 + 8; x++) {
|
||||
int O = org[y * so + x];
|
||||
int X = rec[y * stride + x];
|
||||
int A = rec[AOMMAX(0, y - 1) * stride + x];
|
||||
int B = rec[y * stride + AOMMAX(0, x - 2)];
|
||||
int C = rec[y * stride + AOMMAX(0, x - 1)];
|
||||
int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
|
||||
int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
|
||||
int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
|
||||
int delta = av1_clpf_sample(X, A, B, C, D, E, F, strength);
|
||||
int Y = X + delta;
|
||||
*sum0 += (O - X) * (O - X);
|
||||
*sum1 += (O - Y) * (O - Y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void detect_multi_clpf(const uint8_t *rec, const uint8_t *org, int x0,
|
||||
int y0, int width, int height, int so, int stride,
|
||||
int *sum) {
|
||||
int x, y;
|
||||
|
||||
for (y = y0; y < y0 + 8; y++) {
|
||||
for (x = x0; x < x0 + 8; x++) {
|
||||
int O = org[y * so + x];
|
||||
int X = rec[y * stride + x];
|
||||
int A = rec[AOMMAX(0, y - 1) * stride + x];
|
||||
int B = rec[y * stride + AOMMAX(0, x - 2)];
|
||||
int C = rec[y * stride + AOMMAX(0, x - 1)];
|
||||
int D = rec[y * stride + AOMMIN(width - 1, x + 1)];
|
||||
int E = rec[y * stride + AOMMIN(width - 1, x + 2)];
|
||||
int F = rec[AOMMIN(height - 1, y + 1) * stride + x];
|
||||
int delta1 = av1_clpf_sample(X, A, B, C, D, E, F, 1);
|
||||
int delta2 = av1_clpf_sample(X, A, B, C, D, E, F, 2);
|
||||
int delta3 = av1_clpf_sample(X, A, B, C, D, E, F, 4);
|
||||
int F1 = X + delta1;
|
||||
int F2 = X + delta2;
|
||||
int F3 = X + delta3;
|
||||
sum[0] += (O - X) * (O - X);
|
||||
sum[1] += (O - F1) * (O - F1);
|
||||
sum[2] += (O - F2) * (O - F2);
|
||||
sum[3] += (O - F3) * (O - F3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int block_size, int w, int h, unsigned int strength,
|
||||
unsigned int fb_size_log2, uint8_t *res) {
|
||||
int m, n, sum0 = 0, sum1 = 0;
|
||||
for (m = 0; m < h; m++) {
|
||||
for (n = 0; n < w; n++) {
|
||||
int xpos = (l << fb_size_log2) + n * block_size;
|
||||
int ypos = (k << fb_size_log2) + m * block_size;
|
||||
const int bs = MI_BLOCK_SIZE;
|
||||
if (!cm->mi_grid_visible[ypos / bs * cm->mi_stride + xpos / bs]
|
||||
->mbmi.skip)
|
||||
detect_clpf(rec->y_buffer, org->y_buffer, xpos, ypos, rec->y_crop_width,
|
||||
rec->y_crop_height, org->y_stride, rec->y_stride, &sum0,
|
||||
&sum1, strength);
|
||||
}
|
||||
}
|
||||
*res = sum1 < sum0;
|
||||
return *res;
|
||||
}
|
||||
|
||||
// Calculate the square error of all filter settings. Result:
|
||||
// res[0][0] : unfiltered
|
||||
// res[0][1-3] : strength=1,2,4, no signals
|
||||
// res[1][0] : (bit count, fb size = 128)
|
||||
// res[1][1-3] : strength=1,2,4, fb size = 128
|
||||
// res[2][0] : (bit count, fb size = 64)
|
||||
// res[2][1-3] : strength=1,2,4, fb size = 64
|
||||
// res[3][0] : (bit count, fb size = 32)
|
||||
// res[3][1-3] : strength=1,2,4, fb size = 32
|
||||
static int clpf_rdo(int y, int x, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
unsigned int block_size, unsigned int fb_size_log2, int w,
|
||||
int h, int64_t res[4][4]) {
|
||||
int i, m, n, filtered = 0;
|
||||
int sum[4];
|
||||
int bslog = get_msb(block_size);
|
||||
sum[0] = sum[1] = sum[2] = sum[3] = 0;
|
||||
if (fb_size_log2 > (unsigned int)get_msb(MAX_FB_SIZE) - 3) {
|
||||
int w1, h1, w2, h2, i, sum1, sum2, sum3, oldfiltered;
|
||||
|
||||
fb_size_log2--;
|
||||
w1 = AOMMIN(1 << (fb_size_log2 - bslog), w);
|
||||
h1 = AOMMIN(1 << (fb_size_log2 - bslog), h);
|
||||
w2 = AOMMIN(w - (1 << (fb_size_log2 - bslog)), w >> 1);
|
||||
h2 = AOMMIN(h - (1 << (fb_size_log2 - bslog)), h >> 1);
|
||||
i = get_msb(MAX_FB_SIZE) - fb_size_log2;
|
||||
sum1 = res[i][1];
|
||||
sum2 = res[i][2];
|
||||
sum3 = res[i][3];
|
||||
oldfiltered = res[i][0];
|
||||
res[i][0] = 0;
|
||||
|
||||
filtered =
|
||||
clpf_rdo(y, x, rec, org, cm, block_size, fb_size_log2, w1, h1, res);
|
||||
if (1 << (fb_size_log2 - bslog) < w)
|
||||
filtered |= clpf_rdo(y, x + (1 << fb_size_log2), rec, org, cm, block_size,
|
||||
fb_size_log2, w2, h1, res);
|
||||
if (1 << (fb_size_log2 - bslog) < h) {
|
||||
filtered |= clpf_rdo(y + (1 << fb_size_log2), x, rec, org, cm, block_size,
|
||||
fb_size_log2, w1, h2, res);
|
||||
filtered |= clpf_rdo(y + (1 << fb_size_log2), x + (1 << fb_size_log2),
|
||||
rec, org, cm, block_size, fb_size_log2, w2, h2, res);
|
||||
}
|
||||
|
||||
res[i][1] = AOMMIN(sum1 + res[i][0], res[i][1]);
|
||||
res[i][2] = AOMMIN(sum2 + res[i][0], res[i][2]);
|
||||
res[i][3] = AOMMIN(sum3 + res[i][0], res[i][3]);
|
||||
res[i][0] = oldfiltered + filtered; // Number of signal bits
|
||||
return filtered;
|
||||
}
|
||||
|
||||
for (m = 0; m < h; m++) {
|
||||
for (n = 0; n < w; n++) {
|
||||
int xpos = x + n * block_size;
|
||||
int ypos = y + m * block_size;
|
||||
if (!cm->mi_grid_visible[ypos / MI_BLOCK_SIZE * cm->mi_stride +
|
||||
xpos / MI_BLOCK_SIZE]
|
||||
->mbmi.skip) {
|
||||
detect_multi_clpf(rec->y_buffer, org->y_buffer, xpos, ypos,
|
||||
rec->y_crop_width, rec->y_crop_height, org->y_stride,
|
||||
rec->y_stride, sum);
|
||||
filtered = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
res[i][0] += sum[0];
|
||||
res[i][1] += sum[1];
|
||||
res[i][2] += sum[2];
|
||||
res[i][3] += sum[3];
|
||||
}
|
||||
return filtered;
|
||||
}
|
||||
|
||||
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int *best_strength, int *best_bs) {
|
||||
int i, j, k, l;
|
||||
int64_t best, sums[4][4];
|
||||
int width = rec->y_crop_width, height = rec->y_crop_height;
|
||||
const int bs = MI_BLOCK_SIZE;
|
||||
int fb_size_log2 = get_msb(MAX_FB_SIZE);
|
||||
int num_fb_ver = (height + (1 << fb_size_log2) - bs) >> fb_size_log2;
|
||||
int num_fb_hor = (width + (1 << fb_size_log2) - bs) >> fb_size_log2;
|
||||
|
||||
memset(sums, 0, sizeof(sums));
|
||||
|
||||
for (k = 0; k < num_fb_ver; k++) {
|
||||
for (l = 0; l < num_fb_hor; l++) {
|
||||
// Calculate the block size after frame border clipping
|
||||
int h =
|
||||
AOMMIN(height, (k + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
|
||||
int w =
|
||||
AOMMIN(width, (l + 1) << fb_size_log2) & ((1 << fb_size_log2) - 1);
|
||||
h += !h << fb_size_log2;
|
||||
w += !w << fb_size_log2;
|
||||
clpf_rdo(k << fb_size_log2, l << fb_size_log2, rec, org, cm, bs,
|
||||
fb_size_log2, w / bs, h / bs, sums);
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
static const double lambda_square[] = {
|
||||
// exp((i - 15.4244) / 8.4010)
|
||||
0.159451, 0.179607, 0.202310, 0.227884, 0.256690, 0.289138, 0.325687,
|
||||
0.366856, 0.413230, 0.465465, 0.524303, 0.590579, 0.665233, 0.749323,
|
||||
0.844044, 0.950737, 1.070917, 1.206289, 1.358774, 1.530533, 1.724004,
|
||||
1.941931, 2.187406, 2.463911, 2.775368, 3.126195, 3.521370, 3.966498,
|
||||
4.467893, 5.032669, 5.668837, 6.385421, 7.192586, 8.101784, 9.125911,
|
||||
10.27949, 11.57890, 13.04256, 14.69124, 16.54832, 18.64016, 20.99641,
|
||||
23.65052, 26.64013, 30.00764, 33.80084, 38.07352, 42.88630, 48.30746,
|
||||
54.41389, 61.29221, 69.04002, 77.76720, 87.59756, 98.67056, 111.1432,
|
||||
125.1926, 141.0179, 158.8436, 178.9227, 201.5399, 227.0160, 255.7126,
|
||||
288.0366
|
||||
};
|
||||
|
||||
// Estimate the bit costs and adjust the square errors
|
||||
double lambda =
|
||||
lambda_square[av1_get_qindex(&cm->seg, 0, cm->base_qindex) >> 2];
|
||||
int i, cost = (int)((1.2 * lambda * (sums[j][0] + 2 + 2 * (j > 0)) + 0.5));
|
||||
for (i = 0; i < 4; i++)
|
||||
sums[j][i] = ((sums[j][i] + (i && j) * cost) << 4) + j * 4 + i;
|
||||
}
|
||||
|
||||
best = (int64_t)1 << 62;
|
||||
for (i = 0; i < 4; i++)
|
||||
for (j = 0; j < 4; j++)
|
||||
if ((!i || j) && sums[i][j] < best) best = sums[i][j];
|
||||
best &= 15;
|
||||
*best_bs = (best > 3) * (5 + (best < 12) + (best < 8));
|
||||
*best_strength = best ? 1 << ((best - 1) & 3) : 0;
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
|
||||
*
|
||||
* This source code is subject to the terms of the BSD 2 Clause License and
|
||||
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
|
||||
* was not distributed with this source code in the LICENSE file, you can
|
||||
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
|
||||
* Media Patent License 1.0 was not distributed with this source code in the
|
||||
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
|
||||
*/
|
||||
|
||||
#ifndef AV1_ENCODER_CLPF_H_
|
||||
#define AV1_ENCODER_CLPF_H_
|
||||
|
||||
#include "av1/common/reconinter.h"
|
||||
|
||||
int av1_clpf_decision(int k, int l, const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int block_size, int w, int h, unsigned int strength,
|
||||
unsigned int fb_size_log2, uint8_t *res);
|
||||
|
||||
void av1_clpf_test_frame(const YV12_BUFFER_CONFIG *rec,
|
||||
const YV12_BUFFER_CONFIG *org, const AV1_COMMON *cm,
|
||||
int *best_strength, int *best_bs);
|
||||
|
||||
#endif
|
|
@ -18,6 +18,7 @@
|
|||
#include "av1/common/alloccommon.h"
|
||||
#if CONFIG_CLPF
|
||||
#include "av1/common/clpf.h"
|
||||
#include "av1/encoder/clpf_rdo.h"
|
||||
#endif
|
||||
#if CONFIG_DERING
|
||||
#include "av1/common/dering.h"
|
||||
|
@ -2478,6 +2479,47 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
|
|||
av1_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0);
|
||||
}
|
||||
|
||||
#if CONFIG_CLPF
|
||||
cm->clpf_strength = 0;
|
||||
cm->clpf_size = 2;
|
||||
CHECK_MEM_ERROR(
|
||||
cm, cm->clpf_blocks,
|
||||
aom_malloc(((cm->frame_to_show->y_crop_width + 31) & ~31) *
|
||||
((cm->frame_to_show->y_crop_height + 31) & ~31) >>
|
||||
10));
|
||||
if (!is_lossless_requested(&cpi->oxcf)) {
|
||||
// Test CLPF
|
||||
int i, hq = 1;
|
||||
// TODO(yaowu): investigate per-segment CLPF decision and
|
||||
// an optimal threshold, use 80 for now.
|
||||
for (i = 0; i < MAX_SEGMENTS; i++)
|
||||
hq &= av1_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
|
||||
|
||||
// Don't try filter if the entire image is nearly losslessly encoded
|
||||
if (!hq) {
|
||||
// Find the best strength and block size for the entire frame
|
||||
int fb_size_log2, strength;
|
||||
av1_clpf_test_frame(&cpi->last_frame_uf, cpi->Source, cm, &strength,
|
||||
&fb_size_log2);
|
||||
|
||||
if (!fb_size_log2) fb_size_log2 = get_msb(MAX_FB_SIZE);
|
||||
|
||||
if (!strength) { // Better to disable for the whole frame?
|
||||
cm->clpf_strength = 0;
|
||||
} else {
|
||||
// Apply the filter using the chosen strength
|
||||
cm->clpf_strength = strength - (strength == 4);
|
||||
cm->clpf_size =
|
||||
fb_size_log2 ? fb_size_log2 - get_msb(MAX_FB_SIZE) + 3 : 0;
|
||||
aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
cm->clpf_numblocks =
|
||||
av1_clpf_frame(cm->frame_to_show, &cpi->last_frame_uf, cpi->Source,
|
||||
cm, !!cm->clpf_size, strength, 4 + cm->clpf_size,
|
||||
cm->clpf_blocks, av1_clpf_decision);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if CONFIG_DERING
|
||||
if (is_lossless_requested(&cpi->oxcf)) {
|
||||
cm->dering_level = 0;
|
||||
|
@ -2488,65 +2530,6 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
|
|||
}
|
||||
#endif // CONFIG_DERING
|
||||
|
||||
#if CONFIG_CLPF
|
||||
cm->clpf = 0;
|
||||
if (!is_lossless_requested(&cpi->oxcf)) {
|
||||
// Test CLPF
|
||||
int i, hq = 1;
|
||||
uint64_t before, after;
|
||||
// TODO(yaowu): investigate per-segment CLPF decision and
|
||||
// an optimal threshold, use 80 for now.
|
||||
for (i = 0; i < MAX_SEGMENTS; i++)
|
||||
hq &= av1_get_qindex(&cm->seg, i, cm->base_qindex) < 80;
|
||||
|
||||
if (!hq) { // Don't try filter if the entire image is nearly losslessly
|
||||
// encoded
|
||||
#if CLPF_FILTER_ALL_PLANES
|
||||
aom_yv12_copy_frame(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
before =
|
||||
get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
|
||||
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
|
||||
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
|
||||
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
|
||||
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
|
||||
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
|
||||
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
|
||||
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
|
||||
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
|
||||
av1_clpf_frame(cm->frame_to_show, cm, xd);
|
||||
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
|
||||
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
|
||||
cpi->Source->y_crop_width, cpi->Source->y_crop_height) +
|
||||
get_sse(cpi->Source->u_buffer, cpi->Source->uv_stride,
|
||||
cm->frame_to_show->u_buffer, cm->frame_to_show->uv_stride,
|
||||
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height) +
|
||||
get_sse(cpi->Source->v_buffer, cpi->Source->uv_stride,
|
||||
cm->frame_to_show->v_buffer, cm->frame_to_show->uv_stride,
|
||||
cpi->Source->uv_crop_width, cpi->Source->uv_crop_height);
|
||||
#else
|
||||
aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
|
||||
before = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
|
||||
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
|
||||
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
|
||||
av1_clpf_frame(cm->frame_to_show, cm, xd);
|
||||
after = get_sse(cpi->Source->y_buffer, cpi->Source->y_stride,
|
||||
cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride,
|
||||
cpi->Source->y_crop_width, cpi->Source->y_crop_height);
|
||||
#endif
|
||||
if (before < after) {
|
||||
// No improvement, restore original
|
||||
#if CLPF_FILTER_ALL_PLANES
|
||||
aom_yv12_copy_frame(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
#else
|
||||
aom_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);
|
||||
#endif
|
||||
} else {
|
||||
cm->clpf = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
aom_extend_frame_inner_borders(cm->frame_to_show);
|
||||
}
|
||||
|
||||
|
@ -3649,6 +3632,10 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
|
|||
}
|
||||
#endif // CONFIG_EXT_REFS
|
||||
|
||||
#if CONFIG_CLPF
|
||||
aom_free(cm->clpf_blocks);
|
||||
#endif
|
||||
|
||||
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
|
||||
|
||||
if (frame_is_intra_only(cm) == 0) {
|
||||
|
|
Загрузка…
Ссылка в новой задаче