Remove vp9-postproc from configure
Change-Id: I601464f0b74183daa80730856dfbf33ddfce2cfe
This commit is contained in:
Родитель
3246fc04fb
Коммит
b89861a463
|
@ -41,7 +41,6 @@ Advanced options:
|
|||
${toggle_vp10} VP10 codec support
|
||||
${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders)
|
||||
${toggle_postproc} postprocessing
|
||||
${toggle_vp9_postproc} vp9 specific postprocessing
|
||||
${toggle_multithread} multithreaded encoding and decoding
|
||||
${toggle_spatial_resampling} spatial sampling (scaling) support
|
||||
${toggle_realtime_only} enable this option while building for real-time encoding
|
||||
|
@ -283,7 +282,6 @@ CONFIG_LIST="
|
|||
dc_recon
|
||||
runtime_cpu_detect
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
|
@ -346,7 +344,6 @@ CMDLINE_SELECT="
|
|||
dequant_tokens
|
||||
dc_recon
|
||||
postproc
|
||||
vp9_postproc
|
||||
multithread
|
||||
internal_stats
|
||||
${CODECS}
|
||||
|
@ -442,7 +439,7 @@ process_targets() {
|
|||
done
|
||||
enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
|
||||
enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
|
||||
! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
|
||||
! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
|
||||
! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
|
||||
DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
|
||||
|
@ -626,10 +623,6 @@ process_toolchain() {
|
|||
enable_feature dc_recon
|
||||
fi
|
||||
|
||||
if enabled internal_stats; then
|
||||
enable_feature vp9_postproc
|
||||
fi
|
||||
|
||||
# Enable the postbuild target if building for visual studio.
|
||||
case "$tgt_cc" in
|
||||
vs*) enable_feature msvs
|
||||
|
|
|
@ -81,15 +81,6 @@ void vp10_free_ref_frame_buffers(BufferPool *pool) {
|
|||
}
|
||||
}
|
||||
|
||||
void vp10_free_postproc_buffers(VP10_COMMON *cm) {
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vpx_free_frame_buffer(&cm->post_proc_buffer);
|
||||
vpx_free_frame_buffer(&cm->post_proc_buffer_int);
|
||||
#else
|
||||
(void)cm;
|
||||
#endif
|
||||
}
|
||||
|
||||
void vp10_free_context_buffers(VP10_COMMON *cm) {
|
||||
cm->free_mi(cm);
|
||||
free_seg_map(cm);
|
||||
|
|
|
@ -1,394 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp10_rtcd.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
#include "vp10/common/postproc.h"
|
||||
|
||||
// TODO(jackychen): Replace this function with SSE2 code. There is
|
||||
// one SSE2 implementation in vp8, so will consider how to share it
|
||||
// between vp8 and vp9.
|
||||
static void filter_by_weight(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
int block_size, int src_weight) {
|
||||
const int dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
const int rounding_bit = 1 << (MFQE_PRECISION - 1);
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < block_size; r++) {
|
||||
for (c = 0; c < block_size; c++) {
|
||||
dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit)
|
||||
>> MFQE_PRECISION;
|
||||
}
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_filter_by_weight8x8_c(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride, int src_weight) {
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
|
||||
}
|
||||
|
||||
void vp10_filter_by_weight16x16_c(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
int src_weight) {
|
||||
filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
|
||||
}
|
||||
|
||||
static void filter_by_weight32x32(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride, int weight) {
|
||||
vp10_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight);
|
||||
vp10_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride,
|
||||
weight);
|
||||
vp10_filter_by_weight16x16(src + src_stride * 16, src_stride,
|
||||
dst + dst_stride * 16, dst_stride, weight);
|
||||
vp10_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride,
|
||||
dst + dst_stride * 16 + 16, dst_stride, weight);
|
||||
}
|
||||
|
||||
static void filter_by_weight64x64(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride, int weight) {
|
||||
filter_by_weight32x32(src, src_stride, dst, dst_stride, weight);
|
||||
filter_by_weight32x32(src + 32, src_stride, dst + 32,
|
||||
dst_stride, weight);
|
||||
filter_by_weight32x32(src + src_stride * 32, src_stride,
|
||||
dst + dst_stride * 32, dst_stride, weight);
|
||||
filter_by_weight32x32(src + src_stride * 32 + 32, src_stride,
|
||||
dst + dst_stride * 32 + 32, dst_stride, weight);
|
||||
}
|
||||
|
||||
static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd,
|
||||
int yd_stride, const uint8_t *u, const uint8_t *v,
|
||||
int uv_stride, uint8_t *ud, uint8_t *vd,
|
||||
int uvd_stride, BLOCK_SIZE block_size,
|
||||
int weight) {
|
||||
if (block_size == BLOCK_16X16) {
|
||||
vp10_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight);
|
||||
vp10_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight);
|
||||
vp10_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight);
|
||||
} else if (block_size == BLOCK_32X32) {
|
||||
filter_by_weight32x32(y, y_stride, yd, yd_stride, weight);
|
||||
vp10_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight);
|
||||
vp10_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight);
|
||||
} else if (block_size == BLOCK_64X64) {
|
||||
filter_by_weight64x64(y, y_stride, yd, yd_stride, weight);
|
||||
filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight);
|
||||
filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(jackychen): Determine whether replace it with assembly code.
|
||||
static void copy_mem8x8(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
int r;
|
||||
for (r = 0; r < 8; r++) {
|
||||
memcpy(dst, src, 8);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_mem16x16(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
int r;
|
||||
for (r = 0; r < 16; r++) {
|
||||
memcpy(dst, src, 16);
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
static void copy_mem32x32(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
copy_mem16x16(src, src_stride, dst, dst_stride);
|
||||
copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride);
|
||||
copy_mem16x16(src + src_stride * 16, src_stride,
|
||||
dst + dst_stride * 16, dst_stride);
|
||||
copy_mem16x16(src + src_stride * 16 + 16, src_stride,
|
||||
dst + dst_stride * 16 + 16, dst_stride);
|
||||
}
|
||||
|
||||
void copy_mem64x64(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride) {
|
||||
copy_mem32x32(src, src_stride, dst, dst_stride);
|
||||
copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride);
|
||||
copy_mem32x32(src + src_stride * 32, src_stride,
|
||||
dst + src_stride * 32, dst_stride);
|
||||
copy_mem32x32(src + src_stride * 32 + 32, src_stride,
|
||||
dst + src_stride * 32 + 32, dst_stride);
|
||||
}
|
||||
|
||||
static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v,
|
||||
int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud,
|
||||
uint8_t *vd, int yd_stride, int uvd_stride,
|
||||
BLOCK_SIZE bs) {
|
||||
if (bs == BLOCK_16X16) {
|
||||
copy_mem16x16(y, y_stride, yd, yd_stride);
|
||||
copy_mem8x8(u, uv_stride, ud, uvd_stride);
|
||||
copy_mem8x8(v, uv_stride, vd, uvd_stride);
|
||||
} else if (bs == BLOCK_32X32) {
|
||||
copy_mem32x32(y, y_stride, yd, yd_stride);
|
||||
copy_mem16x16(u, uv_stride, ud, uvd_stride);
|
||||
copy_mem16x16(v, uv_stride, vd, uvd_stride);
|
||||
} else {
|
||||
copy_mem64x64(y, y_stride, yd, yd_stride);
|
||||
copy_mem32x32(u, uv_stride, ud, uvd_stride);
|
||||
copy_mem32x32(v, uv_stride, vd, uvd_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) {
|
||||
const int adj = qdiff >> MFQE_PRECISION;
|
||||
if (bs == BLOCK_16X16) {
|
||||
*sad_thr = 7 + adj;
|
||||
} else if (bs == BLOCK_32X32) {
|
||||
*sad_thr = 6 + adj;
|
||||
} else { // BLOCK_64X64
|
||||
*sad_thr = 5 + adj;
|
||||
}
|
||||
*vdiff_thr = 125 + qdiff;
|
||||
}
|
||||
|
||||
static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u,
|
||||
const uint8_t *v, int y_stride, int uv_stride,
|
||||
uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride,
|
||||
int uvd_stride, int qdiff) {
|
||||
int sad, sad_thr, vdiff, vdiff_thr;
|
||||
uint32_t sse;
|
||||
|
||||
get_thr(bs, qdiff, &sad_thr, &vdiff_thr);
|
||||
|
||||
if (bs == BLOCK_16X16) {
|
||||
vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8;
|
||||
sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8;
|
||||
} else if (bs == BLOCK_32X32) {
|
||||
vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10;
|
||||
sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10;
|
||||
} else /* if (bs == BLOCK_64X64) */ {
|
||||
vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12;
|
||||
sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12;
|
||||
}
|
||||
|
||||
// vdiff > sad * 3 means vdiff should not be too small, otherwise,
|
||||
// it might be a lighting change in smooth area. When there is a
|
||||
// lighting change in smooth area, it is dangerous to do MFQE.
|
||||
if (sad > 1 && vdiff > sad * 3) {
|
||||
const int weight = 1 << MFQE_PRECISION;
|
||||
int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr);
|
||||
// When ifactor equals weight, no MFQE is done.
|
||||
if (ifactor > weight) {
|
||||
ifactor = weight;
|
||||
}
|
||||
apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd,
|
||||
uvd_stride, bs, ifactor);
|
||||
} else {
|
||||
// Copy the block from current frame (i.e., no mfqe is done).
|
||||
copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
|
||||
yd_stride, uvd_stride, bs);
|
||||
}
|
||||
}
|
||||
|
||||
static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) {
|
||||
// Check the motion in current block(for inter frame),
|
||||
// or check the motion in the correlated block in last frame (for keyframe).
|
||||
const int mv_len_square = mi->mbmi.mv[0].as_mv.row *
|
||||
mi->mbmi.mv[0].as_mv.row +
|
||||
mi->mbmi.mv[0].as_mv.col *
|
||||
mi->mbmi.mv[0].as_mv.col;
|
||||
const int mv_threshold = 100;
|
||||
return mi->mbmi.mode >= NEARESTMV && // Not an intra block
|
||||
cur_bs >= BLOCK_16X16 &&
|
||||
mv_len_square <= mv_threshold;
|
||||
}
|
||||
|
||||
// Process each partiton in a super block, recursively.
|
||||
static void mfqe_partition(VP10_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs,
|
||||
const uint8_t *y, const uint8_t *u,
|
||||
const uint8_t *v, int y_stride, int uv_stride,
|
||||
uint8_t *yd, uint8_t *ud, uint8_t *vd,
|
||||
int yd_stride, int uvd_stride) {
|
||||
int mi_offset, y_offset, uv_offset;
|
||||
const BLOCK_SIZE cur_bs = mi->mbmi.sb_type;
|
||||
const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex;
|
||||
const int bsl = b_width_log2_lookup[bs];
|
||||
PARTITION_TYPE partition = partition_lookup[bsl][cur_bs];
|
||||
const BLOCK_SIZE subsize = get_subsize(bs, partition);
|
||||
|
||||
if (cur_bs < BLOCK_8X8) {
|
||||
// If there are blocks smaller than 8x8, it must be on the boundary.
|
||||
return;
|
||||
}
|
||||
// No MFQE on blocks smaller than 16x16
|
||||
if (bs == BLOCK_16X16) {
|
||||
partition = PARTITION_NONE;
|
||||
}
|
||||
if (bs == BLOCK_64X64) {
|
||||
mi_offset = 4;
|
||||
y_offset = 32;
|
||||
uv_offset = 16;
|
||||
} else {
|
||||
mi_offset = 2;
|
||||
y_offset = 16;
|
||||
uv_offset = 8;
|
||||
}
|
||||
switch (partition) {
|
||||
BLOCK_SIZE mfqe_bs, bs_tmp;
|
||||
case PARTITION_HORZ:
|
||||
if (bs == BLOCK_64X64) {
|
||||
mfqe_bs = BLOCK_64X32;
|
||||
bs_tmp = BLOCK_32X32;
|
||||
} else {
|
||||
mfqe_bs = BLOCK_32X16;
|
||||
bs_tmp = BLOCK_16X16;
|
||||
}
|
||||
if (mfqe_decision(mi, mfqe_bs)) {
|
||||
// Do mfqe on the first square partition.
|
||||
mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
|
||||
yd, ud, vd, yd_stride, uvd_stride, qdiff);
|
||||
// Do mfqe on the second square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
|
||||
y_stride, uv_stride, yd + y_offset, ud + uv_offset,
|
||||
vd + uv_offset, yd_stride, uvd_stride, qdiff);
|
||||
}
|
||||
if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) {
|
||||
// Do mfqe on the first square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
|
||||
v + uv_offset * uv_stride, y_stride, uv_stride,
|
||||
yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
|
||||
vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
|
||||
// Do mfqe on the second square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
|
||||
u + uv_offset * uv_stride + uv_offset,
|
||||
v + uv_offset * uv_stride + uv_offset, y_stride,
|
||||
uv_stride, yd + y_offset * yd_stride + y_offset,
|
||||
ud + uv_offset * uvd_stride + uv_offset,
|
||||
vd + uv_offset * uvd_stride + uv_offset,
|
||||
yd_stride, uvd_stride, qdiff);
|
||||
}
|
||||
break;
|
||||
case PARTITION_VERT:
|
||||
if (bs == BLOCK_64X64) {
|
||||
mfqe_bs = BLOCK_32X64;
|
||||
bs_tmp = BLOCK_32X32;
|
||||
} else {
|
||||
mfqe_bs = BLOCK_16X32;
|
||||
bs_tmp = BLOCK_16X16;
|
||||
}
|
||||
if (mfqe_decision(mi, mfqe_bs)) {
|
||||
// Do mfqe on the first square partition.
|
||||
mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride,
|
||||
yd, ud, vd, yd_stride, uvd_stride, qdiff);
|
||||
// Do mfqe on the second square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride,
|
||||
v + uv_offset * uv_stride, y_stride, uv_stride,
|
||||
yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
|
||||
vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff);
|
||||
}
|
||||
if (mfqe_decision(mi + mi_offset, mfqe_bs)) {
|
||||
// Do mfqe on the first square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset,
|
||||
y_stride, uv_stride, yd + y_offset, ud + uv_offset,
|
||||
vd + uv_offset, yd_stride, uvd_stride, qdiff);
|
||||
// Do mfqe on the second square partition.
|
||||
mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset,
|
||||
u + uv_offset * uv_stride + uv_offset,
|
||||
v + uv_offset * uv_stride + uv_offset, y_stride,
|
||||
uv_stride, yd + y_offset * yd_stride + y_offset,
|
||||
ud + uv_offset * uvd_stride + uv_offset,
|
||||
vd + uv_offset * uvd_stride + uv_offset,
|
||||
yd_stride, uvd_stride, qdiff);
|
||||
}
|
||||
break;
|
||||
case PARTITION_NONE:
|
||||
if (mfqe_decision(mi, cur_bs)) {
|
||||
// Do mfqe on this partition.
|
||||
mfqe_block(cur_bs, y, u, v, y_stride, uv_stride,
|
||||
yd, ud, vd, yd_stride, uvd_stride, qdiff);
|
||||
} else {
|
||||
// Copy the block from current frame(i.e., no mfqe is done).
|
||||
copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd,
|
||||
yd_stride, uvd_stride, bs);
|
||||
}
|
||||
break;
|
||||
case PARTITION_SPLIT:
|
||||
// Recursion on four square partitions, e.g. if bs is 64X64,
|
||||
// then look into four 32X32 blocks in it.
|
||||
mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd,
|
||||
yd_stride, uvd_stride);
|
||||
mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset,
|
||||
v + uv_offset, y_stride, uv_stride, yd + y_offset,
|
||||
ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride);
|
||||
mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize,
|
||||
y + y_offset * y_stride, u + uv_offset * uv_stride,
|
||||
v + uv_offset * uv_stride, y_stride, uv_stride,
|
||||
yd + y_offset * yd_stride, ud + uv_offset * uvd_stride,
|
||||
vd + uv_offset * uvd_stride, yd_stride, uvd_stride);
|
||||
mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset,
|
||||
subsize, y + y_offset * y_stride + y_offset,
|
||||
u + uv_offset * uv_stride + uv_offset,
|
||||
v + uv_offset * uv_stride + uv_offset, y_stride,
|
||||
uv_stride, yd + y_offset * yd_stride + y_offset,
|
||||
ud + uv_offset * uvd_stride + uv_offset,
|
||||
vd + uv_offset * uvd_stride + uv_offset,
|
||||
yd_stride, uvd_stride);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_mfqe(VP10_COMMON *cm) {
|
||||
int mi_row, mi_col;
|
||||
// Current decoded frame.
|
||||
const YV12_BUFFER_CONFIG *show = cm->frame_to_show;
|
||||
// Last decoded frame and will store the MFQE result.
|
||||
YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
|
||||
// Loop through each super block.
|
||||
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) {
|
||||
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
|
||||
MODE_INFO *mi;
|
||||
MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col);
|
||||
// Motion Info in last frame.
|
||||
MODE_INFO *mi_prev = cm->postproc_state.prev_mi +
|
||||
(mi_row * cm->mi_stride + mi_col);
|
||||
const uint32_t y_stride = show->y_stride;
|
||||
const uint32_t uv_stride = show->uv_stride;
|
||||
const uint32_t yd_stride = dest->y_stride;
|
||||
const uint32_t uvd_stride = dest->uv_stride;
|
||||
const uint32_t row_offset_y = mi_row << 3;
|
||||
const uint32_t row_offset_uv = mi_row << 2;
|
||||
const uint32_t col_offset_y = mi_col << 3;
|
||||
const uint32_t col_offset_uv = mi_col << 2;
|
||||
const uint8_t *y = show->y_buffer + row_offset_y * y_stride +
|
||||
col_offset_y;
|
||||
const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride +
|
||||
col_offset_uv;
|
||||
const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride +
|
||||
col_offset_uv;
|
||||
uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y;
|
||||
uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride +
|
||||
col_offset_uv;
|
||||
uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride +
|
||||
col_offset_uv;
|
||||
if (frame_is_intra_only(cm)) {
|
||||
mi = mi_prev;
|
||||
} else {
|
||||
mi = mi_local;
|
||||
}
|
||||
mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud,
|
||||
vd, yd_stride, uvd_stride);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP10_COMMON_MFQE_H_
|
||||
#define VP10_COMMON_MFQE_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Multiframe Quality Enhancement.
|
||||
// The aim for MFQE is to replace pixel blocks in the current frame with
|
||||
// the correlated pixel blocks (with higher quality) in the last frame.
|
||||
// The replacement can only be taken in stationary blocks by checking
|
||||
// the motion of the blocks and other conditions such as the SAD of
|
||||
// the current block and correlated block, the variance of the block
|
||||
// difference, etc.
|
||||
void vp10_mfqe(struct VP10Common *cm);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP10_COMMON_MFQE_H_
|
|
@ -1,137 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vp10_rtcd.h"
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
#include "vpx_dsp/mips/macros_msa.h"
|
||||
|
||||
static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride,
|
||||
uint8_t *dst_ptr, int32_t dst_stride,
|
||||
int32_t src_weight) {
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
uint64_t src0_d, src1_d, dst0_d, dst1_d;
|
||||
v16i8 src0 = { 0 };
|
||||
v16i8 src1 = { 0 };
|
||||
v16i8 dst0 = { 0 };
|
||||
v16i8 dst1 = { 0 };
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 2; row--;) {
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2(dst_ptr, dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src0);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst0);
|
||||
|
||||
LD2(src_ptr, src_stride, src0_d, src1_d);
|
||||
src_ptr += (2 * src_stride);
|
||||
LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d);
|
||||
INSERT_D2_SB(src0_d, src1_d, src1);
|
||||
INSERT_D2_SB(dst0_d, dst1_d, dst1);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst0, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r);
|
||||
ST8x2_UB(dst1, dst_ptr, dst_stride);
|
||||
dst_ptr += (2 * dst_stride);
|
||||
}
|
||||
}
|
||||
|
||||
static void filter_by_weight16x16_msa(const uint8_t *src_ptr,
|
||||
int32_t src_stride,
|
||||
uint8_t *dst_ptr,
|
||||
int32_t dst_stride,
|
||||
int32_t src_weight) {
|
||||
int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight;
|
||||
int32_t row;
|
||||
v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3;
|
||||
v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l;
|
||||
|
||||
src_wt = __msa_fill_h(src_weight);
|
||||
dst_wt = __msa_fill_h(dst_weight);
|
||||
|
||||
for (row = 4; row--;) {
|
||||
LD_SB4(src_ptr, src_stride, src0, src1, src2, src3);
|
||||
src_ptr += (4 * src_stride);
|
||||
LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3);
|
||||
|
||||
UNPCK_UB_SH(src0, src_r, src_l);
|
||||
UNPCK_UB_SH(dst0, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src1, src_r, src_l);
|
||||
UNPCK_UB_SH(dst1, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src2, src_r, src_l);
|
||||
UNPCK_UB_SH(dst2, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
|
||||
UNPCK_UB_SH(src3, src_r, src_l);
|
||||
UNPCK_UB_SH(dst3, dst_r, dst_l);
|
||||
res_h_r = (src_r * src_wt);
|
||||
res_h_r += (dst_r * dst_wt);
|
||||
res_h_l = (src_l * src_wt);
|
||||
res_h_l += (dst_l * dst_wt);
|
||||
SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION);
|
||||
PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr);
|
||||
dst_ptr += dst_stride;
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_filter_by_weight8x8_msa(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
int src_weight) {
|
||||
filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight);
|
||||
}
|
||||
|
||||
void vp10_filter_by_weight16x16_msa(const uint8_t *src, int src_stride,
|
||||
uint8_t *dst, int dst_stride,
|
||||
int src_weight) {
|
||||
filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight);
|
||||
}
|
|
@ -24,10 +24,6 @@
|
|||
#include "vp10/common/quant_common.h"
|
||||
#include "vp10/common/tile_common.h"
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
#include "vp10/common/postproc.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -167,11 +163,6 @@ typedef struct VP10Common {
|
|||
|
||||
int new_fb_idx;
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
YV12_BUFFER_CONFIG post_proc_buffer;
|
||||
YV12_BUFFER_CONFIG post_proc_buffer_int;
|
||||
#endif
|
||||
|
||||
FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
|
||||
FRAME_TYPE frame_type;
|
||||
|
||||
|
@ -275,10 +266,6 @@ typedef struct VP10Common {
|
|||
vpx_bit_depth_t bit_depth;
|
||||
vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
struct postproc_state postproc_state;
|
||||
#endif
|
||||
|
||||
int error_resilient_mode;
|
||||
|
||||
int log2_tile_cols, log2_tile_rows;
|
||||
|
|
|
@ -1,746 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_scale_rtcd.h"
|
||||
#include "./vp10_rtcd.h"
|
||||
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_ports/system_state.h"
|
||||
#include "vpx_scale/vpx_scale.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
#include "vp10/common/postproc.h"
|
||||
#include "vp10/common/textblit.h"
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
static const short kernel5[] = {
|
||||
1, 1, 4, 1, 1
|
||||
};
|
||||
|
||||
const short vp10_rv[] = {
|
||||
8, 5, 2, 2, 8, 12, 4, 9, 8, 3,
|
||||
0, 3, 9, 0, 0, 0, 8, 3, 14, 4,
|
||||
10, 1, 11, 14, 1, 14, 9, 6, 12, 11,
|
||||
8, 6, 10, 0, 0, 8, 9, 0, 3, 14,
|
||||
8, 11, 13, 4, 2, 9, 0, 3, 9, 6,
|
||||
1, 2, 3, 14, 13, 1, 8, 2, 9, 7,
|
||||
3, 3, 1, 13, 13, 6, 6, 5, 2, 7,
|
||||
11, 9, 11, 8, 7, 3, 2, 0, 13, 13,
|
||||
14, 4, 12, 5, 12, 10, 8, 10, 13, 10,
|
||||
4, 14, 4, 10, 0, 8, 11, 1, 13, 7,
|
||||
7, 14, 6, 14, 13, 2, 13, 5, 4, 4,
|
||||
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
|
||||
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
|
||||
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
|
||||
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
|
||||
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
|
||||
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
|
||||
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
|
||||
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
|
||||
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
|
||||
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
|
||||
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
|
||||
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
|
||||
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
|
||||
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
|
||||
0, 10, 0, 5, 13, 2, 12, 7, 11, 13,
|
||||
8, 0, 4, 10, 7, 2, 7, 2, 2, 5,
|
||||
3, 4, 7, 3, 3, 14, 14, 5, 9, 13,
|
||||
3, 14, 3, 6, 3, 0, 11, 8, 13, 1,
|
||||
13, 1, 12, 0, 10, 9, 7, 6, 2, 8,
|
||||
5, 2, 13, 7, 1, 13, 14, 7, 6, 7,
|
||||
9, 6, 10, 11, 7, 8, 7, 5, 14, 8,
|
||||
4, 4, 0, 8, 7, 10, 0, 8, 14, 11,
|
||||
3, 12, 5, 7, 14, 3, 14, 5, 2, 6,
|
||||
11, 12, 12, 8, 0, 11, 13, 1, 2, 0,
|
||||
5, 10, 14, 7, 8, 0, 4, 11, 0, 8,
|
||||
0, 3, 10, 5, 8, 0, 11, 6, 7, 8,
|
||||
10, 7, 13, 9, 2, 5, 1, 5, 10, 2,
|
||||
4, 3, 5, 6, 10, 8, 9, 4, 11, 14,
|
||||
3, 8, 3, 7, 8, 5, 11, 4, 12, 3,
|
||||
11, 9, 14, 8, 14, 13, 4, 3, 1, 2,
|
||||
14, 6, 5, 4, 4, 11, 4, 6, 2, 1,
|
||||
5, 8, 8, 12, 13, 5, 14, 10, 12, 13,
|
||||
0, 9, 5, 5, 11, 10, 13, 9, 10, 13,
|
||||
};
|
||||
|
||||
static const uint8_t q_diff_thresh = 20;
|
||||
static const uint8_t last_q_thresh = 170;
|
||||
|
||||
void vp10_post_proc_down_and_across_c(const uint8_t *src_ptr,
|
||||
uint8_t *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line,
|
||||
int rows,
|
||||
int cols,
|
||||
int flimit) {
|
||||
uint8_t const *p_src;
|
||||
uint8_t *p_dst;
|
||||
int row, col, i, v, kernel;
|
||||
int pitch = src_pixels_per_line;
|
||||
uint8_t d[8];
|
||||
(void)dst_pixels_per_line;
|
||||
|
||||
for (row = 0; row < rows; row++) {
|
||||
/* post_proc_down for one row */
|
||||
p_src = src_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
kernel = 4;
|
||||
v = p_src[col];
|
||||
|
||||
for (i = -2; i <= 2; i++) {
|
||||
if (abs(v - p_src[col + i * pitch]) > flimit)
|
||||
goto down_skip_convolve;
|
||||
|
||||
kernel += kernel5[2 + i] * p_src[col + i * pitch];
|
||||
}
|
||||
|
||||
v = (kernel >> 3);
|
||||
down_skip_convolve:
|
||||
p_dst[col] = v;
|
||||
}
|
||||
|
||||
/* now post_proc_across */
|
||||
p_src = dst_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
d[i] = p_src[i];
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
kernel = 4;
|
||||
v = p_src[col];
|
||||
|
||||
d[col & 7] = v;
|
||||
|
||||
for (i = -2; i <= 2; i++) {
|
||||
if (abs(v - p_src[col + i]) > flimit)
|
||||
goto across_skip_convolve;
|
||||
|
||||
kernel += kernel5[2 + i] * p_src[col + i];
|
||||
}
|
||||
|
||||
d[col & 7] = (kernel >> 3);
|
||||
across_skip_convolve:
|
||||
|
||||
if (col >= 2)
|
||||
p_dst[col - 2] = d[(col - 2) & 7];
|
||||
}
|
||||
|
||||
/* handle the last two pixels */
|
||||
p_dst[col - 2] = d[(col - 2) & 7];
|
||||
p_dst[col - 1] = d[(col - 1) & 7];
|
||||
|
||||
|
||||
/* next row */
|
||||
src_ptr += pitch;
|
||||
dst_ptr += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
void vp10_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr,
|
||||
uint16_t *dst_ptr,
|
||||
int src_pixels_per_line,
|
||||
int dst_pixels_per_line,
|
||||
int rows,
|
||||
int cols,
|
||||
int flimit) {
|
||||
uint16_t const *p_src;
|
||||
uint16_t *p_dst;
|
||||
int row, col, i, v, kernel;
|
||||
int pitch = src_pixels_per_line;
|
||||
uint16_t d[8];
|
||||
|
||||
for (row = 0; row < rows; row++) {
|
||||
// post_proc_down for one row.
|
||||
p_src = src_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
kernel = 4;
|
||||
v = p_src[col];
|
||||
|
||||
for (i = -2; i <= 2; i++) {
|
||||
if (abs(v - p_src[col + i * pitch]) > flimit)
|
||||
goto down_skip_convolve;
|
||||
|
||||
kernel += kernel5[2 + i] * p_src[col + i * pitch];
|
||||
}
|
||||
|
||||
v = (kernel >> 3);
|
||||
|
||||
down_skip_convolve:
|
||||
p_dst[col] = v;
|
||||
}
|
||||
|
||||
/* now post_proc_across */
|
||||
p_src = dst_ptr;
|
||||
p_dst = dst_ptr;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
d[i] = p_src[i];
|
||||
|
||||
for (col = 0; col < cols; col++) {
|
||||
kernel = 4;
|
||||
v = p_src[col];
|
||||
|
||||
d[col & 7] = v;
|
||||
|
||||
for (i = -2; i <= 2; i++) {
|
||||
if (abs(v - p_src[col + i]) > flimit)
|
||||
goto across_skip_convolve;
|
||||
|
||||
kernel += kernel5[2 + i] * p_src[col + i];
|
||||
}
|
||||
|
||||
d[col & 7] = (kernel >> 3);
|
||||
|
||||
across_skip_convolve:
|
||||
if (col >= 2)
|
||||
p_dst[col - 2] = d[(col - 2) & 7];
|
||||
}
|
||||
|
||||
/* handle the last two pixels */
|
||||
p_dst[col - 2] = d[(col - 2) & 7];
|
||||
p_dst[col - 1] = d[(col - 1) & 7];
|
||||
|
||||
|
||||
/* next row */
|
||||
src_ptr += pitch;
|
||||
dst_ptr += dst_pixels_per_line;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
|
||||
static int q2mbl(int x) {
|
||||
if (x < 20) x = 20;
|
||||
|
||||
x = 50 + (x - 50) * 10 / 8;
|
||||
return x * x / 3;
|
||||
}
|
||||
|
||||
void vp10_mbpost_proc_across_ip_c(uint8_t *src, int pitch,
|
||||
int rows, int cols, int flimit) {
|
||||
int r, c, i;
|
||||
uint8_t *s = src;
|
||||
uint8_t d[16];
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i] * s[i];
|
||||
sum += s[i];
|
||||
d[i + 8] = 0;
|
||||
}
|
||||
|
||||
for (c = 0; c < cols + 8; c++) {
|
||||
int x = s[c + 7] - s[c - 8];
|
||||
int y = s[c + 7] + s[c - 8];
|
||||
|
||||
sum += x;
|
||||
sumsq += x * y;
|
||||
|
||||
d[c & 15] = s[c];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[c & 15] = (8 + sum + s[c]) >> 4;
|
||||
}
|
||||
|
||||
s[c - 8] = d[(c - 8) & 15];
|
||||
}
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
void vp10_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch,
|
||||
int rows, int cols, int flimit) {
|
||||
int r, c, i;
|
||||
|
||||
uint16_t *s = src;
|
||||
uint16_t d[16];
|
||||
|
||||
|
||||
for (r = 0; r < rows; r++) {
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i] * s[i];
|
||||
sum += s[i];
|
||||
d[i + 8] = 0;
|
||||
}
|
||||
|
||||
for (c = 0; c < cols + 8; c++) {
|
||||
int x = s[c + 7] - s[c - 8];
|
||||
int y = s[c + 7] + s[c - 8];
|
||||
|
||||
sum += x;
|
||||
sumsq += x * y;
|
||||
|
||||
d[c & 15] = s[c];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[c & 15] = (8 + sum + s[c]) >> 4;
|
||||
}
|
||||
|
||||
s[c - 8] = d[(c - 8) & 15];
|
||||
}
|
||||
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
|
||||
void vp10_mbpost_proc_down_c(uint8_t *dst, int pitch,
|
||||
int rows, int cols, int flimit) {
|
||||
int r, c, i;
|
||||
const short *rv3 = &vp10_rv[63 & rand()]; // NOLINT
|
||||
|
||||
for (c = 0; c < cols; c++) {
|
||||
uint8_t *s = &dst[c];
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
uint8_t d[16];
|
||||
const short *rv2 = rv3 + ((c * 17) & 127);
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i * pitch] * s[i * pitch];
|
||||
sum += s[i * pitch];
|
||||
}
|
||||
|
||||
for (r = 0; r < rows + 8; r++) {
|
||||
sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
|
||||
sum += s[7 * pitch] - s[-8 * pitch];
|
||||
d[r & 15] = s[0];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
|
||||
}
|
||||
|
||||
s[-8 * pitch] = d[(r - 8) & 15];
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
void vp10_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch,
|
||||
int rows, int cols, int flimit) {
|
||||
int r, c, i;
|
||||
const int16_t *rv3 = &vp10_rv[63 & rand()]; // NOLINT
|
||||
|
||||
for (c = 0; c < cols; c++) {
|
||||
uint16_t *s = &dst[c];
|
||||
int sumsq = 0;
|
||||
int sum = 0;
|
||||
uint16_t d[16];
|
||||
const int16_t *rv2 = rv3 + ((c * 17) & 127);
|
||||
|
||||
for (i = -8; i <= 6; i++) {
|
||||
sumsq += s[i * pitch] * s[i * pitch];
|
||||
sum += s[i * pitch];
|
||||
}
|
||||
|
||||
for (r = 0; r < rows + 8; r++) {
|
||||
sumsq += s[7 * pitch] * s[ 7 * pitch] - s[-8 * pitch] * s[-8 * pitch];
|
||||
sum += s[7 * pitch] - s[-8 * pitch];
|
||||
d[r & 15] = s[0];
|
||||
|
||||
if (sumsq * 15 - sum * sum < flimit) {
|
||||
d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4;
|
||||
}
|
||||
|
||||
s[-8 * pitch] = d[(r - 8) & 15];
|
||||
s += pitch;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
|
||||
static void deblock_and_de_macro_block(YV12_BUFFER_CONFIG *source,
|
||||
YV12_BUFFER_CONFIG *post,
|
||||
int q,
|
||||
int low_var_thresh,
|
||||
int flag) {
|
||||
double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065;
|
||||
int ppl = (int)(level + .5);
|
||||
(void) low_var_thresh;
|
||||
(void) flag;
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->y_buffer),
|
||||
CONVERT_TO_SHORTPTR(post->y_buffer),
|
||||
source->y_stride, post->y_stride,
|
||||
source->y_height, source->y_width,
|
||||
ppl);
|
||||
|
||||
vp10_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer),
|
||||
post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer),
|
||||
post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->u_buffer),
|
||||
CONVERT_TO_SHORTPTR(post->u_buffer),
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width,
|
||||
ppl);
|
||||
vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(source->v_buffer),
|
||||
CONVERT_TO_SHORTPTR(post->v_buffer),
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width,
|
||||
ppl);
|
||||
} else {
|
||||
vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
|
||||
source->y_stride, post->y_stride,
|
||||
source->y_height, source->y_width, ppl);
|
||||
|
||||
vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width, ppl);
|
||||
vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width, ppl);
|
||||
}
|
||||
#else
|
||||
vp10_post_proc_down_and_across(source->y_buffer, post->y_buffer,
|
||||
source->y_stride, post->y_stride,
|
||||
source->y_height, source->y_width, ppl);
|
||||
|
||||
vp10_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height,
|
||||
post->y_width, q2mbl(q));
|
||||
|
||||
vp10_post_proc_down_and_across(source->u_buffer, post->u_buffer,
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width, ppl);
|
||||
vp10_post_proc_down_and_across(source->v_buffer, post->v_buffer,
|
||||
source->uv_stride, post->uv_stride,
|
||||
source->uv_height, source->uv_width, ppl);
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
}
|
||||
|
||||
void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
|
||||
int q) {
|
||||
const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
|
||||
+ 0.0065 + 0.5);
|
||||
int i;
|
||||
|
||||
const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
|
||||
const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
|
||||
const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
|
||||
const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
|
||||
|
||||
uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
|
||||
const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
|
||||
|
||||
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
|
||||
(dst->flags & YV12_FLAG_HIGHBITDEPTH));
|
||||
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
vp10_highbd_post_proc_down_and_across(CONVERT_TO_SHORTPTR(srcs[i]),
|
||||
CONVERT_TO_SHORTPTR(dsts[i]),
|
||||
src_strides[i], dst_strides[i],
|
||||
src_heights[i], src_widths[i], ppl);
|
||||
} else {
|
||||
vp10_post_proc_down_and_across(srcs[i], dsts[i],
|
||||
src_strides[i], dst_strides[i],
|
||||
src_heights[i], src_widths[i], ppl);
|
||||
}
|
||||
#else
|
||||
vp10_post_proc_down_and_across(srcs[i], dsts[i],
|
||||
src_strides[i], dst_strides[i],
|
||||
src_heights[i], src_widths[i], ppl);
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
}
|
||||
}
|
||||
|
||||
void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
|
||||
int q) {
|
||||
const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q
|
||||
+ 0.0065 + 0.5);
|
||||
int i;
|
||||
|
||||
const uint8_t *const srcs[3] = {src->y_buffer, src->u_buffer, src->v_buffer};
|
||||
const int src_strides[3] = {src->y_stride, src->uv_stride, src->uv_stride};
|
||||
const int src_widths[3] = {src->y_width, src->uv_width, src->uv_width};
|
||||
const int src_heights[3] = {src->y_height, src->uv_height, src->uv_height};
|
||||
|
||||
uint8_t *const dsts[3] = {dst->y_buffer, dst->u_buffer, dst->v_buffer};
|
||||
const int dst_strides[3] = {dst->y_stride, dst->uv_stride, dst->uv_stride};
|
||||
|
||||
for (i = 0; i < MAX_MB_PLANE; ++i) {
|
||||
const int src_stride = src_strides[i];
|
||||
const int src_width = src_widths[i] - 4;
|
||||
const int src_height = src_heights[i] - 4;
|
||||
const int dst_stride = dst_strides[i];
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
assert((src->flags & YV12_FLAG_HIGHBITDEPTH) ==
|
||||
(dst->flags & YV12_FLAG_HIGHBITDEPTH));
|
||||
if (src->flags & YV12_FLAG_HIGHBITDEPTH) {
|
||||
const uint16_t *const src_plane = CONVERT_TO_SHORTPTR(
|
||||
srcs[i] + 2 * src_stride + 2);
|
||||
uint16_t *const dst_plane = CONVERT_TO_SHORTPTR(
|
||||
dsts[i] + 2 * dst_stride + 2);
|
||||
vp10_highbd_post_proc_down_and_across(src_plane, dst_plane, src_stride,
|
||||
dst_stride, src_height, src_width,
|
||||
ppl);
|
||||
} else {
|
||||
const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
|
||||
uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
|
||||
|
||||
vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride,
|
||||
dst_stride, src_height, src_width, ppl);
|
||||
}
|
||||
#else
|
||||
const uint8_t *const src_plane = srcs[i] + 2 * src_stride + 2;
|
||||
uint8_t *const dst_plane = dsts[i] + 2 * dst_stride + 2;
|
||||
vp10_post_proc_down_and_across(src_plane, dst_plane, src_stride, dst_stride,
|
||||
src_height, src_width, ppl);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static double gaussian(double sigma, double mu, double x) {
|
||||
return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
|
||||
(exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
|
||||
}
|
||||
|
||||
static void fillrd(struct postproc_state *state, int q, int a) {
|
||||
char char_dist[300];
|
||||
|
||||
double sigma;
|
||||
int ai = a, qi = q, i;
|
||||
|
||||
vpx_clear_system_state();
|
||||
|
||||
sigma = ai + .5 + .6 * (63 - qi) / 63.0;
|
||||
|
||||
/* set up a lookup table of 256 entries that matches
|
||||
* a gaussian distribution with sigma determined by q.
|
||||
*/
|
||||
{
|
||||
int next, j;
|
||||
|
||||
next = 0;
|
||||
|
||||
for (i = -32; i < 32; i++) {
|
||||
int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i));
|
||||
|
||||
if (a_i) {
|
||||
for (j = 0; j < a_i; j++) {
|
||||
char_dist[next + j] = (char) i;
|
||||
}
|
||||
|
||||
next = next + j;
|
||||
}
|
||||
}
|
||||
|
||||
for (; next < 256; next++)
|
||||
char_dist[next] = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < 3072; i++) {
|
||||
state->noise[i] = char_dist[rand() & 0xff]; // NOLINT
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
state->blackclamp[i] = -char_dist[0];
|
||||
state->whiteclamp[i] = -char_dist[0];
|
||||
state->bothclamp[i] = -2 * char_dist[0];
|
||||
}
|
||||
|
||||
state->last_q = q;
|
||||
state->last_noise = a;
|
||||
}
|
||||
|
||||
void vp10_plane_add_noise_c(uint8_t *start, char *noise,
|
||||
char blackclamp[16],
|
||||
char whiteclamp[16],
|
||||
char bothclamp[16],
|
||||
unsigned int width, unsigned int height, int pitch) {
|
||||
unsigned int i, j;
|
||||
|
||||
// TODO(jbb): why does simd code use both but c doesn't, normalize and
|
||||
// fix..
|
||||
(void) bothclamp;
|
||||
for (i = 0; i < height; i++) {
|
||||
uint8_t *pos = start + i * pitch;
|
||||
char *ref = (char *)(noise + (rand() & 0xff)); // NOLINT
|
||||
|
||||
for (j = 0; j < width; j++) {
|
||||
if (pos[j] < blackclamp[0])
|
||||
pos[j] = blackclamp[0];
|
||||
|
||||
if (pos[j] > 255 + whiteclamp[0])
|
||||
pos[j] = 255 + whiteclamp[0];
|
||||
|
||||
pos[j] += ref[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void swap_mi_and_prev_mi(VP10_COMMON *cm) {
|
||||
// Current mip will be the prev_mip for the next frame.
|
||||
MODE_INFO *temp = cm->postproc_state.prev_mip;
|
||||
cm->postproc_state.prev_mip = cm->mip;
|
||||
cm->mip = temp;
|
||||
|
||||
// Update the upper left visible macroblock ptrs.
|
||||
cm->mi = cm->mip + cm->mi_stride + 1;
|
||||
cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1;
|
||||
}
|
||||
|
||||
int vp10_post_proc_frame(struct VP10Common *cm,
|
||||
YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *ppflags) {
|
||||
const int q = VPXMIN(105, cm->lf.filter_level * 2);
|
||||
const int flags = ppflags->post_proc_flag;
|
||||
YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
|
||||
struct postproc_state *const ppstate = &cm->postproc_state;
|
||||
|
||||
if (!cm->frame_to_show)
|
||||
return -1;
|
||||
|
||||
if (!flags) {
|
||||
*dest = *cm->frame_to_show;
|
||||
return 0;
|
||||
}
|
||||
|
||||
vpx_clear_system_state();
|
||||
|
||||
// Alloc memory for prev_mip in the first frame.
|
||||
if (cm->current_video_frame == 1) {
|
||||
cm->postproc_state.last_base_qindex = cm->base_qindex;
|
||||
cm->postproc_state.last_frame_valid = 1;
|
||||
ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip));
|
||||
if (!ppstate->prev_mip) {
|
||||
return 1;
|
||||
}
|
||||
ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1;
|
||||
memset(ppstate->prev_mip, 0,
|
||||
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip));
|
||||
}
|
||||
|
||||
// Allocate post_proc_buffer_int if needed.
|
||||
if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) {
|
||||
if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
|
||||
const int width = ALIGN_POWER_OF_TWO(cm->width, 4);
|
||||
const int height = ALIGN_POWER_OF_TWO(cm->height, 4);
|
||||
|
||||
if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif // CONFIG_VPX_HIGHBITDEPTH
|
||||
VPX_ENC_BORDER_IN_PIXELS,
|
||||
cm->byte_alignment) < 0) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate MFQE framebuffer");
|
||||
}
|
||||
|
||||
// Ensure that postproc is set to all 0s so that post proc
|
||||
// doesn't pull random data in from edge.
|
||||
memset(cm->post_proc_buffer_int.buffer_alloc, 128,
|
||||
cm->post_proc_buffer.frame_size);
|
||||
}
|
||||
}
|
||||
|
||||
if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VPX_DEC_BORDER_IN_PIXELS, cm->byte_alignment,
|
||||
NULL, NULL, NULL) < 0)
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate post-processing buffer");
|
||||
|
||||
if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 &&
|
||||
cm->postproc_state.last_frame_valid && cm->bit_depth == 8 &&
|
||||
cm->postproc_state.last_base_qindex <= last_q_thresh &&
|
||||
cm->base_qindex - cm->postproc_state.last_base_qindex >= q_diff_thresh) {
|
||||
vp10_mfqe(cm);
|
||||
// TODO(jackychen): Consider whether enable deblocking by default
|
||||
// if mfqe is enabled. Need to take both the quality and the speed
|
||||
// into consideration.
|
||||
if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) {
|
||||
vp8_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int);
|
||||
}
|
||||
if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) {
|
||||
deblock_and_de_macro_block(&cm->post_proc_buffer_int, ppbuf,
|
||||
q + (ppflags->deblocking_level - 5) * 10,
|
||||
1, 0);
|
||||
} else if (flags & VP9D_DEBLOCK) {
|
||||
vp10_deblock(&cm->post_proc_buffer_int, ppbuf, q);
|
||||
} else {
|
||||
vp8_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf);
|
||||
}
|
||||
} else if (flags & VP9D_DEMACROBLOCK) {
|
||||
deblock_and_de_macro_block(cm->frame_to_show, ppbuf,
|
||||
q + (ppflags->deblocking_level - 5) * 10, 1, 0);
|
||||
} else if (flags & VP9D_DEBLOCK) {
|
||||
vp10_deblock(cm->frame_to_show, ppbuf, q);
|
||||
} else {
|
||||
vp8_yv12_copy_frame(cm->frame_to_show, ppbuf);
|
||||
}
|
||||
|
||||
cm->postproc_state.last_base_qindex = cm->base_qindex;
|
||||
cm->postproc_state.last_frame_valid = 1;
|
||||
|
||||
if (flags & VP9D_ADDNOISE) {
|
||||
const int noise_level = ppflags->noise_level;
|
||||
if (ppstate->last_q != q ||
|
||||
ppstate->last_noise != noise_level) {
|
||||
fillrd(ppstate, 63 - q, noise_level);
|
||||
}
|
||||
|
||||
vp10_plane_add_noise(ppbuf->y_buffer, ppstate->noise, ppstate->blackclamp,
|
||||
ppstate->whiteclamp, ppstate->bothclamp,
|
||||
ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride);
|
||||
}
|
||||
|
||||
*dest = *ppbuf;
|
||||
|
||||
/* handle problem with extending borders */
|
||||
dest->y_width = cm->width;
|
||||
dest->y_height = cm->height;
|
||||
dest->uv_width = dest->y_width >> cm->subsampling_x;
|
||||
dest->uv_height = dest->y_height >> cm->subsampling_y;
|
||||
|
||||
swap_mi_and_prev_mi(cm);
|
||||
return 0;
|
||||
}
|
||||
#endif // CONFIG_VP9_POSTPROC
|
|
@ -1,53 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef VP10_COMMON_POSTPROC_H_
|
||||
#define VP10_COMMON_POSTPROC_H_
|
||||
|
||||
#include "vpx_ports/mem.h"
|
||||
#include "vpx_scale/yv12config.h"
|
||||
#include "vp10/common/blockd.h"
|
||||
#include "vp10/common/mfqe.h"
|
||||
#include "vp10/common/ppflags.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct postproc_state {
|
||||
int last_q;
|
||||
int last_noise;
|
||||
char noise[3072];
|
||||
int last_base_qindex;
|
||||
int last_frame_valid;
|
||||
MODE_INFO *prev_mip;
|
||||
MODE_INFO *prev_mi;
|
||||
DECLARE_ALIGNED(16, char, blackclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, whiteclamp[16]);
|
||||
DECLARE_ALIGNED(16, char, bothclamp[16]);
|
||||
};
|
||||
|
||||
struct VP10Common;
|
||||
|
||||
#define MFQE_PRECISION 4
|
||||
|
||||
int vp10_post_proc_frame(struct VP10Common *cm,
|
||||
YV12_BUFFER_CONFIG *dest, vp10_ppflags_t *flags);
|
||||
|
||||
void vp10_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
|
||||
|
||||
void vp10_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP10_COMMON_POSTPROC_H_
|
|
@ -1,43 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef VP10_COMMON_PPFLAGS_H_
|
||||
#define VP10_COMMON_PPFLAGS_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
VP9D_NOFILTERING = 0,
|
||||
VP9D_DEBLOCK = 1 << 0,
|
||||
VP9D_DEMACROBLOCK = 1 << 1,
|
||||
VP9D_ADDNOISE = 1 << 2,
|
||||
VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3,
|
||||
VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4,
|
||||
VP9D_DEBUG_TXT_DC_DIFF = 1 << 5,
|
||||
VP9D_DEBUG_TXT_RATE_INFO = 1 << 6,
|
||||
VP9D_DEBUG_DRAW_MV = 1 << 7,
|
||||
VP9D_DEBUG_CLR_BLK_MODES = 1 << 8,
|
||||
VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9,
|
||||
VP9D_MFQE = 1 << 10
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int post_proc_flag;
|
||||
int deblocking_level;
|
||||
int noise_level;
|
||||
} vp10_ppflags_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // VP10_COMMON_PPFLAGS_H_
|
|
@ -54,33 +54,6 @@ if ($opts{arch} eq "x86_64") {
|
|||
$avx2_x86_64 = 'avx2';
|
||||
}
|
||||
|
||||
#
|
||||
# post proc
|
||||
#
|
||||
if (vpx_config("CONFIG_VP9_POSTPROC") eq "yes") {
|
||||
add_proto qw/void vp10_mbpost_proc_down/, "uint8_t *dst, int pitch, int rows, int cols, int flimit";
|
||||
specialize qw/vp10_mbpost_proc_down sse2/;
|
||||
$vp10_mbpost_proc_down_sse2=vp10_mbpost_proc_down_xmm;
|
||||
|
||||
add_proto qw/void vp10_mbpost_proc_across_ip/, "uint8_t *src, int pitch, int rows, int cols, int flimit";
|
||||
specialize qw/vp10_mbpost_proc_across_ip sse2/;
|
||||
$vp10_mbpost_proc_across_ip_sse2=vp10_mbpost_proc_across_ip_xmm;
|
||||
|
||||
add_proto qw/void vp10_post_proc_down_and_across/, "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit";
|
||||
specialize qw/vp10_post_proc_down_and_across sse2/;
|
||||
$vp10_post_proc_down_and_across_sse2=vp10_post_proc_down_and_across_xmm;
|
||||
|
||||
add_proto qw/void vp10_plane_add_noise/, "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch";
|
||||
specialize qw/vp10_plane_add_noise sse2/;
|
||||
$vp10_plane_add_noise_sse2=vp10_plane_add_noise_wmt;
|
||||
|
||||
add_proto qw/void vp10_filter_by_weight16x16/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp10_filter_by_weight16x16 sse2 msa/;
|
||||
|
||||
add_proto qw/void vp10_filter_by_weight8x8/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight";
|
||||
specialize qw/vp10_filter_by_weight8x8 sse2 msa/;
|
||||
}
|
||||
|
||||
#
|
||||
# dct
|
||||
#
|
||||
|
|
|
@ -1,287 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
; This file is a duplicate of mfqe_sse2.asm in VP8.
|
||||
; TODO(jackychen): Find a way to fix the duplicate.
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp10_filter_by_weight16x16_sse2
|
||||
;(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride,
|
||||
; int src_weight
|
||||
;)
|
||||
global sym(vp10_filter_by_weight16x16_sse2) PRIVATE
|
||||
sym(vp10_filter_by_weight16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movd xmm0, arg(4) ; src_weight
|
||||
pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
|
||||
punpcklqdq xmm0, xmm0 ; replicate to all hi words
|
||||
|
||||
movdqa xmm1, [GLOBAL(tMFQE)]
|
||||
psubw xmm1, xmm0 ; dst_weight
|
||||
|
||||
mov rax, arg(0) ; src
|
||||
mov rsi, arg(1) ; src_stride
|
||||
mov rdx, arg(2) ; dst
|
||||
mov rdi, arg(3) ; dst_stride
|
||||
|
||||
mov rcx, 16 ; loop count
|
||||
pxor xmm6, xmm6
|
||||
|
||||
.combine
|
||||
movdqa xmm2, [rax]
|
||||
movdqa xmm4, [rdx]
|
||||
add rax, rsi
|
||||
|
||||
; src * src_weight
|
||||
movdqa xmm3, xmm2
|
||||
punpcklbw xmm2, xmm6
|
||||
punpckhbw xmm3, xmm6
|
||||
pmullw xmm2, xmm0
|
||||
pmullw xmm3, xmm0
|
||||
|
||||
; dst * dst_weight
|
||||
movdqa xmm5, xmm4
|
||||
punpcklbw xmm4, xmm6
|
||||
punpckhbw xmm5, xmm6
|
||||
pmullw xmm4, xmm1
|
||||
pmullw xmm5, xmm1
|
||||
|
||||
; sum, round and shift
|
||||
paddw xmm2, xmm4
|
||||
paddw xmm3, xmm5
|
||||
paddw xmm2, [GLOBAL(tMFQE_round)]
|
||||
paddw xmm3, [GLOBAL(tMFQE_round)]
|
||||
psrlw xmm2, 4
|
||||
psrlw xmm3, 4
|
||||
|
||||
packuswb xmm2, xmm3
|
||||
movdqa [rdx], xmm2
|
||||
add rdx, rdi
|
||||
|
||||
dec rcx
|
||||
jnz .combine
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
|
||||
ret
|
||||
|
||||
;void vp10_filter_by_weight8x8_sse2
|
||||
;(
|
||||
; unsigned char *src,
|
||||
; int src_stride,
|
||||
; unsigned char *dst,
|
||||
; int dst_stride,
|
||||
; int src_weight
|
||||
;)
|
||||
global sym(vp10_filter_by_weight8x8_sse2) PRIVATE
|
||||
sym(vp10_filter_by_weight8x8_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movd xmm0, arg(4) ; src_weight
|
||||
pshuflw xmm0, xmm0, 0x0 ; replicate to all low words
|
||||
punpcklqdq xmm0, xmm0 ; replicate to all hi words
|
||||
|
||||
movdqa xmm1, [GLOBAL(tMFQE)]
|
||||
psubw xmm1, xmm0 ; dst_weight
|
||||
|
||||
mov rax, arg(0) ; src
|
||||
mov rsi, arg(1) ; src_stride
|
||||
mov rdx, arg(2) ; dst
|
||||
mov rdi, arg(3) ; dst_stride
|
||||
|
||||
mov rcx, 8 ; loop count
|
||||
pxor xmm4, xmm4
|
||||
|
||||
.combine
|
||||
movq xmm2, [rax]
|
||||
movq xmm3, [rdx]
|
||||
add rax, rsi
|
||||
|
||||
; src * src_weight
|
||||
punpcklbw xmm2, xmm4
|
||||
pmullw xmm2, xmm0
|
||||
|
||||
; dst * dst_weight
|
||||
punpcklbw xmm3, xmm4
|
||||
pmullw xmm3, xmm1
|
||||
|
||||
; sum, round and shift
|
||||
paddw xmm2, xmm3
|
||||
paddw xmm2, [GLOBAL(tMFQE_round)]
|
||||
psrlw xmm2, 4
|
||||
|
||||
packuswb xmm2, xmm4
|
||||
movq [rdx], xmm2
|
||||
add rdx, rdi
|
||||
|
||||
dec rcx
|
||||
jnz .combine
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
|
||||
ret
|
||||
|
||||
;void vp10_variance_and_sad_16x16_sse2 | arg
|
||||
;(
|
||||
; unsigned char *src1, 0
|
||||
; int stride1, 1
|
||||
; unsigned char *src2, 2
|
||||
; int stride2, 3
|
||||
; unsigned int *variance, 4
|
||||
; unsigned int *sad, 5
|
||||
;)
|
||||
global sym(vp10_variance_and_sad_16x16_sse2) PRIVATE
|
||||
sym(vp10_variance_and_sad_16x16_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 6
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rax, arg(0) ; src1
|
||||
mov rcx, arg(1) ; stride1
|
||||
mov rdx, arg(2) ; src2
|
||||
mov rdi, arg(3) ; stride2
|
||||
|
||||
mov rsi, 16 ; block height
|
||||
|
||||
; Prep accumulator registers
|
||||
pxor xmm3, xmm3 ; SAD
|
||||
pxor xmm4, xmm4 ; sum of src2
|
||||
pxor xmm5, xmm5 ; sum of src2^2
|
||||
|
||||
; Because we're working with the actual output frames
|
||||
; we can't depend on any kind of data alignment.
|
||||
.accumulate
|
||||
movdqa xmm0, [rax] ; src1
|
||||
movdqa xmm1, [rdx] ; src2
|
||||
add rax, rcx ; src1 + stride1
|
||||
add rdx, rdi ; src2 + stride2
|
||||
|
||||
; SAD(src1, src2)
|
||||
psadbw xmm0, xmm1
|
||||
paddusw xmm3, xmm0
|
||||
|
||||
; SUM(src2)
|
||||
pxor xmm2, xmm2
|
||||
psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0
|
||||
paddusw xmm4, xmm2
|
||||
|
||||
; pmaddubsw would be ideal if it took two unsigned values. instead,
|
||||
; it expects a signed and an unsigned value. so instead we zero extend
|
||||
; and operate on words.
|
||||
pxor xmm2, xmm2
|
||||
movdqa xmm0, xmm1
|
||||
punpcklbw xmm0, xmm2
|
||||
punpckhbw xmm1, xmm2
|
||||
pmaddwd xmm0, xmm0
|
||||
pmaddwd xmm1, xmm1
|
||||
paddd xmm5, xmm0
|
||||
paddd xmm5, xmm1
|
||||
|
||||
sub rsi, 1
|
||||
jnz .accumulate
|
||||
|
||||
; phaddd only operates on adjacent double words.
|
||||
; Finalize SAD and store
|
||||
movdqa xmm0, xmm3
|
||||
psrldq xmm0, 8
|
||||
paddusw xmm0, xmm3
|
||||
paddd xmm0, [GLOBAL(t128)]
|
||||
psrld xmm0, 8
|
||||
|
||||
mov rax, arg(5)
|
||||
movd [rax], xmm0
|
||||
|
||||
; Accumulate sum of src2
|
||||
movdqa xmm0, xmm4
|
||||
psrldq xmm0, 8
|
||||
paddusw xmm0, xmm4
|
||||
; Square src2. Ignore high value
|
||||
pmuludq xmm0, xmm0
|
||||
psrld xmm0, 8
|
||||
|
||||
; phaddw could be used to sum adjacent values but we want
|
||||
; all the values summed. promote to doubles, accumulate,
|
||||
; shift and sum
|
||||
pxor xmm2, xmm2
|
||||
movdqa xmm1, xmm5
|
||||
punpckldq xmm1, xmm2
|
||||
punpckhdq xmm5, xmm2
|
||||
paddd xmm1, xmm5
|
||||
movdqa xmm2, xmm1
|
||||
psrldq xmm1, 8
|
||||
paddd xmm1, xmm2
|
||||
|
||||
psubd xmm1, xmm0
|
||||
|
||||
; (variance + 128) >> 8
|
||||
paddd xmm1, [GLOBAL(t128)]
|
||||
psrld xmm1, 8
|
||||
mov rax, arg(4)
|
||||
|
||||
movd [rax], xmm1
|
||||
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
t128:
|
||||
%ifndef __NASM_VER__
|
||||
ddq 128
|
||||
%elif CONFIG_BIG_ENDIAN
|
||||
dq 0, 128
|
||||
%else
|
||||
dq 128, 0
|
||||
%endif
|
||||
align 16
|
||||
tMFQE: ; 1 << MFQE_PRECISION
|
||||
times 8 dw 0x10
|
||||
align 16
|
||||
tMFQE_round: ; 1 << (MFQE_PRECISION - 1)
|
||||
times 8 dw 0x08
|
|
@ -1,694 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp10_post_proc_down_and_across_xmm
|
||||
;(
|
||||
; unsigned char *src_ptr,
|
||||
; unsigned char *dst_ptr,
|
||||
; int src_pixels_per_line,
|
||||
; int dst_pixels_per_line,
|
||||
; int rows,
|
||||
; int cols,
|
||||
; int flimit
|
||||
;)
|
||||
global sym(vp10_post_proc_down_and_across_xmm) PRIVATE
|
||||
sym(vp10_post_proc_down_and_across_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
ALIGN_STACK 16, rax
|
||||
; move the global rd onto the stack, since we don't have enough registers
|
||||
; to do PIC addressing
|
||||
movdqa xmm0, [GLOBAL(rd42)]
|
||||
sub rsp, 16
|
||||
movdqa [rsp], xmm0
|
||||
%define RD42 [rsp]
|
||||
%else
|
||||
%define RD42 [GLOBAL(rd42)]
|
||||
%endif
|
||||
|
||||
|
||||
movd xmm2, dword ptr arg(6) ;flimit
|
||||
punpcklwd xmm2, xmm2
|
||||
punpckldq xmm2, xmm2
|
||||
punpcklqdq xmm2, xmm2
|
||||
|
||||
mov rsi, arg(0) ;src_ptr
|
||||
mov rdi, arg(1) ;dst_ptr
|
||||
|
||||
movsxd rcx, DWORD PTR arg(4) ;rows
|
||||
movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line ; destination pitch?
|
||||
pxor xmm0, xmm0 ; mm0 = 00000000
|
||||
|
||||
.nextrow:
|
||||
|
||||
xor rdx, rdx ; clear out rdx for use as loop counter
|
||||
.nextcol:
|
||||
movq xmm3, QWORD PTR [rsi] ; mm4 = r0 p0..p7
|
||||
punpcklbw xmm3, xmm0 ; mm3 = p0..p3
|
||||
movdqa xmm1, xmm3 ; mm1 = p0..p3
|
||||
psllw xmm3, 2 ;
|
||||
|
||||
movq xmm5, QWORD PTR [rsi + rax] ; mm4 = r1 p0..p7
|
||||
punpcklbw xmm5, xmm0 ; mm5 = r1 p0..p3
|
||||
paddusw xmm3, xmm5 ; mm3 += mm6
|
||||
|
||||
; thresholding
|
||||
movdqa xmm7, xmm1 ; mm7 = r0 p0..p3
|
||||
psubusw xmm7, xmm5 ; mm7 = r0 p0..p3 - r1 p0..p3
|
||||
psubusw xmm5, xmm1 ; mm5 = r1 p0..p3 - r0 p0..p3
|
||||
paddusw xmm7, xmm5 ; mm7 = abs(r0 p0..p3 - r1 p0..p3)
|
||||
pcmpgtw xmm7, xmm2
|
||||
|
||||
movq xmm5, QWORD PTR [rsi + 2*rax] ; mm4 = r2 p0..p7
|
||||
punpcklbw xmm5, xmm0 ; mm5 = r2 p0..p3
|
||||
paddusw xmm3, xmm5 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
|
||||
psubusw xmm6, xmm5 ; mm6 = r0 p0..p3 - r2 p0..p3
|
||||
psubusw xmm5, xmm1 ; mm5 = r2 p0..p3 - r2 p0..p3
|
||||
paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r2 p0..p3)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
|
||||
neg rax
|
||||
movq xmm5, QWORD PTR [rsi+2*rax] ; mm4 = r-2 p0..p7
|
||||
punpcklbw xmm5, xmm0 ; mm5 = r-2 p0..p3
|
||||
paddusw xmm3, xmm5 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
|
||||
psubusw xmm6, xmm5 ; mm6 = p0..p3 - r-2 p0..p3
|
||||
psubusw xmm5, xmm1 ; mm5 = r-2 p0..p3 - p0..p3
|
||||
paddusw xmm6, xmm5 ; mm6 = abs(r0 p0..p3 - r-2 p0..p3)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
movq xmm4, QWORD PTR [rsi+rax] ; mm4 = r-1 p0..p7
|
||||
punpcklbw xmm4, xmm0 ; mm4 = r-1 p0..p3
|
||||
paddusw xmm3, xmm4 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = r0 p0..p3
|
||||
psubusw xmm6, xmm4 ; mm6 = p0..p3 - r-2 p0..p3
|
||||
psubusw xmm4, xmm1 ; mm5 = r-1 p0..p3 - p0..p3
|
||||
paddusw xmm6, xmm4 ; mm6 = abs(r0 p0..p3 - r-1 p0..p3)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
|
||||
paddusw xmm3, RD42 ; mm3 += round value
|
||||
psraw xmm3, 3 ; mm3 /= 8
|
||||
|
||||
pand xmm1, xmm7 ; mm1 select vals > thresh from source
|
||||
pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
|
||||
paddusw xmm1, xmm7 ; combination
|
||||
|
||||
packuswb xmm1, xmm0 ; pack to bytes
|
||||
movq QWORD PTR [rdi], xmm1 ;
|
||||
|
||||
neg rax ; pitch is positive
|
||||
add rsi, 8
|
||||
add rdi, 8
|
||||
|
||||
add rdx, 8
|
||||
cmp edx, dword arg(5) ;cols
|
||||
|
||||
jl .nextcol
|
||||
|
||||
; done with the all cols, start the across filtering in place
|
||||
sub rsi, rdx
|
||||
sub rdi, rdx
|
||||
|
||||
xor rdx, rdx
|
||||
movq mm0, QWORD PTR [rdi-8];
|
||||
|
||||
.acrossnextcol:
|
||||
movq xmm7, QWORD PTR [rdi +rdx -2]
|
||||
movd xmm4, DWORD PTR [rdi +rdx +6]
|
||||
|
||||
pslldq xmm4, 8
|
||||
por xmm4, xmm7
|
||||
|
||||
movdqa xmm3, xmm4
|
||||
psrldq xmm3, 2
|
||||
punpcklbw xmm3, xmm0 ; mm3 = p0..p3
|
||||
movdqa xmm1, xmm3 ; mm1 = p0..p3
|
||||
psllw xmm3, 2
|
||||
|
||||
|
||||
movdqa xmm5, xmm4
|
||||
psrldq xmm5, 3
|
||||
punpcklbw xmm5, xmm0 ; mm5 = p1..p4
|
||||
paddusw xmm3, xmm5 ; mm3 += mm6
|
||||
|
||||
; thresholding
|
||||
movdqa xmm7, xmm1 ; mm7 = p0..p3
|
||||
psubusw xmm7, xmm5 ; mm7 = p0..p3 - p1..p4
|
||||
psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
|
||||
paddusw xmm7, xmm5 ; mm7 = abs(p0..p3 - p1..p4)
|
||||
pcmpgtw xmm7, xmm2
|
||||
|
||||
movdqa xmm5, xmm4
|
||||
psrldq xmm5, 4
|
||||
punpcklbw xmm5, xmm0 ; mm5 = p2..p5
|
||||
paddusw xmm3, xmm5 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = p0..p3
|
||||
psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
|
||||
psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
|
||||
paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
|
||||
movdqa xmm5, xmm4 ; mm5 = p-2..p5
|
||||
punpcklbw xmm5, xmm0 ; mm5 = p-2..p1
|
||||
paddusw xmm3, xmm5 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = p0..p3
|
||||
psubusw xmm6, xmm5 ; mm6 = p0..p3 - p1..p4
|
||||
psubusw xmm5, xmm1 ; mm5 = p1..p4 - p0..p3
|
||||
paddusw xmm6, xmm5 ; mm6 = abs(p0..p3 - p1..p4)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
psrldq xmm4, 1 ; mm4 = p-1..p5
|
||||
punpcklbw xmm4, xmm0 ; mm4 = p-1..p2
|
||||
paddusw xmm3, xmm4 ; mm3 += mm5
|
||||
|
||||
; thresholding
|
||||
movdqa xmm6, xmm1 ; mm6 = p0..p3
|
||||
psubusw xmm6, xmm4 ; mm6 = p0..p3 - p1..p4
|
||||
psubusw xmm4, xmm1 ; mm5 = p1..p4 - p0..p3
|
||||
paddusw xmm6, xmm4 ; mm6 = abs(p0..p3 - p1..p4)
|
||||
pcmpgtw xmm6, xmm2
|
||||
por xmm7, xmm6 ; accumulate thresholds
|
||||
|
||||
paddusw xmm3, RD42 ; mm3 += round value
|
||||
psraw xmm3, 3 ; mm3 /= 8
|
||||
|
||||
pand xmm1, xmm7 ; mm1 select vals > thresh from source
|
||||
pandn xmm7, xmm3 ; mm7 select vals < thresh from blurred result
|
||||
paddusw xmm1, xmm7 ; combination
|
||||
|
||||
packuswb xmm1, xmm0 ; pack to bytes
|
||||
movq QWORD PTR [rdi+rdx-8], mm0 ; store previous four bytes
|
||||
movdq2q mm0, xmm1
|
||||
|
||||
add rdx, 8
|
||||
cmp edx, dword arg(5) ;cols
|
||||
jl .acrossnextcol;
|
||||
|
||||
; last 8 pixels
|
||||
movq QWORD PTR [rdi+rdx-8], mm0
|
||||
|
||||
; done with this rwo
|
||||
add rsi,rax ; next line
|
||||
mov eax, dword arg(3) ;dst_pixels_per_line ; destination pitch?
|
||||
add rdi,rax ; next destination
|
||||
mov eax, dword arg(2) ;src_pixels_per_line ; destination pitch?
|
||||
|
||||
dec rcx ; decrement count
|
||||
jnz .nextrow ; next row
|
||||
|
||||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
add rsp,16
|
||||
pop rsp
|
||||
%endif
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%undef RD42
|
||||
|
||||
|
||||
;void vp10_mbpost_proc_down_xmm(unsigned char *dst,
|
||||
; int pitch, int rows, int cols,int flimit)
|
||||
extern sym(vp10_rv)
|
||||
global sym(vp10_mbpost_proc_down_xmm) PRIVATE
|
||||
sym(vp10_mbpost_proc_down_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
sub rsp, 128+16
|
||||
|
||||
; unsigned char d[16][8] at [rsp]
|
||||
; create flimit2 at [rsp+128]
|
||||
mov eax, dword ptr arg(4) ;flimit
|
||||
mov [rsp+128], eax
|
||||
mov [rsp+128+4], eax
|
||||
mov [rsp+128+8], eax
|
||||
mov [rsp+128+12], eax
|
||||
%define flimit4 [rsp+128]
|
||||
|
||||
%if ABI_IS_32BIT=0
|
||||
lea r8, [GLOBAL(sym(vp10_rv))]
|
||||
%endif
|
||||
|
||||
;rows +=8;
|
||||
add dword arg(2), 8
|
||||
|
||||
;for(c=0; c<cols; c+=8)
|
||||
.loop_col:
|
||||
mov rsi, arg(0) ; s
|
||||
pxor xmm0, xmm0 ;
|
||||
|
||||
movsxd rax, dword ptr arg(1) ;pitch ;
|
||||
neg rax ; rax = -pitch
|
||||
|
||||
lea rsi, [rsi + rax*8]; ; rdi = s[-pitch*8]
|
||||
neg rax
|
||||
|
||||
|
||||
pxor xmm5, xmm5
|
||||
pxor xmm6, xmm6 ;
|
||||
|
||||
pxor xmm7, xmm7 ;
|
||||
mov rdi, rsi
|
||||
|
||||
mov rcx, 15 ;
|
||||
|
||||
.loop_initvar:
|
||||
movq xmm1, QWORD PTR [rdi];
|
||||
punpcklbw xmm1, xmm0 ;
|
||||
|
||||
paddw xmm5, xmm1 ;
|
||||
pmullw xmm1, xmm1 ;
|
||||
|
||||
movdqa xmm2, xmm1 ;
|
||||
punpcklwd xmm1, xmm0 ;
|
||||
|
||||
punpckhwd xmm2, xmm0 ;
|
||||
paddd xmm6, xmm1 ;
|
||||
|
||||
paddd xmm7, xmm2 ;
|
||||
lea rdi, [rdi+rax] ;
|
||||
|
||||
dec rcx
|
||||
jne .loop_initvar
|
||||
;save the var and sum
|
||||
xor rdx, rdx
|
||||
.loop_row:
|
||||
movq xmm1, QWORD PTR [rsi] ; [s-pitch*8]
|
||||
movq xmm2, QWORD PTR [rdi] ; [s+pitch*7]
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpcklbw xmm2, xmm0
|
||||
|
||||
paddw xmm5, xmm2
|
||||
psubw xmm5, xmm1
|
||||
|
||||
pmullw xmm2, xmm2
|
||||
movdqa xmm4, xmm2
|
||||
|
||||
punpcklwd xmm2, xmm0
|
||||
punpckhwd xmm4, xmm0
|
||||
|
||||
paddd xmm6, xmm2
|
||||
paddd xmm7, xmm4
|
||||
|
||||
pmullw xmm1, xmm1
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
punpcklwd xmm1, xmm0
|
||||
psubd xmm6, xmm1
|
||||
|
||||
punpckhwd xmm2, xmm0
|
||||
psubd xmm7, xmm2
|
||||
|
||||
|
||||
movdqa xmm3, xmm6
|
||||
pslld xmm3, 4
|
||||
|
||||
psubd xmm3, xmm6
|
||||
movdqa xmm1, xmm5
|
||||
|
||||
movdqa xmm4, xmm5
|
||||
pmullw xmm1, xmm1
|
||||
|
||||
pmulhw xmm4, xmm4
|
||||
movdqa xmm2, xmm1
|
||||
|
||||
punpcklwd xmm1, xmm4
|
||||
punpckhwd xmm2, xmm4
|
||||
|
||||
movdqa xmm4, xmm7
|
||||
pslld xmm4, 4
|
||||
|
||||
psubd xmm4, xmm7
|
||||
|
||||
psubd xmm3, xmm1
|
||||
psubd xmm4, xmm2
|
||||
|
||||
psubd xmm3, flimit4
|
||||
psubd xmm4, flimit4
|
||||
|
||||
psrad xmm3, 31
|
||||
psrad xmm4, 31
|
||||
|
||||
packssdw xmm3, xmm4
|
||||
packsswb xmm3, xmm0
|
||||
|
||||
movq xmm1, QWORD PTR [rsi+rax*8]
|
||||
|
||||
movq xmm2, xmm1
|
||||
punpcklbw xmm1, xmm0
|
||||
|
||||
paddw xmm1, xmm5
|
||||
mov rcx, rdx
|
||||
|
||||
and rcx, 127
|
||||
%if ABI_IS_32BIT=1 && CONFIG_PIC=1
|
||||
push rax
|
||||
lea rax, [GLOBAL(sym(vp10_rv))]
|
||||
movdqu xmm4, [rax + rcx*2] ;vp10_rv[rcx*2]
|
||||
pop rax
|
||||
%elif ABI_IS_32BIT=0
|
||||
movdqu xmm4, [r8 + rcx*2] ;vp10_rv[rcx*2]
|
||||
%else
|
||||
movdqu xmm4, [sym(vp10_rv) + rcx*2]
|
||||
%endif
|
||||
|
||||
paddw xmm1, xmm4
|
||||
;paddw xmm1, eight8s
|
||||
psraw xmm1, 4
|
||||
|
||||
packuswb xmm1, xmm0
|
||||
pand xmm1, xmm3
|
||||
|
||||
pandn xmm3, xmm2
|
||||
por xmm1, xmm3
|
||||
|
||||
and rcx, 15
|
||||
movq QWORD PTR [rsp + rcx*8], xmm1 ;d[rcx*8]
|
||||
|
||||
mov rcx, rdx
|
||||
sub rcx, 8
|
||||
|
||||
and rcx, 15
|
||||
movq mm0, [rsp + rcx*8] ;d[rcx*8]
|
||||
|
||||
movq [rsi], mm0
|
||||
lea rsi, [rsi+rax]
|
||||
|
||||
lea rdi, [rdi+rax]
|
||||
add rdx, 1
|
||||
|
||||
cmp edx, dword arg(2) ;rows
|
||||
jl .loop_row
|
||||
|
||||
add dword arg(0), 8 ; s += 8
|
||||
sub dword arg(3), 8 ; cols -= 8
|
||||
cmp dword arg(3), 0
|
||||
jg .loop_col
|
||||
|
||||
add rsp, 128+16
|
||||
pop rsp
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%undef flimit4
|
||||
|
||||
|
||||
;void vp10_mbpost_proc_across_ip_xmm(unsigned char *src,
|
||||
; int pitch, int rows, int cols,int flimit)
|
||||
global sym(vp10_mbpost_proc_across_ip_xmm) PRIVATE
|
||||
sym(vp10_mbpost_proc_across_ip_xmm):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
SAVE_XMM 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
ALIGN_STACK 16, rax
|
||||
sub rsp, 16
|
||||
|
||||
; create flimit4 at [rsp]
|
||||
mov eax, dword ptr arg(4) ;flimit
|
||||
mov [rsp], eax
|
||||
mov [rsp+4], eax
|
||||
mov [rsp+8], eax
|
||||
mov [rsp+12], eax
|
||||
%define flimit4 [rsp]
|
||||
|
||||
|
||||
;for(r=0;r<rows;r++)
|
||||
.ip_row_loop:
|
||||
|
||||
xor rdx, rdx ;sumsq=0;
|
||||
xor rcx, rcx ;sum=0;
|
||||
mov rsi, arg(0); s
|
||||
mov rdi, -8
|
||||
.ip_var_loop:
|
||||
;for(i=-8;i<=6;i++)
|
||||
;{
|
||||
; sumsq += s[i]*s[i];
|
||||
; sum += s[i];
|
||||
;}
|
||||
movzx eax, byte [rsi+rdi]
|
||||
add ecx, eax
|
||||
mul al
|
||||
add edx, eax
|
||||
add rdi, 1
|
||||
cmp rdi, 6
|
||||
jle .ip_var_loop
|
||||
|
||||
|
||||
;mov rax, sumsq
|
||||
;movd xmm7, rax
|
||||
movd xmm7, edx
|
||||
|
||||
;mov rax, sum
|
||||
;movd xmm6, rax
|
||||
movd xmm6, ecx
|
||||
|
||||
mov rsi, arg(0) ;s
|
||||
xor rcx, rcx
|
||||
|
||||
movsxd rdx, dword arg(3) ;cols
|
||||
add rdx, 8
|
||||
pxor mm0, mm0
|
||||
pxor mm1, mm1
|
||||
|
||||
pxor xmm0, xmm0
|
||||
.nextcol4:
|
||||
|
||||
movd xmm1, DWORD PTR [rsi+rcx-8] ; -8 -7 -6 -5
|
||||
movd xmm2, DWORD PTR [rsi+rcx+7] ; +7 +8 +9 +10
|
||||
|
||||
punpcklbw xmm1, xmm0 ; expanding
|
||||
punpcklbw xmm2, xmm0 ; expanding
|
||||
|
||||
punpcklwd xmm1, xmm0 ; expanding to dwords
|
||||
punpcklwd xmm2, xmm0 ; expanding to dwords
|
||||
|
||||
psubd xmm2, xmm1 ; 7--8 8--7 9--6 10--5
|
||||
paddd xmm1, xmm1 ; -8*2 -7*2 -6*2 -5*2
|
||||
|
||||
paddd xmm1, xmm2 ; 7+-8 8+-7 9+-6 10+-5
|
||||
pmaddwd xmm1, xmm2 ; squared of 7+-8 8+-7 9+-6 10+-5
|
||||
|
||||
paddd xmm6, xmm2
|
||||
paddd xmm7, xmm1
|
||||
|
||||
pshufd xmm6, xmm6, 0 ; duplicate the last ones
|
||||
pshufd xmm7, xmm7, 0 ; duplicate the last ones
|
||||
|
||||
psrldq xmm1, 4 ; 8--7 9--6 10--5 0000
|
||||
psrldq xmm2, 4 ; 8--7 9--6 10--5 0000
|
||||
|
||||
pshufd xmm3, xmm1, 3 ; 0000 8--7 8--7 8--7 squared
|
||||
pshufd xmm4, xmm2, 3 ; 0000 8--7 8--7 8--7 squared
|
||||
|
||||
paddd xmm6, xmm4
|
||||
paddd xmm7, xmm3
|
||||
|
||||
pshufd xmm3, xmm1, 01011111b ; 0000 0000 9--6 9--6 squared
|
||||
pshufd xmm4, xmm2, 01011111b ; 0000 0000 9--6 9--6 squared
|
||||
|
||||
paddd xmm7, xmm3
|
||||
paddd xmm6, xmm4
|
||||
|
||||
pshufd xmm3, xmm1, 10111111b ; 0000 0000 8--7 8--7 squared
|
||||
pshufd xmm4, xmm2, 10111111b ; 0000 0000 8--7 8--7 squared
|
||||
|
||||
paddd xmm7, xmm3
|
||||
paddd xmm6, xmm4
|
||||
|
||||
movdqa xmm3, xmm6
|
||||
pmaddwd xmm3, xmm3
|
||||
|
||||
movdqa xmm5, xmm7
|
||||
pslld xmm5, 4
|
||||
|
||||
psubd xmm5, xmm7
|
||||
psubd xmm5, xmm3
|
||||
|
||||
psubd xmm5, flimit4
|
||||
psrad xmm5, 31
|
||||
|
||||
packssdw xmm5, xmm0
|
||||
packsswb xmm5, xmm0
|
||||
|
||||
movd xmm1, DWORD PTR [rsi+rcx]
|
||||
movq xmm2, xmm1
|
||||
|
||||
punpcklbw xmm1, xmm0
|
||||
punpcklwd xmm1, xmm0
|
||||
|
||||
paddd xmm1, xmm6
|
||||
paddd xmm1, [GLOBAL(four8s)]
|
||||
|
||||
psrad xmm1, 4
|
||||
packssdw xmm1, xmm0
|
||||
|
||||
packuswb xmm1, xmm0
|
||||
pand xmm1, xmm5
|
||||
|
||||
pandn xmm5, xmm2
|
||||
por xmm5, xmm1
|
||||
|
||||
movd [rsi+rcx-8], mm0
|
||||
movq mm0, mm1
|
||||
|
||||
movdq2q mm1, xmm5
|
||||
psrldq xmm7, 12
|
||||
|
||||
psrldq xmm6, 12
|
||||
add rcx, 4
|
||||
|
||||
cmp rcx, rdx
|
||||
jl .nextcol4
|
||||
|
||||
;s+=pitch;
|
||||
movsxd rax, dword arg(1)
|
||||
add arg(0), rax
|
||||
|
||||
sub dword arg(2), 1 ;rows-=1
|
||||
cmp dword arg(2), 0
|
||||
jg .ip_row_loop
|
||||
|
||||
add rsp, 16
|
||||
pop rsp
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
RESTORE_XMM
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
%undef flimit4
|
||||
|
||||
|
||||
;void vp10_plane_add_noise_wmt (unsigned char *start, unsigned char *noise,
|
||||
; unsigned char blackclamp[16],
|
||||
; unsigned char whiteclamp[16],
|
||||
; unsigned char bothclamp[16],
|
||||
; unsigned int width, unsigned int height, int pitch)
|
||||
global sym(vp10_plane_add_noise_wmt) PRIVATE
|
||||
sym(vp10_plane_add_noise_wmt):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 8
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
.addnoise_loop:
|
||||
call sym(LIBVPX_RAND) WRT_PLT
|
||||
mov rcx, arg(1) ;noise
|
||||
and rax, 0xff
|
||||
add rcx, rax
|
||||
|
||||
; we rely on the fact that the clamping vectors are stored contiguously
|
||||
; in black/white/both order. Note that we have to reload this here because
|
||||
; rdx could be trashed by rand()
|
||||
mov rdx, arg(2) ; blackclamp
|
||||
|
||||
|
||||
mov rdi, rcx
|
||||
movsxd rcx, dword arg(5) ;[Width]
|
||||
mov rsi, arg(0) ;Pos
|
||||
xor rax,rax
|
||||
|
||||
.addnoise_nextset:
|
||||
movdqu xmm1,[rsi+rax] ; get the source
|
||||
|
||||
psubusb xmm1, [rdx] ;blackclamp ; clamp both sides so we don't outrange adding noise
|
||||
paddusb xmm1, [rdx+32] ;bothclamp
|
||||
psubusb xmm1, [rdx+16] ;whiteclamp
|
||||
|
||||
movdqu xmm2,[rdi+rax] ; get the noise for this line
|
||||
paddb xmm1,xmm2 ; add it in
|
||||
movdqu [rsi+rax],xmm1 ; store the result
|
||||
|
||||
add rax,16 ; move to the next line
|
||||
|
||||
cmp rax, rcx
|
||||
jl .addnoise_nextset
|
||||
|
||||
movsxd rax, dword arg(7) ; Pitch
|
||||
add arg(0), rax ; Start += Pitch
|
||||
sub dword arg(6), 1 ; Height -= 1
|
||||
jg .addnoise_loop
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
rd42:
|
||||
times 8 dw 0x04
|
||||
four8s:
|
||||
times 4 dd 8
|
|
@ -26,9 +26,6 @@
|
|||
#include "vp10/common/alloccommon.h"
|
||||
#include "vp10/common/loopfilter.h"
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
#include "vp10/common/postproc.h"
|
||||
#endif
|
||||
#include "vp10/common/quant_common.h"
|
||||
#include "vp10/common/reconintra.h"
|
||||
|
||||
|
@ -154,8 +151,8 @@ static int equal_dimensions(const YV12_BUFFER_CONFIG *a,
|
|||
}
|
||||
|
||||
vpx_codec_err_t vp10_copy_reference_dec(VP10Decoder *pbi,
|
||||
VP9_REFFRAME ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd) {
|
||||
VP9_REFFRAME ref_frame_flag,
|
||||
YV12_BUFFER_CONFIG *sd) {
|
||||
VP10_COMMON *cm = &pbi->common;
|
||||
|
||||
/* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the
|
||||
|
@ -413,13 +410,9 @@ int vp10_receive_compressed_data(VP10Decoder *pbi,
|
|||
return retcode;
|
||||
}
|
||||
|
||||
int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
|
||||
vp10_ppflags_t *flags) {
|
||||
int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd) {
|
||||
VP10_COMMON *const cm = &pbi->common;
|
||||
int ret = -1;
|
||||
#if !CONFIG_VP9_POSTPROC
|
||||
(void)*flags;
|
||||
#endif
|
||||
|
||||
if (pbi->ready_for_new_data == 1)
|
||||
return ret;
|
||||
|
@ -432,17 +425,8 @@ int vp10_get_raw_frame(VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
|
|||
|
||||
pbi->ready_for_new_data = 1;
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
if (!cm->show_existing_frame) {
|
||||
ret = vp10_post_proc_frame(cm, sd, flags);
|
||||
} else {
|
||||
*sd = *cm->frame_to_show;
|
||||
ret = 0;
|
||||
}
|
||||
#else
|
||||
*sd = *cm->frame_to_show;
|
||||
ret = 0;
|
||||
#endif /*!CONFIG_POSTPROC*/
|
||||
vpx_clear_system_state();
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
|
||||
#include "vp10/common/thread_common.h"
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
#include "vp10/common/ppflags.h"
|
||||
#include "vp10/decoder/dthread.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -85,8 +84,7 @@ typedef struct VP10Decoder {
|
|||
int vp10_receive_compressed_data(struct VP10Decoder *pbi,
|
||||
size_t size, const uint8_t **dest);
|
||||
|
||||
int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd,
|
||||
vp10_ppflags_t *flags);
|
||||
int vp10_get_raw_frame(struct VP10Decoder *pbi, YV12_BUFFER_CONFIG *sd);
|
||||
|
||||
vpx_codec_err_t vp10_copy_reference_dec(struct VP10Decoder *pbi,
|
||||
VP9_REFFRAME ref_frame_flag,
|
||||
|
|
|
@ -17,9 +17,6 @@
|
|||
#include "vp10/common/alloccommon.h"
|
||||
#include "vp10/common/filter.h"
|
||||
#include "vp10/common/idct.h"
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
#include "vp10/common/postproc.h"
|
||||
#endif
|
||||
#include "vp10/common/reconinter.h"
|
||||
#include "vp10/common/reconintra.h"
|
||||
#include "vp10/common/tile_common.h"
|
||||
|
@ -375,9 +372,6 @@ static void dealloc_compressor_data(VP10_COMP *cpi) {
|
|||
cpi->active_map.map = NULL;
|
||||
|
||||
vp10_free_ref_frame_buffers(cm->buffer_pool);
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vp10_free_postproc_buffers(cm);
|
||||
#endif
|
||||
vp10_free_context_buffers(cm);
|
||||
|
||||
vpx_free_frame_buffer(&cpi->last_frame_uf);
|
||||
|
@ -1969,9 +1963,6 @@ void vp10_remove_compressor(VP10_COMP *cpi) {
|
|||
|
||||
vp10_remove_common(cm);
|
||||
vp10_free_ref_frame_buffers(cm->buffer_pool);
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vp10_free_postproc_buffers(cm);
|
||||
#endif
|
||||
vpx_free(cpi);
|
||||
|
||||
#if CONFIG_VP9_TEMPORAL_DENOISING
|
||||
|
@ -2961,31 +2952,6 @@ static void set_size_dependent_vars(VP10_COMP *cpi, int *q,
|
|||
// lagged coding, and if the relevant speed feature flag is set.
|
||||
if (oxcf->pass == 2 && cpi->sf.static_segmentation)
|
||||
configure_static_seg_features(cpi);
|
||||
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
if (oxcf->noise_sensitivity > 0) {
|
||||
int l = 0;
|
||||
switch (oxcf->noise_sensitivity) {
|
||||
case 1:
|
||||
l = 20;
|
||||
break;
|
||||
case 2:
|
||||
l = 40;
|
||||
break;
|
||||
case 3:
|
||||
l = 60;
|
||||
break;
|
||||
case 4:
|
||||
case 5:
|
||||
l = 100;
|
||||
break;
|
||||
case 6:
|
||||
l = 150;
|
||||
break;
|
||||
}
|
||||
vp10_denoise(cpi->Source, cpi->Source, l);
|
||||
}
|
||||
#endif // CONFIG_VP9_POSTPROC
|
||||
}
|
||||
|
||||
static void init_motion_estimation(VP10_COMP *cpi) {
|
||||
|
@ -4169,22 +4135,6 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
|
|||
{
|
||||
PSNR_STATS psnr2;
|
||||
double frame_ssim2 = 0, weight = 0;
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
if (vpx_alloc_frame_buffer(&cm->post_proc_buffer,
|
||||
recon->y_crop_width, recon->y_crop_height,
|
||||
cm->subsampling_x, cm->subsampling_y,
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
cm->use_highbitdepth,
|
||||
#endif
|
||||
VPX_ENC_BORDER_IN_PIXELS,
|
||||
cm->byte_alignment) < 0) {
|
||||
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
|
||||
"Failed to allocate post processing buffer");
|
||||
}
|
||||
|
||||
vp10_deblock(cm->frame_to_show, &cm->post_proc_buffer,
|
||||
cm->lf.filter_level * 10 / 6);
|
||||
#endif
|
||||
vpx_clear_system_state();
|
||||
|
||||
#if CONFIG_VPX_HIGHBITDEPTH
|
||||
|
@ -4315,20 +4265,13 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
|
|||
return 0;
|
||||
}
|
||||
|
||||
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
|
||||
vp10_ppflags_t *flags) {
|
||||
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
|
||||
VP10_COMMON *cm = &cpi->common;
|
||||
#if !CONFIG_VP9_POSTPROC
|
||||
(void)flags;
|
||||
#endif
|
||||
|
||||
if (!cm->show_frame) {
|
||||
return -1;
|
||||
} else {
|
||||
int ret;
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
ret = vp10_post_proc_frame(cm, dest, flags);
|
||||
#else
|
||||
if (cm->frame_to_show) {
|
||||
*dest = *cm->frame_to_show;
|
||||
dest->y_width = cm->width;
|
||||
|
@ -4339,7 +4282,6 @@ int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
|
|||
} else {
|
||||
ret = -1;
|
||||
}
|
||||
#endif // !CONFIG_VP9_POSTPROC
|
||||
vpx_clear_system_state();
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include "vpx/vp8cx.h"
|
||||
|
||||
#include "vp10/common/alloccommon.h"
|
||||
#include "vp10/common/ppflags.h"
|
||||
#include "vp10/common/entropymode.h"
|
||||
#include "vp10/common/thread_common.h"
|
||||
#include "vp10/common/onyxc_int.h"
|
||||
|
@ -514,8 +513,7 @@ int vp10_get_compressed_data(VP10_COMP *cpi, unsigned int *frame_flags,
|
|||
size_t *size, uint8_t *dest,
|
||||
int64_t *time_stamp, int64_t *time_end, int flush);
|
||||
|
||||
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest,
|
||||
vp10_ppflags_t *flags);
|
||||
int vp10_get_preview_raw_frame(VP10_COMP *cpi, YV12_BUFFER_CONFIG *dest);
|
||||
|
||||
int vp10_use_as_reference(VP10_COMP *cpi, int ref_frame_flags);
|
||||
|
||||
|
|
|
@ -313,7 +313,7 @@ static void temporal_filter_iterate_c(VP10_COMP *cpi,
|
|||
|
||||
for (mb_row = 0; mb_row < mb_rows; mb_row++) {
|
||||
// Source frames are extended to 16 pixels. This is different than
|
||||
// L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS)
|
||||
// L/A/G reference frames that have a border of 32 (VPXENCBORDERINPIXELS)
|
||||
// A 6/8 tap filter is used for motion search. This requires 2 pixels
|
||||
// before and 3 pixels after. So the largest Y mv on a border would
|
||||
// then be 16 - VPX_INTERP_EXTEND. The UV blocks are half the size of the
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
|
||||
VP10_COMMON_SRCS-yes += vp10_common.mk
|
||||
VP10_COMMON_SRCS-yes += vp10_iface_common.h
|
||||
VP10_COMMON_SRCS-yes += common/ppflags.h
|
||||
VP10_COMMON_SRCS-yes += common/alloccommon.c
|
||||
VP10_COMMON_SRCS-yes += common/blockd.c
|
||||
VP10_COMMON_SRCS-yes += common/debugmodes.c
|
||||
|
@ -64,15 +63,6 @@ VP10_COMMON_SRCS-yes += common/scan.h
|
|||
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.h
|
||||
VP10_COMMON_SRCS-yes += common/vp10_fwd_txfm.c
|
||||
|
||||
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.h
|
||||
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/postproc.c
|
||||
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.h
|
||||
VP10_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/mfqe.c
|
||||
ifeq ($(CONFIG_VP9_POSTPROC),yes)
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
|
||||
endif
|
||||
|
||||
ifneq ($(CONFIG_VPX_HIGHBITDEPTH),yes)
|
||||
VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans4_dspr2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/itrans8_dspr2.c
|
||||
|
@ -84,10 +74,6 @@ VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct4x4_msa.c
|
|||
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct8x8_msa.c
|
||||
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct16x16_msa.c
|
||||
|
||||
ifeq ($(CONFIG_VP9_POSTPROC),yes)
|
||||
VP10_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c
|
||||
endif
|
||||
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_intrin_sse2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_txfm_sse2.c
|
||||
VP10_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp10_fwd_dct32x32_impl_sse2.h
|
||||
|
|
|
@ -1121,34 +1121,16 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
|
|||
|
||||
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *);
|
||||
if (config != NULL) {
|
||||
ctx->preview_ppcfg = *config;
|
||||
return VPX_CODEC_OK;
|
||||
} else {
|
||||
return VPX_CODEC_INVALID_PARAM;
|
||||
}
|
||||
#else
|
||||
(void)ctx;
|
||||
(void)args;
|
||||
return VPX_CODEC_INCAPABLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp10_ppflags_t flags;
|
||||
vp10_zero(flags);
|
||||
|
||||
if (ctx->preview_ppcfg.post_proc_flag) {
|
||||
flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag;
|
||||
flags.deblocking_level = ctx->preview_ppcfg.deblocking_level;
|
||||
flags.noise_level = ctx->preview_ppcfg.noise_level;
|
||||
}
|
||||
|
||||
if (vp10_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) {
|
||||
if (vp10_get_preview_raw_frame(ctx->cpi, &sd) == 0) {
|
||||
yuvconfig2image(&ctx->preview_img, &sd, NULL);
|
||||
return &ctx->preview_img;
|
||||
} else {
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
#include "vp10/vp10_iface_common.h"
|
||||
|
||||
#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0)
|
||||
#define VP9_CAP_POSTPROC 0
|
||||
|
||||
typedef vpx_codec_stream_info_t vp10_stream_info_t;
|
||||
|
||||
|
@ -119,9 +119,6 @@ static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
|
|||
(FrameWorkerData *)worker->data1;
|
||||
vpx_get_worker_interface()->end(worker);
|
||||
vp10_remove_common(&frame_worker_data->pbi->common);
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vp10_free_postproc_buffers(&frame_worker_data->pbi->common);
|
||||
#endif
|
||||
vp10_decoder_remove(frame_worker_data->pbi);
|
||||
vpx_free(frame_worker_data->scratch_buffer);
|
||||
#if CONFIG_MULTITHREAD
|
||||
|
@ -313,15 +310,6 @@ static void set_default_ppflags(vp8_postproc_cfg_t *cfg) {
|
|||
cfg->noise_level = 0;
|
||||
}
|
||||
|
||||
static void set_ppflags(const vpx_codec_alg_priv_t *ctx,
|
||||
vp10_ppflags_t *flags) {
|
||||
flags->post_proc_flag =
|
||||
ctx->postproc_cfg.post_proc_flag;
|
||||
|
||||
flags->deblocking_level = ctx->postproc_cfg.deblocking_level;
|
||||
flags->noise_level = ctx->postproc_cfg.noise_level;
|
||||
}
|
||||
|
||||
static int frame_worker_hook(void *arg1, void *arg2) {
|
||||
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1;
|
||||
const uint8_t *data = frame_worker_data->data;
|
||||
|
@ -554,7 +542,6 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
|
|||
|
||||
static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp10_ppflags_t flags = {0, 0, 0};
|
||||
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
|
||||
VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
|
||||
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
|
||||
|
@ -567,7 +554,7 @@ static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
|
|||
|
||||
check_resync(ctx, frame_worker_data->pbi);
|
||||
|
||||
if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
|
||||
if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
|
||||
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
|
||||
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
|
||||
ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
|
||||
|
@ -746,7 +733,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
|
|||
if (*iter == NULL && ctx->frame_workers != NULL) {
|
||||
do {
|
||||
YV12_BUFFER_CONFIG sd;
|
||||
vp10_ppflags_t flags = {0, 0, 0};
|
||||
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
|
||||
VPxWorker *const worker =
|
||||
&ctx->frame_workers[ctx->next_output_worker_id];
|
||||
|
@ -754,8 +740,6 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
|
|||
(FrameWorkerData *)worker->data1;
|
||||
ctx->next_output_worker_id =
|
||||
(ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
|
||||
if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
|
||||
set_ppflags(ctx, &flags);
|
||||
// Wait for the frame from worker thread.
|
||||
if (winterface->sync(worker)) {
|
||||
// Check if worker has received any frames.
|
||||
|
@ -764,7 +748,7 @@ static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
|
|||
frame_worker_data->received_frame = 0;
|
||||
check_resync(ctx, frame_worker_data->pbi);
|
||||
}
|
||||
if (vp10_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
|
||||
if (vp10_get_raw_frame(frame_worker_data->pbi, &sd) == 0) {
|
||||
VP10_COMMON *const cm = &frame_worker_data->pbi->common;
|
||||
RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
|
||||
release_last_output_frame(ctx);
|
||||
|
@ -878,21 +862,9 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
|
|||
|
||||
static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx,
|
||||
va_list args) {
|
||||
#if CONFIG_VP9_POSTPROC
|
||||
vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
|
||||
|
||||
if (data) {
|
||||
ctx->postproc_cfg_set = 1;
|
||||
ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data);
|
||||
return VPX_CODEC_OK;
|
||||
} else {
|
||||
return VPX_CODEC_INVALID_PARAM;
|
||||
}
|
||||
#else
|
||||
(void)ctx;
|
||||
(void)args;
|
||||
return VPX_CODEC_INCAPABLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx,
|
||||
|
|
|
@ -77,10 +77,6 @@ VP10_CX_SRCS-yes += encoder/aq_complexity.c
|
|||
VP10_CX_SRCS-yes += encoder/aq_complexity.h
|
||||
VP10_CX_SRCS-yes += encoder/skin_detection.c
|
||||
VP10_CX_SRCS-yes += encoder/skin_detection.h
|
||||
ifeq ($(CONFIG_VP9_POSTPROC),yes)
|
||||
VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h
|
||||
VP10_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c
|
||||
endif
|
||||
VP10_CX_SRCS-yes += encoder/temporal_filter.c
|
||||
VP10_CX_SRCS-yes += encoder/temporal_filter.h
|
||||
VP10_CX_SRCS-yes += encoder/mbgraph.c
|
||||
|
|
|
@ -393,7 +393,7 @@ section .text
|
|||
|
||||
; On Android platforms use lrand48 when building postproc routines. Prior to L
|
||||
; rand() was not available.
|
||||
%if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1
|
||||
%if CONFIG_POSTPROC=1
|
||||
%ifdef __ANDROID__
|
||||
extern sym(lrand48)
|
||||
%define LIBVPX_RAND lrand48
|
||||
|
|
Загрузка…
Ссылка в новой задаче