Unify subtract function used in VP8/9
This commit replaces the vp8_ prefixed subtract function with the common vpx_subtract_block function. It removes redundant SIMD optimization codes and unit tests. Change-Id: I42e086c32c93c6125e452dcaa6ed04337fe028d9
This commit is contained in:
Родитель
9cb3a13426
Коммит
0ede9f52b7
|
@ -1,123 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "third_party/googletest/src/include/gtest/gtest.h"
|
||||
#include "test/acm_random.h"
|
||||
#include "test/clear_system_state.h"
|
||||
#include "test/register_state_check.h"
|
||||
#include "./vpx_config.h"
|
||||
#include "./vp8_rtcd.h"
|
||||
#include "vp8/common/blockd.h"
|
||||
#include "vp8/encoder/block.h"
|
||||
#include "vpx_mem/vpx_mem.h"
|
||||
|
||||
typedef void (*SubtractBlockFunc)(BLOCK *be, BLOCKD *bd, int pitch);
|
||||
|
||||
namespace {
|
||||
|
||||
class SubtractBlockTest : public ::testing::TestWithParam<SubtractBlockFunc> {
|
||||
public:
|
||||
virtual void TearDown() {
|
||||
libvpx_test::ClearSystemState();
|
||||
}
|
||||
};
|
||||
|
||||
using libvpx_test::ACMRandom;
|
||||
|
||||
TEST_P(SubtractBlockTest, SimpleSubtract) {
|
||||
ACMRandom rnd(ACMRandom::DeterministicSeed());
|
||||
BLOCK be;
|
||||
BLOCKD bd;
|
||||
// in libvpx, this stride is always 16
|
||||
const int kDiffPredStride = 16;
|
||||
const int kSrcStride[] = {32, 16, 8, 4, 0};
|
||||
const int kBlockWidth = 4;
|
||||
const int kBlockHeight = 4;
|
||||
|
||||
// Allocate... align to 16 for mmx/sse tests
|
||||
uint8_t *source = reinterpret_cast<uint8_t*>(
|
||||
vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
|
||||
be.src_diff = reinterpret_cast<int16_t*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
|
||||
bd.predictor = reinterpret_cast<unsigned char*>(
|
||||
vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
|
||||
|
||||
for (int i = 0; kSrcStride[i] > 0; ++i) {
|
||||
// start at block0
|
||||
be.src = 0;
|
||||
be.base_src = &source;
|
||||
be.src_stride = kSrcStride[i];
|
||||
|
||||
// set difference
|
||||
int16_t *src_diff = be.src_diff;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
src_diff[c] = static_cast<int16_t>(0xa5a5u);
|
||||
}
|
||||
src_diff += kDiffPredStride;
|
||||
}
|
||||
|
||||
// set destination
|
||||
uint8_t *base_src = *be.base_src;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
base_src[c] = rnd.Rand8();
|
||||
}
|
||||
base_src += be.src_stride;
|
||||
}
|
||||
|
||||
// set predictor
|
||||
uint8_t *predictor = bd.predictor;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
predictor[c] = rnd.Rand8();
|
||||
}
|
||||
predictor += kDiffPredStride;
|
||||
}
|
||||
|
||||
ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
|
||||
|
||||
base_src = *be.base_src;
|
||||
src_diff = be.src_diff;
|
||||
predictor = bd.predictor;
|
||||
for (int r = 0; r < kBlockHeight; ++r) {
|
||||
for (int c = 0; c < kBlockWidth; ++c) {
|
||||
EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
|
||||
<< ", c = " << c;
|
||||
}
|
||||
src_diff += kDiffPredStride;
|
||||
predictor += kDiffPredStride;
|
||||
base_src += be.src_stride;
|
||||
}
|
||||
}
|
||||
vpx_free(be.src_diff);
|
||||
vpx_free(source);
|
||||
vpx_free(bd.predictor);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_c));
|
||||
|
||||
#if HAVE_NEON
|
||||
INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_neon));
|
||||
#endif
|
||||
|
||||
#if HAVE_MMX
|
||||
INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_mmx));
|
||||
#endif
|
||||
|
||||
#if HAVE_SSE2
|
||||
INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
|
||||
::testing::Values(vp8_subtract_b_sse2));
|
||||
#endif
|
||||
|
||||
} // namespace
|
|
@ -104,7 +104,6 @@ endif
|
|||
LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
|
||||
LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc
|
||||
|
|
|
@ -343,15 +343,6 @@ add_proto qw/int vp8_mbuverror/, "struct macroblock *mb";
|
|||
specialize qw/vp8_mbuverror mmx sse2/;
|
||||
$vp8_mbuverror_sse2=vp8_mbuverror_xmm;
|
||||
|
||||
add_proto qw/void vp8_subtract_b/, "struct block *be, struct blockd *bd, int pitch";
|
||||
specialize qw/vp8_subtract_b mmx sse2 neon/;
|
||||
|
||||
add_proto qw/void vp8_subtract_mby/, "short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride";
|
||||
specialize qw/vp8_subtract_mby mmx sse2 neon/;
|
||||
|
||||
add_proto qw/void vp8_subtract_mbuv/, "short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride";
|
||||
specialize qw/vp8_subtract_mbuv mmx sse2 neon/;
|
||||
|
||||
#
|
||||
# Motion search
|
||||
#
|
||||
|
|
|
@ -1,154 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2014 The WebM project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include "vp8/encoder/block.h"
|
||||
|
||||
void vp8_subtract_b_neon(
|
||||
BLOCK *be,
|
||||
BLOCKD *bd,
|
||||
int pitch) {
|
||||
unsigned char *src_ptr, *predictor;
|
||||
int src_stride;
|
||||
int16_t *src_diff;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
uint16x8_t q10u16, q11u16, q12u16, q13u16;
|
||||
|
||||
src_ptr = *be->base_src + be->src;
|
||||
src_stride = be->src_stride;
|
||||
predictor = bd->predictor;
|
||||
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d4u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d6u8 = vld1_u8(src_ptr);
|
||||
|
||||
d1u8 = vld1_u8(predictor);
|
||||
predictor += pitch;
|
||||
d3u8 = vld1_u8(predictor);
|
||||
predictor += pitch;
|
||||
d5u8 = vld1_u8(predictor);
|
||||
predictor += pitch;
|
||||
d7u8 = vld1_u8(predictor);
|
||||
|
||||
q10u16 = vsubl_u8(d0u8, d1u8);
|
||||
q11u16 = vsubl_u8(d2u8, d3u8);
|
||||
q12u16 = vsubl_u8(d4u8, d5u8);
|
||||
q13u16 = vsubl_u8(d6u8, d7u8);
|
||||
|
||||
src_diff = be->src_diff;
|
||||
vst1_u16((uint16_t *)src_diff, vget_low_u16(q10u16));
|
||||
src_diff += pitch;
|
||||
vst1_u16((uint16_t *)src_diff, vget_low_u16(q11u16));
|
||||
src_diff += pitch;
|
||||
vst1_u16((uint16_t *)src_diff, vget_low_u16(q12u16));
|
||||
src_diff += pitch;
|
||||
vst1_u16((uint16_t *)src_diff, vget_low_u16(q13u16));
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_subtract_mby_neon(
|
||||
int16_t *diff,
|
||||
unsigned char *src,
|
||||
int src_stride,
|
||||
unsigned char *pred,
|
||||
int pred_stride) {
|
||||
int i;
|
||||
uint8x16_t q0u8, q1u8, q2u8, q3u8;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16;
|
||||
|
||||
for (i = 0; i < 8; i++) { // subtract_mby_loop
|
||||
q0u8 = vld1q_u8(src);
|
||||
src += src_stride;
|
||||
q2u8 = vld1q_u8(src);
|
||||
src += src_stride;
|
||||
q1u8 = vld1q_u8(pred);
|
||||
pred += pred_stride;
|
||||
q3u8 = vld1q_u8(pred);
|
||||
pred += pred_stride;
|
||||
|
||||
q8u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q1u8));
|
||||
q9u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q1u8));
|
||||
q10u16 = vsubl_u8(vget_low_u8(q2u8), vget_low_u8(q3u8));
|
||||
q11u16 = vsubl_u8(vget_high_u8(q2u8), vget_high_u8(q3u8));
|
||||
|
||||
vst1q_u16((uint16_t *)diff, q8u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q9u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q10u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q11u16);
|
||||
diff += 8;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void vp8_subtract_mbuv_neon(
|
||||
int16_t *diff,
|
||||
unsigned char *usrc,
|
||||
unsigned char *vsrc,
|
||||
int src_stride,
|
||||
unsigned char *upred,
|
||||
unsigned char *vpred,
|
||||
int pred_stride) {
|
||||
int i, j;
|
||||
unsigned char *src_ptr, *pred_ptr;
|
||||
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
|
||||
uint16x8_t q8u16, q9u16, q10u16, q11u16;
|
||||
|
||||
diff += 256;
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (i == 0) {
|
||||
src_ptr = usrc;
|
||||
pred_ptr = upred;
|
||||
} else if (i == 1) {
|
||||
src_ptr = vsrc;
|
||||
pred_ptr = vpred;
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
d0u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d1u8 = vld1_u8(pred_ptr);
|
||||
pred_ptr += pred_stride;
|
||||
d2u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d3u8 = vld1_u8(pred_ptr);
|
||||
pred_ptr += pred_stride;
|
||||
d4u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d5u8 = vld1_u8(pred_ptr);
|
||||
pred_ptr += pred_stride;
|
||||
d6u8 = vld1_u8(src_ptr);
|
||||
src_ptr += src_stride;
|
||||
d7u8 = vld1_u8(pred_ptr);
|
||||
pred_ptr += pred_stride;
|
||||
|
||||
q8u16 = vsubl_u8(d0u8, d1u8);
|
||||
q9u16 = vsubl_u8(d2u8, d3u8);
|
||||
q10u16 = vsubl_u8(d4u8, d5u8);
|
||||
q11u16 = vsubl_u8(d6u8, d7u8);
|
||||
|
||||
vst1q_u16((uint16_t *)diff, q8u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q9u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q10u16);
|
||||
diff += 8;
|
||||
vst1q_u16((uint16_t *)diff, q11u16);
|
||||
diff += 8;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
|
@ -8,6 +8,7 @@
|
|||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
|
||||
#include "vpx_config.h"
|
||||
#include "vp8_rtcd.h"
|
||||
|
@ -19,80 +20,29 @@
|
|||
#include "vpx_mem/vpx_mem.h"
|
||||
#include "rdopt.h"
|
||||
|
||||
// TODO(jingning,johannkoenig): use vpx_subtract_block to replace
|
||||
// codec specified vp9_subtract_ functions.
|
||||
void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *src_ptr = (*(be->base_src) + be->src);
|
||||
short *diff_ptr = be->src_diff;
|
||||
unsigned char *pred_ptr = bd->predictor;
|
||||
int src_stride = be->src_stride;
|
||||
void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) {
|
||||
unsigned char *src_ptr = (*(be->base_src) + be->src);
|
||||
short *diff_ptr = be->src_diff;
|
||||
unsigned char *pred_ptr = bd->predictor;
|
||||
int src_stride = be->src_stride;
|
||||
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 4; r++)
|
||||
{
|
||||
for (c = 0; c < 4; c++)
|
||||
{
|
||||
diff_ptr[c] = src_ptr[c] - pred_ptr[c];
|
||||
}
|
||||
|
||||
diff_ptr += pitch;
|
||||
pred_ptr += pitch;
|
||||
src_ptr += src_stride;
|
||||
}
|
||||
vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride,
|
||||
pred_ptr, pitch);
|
||||
}
|
||||
|
||||
void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
int src_stride, unsigned char *upred,
|
||||
unsigned char *vpred, int pred_stride)
|
||||
{
|
||||
short *udiff = diff + 256;
|
||||
short *vdiff = diff + 320;
|
||||
unsigned char *vpred, int pred_stride) {
|
||||
short *udiff = diff + 256;
|
||||
short *vdiff = diff + 320;
|
||||
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
for (c = 0; c < 8; c++)
|
||||
{
|
||||
udiff[c] = usrc[c] - upred[c];
|
||||
}
|
||||
|
||||
udiff += 8;
|
||||
upred += pred_stride;
|
||||
usrc += src_stride;
|
||||
}
|
||||
|
||||
for (r = 0; r < 8; r++)
|
||||
{
|
||||
for (c = 0; c < 8; c++)
|
||||
{
|
||||
vdiff[c] = vsrc[c] - vpred[c];
|
||||
}
|
||||
|
||||
vdiff += 8;
|
||||
vpred += pred_stride;
|
||||
vsrc += src_stride;
|
||||
}
|
||||
vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride);
|
||||
vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride);
|
||||
}
|
||||
|
||||
void vp8_subtract_mby_c(short *diff, unsigned char *src, int src_stride,
|
||||
unsigned char *pred, int pred_stride)
|
||||
{
|
||||
int r, c;
|
||||
|
||||
for (r = 0; r < 16; r++)
|
||||
{
|
||||
for (c = 0; c < 16; c++)
|
||||
{
|
||||
diff[c] = src[c] - pred[c];
|
||||
}
|
||||
|
||||
diff += 16;
|
||||
pred += pred_stride;
|
||||
src += src_stride;
|
||||
}
|
||||
void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
|
||||
unsigned char *pred, int pred_stride) {
|
||||
vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride);
|
||||
}
|
||||
|
||||
static void vp8_subtract_mb(MACROBLOCK *x)
|
||||
|
|
|
@ -19,6 +19,13 @@ extern "C" {
|
|||
#endif
|
||||
void vp8_encode_inter16x16(MACROBLOCK *x);
|
||||
|
||||
void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch);
|
||||
void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
int src_stride, unsigned char *upred,
|
||||
unsigned char *vpred, int pred_stride);
|
||||
void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride,
|
||||
unsigned char *pred, int pred_stride);
|
||||
|
||||
void vp8_build_dcblock(MACROBLOCK *b);
|
||||
void vp8_transform_mb(MACROBLOCK *mb);
|
||||
void vp8_transform_mbuv(MACROBLOCK *x);
|
||||
|
|
|
@ -1,223 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
; short *diff, unsigned char *Predictor,
|
||||
; int pitch);
|
||||
global sym(vp8_subtract_b_mmx_impl) PRIVATE
|
||||
sym(vp8_subtract_b_mmx_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
|
||||
mov rdi, arg(2) ;diff
|
||||
mov rax, arg(3) ;Predictor
|
||||
mov rsi, arg(0) ;z
|
||||
movsxd rdx, dword ptr arg(1);src_stride;
|
||||
movsxd rcx, dword ptr arg(4);pitch
|
||||
pxor mm7, mm7
|
||||
|
||||
movd mm0, [rsi]
|
||||
movd mm1, [rax]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq [rdi], mm0
|
||||
|
||||
|
||||
movd mm0, [rsi+rdx]
|
||||
movd mm1, [rax+rcx]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq [rdi+rcx*2],mm0
|
||||
|
||||
|
||||
movd mm0, [rsi+rdx*2]
|
||||
movd mm1, [rax+rcx*2]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq [rdi+rcx*4], mm0
|
||||
|
||||
lea rsi, [rsi+rdx*2]
|
||||
lea rcx, [rcx+rcx*2]
|
||||
|
||||
|
||||
|
||||
movd mm0, [rsi+rdx]
|
||||
movd mm1, [rax+rcx]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq [rdi+rcx*2], mm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;void vp8_subtract_mby_mmx(short *diff, unsigned char *src, int src_stride,
|
||||
;unsigned char *pred, int pred_stride)
|
||||
global sym(vp8_subtract_mby_mmx) PRIVATE
|
||||
sym(vp8_subtract_mby_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdi, arg(0) ;diff
|
||||
mov rsi, arg(1) ;src
|
||||
movsxd rdx, dword ptr arg(2);src_stride
|
||||
mov rax, arg(3) ;pred
|
||||
push rbx
|
||||
movsxd rbx, dword ptr arg(4);pred_stride
|
||||
|
||||
pxor mm0, mm0
|
||||
mov rcx, 16
|
||||
|
||||
|
||||
.submby_loop:
|
||||
movq mm1, [rsi]
|
||||
movq mm3, [rax]
|
||||
|
||||
movq mm2, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
punpcklbw mm1, mm0
|
||||
punpcklbw mm3, mm0
|
||||
|
||||
punpckhbw mm2, mm0
|
||||
punpckhbw mm4, mm0
|
||||
|
||||
psubw mm1, mm3
|
||||
psubw mm2, mm4
|
||||
|
||||
movq [rdi], mm1
|
||||
movq [rdi+8], mm2
|
||||
|
||||
movq mm1, [rsi+8]
|
||||
movq mm3, [rax+8]
|
||||
|
||||
movq mm2, mm1
|
||||
movq mm4, mm3
|
||||
|
||||
punpcklbw mm1, mm0
|
||||
punpcklbw mm3, mm0
|
||||
|
||||
punpckhbw mm2, mm0
|
||||
punpckhbw mm4, mm0
|
||||
|
||||
psubw mm1, mm3
|
||||
psubw mm2, mm4
|
||||
|
||||
movq [rdi+16], mm1
|
||||
movq [rdi+24], mm2
|
||||
add rdi, 32
|
||||
lea rax, [rax+rbx]
|
||||
lea rsi, [rsi+rdx]
|
||||
dec rcx
|
||||
jnz .submby_loop
|
||||
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
; int src_stride, unsigned char *upred,
|
||||
; unsigned char *vpred, int pred_stride)
|
||||
|
||||
global sym(vp8_subtract_mbuv_mmx) PRIVATE
|
||||
sym(vp8_subtract_mbuv_mmx):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdi, arg(0) ;diff
|
||||
mov rsi, arg(1) ;usrc
|
||||
movsxd rdx, dword ptr arg(3);src_stride;
|
||||
mov rax, arg(4) ;upred
|
||||
add rdi, 256*2 ;diff = diff + 256 (shorts)
|
||||
mov rcx, 8
|
||||
push rbx
|
||||
movsxd rbx, dword ptr arg(6);pred_stride
|
||||
|
||||
pxor mm7, mm7
|
||||
|
||||
.submbu_loop:
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rax]
|
||||
movq mm3, mm0
|
||||
movq mm4, mm1
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
punpckhbw mm3, mm7
|
||||
punpckhbw mm4, mm7
|
||||
psubw mm0, mm1
|
||||
psubw mm3, mm4
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
add rdi, 16
|
||||
add rsi, rdx
|
||||
add rax, rbx
|
||||
|
||||
dec rcx
|
||||
jnz .submbu_loop
|
||||
|
||||
mov rsi, arg(2) ;vsrc
|
||||
mov rax, arg(5) ;vpred
|
||||
mov rcx, 8
|
||||
|
||||
.submbv_loop:
|
||||
movq mm0, [rsi]
|
||||
movq mm1, [rax]
|
||||
movq mm3, mm0
|
||||
movq mm4, mm1
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
punpckhbw mm3, mm7
|
||||
punpckhbw mm4, mm7
|
||||
psubw mm0, mm1
|
||||
psubw mm3, mm4
|
||||
movq [rdi], mm0
|
||||
movq [rdi+8], mm3
|
||||
add rdi, 16
|
||||
add rsi, rdx
|
||||
add rax, rbx
|
||||
|
||||
dec rcx
|
||||
jnz .submbv_loop
|
||||
|
||||
pop rbx
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
|
@ -1,245 +0,0 @@
|
|||
;
|
||||
; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
||||
;
|
||||
; Use of this source code is governed by a BSD-style license
|
||||
; that can be found in the LICENSE file in the root of the source
|
||||
; tree. An additional intellectual property rights grant can be found
|
||||
; in the file PATENTS. All contributing project authors may
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
|
||||
%include "vpx_ports/x86_abi_support.asm"
|
||||
|
||||
;void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
; short *diff, unsigned char *Predictor,
|
||||
; int pitch);
|
||||
global sym(vp8_subtract_b_sse2_impl) PRIVATE
|
||||
sym(vp8_subtract_b_sse2_impl):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdi, arg(2) ;diff
|
||||
mov rax, arg(3) ;Predictor
|
||||
mov rsi, arg(0) ;z
|
||||
movsxd rdx, dword ptr arg(1);src_stride;
|
||||
movsxd rcx, dword ptr arg(4);pitch
|
||||
pxor mm7, mm7
|
||||
|
||||
movd mm0, [rsi]
|
||||
movd mm1, [rax]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq MMWORD PTR [rdi], mm0
|
||||
|
||||
movd mm0, [rsi+rdx]
|
||||
movd mm1, [rax+rcx]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq MMWORD PTR [rdi+rcx*2], mm0
|
||||
|
||||
movd mm0, [rsi+rdx*2]
|
||||
movd mm1, [rax+rcx*2]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq MMWORD PTR [rdi+rcx*4], mm0
|
||||
|
||||
lea rsi, [rsi+rdx*2]
|
||||
lea rcx, [rcx+rcx*2]
|
||||
|
||||
movd mm0, [rsi+rdx]
|
||||
movd mm1, [rax+rcx]
|
||||
punpcklbw mm0, mm7
|
||||
punpcklbw mm1, mm7
|
||||
psubw mm0, mm1
|
||||
movq MMWORD PTR [rdi+rcx*2], mm0
|
||||
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
|
||||
;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, int src_stride,
|
||||
;unsigned char *pred, int pred_stride)
|
||||
global sym(vp8_subtract_mby_sse2) PRIVATE
|
||||
sym(vp8_subtract_mby_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 5
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
mov rdi, arg(0) ;diff
|
||||
mov rsi, arg(1) ;src
|
||||
movsxd rdx, dword ptr arg(2);src_stride
|
||||
mov rax, arg(3) ;pred
|
||||
movdqa xmm4, [GLOBAL(t80)]
|
||||
push rbx
|
||||
mov rcx, 8 ; do two lines at one time
|
||||
movsxd rbx, dword ptr arg(4);pred_stride
|
||||
|
||||
.submby_loop:
|
||||
movdqa xmm0, [rsi] ; src
|
||||
movdqa xmm1, [rax] ; pred
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
psubb xmm0, xmm1
|
||||
|
||||
pxor xmm1, xmm4 ;convert to signed values
|
||||
pxor xmm2, xmm4
|
||||
pcmpgtb xmm1, xmm2 ; obtain sign information
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
||||
punpckhbw xmm2, xmm1 ; put sign back to subtraction
|
||||
|
||||
movdqa xmm3, [rsi + rdx]
|
||||
movdqa xmm5, [rax + rbx]
|
||||
|
||||
lea rsi, [rsi+rdx*2]
|
||||
lea rax, [rax+rbx*2]
|
||||
|
||||
movdqa [rdi], xmm0
|
||||
movdqa [rdi +16], xmm2
|
||||
|
||||
movdqa xmm1, xmm3
|
||||
psubb xmm3, xmm5
|
||||
|
||||
pxor xmm5, xmm4 ;convert to signed values
|
||||
pxor xmm1, xmm4
|
||||
pcmpgtb xmm5, xmm1 ; obtain sign information
|
||||
|
||||
movdqa xmm1, xmm3
|
||||
punpcklbw xmm3, xmm5 ; put sign back to subtraction
|
||||
punpckhbw xmm1, xmm5 ; put sign back to subtraction
|
||||
|
||||
movdqa [rdi +32], xmm3
|
||||
movdqa [rdi +48], xmm1
|
||||
|
||||
add rdi, 64
|
||||
dec rcx
|
||||
jnz .submby_loop
|
||||
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
; begin epilog
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
;vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc,
|
||||
; int src_stride, unsigned char *upred,
|
||||
; unsigned char *vpred, int pred_stride)
|
||||
global sym(vp8_subtract_mbuv_sse2) PRIVATE
|
||||
sym(vp8_subtract_mbuv_sse2):
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
SHADOW_ARGS_TO_STACK 7
|
||||
GET_GOT rbx
|
||||
push rsi
|
||||
push rdi
|
||||
; end prolog
|
||||
|
||||
movdqa xmm4, [GLOBAL(t80)]
|
||||
mov rdi, arg(0) ;diff
|
||||
mov rsi, arg(1) ;usrc
|
||||
movsxd rdx, dword ptr arg(3);src_stride;
|
||||
mov rax, arg(4) ;upred
|
||||
add rdi, 256*2 ;diff = diff + 256 (shorts)
|
||||
mov rcx, 4
|
||||
push rbx
|
||||
movsxd rbx, dword ptr arg(6);pred_stride
|
||||
|
||||
;u
|
||||
.submbu_loop:
|
||||
movq xmm0, [rsi] ; src
|
||||
movq xmm2, [rsi+rdx] ; src -- next line
|
||||
movq xmm1, [rax] ; pred
|
||||
movq xmm3, [rax+rbx] ; pred -- next line
|
||||
lea rsi, [rsi + rdx*2]
|
||||
lea rax, [rax + rbx*2]
|
||||
|
||||
punpcklqdq xmm0, xmm2
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
psubb xmm0, xmm1 ; subtraction with sign missed
|
||||
|
||||
pxor xmm1, xmm4 ;convert to signed values
|
||||
pxor xmm2, xmm4
|
||||
pcmpgtb xmm1, xmm2 ; obtain sign information
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
movdqa xmm3, xmm1
|
||||
punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
||||
punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
||||
|
||||
movdqa [rdi], xmm0 ; store difference
|
||||
movdqa [rdi +16], xmm2 ; store difference
|
||||
add rdi, 32
|
||||
sub rcx, 1
|
||||
jnz .submbu_loop
|
||||
|
||||
mov rsi, arg(2) ;vsrc
|
||||
mov rax, arg(5) ;vpred
|
||||
mov rcx, 4
|
||||
|
||||
;v
|
||||
.submbv_loop:
|
||||
movq xmm0, [rsi] ; src
|
||||
movq xmm2, [rsi+rdx] ; src -- next line
|
||||
movq xmm1, [rax] ; pred
|
||||
movq xmm3, [rax+rbx] ; pred -- next line
|
||||
lea rsi, [rsi + rdx*2]
|
||||
lea rax, [rax + rbx*2]
|
||||
|
||||
punpcklqdq xmm0, xmm2
|
||||
punpcklqdq xmm1, xmm3
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
psubb xmm0, xmm1 ; subtraction with sign missed
|
||||
|
||||
pxor xmm1, xmm4 ;convert to signed values
|
||||
pxor xmm2, xmm4
|
||||
pcmpgtb xmm1, xmm2 ; obtain sign information
|
||||
|
||||
movdqa xmm2, xmm0
|
||||
movdqa xmm3, xmm1
|
||||
punpcklbw xmm0, xmm1 ; put sign back to subtraction
|
||||
punpckhbw xmm2, xmm3 ; put sign back to subtraction
|
||||
|
||||
movdqa [rdi], xmm0 ; store difference
|
||||
movdqa [rdi +16], xmm2 ; store difference
|
||||
add rdi, 32
|
||||
sub rcx, 1
|
||||
jnz .submbv_loop
|
||||
|
||||
pop rbx
|
||||
; begin epilog
|
||||
pop rdi
|
||||
pop rsi
|
||||
RESTORE_GOT
|
||||
UNSHADOW_ARGS
|
||||
pop rbp
|
||||
ret
|
||||
|
||||
SECTION_RODATA
|
||||
align 16
|
||||
t80:
|
||||
times 16 db 0x80
|
|
@ -65,14 +65,3 @@ int vp8_mbuverror_mmx(MACROBLOCK *mb)
|
|||
return vp8_mbuverror_mmx_impl(s_ptr, d_ptr);
|
||||
}
|
||||
|
||||
void vp8_subtract_b_mmx_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *z = *(be->base_src) + be->src;
|
||||
unsigned int src_stride = be->src_stride;
|
||||
short *diff = &be->src_diff[0];
|
||||
unsigned char *predictor = &bd->predictor[0];
|
||||
vp8_subtract_b_mmx_impl(z, src_stride, diff, predictor, pitch);
|
||||
}
|
||||
|
|
|
@ -30,14 +30,3 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
|
|||
return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
|
||||
}
|
||||
|
||||
void vp8_subtract_b_sse2_impl(unsigned char *z, int src_stride,
|
||||
short *diff, unsigned char *predictor,
|
||||
int pitch);
|
||||
void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
|
||||
{
|
||||
unsigned char *z = *(be->base_src) + be->src;
|
||||
unsigned int src_stride = be->src_stride;
|
||||
short *diff = &be->src_diff[0];
|
||||
unsigned char *predictor = &bd->predictor[0];
|
||||
vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
|
||||
}
|
||||
|
|
|
@ -82,7 +82,6 @@ VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c
|
|||
endif
|
||||
|
||||
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
|
||||
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
|
||||
VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/vp8_enc_stubs_mmx.c
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
|
||||
|
@ -94,7 +93,6 @@ ifeq ($(CONFIG_TEMPORAL_DENOISING),yes)
|
|||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c
|
||||
endif
|
||||
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
|
||||
VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c
|
||||
VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
|
||||
|
|
|
@ -25,5 +25,4 @@ VP8_CX_SRCS-$(HAVE_MEDIA) += encoder/arm/armv6/walsh_v6$(ASM)
|
|||
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c
|
||||
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon.c
|
||||
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon.c
|
||||
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/subtract_neon.c
|
||||
VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c
|
||||
|
|
Загрузка…
Ссылка в новой задаче